Dillo v3.2.0-39-ge8be369a
Loading...
Searching...
No Matches
decode.c
Go to the documentation of this file.
1/*
2 * File: decode.c
3 *
4 * Copyright 2007-2008 Jorge Arellano Cid <jcid@dillo.org>
5 * Copyright 2025 Rodrigo Arias Mallo <rodarima@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 */
12
13#include "config.h"
14
15#include <zlib.h>
16#include <iconv.h>
17#include <errno.h>
18#include <stdlib.h> /* strtol */
19
20#ifdef ENABLE_BROTLI
21#include <brotli/decode.h>
22#endif
23
24#include "decode.h"
25#include "utf8.hh"
26#include "msg.h"
27
28static const int bufsize = 8*1024;
29
34 int inlen)
35{
36 char *inputPtr, *eol;
37 int inputRemaining;
38 int chunkRemaining = *((int *)dc->state);
39 Dstr *output = dStr_sized_new(inlen);
40
41 dStr_append_l(dc->leftover, instr, inlen);
42 inputPtr = dc->leftover->str;
43 inputRemaining = dc->leftover->len;
44
45 while (inputRemaining > 0) {
46 if (chunkRemaining > 2) {
47 /* chunk body to copy */
48 int copylen = MIN(chunkRemaining - 2, inputRemaining);
49 dStr_append_l(output, inputPtr, copylen);
50 chunkRemaining -= copylen;
51 inputRemaining -= copylen;
52 inputPtr += copylen;
53 }
54
55 if ((chunkRemaining == 2) && (inputRemaining > 0)) {
56 /* CR to discard */
57 chunkRemaining--;
58 inputRemaining--;
59 inputPtr++;
60 }
61 if ((chunkRemaining == 1) && (inputRemaining > 0)) {
62 /* LF to discard */
63 chunkRemaining--;
64 inputRemaining--;
65 inputPtr++;
66 }
67
68 /*
69 * A chunk has a one-line header that begins with the chunk length
70 * in hexadecimal.
71 */
72 if (!(eol = (char *)memchr(inputPtr, '\n', inputRemaining))) {
73 break; /* We don't have the whole line yet. */
74 }
75
76 if (!(chunkRemaining = strtol(inputPtr, NULL, 0x10))) {
77 dc->finished = TRUE;
78 break; /* A chunk length of 0 means we're done! */
79 }
80 inputRemaining -= (eol - inputPtr) + 1;
81 inputPtr = eol + 1;
82 chunkRemaining += 2; /* CRLF at the end of every chunk */
83 }
84
85 /* If we have a partial chunk header, save it for next time. */
86 dStr_erase(dc->leftover, 0, inputPtr - dc->leftover->str);
87
88 *(int *)dc->state = chunkRemaining;
89 return output;
90}
91
96
98{
99 dFree(dc->state);
100 dStr_free(dc->leftover, 1);
101 dFree(dc);
102}
103
105{
106 (void)inflateEnd((z_stream *)dc->state);
107
108 dFree(dc->state);
109 dFree(dc->buffer);
110}
111
112/*
113 * BUG: A fair amount of duplicated code exists in the gzip/deflate decoding,
114 * but an attempt to pull out the common code left everything too contorted
115 * for what it accomplished.
116 */
117
121static Dstr *Decode_gzip(Decode *dc, const char *instr, int inlen)
122{
123 int rc = Z_OK;
124
125 z_stream *zs = (z_stream *)dc->state;
126
127 int inputConsumed = 0;
128 Dstr *output = dStr_new("");
129
130 while ((rc == Z_OK) && (inputConsumed < inlen)) {
131 zs->next_in = (Bytef *)instr + inputConsumed;
132 zs->avail_in = inlen - inputConsumed;
133
134 zs->next_out = (Bytef *)dc->buffer;
135 zs->avail_out = bufsize;
136
137 rc = inflate(zs, Z_SYNC_FLUSH);
138
139 dStr_append_l(output, dc->buffer, zs->total_out);
140
141 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
142 // Z_STREAM_END at end of file
143
144 inputConsumed += zs->total_in;
145 zs->total_out = 0;
146 zs->total_in = 0;
147 } else if (rc == Z_DATA_ERROR) {
148 MSG_ERR("gzip decompression error\n");
149 }
150 }
151 return output;
152}
153
157static Dstr *Decode_raw_deflate(Decode *dc, const char *instr, int inlen)
158{
159 int rc = Z_OK;
160
161 z_stream *zs = (z_stream *)dc->state;
162
163 int inputConsumed = 0;
164 Dstr *output = dStr_new("");
165
166 while ((rc == Z_OK) && (inputConsumed < inlen)) {
167 zs->next_in = (Bytef *)instr + inputConsumed;
168 zs->avail_in = inlen - inputConsumed;
169
170 zs->next_out = (Bytef *)dc->buffer;
171 zs->avail_out = bufsize;
172
173 rc = inflate(zs, Z_SYNC_FLUSH);
174
175 dStr_append_l(output, dc->buffer, zs->total_out);
176
177 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
178 // Z_STREAM_END at end of file
179
180 inputConsumed += zs->total_in;
181 zs->total_out = 0;
182 zs->total_in = 0;
183 } else if (rc == Z_DATA_ERROR) {
184 MSG_ERR("raw deflate decompression also failed\n");
185 }
186 }
187 return output;
188}
189
194static Dstr *Decode_deflate(Decode *dc, const char *instr, int inlen)
195{
196 int rc = Z_OK;
197
198 z_stream *zs = (z_stream *)dc->state;
199
200 int inputConsumed = 0;
201 Dstr *output = dStr_new("");
202
203 while ((rc == Z_OK) && (inputConsumed < inlen)) {
204 zs->next_in = (Bytef *)instr + inputConsumed;
205 zs->avail_in = inlen - inputConsumed;
206
207 zs->next_out = (Bytef *)dc->buffer;
208 zs->avail_out = bufsize;
209
210 rc = inflate(zs, Z_SYNC_FLUSH);
211
212 dStr_append_l(output, dc->buffer, zs->total_out);
213
214 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
215 // Z_STREAM_END at end of file
216
217 inputConsumed += zs->total_in;
218 zs->total_out = 0;
219 zs->total_in = 0;
220 } else if (rc == Z_DATA_ERROR) {
221 MSG_WARN("Deflate decompression error. Certain servers illegally fail"
222 " to send data in a zlib wrapper. Let's try raw deflate.\n");
223 dStr_free(output, 1);
224 (void)inflateEnd(zs);
225 dFree(dc->state);
226 dc->state = zs = dNew(z_stream, 1);
227 zs->zalloc = NULL;
228 zs->zfree = NULL;
229 zs->next_in = NULL;
230 zs->avail_in = 0;
232
233 // Negative value means that we want raw deflate.
234 inflateInit2(zs, -MAX_WBITS);
235
236 return Decode_raw_deflate(dc, instr, inlen);
237 }
238 }
239 return output;
240}
241
242#ifdef ENABLE_BROTLI
246static Dstr *Decode_brotli_process(Decode *dc, const char *instr, int inlen)
247{
248 Dstr *output = dStr_new("");
249 BrotliDecoderState *st = (BrotliDecoderState *) dc->state;
250
251 const uint8_t *next_in = (const uint8_t *) instr;
252 size_t avail_in = inlen;
253 BrotliDecoderResult res;
254
255 _MSG("Decode_brotli_process inlen=%d\n", inlen);
256
257 /* Handle empty case */
258 if (avail_in == 0)
259 return output;
260
261 do {
262 /* Always reset output buffer */
263 uint8_t *next_out = (uint8_t *) dc->buffer;
264 size_t avail_out = bufsize;
265
266 _MSG("Decode_brotli_process decoding %zd bytes\n", avail_in);
267
268 res = BrotliDecoderDecompressStream(st,
269 &avail_in, &next_in, &avail_out, &next_out, NULL);
270
271 _MSG("Decode_brotli_process res=%d\n", res);
272
273 if (res == BROTLI_DECODER_RESULT_ERROR) {
274 MSG_ERR("brotli decompression error\n");
275 break;
276 }
277
278 size_t delta = bufsize - avail_out;
279 _MSG("Decode_brotli_process delta=%zd\n", delta);
280 dStr_append_l(output, dc->buffer, delta);
281
282 } while (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
283
284 _MSG("Decode_brotli_process exitting with res=%d\n", res);
285
286 return output;
287}
288
289static void Decode_brotli_free(Decode *dc)
290{
291 BrotliDecoderState *st = (BrotliDecoderState *) dc->state;
292 BrotliDecoderDestroyInstance(st);
293
294 dFree(dc->buffer);
295}
296
297static Decode *Decode_brotli_init(void)
298{
299 BrotliDecoderState *st = BrotliDecoderCreateInstance(NULL, NULL, NULL);
300 if (st == NULL) {
301 MSG_ERR("Cannot create brotli decoder instance\n");
302 return NULL;
303 }
304
305 Decode *dc = dNew0(Decode, 1);
306
307 dc->buffer = dNew(char, bufsize);
308 dc->state = st;
309 dc->leftover = NULL; /* not used */
310 dc->decode = Decode_brotli_process;
311 dc->free = Decode_brotli_free;
312
313 return dc;
314}
315#endif /* ENABLE_BROTLI */
316
317
321static Dstr *Decode_charset(Decode *dc, const char *instr, int inlen)
322{
323 inbuf_t *inPtr;
324 char *outPtr;
325 size_t inLeft, outRoom;
326
327 Dstr *output = dStr_new("");
328 int rc = 0;
329
330 dStr_append_l(dc->leftover, instr, inlen);
331 inPtr = dc->leftover->str;
332 inLeft = dc->leftover->len;
333
334 while ((rc != EINVAL) && (inLeft > 0)) {
335
336 outPtr = dc->buffer;
337 outRoom = bufsize;
338
339 rc = iconv((iconv_t)dc->state, &inPtr, &inLeft, &outPtr, &outRoom);
340
341 // iconv() on success, number of bytes converted
342 // -1, errno == EILSEQ illegal byte sequence found
343 // EINVAL partial character ends source buffer
344 // E2BIG destination buffer is full
345
346 dStr_append_l(output, dc->buffer, bufsize - outRoom);
347
348 if (rc == -1)
349 rc = errno;
350 if (rc == EILSEQ){
351 inPtr++;
352 inLeft--;
354 sizeof(utf8_replacement_char) - 1);
355 }
356 }
357 dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
358
359 return output;
360}
361
363{
364 /* iconv_close() frees dc->state */
365 (void)iconv_close((iconv_t)(dc->state));
366
367 dFree(dc->buffer);
368 dStr_free(dc->leftover, 1);
369}
370
375{
376 DecodeTransfer *dc = NULL;
377
378 if (format && !dStrAsciiCasecmp(format, "chunked")) {
379 int *chunk_remaining = dNew(int, 1);
380 *chunk_remaining = 0;
381 dc = dNew(DecodeTransfer, 1);
382 dc->leftover = dStr_new("");
383 dc->state = chunk_remaining;
384 dc->finished = FALSE;
385 _MSG("chunked!\n");
386 }
387 return dc;
388}
389
391{
392 z_stream *zs = dNew(z_stream, 1);
393 Decode *dc = dNew(Decode, 1);
394
395 zs->zalloc = NULL;
396 zs->zfree = NULL;
397 zs->next_in = NULL;
398 zs->avail_in = 0;
399 dc->state = zs;
400 dc->buffer = dNew(char, bufsize);
401
403 dc->leftover = NULL; /* not used */
404 return dc;
405}
406
410Decode *a_Decode_content_init(const char *format)
411{
412 z_stream *zs;
413 Decode *dc = NULL;
414
415 if (format && *format) {
416 if (!dStrAsciiCasecmp(format, "gzip") ||
417 !dStrAsciiCasecmp(format, "x-gzip")) {
418 _MSG("gzipped data!\n");
419
421 zs = (z_stream *)dc->state;
422 /* 16 is a magic number for gzip decoding */
423 inflateInit2(zs, MAX_WBITS+16);
424
425 dc->decode = Decode_gzip;
426 } else if (!dStrAsciiCasecmp(format, "deflate")) {
427 _MSG("deflated data!\n");
428
430 zs = (z_stream *)dc->state;
431 inflateInit(zs);
432
434#ifdef ENABLE_BROTLI
435 } else if (!dStrAsciiCasecmp(format, "br")) {
436 _MSG("brotli data!\n");
437 dc = Decode_brotli_init();
438#endif
439 } else {
440 MSG("Content-Encoding '%s' not recognized.\n", format);
441 }
442 }
443 return dc;
444}
445
453Decode *a_Decode_charset_init(const char *format)
454{
455 Decode *dc = NULL;
456
457 if (format &&
458 strlen(format) &&
459 dStrAsciiCasecmp(format,"UTF-8")) {
460
461 iconv_t ic = iconv_open("UTF-8", format);
462 if (ic != (iconv_t) -1) {
463 dc = dNew(Decode, 1);
464 dc->state = ic;
465 dc->buffer = dNew(char, bufsize);
466 dc->leftover = dStr_new("");
467
470 } else {
471 MSG_WARN("Unable to convert from character encoding: '%s'\n", format);
472 }
473 }
474 return dc;
475}
476
480Dstr *a_Decode_process(Decode *dc, const char *instr, int inlen)
481{
482 return dc->decode(dc, instr, inlen);
483}
484
485/*
486 * Free the decoder.
487 */
489{
490 if (dc) {
491 dc->free(dc);
492 dFree(dc);
493 }
494}
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
unsigned char bool_t
Definition d_size.h:21
Decode * a_Decode_charset_init(const char *format)
Initialize decoder to translate from any character set known to iconv() to UTF-8.
Definition decode.c:453
Dstr * a_Decode_transfer_process(DecodeTransfer *dc, const char *instr, int inlen)
Decode 'Transfer-Encoding: chunked' data.
Definition decode.c:33
static Dstr * Decode_charset(Decode *dc, const char *instr, int inlen)
Translate to desired character set (UTF-8)
Definition decode.c:321
void a_Decode_transfer_free(DecodeTransfer *dc)
Definition decode.c:97
Decode * a_Decode_content_init(const char *format)
Initialize content decoder.
Definition decode.c:410
static Dstr * Decode_raw_deflate(Decode *dc, const char *instr, int inlen)
Decode (raw) deflated data.
Definition decode.c:157
DecodeTransfer * a_Decode_transfer_init(const char *format)
Initialize transfer decoder.
Definition decode.c:374
Dstr * a_Decode_process(Decode *dc, const char *instr, int inlen)
Decode data.
Definition decode.c:480
static Decode * Decode_content_init_common(void)
Definition decode.c:390
static Dstr * Decode_deflate(Decode *dc, const char *instr, int inlen)
Decode deflated data, initially presuming that the required zlib wrapper is there.
Definition decode.c:194
void a_Decode_free(Decode *dc)
Definition decode.c:488
static void Decode_charset_free(Decode *dc)
Definition decode.c:362
static const int bufsize
Definition decode.c:28
static Dstr * Decode_gzip(Decode *dc, const char *instr, int inlen)
Decode gzipped data.
Definition decode.c:121
bool_t a_Decode_transfer_finished(DecodeTransfer *dc)
Definition decode.c:92
static void Decode_compression_free(Decode *dc)
Definition decode.c:104
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
#define MIN(a, b)
Definition dlib.h:30
#define dNew0(type, count)
Definition dlib.h:51
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
#define dNew(type, count)
Definition dlib.h:49
#define MSG_ERR(...)
Definition dpid_common.h:23
#define MSG_WARN(...)
Definition msg.h:26
void * state
Definition decode.h:23
Dstr * leftover
Definition decode.h:22
bool_t finished
has the terminating chunk been seen?
Definition decode.h:24
char * buffer
Definition decode.h:11
Dstr *(* decode)(struct Decode *dc, const char *instr, int inlen)
Definition decode.h:14
Dstr * leftover
Definition decode.h:12
void * state
Definition decode.h:13
void(* free)(struct Decode *dc)
Definition decode.h:15
Definition dlib.h:102
Dstr_char_t * str
Definition dlib.h:105
int len
Definition dlib.h:104
static const char utf8_replacement_char[]
Unicode replacement character U+FFFD.
Definition utf8.hh:16