Dillo v3.1.1-99-gf3103cc4
Loading...
Searching...
No Matches
decode.c
Go to the documentation of this file.
1/*
2 * File: decode.c
3 *
4 * Copyright 2007-2008 Jorge Arellano Cid <jcid@dillo.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 */
11
12#include <zlib.h>
13#include <iconv.h>
14#include <errno.h>
15#include <stdlib.h> /* strtol */
16
17#include "decode.h"
18#include "utf8.hh"
19#include "msg.h"
20
21static const int bufsize = 8*1024;
22
27 int inlen)
28{
29 char *inputPtr, *eol;
30 int inputRemaining;
31 int chunkRemaining = *((int *)dc->state);
32 Dstr *output = dStr_sized_new(inlen);
33
34 dStr_append_l(dc->leftover, instr, inlen);
35 inputPtr = dc->leftover->str;
36 inputRemaining = dc->leftover->len;
37
38 while (inputRemaining > 0) {
39 if (chunkRemaining > 2) {
40 /* chunk body to copy */
41 int copylen = MIN(chunkRemaining - 2, inputRemaining);
42 dStr_append_l(output, inputPtr, copylen);
43 chunkRemaining -= copylen;
44 inputRemaining -= copylen;
45 inputPtr += copylen;
46 }
47
48 if ((chunkRemaining == 2) && (inputRemaining > 0)) {
49 /* CR to discard */
50 chunkRemaining--;
51 inputRemaining--;
52 inputPtr++;
53 }
54 if ((chunkRemaining == 1) && (inputRemaining > 0)) {
55 /* LF to discard */
56 chunkRemaining--;
57 inputRemaining--;
58 inputPtr++;
59 }
60
61 /*
62 * A chunk has a one-line header that begins with the chunk length
63 * in hexadecimal.
64 */
65 if (!(eol = (char *)memchr(inputPtr, '\n', inputRemaining))) {
66 break; /* We don't have the whole line yet. */
67 }
68
69 if (!(chunkRemaining = strtol(inputPtr, NULL, 0x10))) {
70 dc->finished = TRUE;
71 break; /* A chunk length of 0 means we're done! */
72 }
73 inputRemaining -= (eol - inputPtr) + 1;
74 inputPtr = eol + 1;
75 chunkRemaining += 2; /* CRLF at the end of every chunk */
76 }
77
78 /* If we have a partial chunk header, save it for next time. */
79 dStr_erase(dc->leftover, 0, inputPtr - dc->leftover->str);
80
81 *(int *)dc->state = chunkRemaining;
82 return output;
83}
84
89
91{
92 dFree(dc->state);
93 dStr_free(dc->leftover, 1);
94 dFree(dc);
95}
96
98{
99 (void)inflateEnd((z_stream *)dc->state);
100
101 dFree(dc->state);
102 dFree(dc->buffer);
103}
104
105/*
106 * BUG: A fair amount of duplicated code exists in the gzip/deflate decoding,
107 * but an attempt to pull out the common code left everything too contorted
108 * for what it accomplished.
109 */
110
114static Dstr *Decode_gzip(Decode *dc, const char *instr, int inlen)
115{
116 int rc = Z_OK;
117
118 z_stream *zs = (z_stream *)dc->state;
119
120 int inputConsumed = 0;
121 Dstr *output = dStr_new("");
122
123 while ((rc == Z_OK) && (inputConsumed < inlen)) {
124 zs->next_in = (Bytef *)instr + inputConsumed;
125 zs->avail_in = inlen - inputConsumed;
126
127 zs->next_out = (Bytef *)dc->buffer;
128 zs->avail_out = bufsize;
129
130 rc = inflate(zs, Z_SYNC_FLUSH);
131
132 dStr_append_l(output, dc->buffer, zs->total_out);
133
134 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
135 // Z_STREAM_END at end of file
136
137 inputConsumed += zs->total_in;
138 zs->total_out = 0;
139 zs->total_in = 0;
140 } else if (rc == Z_DATA_ERROR) {
141 MSG_ERR("gzip decompression error\n");
142 }
143 }
144 return output;
145}
146
150static Dstr *Decode_raw_deflate(Decode *dc, const char *instr, int inlen)
151{
152 int rc = Z_OK;
153
154 z_stream *zs = (z_stream *)dc->state;
155
156 int inputConsumed = 0;
157 Dstr *output = dStr_new("");
158
159 while ((rc == Z_OK) && (inputConsumed < inlen)) {
160 zs->next_in = (Bytef *)instr + inputConsumed;
161 zs->avail_in = inlen - inputConsumed;
162
163 zs->next_out = (Bytef *)dc->buffer;
164 zs->avail_out = bufsize;
165
166 rc = inflate(zs, Z_SYNC_FLUSH);
167
168 dStr_append_l(output, dc->buffer, zs->total_out);
169
170 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
171 // Z_STREAM_END at end of file
172
173 inputConsumed += zs->total_in;
174 zs->total_out = 0;
175 zs->total_in = 0;
176 } else if (rc == Z_DATA_ERROR) {
177 MSG_ERR("raw deflate decompression also failed\n");
178 }
179 }
180 return output;
181}
182
187static Dstr *Decode_deflate(Decode *dc, const char *instr, int inlen)
188{
189 int rc = Z_OK;
190
191 z_stream *zs = (z_stream *)dc->state;
192
193 int inputConsumed = 0;
194 Dstr *output = dStr_new("");
195
196 while ((rc == Z_OK) && (inputConsumed < inlen)) {
197 zs->next_in = (Bytef *)instr + inputConsumed;
198 zs->avail_in = inlen - inputConsumed;
199
200 zs->next_out = (Bytef *)dc->buffer;
201 zs->avail_out = bufsize;
202
203 rc = inflate(zs, Z_SYNC_FLUSH);
204
205 dStr_append_l(output, dc->buffer, zs->total_out);
206
207 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
208 // Z_STREAM_END at end of file
209
210 inputConsumed += zs->total_in;
211 zs->total_out = 0;
212 zs->total_in = 0;
213 } else if (rc == Z_DATA_ERROR) {
214 MSG_WARN("Deflate decompression error. Certain servers illegally fail"
215 " to send data in a zlib wrapper. Let's try raw deflate.\n");
216 dStr_free(output, 1);
217 (void)inflateEnd(zs);
218 dFree(dc->state);
219 dc->state = zs = dNew(z_stream, 1);
220 zs->zalloc = NULL;
221 zs->zfree = NULL;
222 zs->next_in = NULL;
223 zs->avail_in = 0;
225
226 // Negative value means that we want raw deflate.
227 inflateInit2(zs, -MAX_WBITS);
228
229 return Decode_raw_deflate(dc, instr, inlen);
230 }
231 }
232 return output;
233}
234
238static Dstr *Decode_charset(Decode *dc, const char *instr, int inlen)
239{
240 inbuf_t *inPtr;
241 char *outPtr;
242 size_t inLeft, outRoom;
243
244 Dstr *output = dStr_new("");
245 int rc = 0;
246
247 dStr_append_l(dc->leftover, instr, inlen);
248 inPtr = dc->leftover->str;
249 inLeft = dc->leftover->len;
250
251 while ((rc != EINVAL) && (inLeft > 0)) {
252
253 outPtr = dc->buffer;
254 outRoom = bufsize;
255
256 rc = iconv((iconv_t)dc->state, &inPtr, &inLeft, &outPtr, &outRoom);
257
258 // iconv() on success, number of bytes converted
259 // -1, errno == EILSEQ illegal byte sequence found
260 // EINVAL partial character ends source buffer
261 // E2BIG destination buffer is full
262
263 dStr_append_l(output, dc->buffer, bufsize - outRoom);
264
265 if (rc == -1)
266 rc = errno;
267 if (rc == EILSEQ){
268 inPtr++;
269 inLeft--;
271 sizeof(utf8_replacement_char) - 1);
272 }
273 }
274 dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
275
276 return output;
277}
278
280{
281 /* iconv_close() frees dc->state */
282 (void)iconv_close((iconv_t)(dc->state));
283
284 dFree(dc->buffer);
285 dStr_free(dc->leftover, 1);
286}
287
292{
293 DecodeTransfer *dc = NULL;
294
295 if (format && !dStrAsciiCasecmp(format, "chunked")) {
296 int *chunk_remaining = dNew(int, 1);
297 *chunk_remaining = 0;
298 dc = dNew(DecodeTransfer, 1);
299 dc->leftover = dStr_new("");
300 dc->state = chunk_remaining;
301 dc->finished = FALSE;
302 _MSG("chunked!\n");
303 }
304 return dc;
305}
306
308{
309 z_stream *zs = dNew(z_stream, 1);
310 Decode *dc = dNew(Decode, 1);
311
312 zs->zalloc = NULL;
313 zs->zfree = NULL;
314 zs->next_in = NULL;
315 zs->avail_in = 0;
316 dc->state = zs;
317 dc->buffer = dNew(char, bufsize);
318
320 dc->leftover = NULL; /* not used */
321 return dc;
322}
323
327Decode *a_Decode_content_init(const char *format)
328{
329 z_stream *zs;
330 Decode *dc = NULL;
331
332 if (format && *format) {
333 if (!dStrAsciiCasecmp(format, "gzip") ||
334 !dStrAsciiCasecmp(format, "x-gzip")) {
335 _MSG("gzipped data!\n");
336
338 zs = (z_stream *)dc->state;
339 /* 16 is a magic number for gzip decoding */
340 inflateInit2(zs, MAX_WBITS+16);
341
342 dc->decode = Decode_gzip;
343 } else if (!dStrAsciiCasecmp(format, "deflate")) {
344 _MSG("deflated data!\n");
345
347 zs = (z_stream *)dc->state;
348 inflateInit(zs);
349
351 } else {
352 MSG("Content-Encoding '%s' not recognized.\n", format);
353 }
354 }
355 return dc;
356}
357
365Decode *a_Decode_charset_init(const char *format)
366{
367 Decode *dc = NULL;
368
369 if (format &&
370 strlen(format) &&
371 dStrAsciiCasecmp(format,"UTF-8")) {
372
373 iconv_t ic = iconv_open("UTF-8", format);
374 if (ic != (iconv_t) -1) {
375 dc = dNew(Decode, 1);
376 dc->state = ic;
377 dc->buffer = dNew(char, bufsize);
378 dc->leftover = dStr_new("");
379
382 } else {
383 MSG_WARN("Unable to convert from character encoding: '%s'\n", format);
384 }
385 }
386 return dc;
387}
388
392Dstr *a_Decode_process(Decode *dc, const char *instr, int inlen)
393{
394 return dc->decode(dc, instr, inlen);
395}
396
397/*
398 * Free the decoder.
399 */
401{
402 if (dc) {
403 dc->free(dc);
404 dFree(dc);
405 }
406}
#define _MSG(...)
Definition bookmarks.c:45
#define MSG(...)
Definition bookmarks.c:46
unsigned char bool_t
Definition d_size.h:21
Decode * a_Decode_charset_init(const char *format)
Initialize decoder to translate from any character set known to iconv() to UTF-8.
Definition decode.c:365
Dstr * a_Decode_transfer_process(DecodeTransfer *dc, const char *instr, int inlen)
Decode 'Transfer-Encoding: chunked' data.
Definition decode.c:26
static Dstr * Decode_charset(Decode *dc, const char *instr, int inlen)
Translate to desired character set (UTF-8)
Definition decode.c:238
void a_Decode_transfer_free(DecodeTransfer *dc)
Definition decode.c:90
Decode * a_Decode_content_init(const char *format)
Initialize content decoder.
Definition decode.c:327
static Dstr * Decode_raw_deflate(Decode *dc, const char *instr, int inlen)
Decode (raw) deflated data.
Definition decode.c:150
DecodeTransfer * a_Decode_transfer_init(const char *format)
Initialize transfer decoder.
Definition decode.c:291
Dstr * a_Decode_process(Decode *dc, const char *instr, int inlen)
Decode data.
Definition decode.c:392
static Decode * Decode_content_init_common(void)
Definition decode.c:307
static Dstr * Decode_deflate(Decode *dc, const char *instr, int inlen)
Decode deflated data, initially presuming that the required zlib wrapper is there.
Definition decode.c:187
void a_Decode_free(Decode *dc)
Definition decode.c:400
static void Decode_charset_free(Decode *dc)
Definition decode.c:279
static const int bufsize
Definition decode.c:21
static Dstr * Decode_gzip(Decode *dc, const char *instr, int inlen)
Decode gzipped data.
Definition decode.c:114
bool_t a_Decode_transfer_finished(DecodeTransfer *dc)
Definition decode.c:85
static void Decode_compression_free(Decode *dc)
Definition decode.c:97
void dFree(void *mem)
Definition dlib.c:68
int dStrAsciiCasecmp(const char *s1, const char *s2)
Definition dlib.c:203
Dstr * dStr_sized_new(int sz)
Create a new string with a given size.
Definition dlib.c:254
void dStr_erase(Dstr *ds, int pos_0, int len)
Erase a substring.
Definition dlib.c:388
void dStr_free(Dstr *ds, int all)
Free a dillo string.
Definition dlib.c:337
void dStr_append_l(Dstr *ds, const char *s, int l)
Append a C string to a Dstr (providing length).
Definition dlib.c:308
Dstr * dStr_new(const char *s)
Create a new string.
Definition dlib.c:325
#define MIN(a, b)
Definition dlib.h:30
#define TRUE
Definition dlib.h:23
#define FALSE
Definition dlib.h:19
#define dNew(type, count)
Definition dlib.h:49
#define MSG_ERR(...)
Definition dpid_common.h:23
#define MSG_WARN(...)
Definition msg.h:26
void * state
Definition decode.h:23
Dstr * leftover
Definition decode.h:22
bool_t finished
has the terminating chunk been seen?
Definition decode.h:24
char * buffer
Definition decode.h:11
Dstr *(* decode)(struct Decode *dc, const char *instr, int inlen)
Definition decode.h:14
Dstr * leftover
Definition decode.h:12
void * state
Definition decode.h:13
void(* free)(struct Decode *dc)
Definition decode.h:15
Definition dlib.h:102
Dstr_char_t * str
Definition dlib.h:105
int len
Definition dlib.h:104
static const char utf8_replacement_char[]
Unicode replacement character U+FFFD.
Definition utf8.hh:16