1 /*
2 * File: decode.c
3 *
4 * Copyright 2007-2008 Jorge Arellano Cid <jcid@dillo.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 */
11
12 #include <zlib.h>
13 #include <iconv.h>
14 #include <errno.h>
15 #include <stdlib.h> /* strtol */
16
17 #include "decode.h"
18 #include "utf8.hh"
19 #include "msg.h"
20
21 static const int bufsize = 8*1024;
22
23 /*
24 * Decode chunked data
25 */
Decode_chunked(Decode * dc,const char * instr,int inlen)26 static Dstr *Decode_chunked(Decode *dc, const char *instr, int inlen)
27 {
28 char *inputPtr, *eol;
29 int inputRemaining;
30 int chunkRemaining = *((int *)dc->state);
31 Dstr *output = dStr_sized_new(inlen);
32
33 dStr_append_l(dc->leftover, instr, inlen);
34 inputPtr = dc->leftover->str;
35 inputRemaining = dc->leftover->len;
36
37 while (inputRemaining > 0) {
38 if (chunkRemaining > 2) {
39 /* chunk body to copy */
40 int copylen = MIN(chunkRemaining - 2, inputRemaining);
41 dStr_append_l(output, inputPtr, copylen);
42 chunkRemaining -= copylen;
43 inputRemaining -= copylen;
44 inputPtr += copylen;
45 }
46
47 if ((chunkRemaining == 2) && (inputRemaining > 0)) {
48 /* CR to discard */
49 chunkRemaining--;
50 inputRemaining--;
51 inputPtr++;
52 }
53 if ((chunkRemaining == 1) && (inputRemaining > 0)) {
54 /* LF to discard */
55 chunkRemaining--;
56 inputRemaining--;
57 inputPtr++;
58 }
59
60 /*
61 * A chunk has a one-line header that begins with the chunk length
62 * in hexadecimal.
63 */
64 if (!(eol = (char *)memchr(inputPtr, '\n', inputRemaining))) {
65 break; /* We don't have the whole line yet. */
66 }
67
68 if (!(chunkRemaining = strtol(inputPtr, NULL, 0x10))) {
69 break; /* A chunk length of 0 means we're done! */
70 }
71 inputRemaining -= (eol - inputPtr) + 1;
72 inputPtr = eol + 1;
73 chunkRemaining += 2; /* CRLF at the end of every chunk */
74 }
75
76 /* If we have a partial chunk header, save it for next time. */
77 dStr_erase(dc->leftover, 0, inputPtr - dc->leftover->str);
78
79 *(int *)dc->state = chunkRemaining;
80 return output;
81 }
82
Decode_chunked_free(Decode * dc)83 static void Decode_chunked_free(Decode *dc)
84 {
85 dFree(dc->state);
86 dStr_free(dc->leftover, 1);
87 }
88
Decode_compression_free(Decode * dc)89 static void Decode_compression_free(Decode *dc)
90 {
91 (void)inflateEnd((z_stream *)dc->state);
92
93 dFree(dc->state);
94 dFree(dc->buffer);
95 }
96
97 /*
98 * BUG: A fair amount of duplicated code exists in the gzip/deflate decoding,
99 * but an attempt to pull out the common code left everything too contorted
100 * for what it accomplished.
101 */
102
103 /*
104 * Decode gzipped data
105 */
Decode_gzip(Decode * dc,const char * instr,int inlen)106 static Dstr *Decode_gzip(Decode *dc, const char *instr, int inlen)
107 {
108 int rc = Z_OK;
109
110 z_stream *zs = (z_stream *)dc->state;
111
112 int inputConsumed = 0;
113 Dstr *output = dStr_new("");
114
115 while ((rc == Z_OK) && (inputConsumed < inlen)) {
116 zs->next_in = (Bytef *)instr + inputConsumed;
117 zs->avail_in = inlen - inputConsumed;
118
119 zs->next_out = (Bytef *)dc->buffer;
120 zs->avail_out = bufsize;
121
122 rc = inflate(zs, Z_SYNC_FLUSH);
123
124 dStr_append_l(output, dc->buffer, zs->total_out);
125
126 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
127 // Z_STREAM_END at end of file
128
129 inputConsumed += zs->total_in;
130 zs->total_out = 0;
131 zs->total_in = 0;
132 } else if (rc == Z_DATA_ERROR) {
133 MSG_ERR("gzip decompression error\n");
134 }
135 }
136 return output;
137 }
138
139 /*
140 * Decode (raw) deflated data
141 */
Decode_raw_deflate(Decode * dc,const char * instr,int inlen)142 static Dstr *Decode_raw_deflate(Decode *dc, const char *instr, int inlen)
143 {
144 int rc = Z_OK;
145
146 z_stream *zs = (z_stream *)dc->state;
147
148 int inputConsumed = 0;
149 Dstr *output = dStr_new("");
150
151 while ((rc == Z_OK) && (inputConsumed < inlen)) {
152 zs->next_in = (Bytef *)instr + inputConsumed;
153 zs->avail_in = inlen - inputConsumed;
154
155 zs->next_out = (Bytef *)dc->buffer;
156 zs->avail_out = bufsize;
157
158 rc = inflate(zs, Z_SYNC_FLUSH);
159
160 dStr_append_l(output, dc->buffer, zs->total_out);
161
162 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
163 // Z_STREAM_END at end of file
164
165 inputConsumed += zs->total_in;
166 zs->total_out = 0;
167 zs->total_in = 0;
168 } else if (rc == Z_DATA_ERROR) {
169 MSG_ERR("raw deflate decompression also failed\n");
170 }
171 }
172 return output;
173 }
174
175 /*
176 * Decode deflated data, initially presuming that the required zlib wrapper
177 * is there. On data error, switch to Decode_raw_deflate().
178 */
Decode_deflate(Decode * dc,const char * instr,int inlen)179 static Dstr *Decode_deflate(Decode *dc, const char *instr, int inlen)
180 {
181 int rc = Z_OK;
182
183 z_stream *zs = (z_stream *)dc->state;
184
185 int inputConsumed = 0;
186 Dstr *output = dStr_new("");
187
188 while ((rc == Z_OK) && (inputConsumed < inlen)) {
189 zs->next_in = (Bytef *)instr + inputConsumed;
190 zs->avail_in = inlen - inputConsumed;
191
192 zs->next_out = (Bytef *)dc->buffer;
193 zs->avail_out = bufsize;
194
195 rc = inflate(zs, Z_SYNC_FLUSH);
196
197 dStr_append_l(output, dc->buffer, zs->total_out);
198
199 if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
200 // Z_STREAM_END at end of file
201
202 inputConsumed += zs->total_in;
203 zs->total_out = 0;
204 zs->total_in = 0;
205 } else if (rc == Z_DATA_ERROR) {
206 MSG_WARN("Deflate decompression error. Certain servers illegally fail"
207 " to send data in a zlib wrapper. Let's try raw deflate.\n");
208 dStr_free(output, 1);
209 (void)inflateEnd(zs);
210 dFree(dc->state);
211 dc->state = zs = dNew(z_stream, 1);;
212 zs->zalloc = NULL;
213 zs->zfree = NULL;
214 zs->next_in = NULL;
215 zs->avail_in = 0;
216 dc->decode = Decode_raw_deflate;
217
218 // Negative value means that we want raw deflate.
219 inflateInit2(zs, -MAX_WBITS);
220
221 return Decode_raw_deflate(dc, instr, inlen);
222 }
223 }
224 return output;
225 }
226
227 /*
228 * Translate to desired character set (UTF-8)
229 */
Decode_charset(Decode * dc,const char * instr,int inlen)230 static Dstr *Decode_charset(Decode *dc, const char *instr, int inlen)
231 {
232 inbuf_t *inPtr;
233 char *outPtr;
234 size_t inLeft, outRoom;
235
236 Dstr *output = dStr_new("");
237 int rc = 0;
238
239 dStr_append_l(dc->leftover, instr, inlen);
240 inPtr = dc->leftover->str;
241 inLeft = dc->leftover->len;
242
243 while ((rc != EINVAL) && (inLeft > 0)) {
244
245 outPtr = dc->buffer;
246 outRoom = bufsize;
247
248 rc = iconv((iconv_t)dc->state, &inPtr, &inLeft, &outPtr, &outRoom);
249
250 // iconv() on success, number of bytes converted
251 // -1, errno == EILSEQ illegal byte sequence found
252 // EINVAL partial character ends source buffer
253 // E2BIG destination buffer is full
254
255 dStr_append_l(output, dc->buffer, bufsize - outRoom);
256
257 if (rc == -1)
258 rc = errno;
259 if (rc == EILSEQ){
260 inPtr++;
261 inLeft--;
262 dStr_append_l(output, utf8_replacement_char,
263 sizeof(utf8_replacement_char) - 1);
264 }
265 }
266 dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
267
268 return output;
269 }
270
Decode_charset_free(Decode * dc)271 static void Decode_charset_free(Decode *dc)
272 {
273 /* iconv_close() frees dc->state */
274 (void)iconv_close((iconv_t)(dc->state));
275
276 dFree(dc->buffer);
277 dStr_free(dc->leftover, 1);
278 }
279
280 /*
281 * Initialize transfer decoder. Currently handles "chunked".
282 */
a_Decode_transfer_init(const char * format)283 Decode *a_Decode_transfer_init(const char *format)
284 {
285 Decode *dc = NULL;
286
287 if (format && !dStrAsciiCasecmp(format, "chunked")) {
288 int *chunk_remaining = dNew(int, 1);
289 *chunk_remaining = 0;
290 dc = dNew(Decode, 1);
291 dc->leftover = dStr_new("");
292 dc->state = chunk_remaining;
293 dc->decode = Decode_chunked;
294 dc->free = Decode_chunked_free;
295 dc->buffer = NULL; /* not used */
296 _MSG("chunked!\n");
297 }
298 return dc;
299 }
300
Decode_content_init_common()301 static Decode *Decode_content_init_common()
302 {
303 z_stream *zs = dNew(z_stream, 1);
304 Decode *dc = dNew(Decode, 1);
305
306 zs->zalloc = NULL;
307 zs->zfree = NULL;
308 zs->next_in = NULL;
309 zs->avail_in = 0;
310 dc->state = zs;
311 dc->buffer = dNew(char, bufsize);
312
313 dc->free = Decode_compression_free;
314 dc->leftover = NULL; /* not used */
315 return dc;
316 }
317
318 /*
319 * Initialize content decoder. Currently handles 'gzip' and 'deflate'.
320 */
a_Decode_content_init(const char * format)321 Decode *a_Decode_content_init(const char *format)
322 {
323 z_stream *zs;
324 Decode *dc = NULL;
325
326 if (format && *format) {
327 if (!dStrAsciiCasecmp(format, "gzip") ||
328 !dStrAsciiCasecmp(format, "x-gzip")) {
329 _MSG("gzipped data!\n");
330
331 dc = Decode_content_init_common();
332 zs = (z_stream *)dc->state;
333 /* 16 is a magic number for gzip decoding */
334 inflateInit2(zs, MAX_WBITS+16);
335
336 dc->decode = Decode_gzip;
337 } else if (!dStrAsciiCasecmp(format, "deflate")) {
338 MSG("deflated data!\n");
339
340 dc = Decode_content_init_common();
341 zs = (z_stream *)dc->state;
342 inflateInit(zs);
343
344 dc->decode = Decode_deflate;
345 } else {
346 MSG("Content-Encoding '%s' not recognized.\n", format);
347 }
348 }
349 return dc;
350 }
351
352 /*
353 * Initialize decoder to translate from any character set known to iconv()
354 * to UTF-8.
355 *
356 * GNU iconv(1) will provide a list of known character sets if invoked with
357 * the "--list" flag.
358 */
a_Decode_charset_init(const char * format)359 Decode *a_Decode_charset_init(const char *format)
360 {
361 Decode *dc = NULL;
362
363 if (format &&
364 strlen(format) &&
365 dStrAsciiCasecmp(format,"UTF-8")) {
366
367 iconv_t ic = iconv_open("UTF-8", format);
368 if (ic != (iconv_t) -1) {
369 dc = dNew(Decode, 1);
370 dc->state = ic;
371 dc->buffer = dNew(char, bufsize);
372 dc->leftover = dStr_new("");
373
374 dc->decode = Decode_charset;
375 dc->free = Decode_charset_free;
376 } else {
377 MSG_WARN("Unable to convert from character encoding: '%s'\n", format);
378 }
379 }
380 return dc;
381 }
382
383 /*
384 * Decode data.
385 */
a_Decode_process(Decode * dc,const char * instr,int inlen)386 Dstr *a_Decode_process(Decode *dc, const char *instr, int inlen)
387 {
388 return dc->decode(dc, instr, inlen);
389 }
390
391 /*
392 * Free the decoder.
393 */
a_Decode_free(Decode * dc)394 void a_Decode_free(Decode *dc)
395 {
396 if (dc) {
397 dc->free(dc);
398 dFree(dc);
399 }
400 }
401