1 /*
2  * File: decode.c
3  *
4  * Copyright 2007-2008 Jorge Arellano Cid <jcid@dillo.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  */
11 
12 #include <zlib.h>
13 #include <iconv.h>
14 #include <errno.h>
15 #include <stdlib.h>     /* strtol */
16 
17 #include "decode.h"
18 #include "utf8.hh"
19 #include "msg.h"
20 
21 static const int bufsize = 8*1024;
22 
23 /*
24  * Decode chunked data
25  */
Decode_chunked(Decode * dc,const char * instr,int inlen)26 static Dstr *Decode_chunked(Decode *dc, const char *instr, int inlen)
27 {
28    char *inputPtr, *eol;
29    int inputRemaining;
30    int chunkRemaining = *((int *)dc->state);
31    Dstr *output = dStr_sized_new(inlen);
32 
33    dStr_append_l(dc->leftover, instr, inlen);
34    inputPtr = dc->leftover->str;
35    inputRemaining = dc->leftover->len;
36 
37    while (inputRemaining > 0) {
38       if (chunkRemaining > 2) {
39          /* chunk body to copy */
40          int copylen = MIN(chunkRemaining - 2, inputRemaining);
41          dStr_append_l(output, inputPtr, copylen);
42          chunkRemaining -= copylen;
43          inputRemaining -= copylen;
44          inputPtr += copylen;
45       }
46 
47       if ((chunkRemaining == 2) && (inputRemaining > 0)) {
48          /* CR to discard */
49          chunkRemaining--;
50          inputRemaining--;
51          inputPtr++;
52       }
53       if ((chunkRemaining == 1) && (inputRemaining > 0)) {
54          /* LF to discard */
55          chunkRemaining--;
56          inputRemaining--;
57          inputPtr++;
58       }
59 
60       /*
61        * A chunk has a one-line header that begins with the chunk length
62        * in hexadecimal.
63        */
64       if (!(eol = (char *)memchr(inputPtr, '\n', inputRemaining))) {
65          break;   /* We don't have the whole line yet. */
66       }
67 
68       if (!(chunkRemaining = strtol(inputPtr, NULL, 0x10))) {
69          break;   /* A chunk length of 0 means we're done! */
70       }
71       inputRemaining -= (eol - inputPtr) + 1;
72       inputPtr = eol + 1;
73       chunkRemaining += 2; /* CRLF at the end of every chunk */
74    }
75 
76    /* If we have a partial chunk header, save it for next time. */
77    dStr_erase(dc->leftover, 0, inputPtr - dc->leftover->str);
78 
79    *(int *)dc->state = chunkRemaining;
80    return output;
81 }
82 
Decode_chunked_free(Decode * dc)83 static void Decode_chunked_free(Decode *dc)
84 {
85    dFree(dc->state);
86    dStr_free(dc->leftover, 1);
87 }
88 
Decode_compression_free(Decode * dc)89 static void Decode_compression_free(Decode *dc)
90 {
91    (void)inflateEnd((z_stream *)dc->state);
92 
93    dFree(dc->state);
94    dFree(dc->buffer);
95 }
96 
97 /*
98  * BUG: A fair amount of duplicated code exists in the gzip/deflate decoding,
99  * but an attempt to pull out the common code left everything too contorted
100  * for what it accomplished.
101  */
102 
103 /*
104  * Decode gzipped data
105  */
Decode_gzip(Decode * dc,const char * instr,int inlen)106 static Dstr *Decode_gzip(Decode *dc, const char *instr, int inlen)
107 {
108    int rc = Z_OK;
109 
110    z_stream *zs = (z_stream *)dc->state;
111 
112    int inputConsumed = 0;
113    Dstr *output = dStr_new("");
114 
115    while ((rc == Z_OK) && (inputConsumed < inlen)) {
116       zs->next_in = (Bytef *)instr + inputConsumed;
117       zs->avail_in = inlen - inputConsumed;
118 
119       zs->next_out = (Bytef *)dc->buffer;
120       zs->avail_out = bufsize;
121 
122       rc = inflate(zs, Z_SYNC_FLUSH);
123 
124       dStr_append_l(output, dc->buffer, zs->total_out);
125 
126       if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
127          // Z_STREAM_END at end of file
128 
129          inputConsumed += zs->total_in;
130          zs->total_out = 0;
131          zs->total_in = 0;
132       } else if (rc == Z_DATA_ERROR) {
133          MSG_ERR("gzip decompression error\n");
134       }
135    }
136    return output;
137 }
138 
139 /*
140  * Decode (raw) deflated data
141  */
Decode_raw_deflate(Decode * dc,const char * instr,int inlen)142 static Dstr *Decode_raw_deflate(Decode *dc, const char *instr, int inlen)
143 {
144    int rc = Z_OK;
145 
146    z_stream *zs = (z_stream *)dc->state;
147 
148    int inputConsumed = 0;
149    Dstr *output = dStr_new("");
150 
151    while ((rc == Z_OK) && (inputConsumed < inlen)) {
152       zs->next_in = (Bytef *)instr + inputConsumed;
153       zs->avail_in = inlen - inputConsumed;
154 
155       zs->next_out = (Bytef *)dc->buffer;
156       zs->avail_out = bufsize;
157 
158       rc = inflate(zs, Z_SYNC_FLUSH);
159 
160       dStr_append_l(output, dc->buffer, zs->total_out);
161 
162       if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
163          // Z_STREAM_END at end of file
164 
165          inputConsumed += zs->total_in;
166          zs->total_out = 0;
167          zs->total_in = 0;
168       } else if (rc == Z_DATA_ERROR) {
169          MSG_ERR("raw deflate decompression also failed\n");
170       }
171    }
172    return output;
173 }
174 
175 /*
176  * Decode deflated data, initially presuming that the required zlib wrapper
177  * is there. On data error, switch to Decode_raw_deflate().
178  */
Decode_deflate(Decode * dc,const char * instr,int inlen)179 static Dstr *Decode_deflate(Decode *dc, const char *instr, int inlen)
180 {
181    int rc = Z_OK;
182 
183    z_stream *zs = (z_stream *)dc->state;
184 
185    int inputConsumed = 0;
186    Dstr *output = dStr_new("");
187 
188    while ((rc == Z_OK) && (inputConsumed < inlen)) {
189       zs->next_in = (Bytef *)instr + inputConsumed;
190       zs->avail_in = inlen - inputConsumed;
191 
192       zs->next_out = (Bytef *)dc->buffer;
193       zs->avail_out = bufsize;
194 
195       rc = inflate(zs, Z_SYNC_FLUSH);
196 
197       dStr_append_l(output, dc->buffer, zs->total_out);
198 
199       if ((rc == Z_OK) || (rc == Z_STREAM_END)) {
200          // Z_STREAM_END at end of file
201 
202          inputConsumed += zs->total_in;
203          zs->total_out = 0;
204          zs->total_in = 0;
205       } else if (rc == Z_DATA_ERROR) {
206          MSG_WARN("Deflate decompression error. Certain servers illegally fail"
207                  " to send data in a zlib wrapper. Let's try raw deflate.\n");
208          dStr_free(output, 1);
209          (void)inflateEnd(zs);
210          dFree(dc->state);
211          dc->state = zs = dNew(z_stream, 1);;
212          zs->zalloc = NULL;
213          zs->zfree = NULL;
214          zs->next_in = NULL;
215          zs->avail_in = 0;
216          dc->decode = Decode_raw_deflate;
217 
218          // Negative value means that we want raw deflate.
219          inflateInit2(zs, -MAX_WBITS);
220 
221          return Decode_raw_deflate(dc, instr, inlen);
222       }
223    }
224    return output;
225 }
226 
227 /*
228  * Translate to desired character set (UTF-8)
229  */
Decode_charset(Decode * dc,const char * instr,int inlen)230 static Dstr *Decode_charset(Decode *dc, const char *instr, int inlen)
231 {
232    inbuf_t *inPtr;
233    char *outPtr;
234    size_t inLeft, outRoom;
235 
236    Dstr *output = dStr_new("");
237    int rc = 0;
238 
239    dStr_append_l(dc->leftover, instr, inlen);
240    inPtr = dc->leftover->str;
241    inLeft = dc->leftover->len;
242 
243    while ((rc != EINVAL) && (inLeft > 0)) {
244 
245       outPtr = dc->buffer;
246       outRoom = bufsize;
247 
248       rc = iconv((iconv_t)dc->state, &inPtr, &inLeft, &outPtr, &outRoom);
249 
250       // iconv() on success, number of bytes converted
251       //         -1, errno == EILSEQ illegal byte sequence found
252       //                      EINVAL partial character ends source buffer
253       //                      E2BIG  destination buffer is full
254 
255       dStr_append_l(output, dc->buffer, bufsize - outRoom);
256 
257       if (rc == -1)
258          rc = errno;
259       if (rc == EILSEQ){
260          inPtr++;
261          inLeft--;
262          dStr_append_l(output, utf8_replacement_char,
263                        sizeof(utf8_replacement_char) - 1);
264       }
265    }
266    dStr_erase(dc->leftover, 0, dc->leftover->len - inLeft);
267 
268    return output;
269 }
270 
Decode_charset_free(Decode * dc)271 static void Decode_charset_free(Decode *dc)
272 {
273    /* iconv_close() frees dc->state */
274    (void)iconv_close((iconv_t)(dc->state));
275 
276    dFree(dc->buffer);
277    dStr_free(dc->leftover, 1);
278 }
279 
280 /*
281  * Initialize transfer decoder. Currently handles "chunked".
282  */
a_Decode_transfer_init(const char * format)283 Decode *a_Decode_transfer_init(const char *format)
284 {
285    Decode *dc = NULL;
286 
287    if (format && !dStrAsciiCasecmp(format, "chunked")) {
288       int *chunk_remaining = dNew(int, 1);
289       *chunk_remaining = 0;
290       dc = dNew(Decode, 1);
291       dc->leftover = dStr_new("");
292       dc->state = chunk_remaining;
293       dc->decode = Decode_chunked;
294       dc->free = Decode_chunked_free;
295       dc->buffer = NULL; /* not used */
296       _MSG("chunked!\n");
297    }
298    return dc;
299 }
300 
Decode_content_init_common()301 static Decode *Decode_content_init_common()
302 {
303    z_stream *zs = dNew(z_stream, 1);
304    Decode *dc = dNew(Decode, 1);
305 
306    zs->zalloc = NULL;
307    zs->zfree = NULL;
308    zs->next_in = NULL;
309    zs->avail_in = 0;
310    dc->state = zs;
311    dc->buffer = dNew(char, bufsize);
312 
313    dc->free = Decode_compression_free;
314    dc->leftover = NULL; /* not used */
315    return dc;
316 }
317 
318 /*
319  * Initialize content decoder. Currently handles 'gzip' and 'deflate'.
320  */
a_Decode_content_init(const char * format)321 Decode *a_Decode_content_init(const char *format)
322 {
323    z_stream *zs;
324    Decode *dc = NULL;
325 
326    if (format && *format) {
327       if (!dStrAsciiCasecmp(format, "gzip") ||
328           !dStrAsciiCasecmp(format, "x-gzip")) {
329          _MSG("gzipped data!\n");
330 
331          dc = Decode_content_init_common();
332          zs = (z_stream *)dc->state;
333          /* 16 is a magic number for gzip decoding */
334          inflateInit2(zs, MAX_WBITS+16);
335 
336          dc->decode = Decode_gzip;
337       } else if (!dStrAsciiCasecmp(format, "deflate")) {
338          MSG("deflated data!\n");
339 
340          dc = Decode_content_init_common();
341          zs = (z_stream *)dc->state;
342          inflateInit(zs);
343 
344          dc->decode = Decode_deflate;
345       } else {
346          MSG("Content-Encoding '%s' not recognized.\n", format);
347       }
348    }
349    return dc;
350 }
351 
352 /*
353  * Initialize decoder to translate from any character set known to iconv()
354  * to UTF-8.
355  *
356  * GNU iconv(1) will provide a list of known character sets if invoked with
357  * the "--list" flag.
358  */
a_Decode_charset_init(const char * format)359 Decode *a_Decode_charset_init(const char *format)
360 {
361    Decode *dc = NULL;
362 
363    if (format &&
364        strlen(format) &&
365        dStrAsciiCasecmp(format,"UTF-8")) {
366 
367       iconv_t ic = iconv_open("UTF-8", format);
368       if (ic != (iconv_t) -1) {
369            dc = dNew(Decode, 1);
370            dc->state = ic;
371            dc->buffer = dNew(char, bufsize);
372            dc->leftover = dStr_new("");
373 
374            dc->decode = Decode_charset;
375            dc->free = Decode_charset_free;
376       } else {
377          MSG_WARN("Unable to convert from character encoding: '%s'\n", format);
378       }
379    }
380    return dc;
381 }
382 
383 /*
384  * Decode data.
385  */
a_Decode_process(Decode * dc,const char * instr,int inlen)386 Dstr *a_Decode_process(Decode *dc, const char *instr, int inlen)
387 {
388    return dc->decode(dc, instr, inlen);
389 }
390 
391 /*
392  * Free the decoder.
393  */
a_Decode_free(Decode * dc)394 void a_Decode_free(Decode *dc)
395 {
396    if (dc) {
397       dc->free(dc);
398       dFree(dc);
399    }
400 }
401