1 /*
2    Handling of compressed HTTP responses
3    Copyright (C) 2001-2004, Joe Orton <joe@manyfish.co.uk>
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with this library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18    MA 02111-1307, USA
19 
20 */
21 
22 #include "config.h"
23 
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif
27 #ifdef HAVE_STDLIB_H
28 #include <stdlib.h>
29 #endif
30 
31 #include "ne_request.h"
32 #include "ne_compress.h"
33 #include "ne_utils.h"
34 #include "ne_i18n.h"
35 
36 #include "ne_private.h"
37 
38 #ifdef NEON_ZLIB
39 
40 #include <zlib.h>
41 
42 /* Adds support for the 'gzip' Content-Encoding in HTTP.  gzip is a
43  * file format which wraps the DEFLATE compression algorithm.  zlib
44  * implements DEFLATE: we have to unwrap the gzip format (specified in
45  * RFC1952) as it comes off the wire, and hand off chunks of data to
46  * be inflated. */
47 
48 struct ne_decompress_s {
49     ne_session *session; /* associated session. */
50     /* temporary buffer for holding inflated data. */
51     char outbuf[BUFSIZ];
52     z_stream zstr;
53     int zstrinit; /* non-zero if zstr has been initialized */
54     char *enchdr; /* value of Content-Enconding response header. */
55 
56     /* pass blocks back to this. */
57     ne_block_reader reader;
58     ne_accept_response acceptor;
59     void *userdata;
60 
61     /* buffer for gzip header bytes. */
62     union {
63 	unsigned char buf[10];
64 	struct header {
65 	    unsigned char id1;
66 	    unsigned char id2;
67 	    unsigned char cmeth; /* compression method. */
68 	    unsigned char flags;
69 	    unsigned int mtime; /* breaks when sizeof int != 4 */
70 	    unsigned char xflags;
71 	    unsigned char os;
72 	} hdr;
73     } in;
74     size_t incount;    /* bytes in in.buf */
75 
76     unsigned char footer[8];
77     size_t footcount; /* bytes in footer. */
78 
79     /* CRC32 checksum: odd that zlib uses uLong for this since it is a
80      * 64-bit integer on LP64 platforms. */
81     uLong checksum;
82 
83     /* current state. */
84     enum state {
85 	NE_Z_BEFORE_DATA, /* not received any response blocks yet. */
86 	NE_Z_PASSTHROUGH, /* response not compressed: passing through. */
87 	NE_Z_IN_HEADER, /* received a few bytes of response data, but not
88 			 * got past the gzip header yet. */
89 	NE_Z_POST_HEADER, /* waiting for the end of the NUL-terminated bits. */
90 	NE_Z_INFLATING, /* inflating response bytes. */
91 	NE_Z_AFTER_DATA, /* after data; reading CRC32 & ISIZE */
92 	NE_Z_FINISHED, /* stream is finished. */
93 	NE_Z_ERROR /* inflate bombed. */
94     } state;
95 };
96 
97 #define ID1 0x1f
98 #define ID2 0x8b
99 
100 #define HDR_DONE 0
101 #define HDR_EXTENDED 1
102 #define HDR_ERROR 2
103 
104 /* parse_header parses the gzip header, sets the next state and returns
105  *   HDR_DONE: all done, bytes following are raw DEFLATE data.
106  *   HDR_EXTENDED: all done, expect a NUL-termianted string
107  *                 before the DEFLATE data
108  *   HDR_ERROR: invalid header, give up.
109  */
parse_header(ne_decompress * ctx)110 static int parse_header(ne_decompress *ctx)
111 {
112     struct header *h = &ctx->in.hdr;
113 
114     NE_DEBUG(NE_DBG_HTTP, "ID1: %d  ID2: %d, cmeth %d, flags %d\n",
115 	    h->id1, h->id2, h->cmeth, h->flags);
116 
117     if (h->id1 != ID1 || h->id2 != ID2 || h->cmeth != 8) {
118 	ctx->state = NE_Z_ERROR;
119 	ne_set_error(ctx->session, "Compressed stream invalid");
120 	return HDR_ERROR;
121     }
122 
123     NE_DEBUG(NE_DBG_HTTP, "mtime: %d, xflags: %d, os: %d\n",
124 	     h->mtime, h->xflags, h->os);
125 
126     /* TODO: we can only handle one NUL-terminated extensions field
127      * currently.  Really, we should count the number of bits set, and
128      * skip as many fields as bits set (bailing if any reserved bits
129      * are set. */
130     if (h->flags == 8) {
131 	ctx->state = NE_Z_POST_HEADER;
132 	return HDR_EXTENDED;
133     } else if (h->flags != 0) {
134 	ctx->state = NE_Z_ERROR;
135 	ne_set_error(ctx->session, "Compressed stream not supported");
136 	return HDR_ERROR;
137     }
138 
139     NE_DEBUG(NE_DBG_HTTP, "compress: Good stream.\n");
140 
141     ctx->state = NE_Z_INFLATING;
142     return HDR_DONE;
143 }
144 
145 /* Convert 'buf' to unsigned int; 'buf' must be 'unsigned char *' */
146 #define BUF2UINT(buf) ((buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0])
147 
148 /* Process extra 'len' bytes of 'buf' which were received after the
149  * DEFLATE data. */
process_footer(ne_decompress * ctx,const unsigned char * buf,size_t len)150 static void process_footer(ne_decompress *ctx,
151 			   const unsigned char *buf, size_t len)
152 {
153     if (len + ctx->footcount > 8) {
154         ne_set_error(ctx->session,
155                      "Too many bytes (%" NE_FMT_SIZE_T ") in gzip footer",
156                      len);
157 	ctx->state = NE_Z_ERROR;
158     } else {
159 	memcpy(ctx->footer + ctx->footcount, buf, len);
160 	ctx->footcount += len;
161 	if (ctx->footcount == 8) {
162 	    uLong crc = BUF2UINT(ctx->footer) & 0xFFFFFFFF;
163 	    if (crc == ctx->checksum) {
164 		ctx->state = NE_Z_FINISHED;
165                 /* reader requires a size=0 call at end-of-response */
166                 ctx->reader(ctx->userdata, NULL, 0);
167 		NE_DEBUG(NE_DBG_HTTP, "compress: Checksum match.\n");
168 	    } else {
169 		NE_DEBUG(NE_DBG_HTTP, "compress: Checksum mismatch: "
170 			 "given %lu vs computed %lu\n", crc, ctx->checksum);
171 		ne_set_error(ctx->session,
172 			     "Checksum invalid for compressed stream");
173 		ctx->state = NE_Z_ERROR;
174 	    }
175 	}
176     }
177 }
178 
179 /* A zlib function failed with 'code'; set the session error string
180  * appropriately. */
set_zlib_error(ne_decompress * ctx,const char * msg,int code)181 static void set_zlib_error(ne_decompress *ctx, const char *msg, int code)
182 {
183     if (ctx->zstr.msg)
184         ne_set_error(ctx->session, _("%s: %s"), msg, ctx->zstr.msg);
185     else {
186         const char *err;
187         switch (code) {
188         case Z_STREAM_ERROR: err = "stream error"; break;
189         case Z_DATA_ERROR: err = "data corrupt"; break;
190         case Z_MEM_ERROR: err = "out of memory"; break;
191         case Z_BUF_ERROR: err = "buffer error"; break;
192         case Z_VERSION_ERROR: err = "library version mismatch"; break;
193         default: err = "unknown error"; break;
194         }
195         ne_set_error(ctx->session, _("%s: %s (code %d)"), msg, err, code);
196     }
197 }
198 
199 /* Inflate response buffer 'buf' of length 'len'. */
do_inflate(ne_decompress * ctx,const char * buf,size_t len)200 static void do_inflate(ne_decompress *ctx, const char *buf, size_t len)
201 {
202     int ret;
203 
204     ctx->zstr.avail_in = len;
205     ctx->zstr.next_in = (unsigned char *)buf;
206     ctx->zstr.total_in = 0;
207 
208     do {
209 	ctx->zstr.avail_out = sizeof ctx->outbuf;
210 	ctx->zstr.next_out = (unsigned char *)ctx->outbuf;
211 	ctx->zstr.total_out = 0;
212 
213 	ret = inflate(&ctx->zstr, Z_NO_FLUSH);
214 
215 	NE_DEBUG(NE_DBG_HTTP,
216 		 "compress: inflate %d, %ld bytes out, %d remaining\n",
217 		 ret, ctx->zstr.total_out, ctx->zstr.avail_in);
218 #if 0
219 	NE_DEBUG(NE_DBG_HTTPBODY,
220 		 "Inflated body block (%ld):\n[%.*s]\n",
221 		 ctx->zstr.total_out, (int)ctx->zstr.total_out,
222 		 ctx->outbuf);
223 #endif
224 	/* update checksum. */
225 	ctx->checksum = crc32(ctx->checksum, (unsigned char *)ctx->outbuf,
226 			      ctx->zstr.total_out);
227 
228 	/* pass on the inflated data, if any */
229         if (ctx->zstr.total_out > 0) {
230             ctx->reader(ctx->userdata, ctx->outbuf, ctx->zstr.total_out);
231         }
232     } while (ret == Z_OK && ctx->zstr.avail_in > 0);
233 
234     if (ret == Z_STREAM_END) {
235 	NE_DEBUG(NE_DBG_HTTP, "compress: end of data stream, remaining %d.\n",
236 		 ctx->zstr.avail_in);
237 	/* process the footer. */
238 	ctx->state = NE_Z_AFTER_DATA;
239 	process_footer(ctx, ctx->zstr.next_in, ctx->zstr.avail_in);
240     } else if (ret != Z_OK) {
241 	ctx->state = NE_Z_ERROR;
242         set_zlib_error(ctx, _("Could not inflate data"), ret);
243     }
244 }
245 
246 /* Callback which is passed blocks of the response body. */
gz_reader(void * ud,const char * buf,size_t len)247 static void gz_reader(void *ud, const char *buf, size_t len)
248 {
249     ne_decompress *ctx = ud;
250     const char *zbuf;
251     size_t count;
252 
253     switch (ctx->state) {
254     case NE_Z_PASSTHROUGH:
255 	/* move along there. */
256 	ctx->reader(ctx->userdata, buf, len);
257 	return;
258 
259     case NE_Z_ERROR:
260 	/* beyond hope. */
261 	break;
262 
263     case NE_Z_FINISHED:
264 	/* Could argue for tolerance, and ignoring trailing content;
265 	 * but it could mean something more serious. */
266 	if (len > 0) {
267 	    ctx->state = NE_Z_ERROR;
268 	    ne_set_error(ctx->session,
269 			 "Unexpected content received after compressed stream");
270 	}
271 	break;
272 
273     case NE_Z_BEFORE_DATA:
274 	/* work out whether this is a compressed response or not. */
275 	if (ctx->enchdr && strcasecmp(ctx->enchdr, "gzip") == 0) {
276             int ret;
277 	    NE_DEBUG(NE_DBG_HTTP, "compress: got gzipped stream.\n");
278 
279             /* inflateInit2() works here where inflateInit() doesn't. */
280             ret = inflateInit2(&ctx->zstr, -MAX_WBITS);
281             if (ret != Z_OK) {
282                 set_zlib_error(ctx, _("Could not initialize zlib"), ret);
283                 return;
284             }
285 	    ctx->zstrinit = 1;
286 
287 	} else {
288 	    /* No Content-Encoding header: pass it on.  TODO: we could
289 	     * hack it and register the real callback now. But that
290 	     * would require add_resp_body_rdr to have defined
291 	     * ordering semantics etc etc */
292 	    ctx->state = NE_Z_PASSTHROUGH;
293 	    ctx->reader(ctx->userdata, buf, len);
294 	    return;
295 	}
296 
297 	ctx->state = NE_Z_IN_HEADER;
298 	/* FALLTHROUGH */
299 
300     case NE_Z_IN_HEADER:
301 	/* copy as many bytes as possible into the buffer. */
302 	if (len + ctx->incount > 10) {
303 	    count = 10 - ctx->incount;
304 	} else {
305 	    count = len;
306 	}
307 	memcpy(ctx->in.buf + ctx->incount, buf, count);
308 	ctx->incount += count;
309 	/* have we got the full header yet? */
310 	if (ctx->incount != 10) {
311 	    return;
312 	}
313 
314 	buf += count;
315 	len -= count;
316 
317 	switch (parse_header(ctx)) {
318 	case HDR_EXTENDED:
319 	    if (len == 0)
320 		return;
321 	    break;
322 	case HDR_DONE:
323 	    if (len > 0) {
324 		do_inflate(ctx, buf, len);
325 	    }
326         default:
327 	    return;
328 	}
329 
330 	/* FALLTHROUGH */
331 
332     case NE_Z_POST_HEADER:
333 	/* eating the filename string. */
334 	zbuf = memchr(buf, '\0', len);
335 	if (zbuf == NULL) {
336 	    /* not found it yet. */
337 	    return;
338 	}
339 
340 	NE_DEBUG(NE_DBG_HTTP,
341 		 "compresss: skipped %" NE_FMT_SIZE_T " header bytes.\n",
342 		 zbuf - buf);
343 	/* found end of string. */
344 	len -= (1 + zbuf - buf);
345 	buf = zbuf + 1;
346 	ctx->state = NE_Z_INFLATING;
347 	if (len == 0) {
348 	    /* end of string was at end of buffer. */
349 	    return;
350 	}
351 
352 	/* FALLTHROUGH */
353 
354     case NE_Z_INFLATING:
355 	do_inflate(ctx, buf, len);
356 	break;
357 
358     case NE_Z_AFTER_DATA:
359 	process_footer(ctx, (unsigned char *)buf, len);
360 	break;
361     }
362 
363 }
364 
ne_decompress_destroy(ne_decompress * ctx)365 int ne_decompress_destroy(ne_decompress *ctx)
366 {
367     int ret;
368 
369     if (ctx->zstrinit)
370 	/* inflateEnd only fails if it's passed NULL etc; ignore
371 	 * return value. */
372 	inflateEnd(&ctx->zstr);
373 
374     if (ctx->enchdr)
375 	ne_free(ctx->enchdr);
376 
377     switch (ctx->state) {
378     case NE_Z_BEFORE_DATA:
379     case NE_Z_PASSTHROUGH:
380     case NE_Z_FINISHED:
381 	ret = NE_OK;
382 	break;
383     case NE_Z_ERROR:
384 	/* session error already set. */
385 	ret = NE_ERROR;
386 	break;
387     default:
388 	/* truncated response. */
389 	ne_set_error(ctx->session, "Compressed response was truncated");
390 	ret = NE_ERROR;
391 	break;
392     }
393 
394     ne_free(ctx);
395     return ret;
396 }
397 
398 /* Prepare for a compressed response */
gz_pre_send(ne_request * r,void * ud,ne_buffer * req)399 static void gz_pre_send(ne_request *r, void *ud, ne_buffer *req)
400 {
401     ne_decompress *ctx = ud;
402 
403     NE_DEBUG(NE_DBG_HTTP, "compress: Initialization.\n");
404 
405     /* (Re-)Initialize the context */
406     ctx->state = NE_Z_BEFORE_DATA;
407     if (ctx->zstrinit) inflateEnd(&ctx->zstr);
408     ctx->zstrinit = 0;
409     ctx->incount = ctx->footcount = 0;
410     ctx->checksum = crc32(0L, Z_NULL, 0);
411     if (ctx->enchdr) {
412         ne_free(ctx->enchdr);
413         ctx->enchdr = NULL;
414     }
415 }
416 
417 /* Kill the pre-send hook */
gz_destroy(ne_request * req,void * userdata)418 static void gz_destroy(ne_request *req, void *userdata)
419 {
420     ne_kill_pre_send(ne_get_session(req), gz_pre_send, userdata);
421 }
422 
423 /* Wrapper for user-passed acceptor function. */
gz_acceptor(void * userdata,ne_request * req,const ne_status * st)424 static int gz_acceptor(void *userdata, ne_request *req, const ne_status *st)
425 {
426     ne_decompress *ctx = userdata;
427     return ctx->acceptor(ctx->userdata, req, st);
428 }
429 
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)430 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
431 				    ne_block_reader rdr, void *userdata)
432 {
433     ne_decompress *ctx = ne_calloc(sizeof *ctx);
434 
435     ne_add_request_header(req, "Accept-Encoding", "gzip");
436 
437     ne_add_response_header_handler(req, "Content-Encoding",
438 				   ne_duplicate_header, &ctx->enchdr);
439 
440     ne_add_response_body_reader(req, gz_acceptor, gz_reader, ctx);
441 
442     ctx->reader = rdr;
443     ctx->userdata = userdata;
444     ctx->session = ne_get_session(req);
445     ctx->acceptor = acpt;
446 
447     ne_hook_pre_send(ctx->session, gz_pre_send, ctx);
448     ne_hook_destroy_request(ctx->session, gz_destroy, ctx);
449 
450     return ctx;
451 }
452 
453 #else /* !NEON_ZLIB */
454 
455 /* Pass-through interface present to provide ABI compatibility. */
456 
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)457 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
458 				    ne_block_reader rdr, void *userdata)
459 {
460     ne_add_response_body_reader(req, acpt, rdr, userdata);
461     /* an arbitrary return value: don't confuse them by returning NULL. */
462     return (ne_decompress *)req;
463 }
464 
ne_decompress_destroy(ne_decompress * dc)465 int ne_decompress_destroy(ne_decompress *dc)
466 {
467     return 0;
468 }
469 
470 #endif /* NEON_ZLIB */
471