1 /*
2 Handling of compressed HTTP responses
3 Copyright (C) 2001-2004, Joe Orton <joe@manyfish.co.uk>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA
19
20 */
21
22 #include "config.h"
23
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif
27 #ifdef HAVE_STDLIB_H
28 #include <stdlib.h>
29 #endif
30
31 #include "ne_request.h"
32 #include "ne_compress.h"
33 #include "ne_utils.h"
34 #include "ne_i18n.h"
35
36 #include "ne_private.h"
37
38 #ifdef NEON_ZLIB
39
40 #include <zlib.h>
41
42 /* Adds support for the 'gzip' Content-Encoding in HTTP. gzip is a
43 * file format which wraps the DEFLATE compression algorithm. zlib
44 * implements DEFLATE: we have to unwrap the gzip format (specified in
45 * RFC1952) as it comes off the wire, and hand off chunks of data to
46 * be inflated. */
47
48 struct ne_decompress_s {
49 ne_session *session; /* associated session. */
50 /* temporary buffer for holding inflated data. */
51 char outbuf[BUFSIZ];
52 z_stream zstr;
53 int zstrinit; /* non-zero if zstr has been initialized */
54 char *enchdr; /* value of Content-Enconding response header. */
55
56 /* pass blocks back to this. */
57 ne_block_reader reader;
58 ne_accept_response acceptor;
59 void *userdata;
60
61 /* buffer for gzip header bytes. */
62 union {
63 unsigned char buf[10];
64 struct header {
65 unsigned char id1;
66 unsigned char id2;
67 unsigned char cmeth; /* compression method. */
68 unsigned char flags;
69 unsigned int mtime; /* breaks when sizeof int != 4 */
70 unsigned char xflags;
71 unsigned char os;
72 } hdr;
73 } in;
74 size_t incount; /* bytes in in.buf */
75
76 unsigned char footer[8];
77 size_t footcount; /* bytes in footer. */
78
79 /* CRC32 checksum: odd that zlib uses uLong for this since it is a
80 * 64-bit integer on LP64 platforms. */
81 uLong checksum;
82
83 /* current state. */
84 enum state {
85 NE_Z_BEFORE_DATA, /* not received any response blocks yet. */
86 NE_Z_PASSTHROUGH, /* response not compressed: passing through. */
87 NE_Z_IN_HEADER, /* received a few bytes of response data, but not
88 * got past the gzip header yet. */
89 NE_Z_POST_HEADER, /* waiting for the end of the NUL-terminated bits. */
90 NE_Z_INFLATING, /* inflating response bytes. */
91 NE_Z_AFTER_DATA, /* after data; reading CRC32 & ISIZE */
92 NE_Z_FINISHED, /* stream is finished. */
93 NE_Z_ERROR /* inflate bombed. */
94 } state;
95 };
96
97 #define ID1 0x1f
98 #define ID2 0x8b
99
100 #define HDR_DONE 0
101 #define HDR_EXTENDED 1
102 #define HDR_ERROR 2
103
104 /* parse_header parses the gzip header, sets the next state and returns
105 * HDR_DONE: all done, bytes following are raw DEFLATE data.
106 * HDR_EXTENDED: all done, expect a NUL-termianted string
107 * before the DEFLATE data
108 * HDR_ERROR: invalid header, give up.
109 */
parse_header(ne_decompress * ctx)110 static int parse_header(ne_decompress *ctx)
111 {
112 struct header *h = &ctx->in.hdr;
113
114 NE_DEBUG(NE_DBG_HTTP, "ID1: %d ID2: %d, cmeth %d, flags %d\n",
115 h->id1, h->id2, h->cmeth, h->flags);
116
117 if (h->id1 != ID1 || h->id2 != ID2 || h->cmeth != 8) {
118 ctx->state = NE_Z_ERROR;
119 ne_set_error(ctx->session, "Compressed stream invalid");
120 return HDR_ERROR;
121 }
122
123 NE_DEBUG(NE_DBG_HTTP, "mtime: %d, xflags: %d, os: %d\n",
124 h->mtime, h->xflags, h->os);
125
126 /* TODO: we can only handle one NUL-terminated extensions field
127 * currently. Really, we should count the number of bits set, and
128 * skip as many fields as bits set (bailing if any reserved bits
129 * are set. */
130 if (h->flags == 8) {
131 ctx->state = NE_Z_POST_HEADER;
132 return HDR_EXTENDED;
133 } else if (h->flags != 0) {
134 ctx->state = NE_Z_ERROR;
135 ne_set_error(ctx->session, "Compressed stream not supported");
136 return HDR_ERROR;
137 }
138
139 NE_DEBUG(NE_DBG_HTTP, "compress: Good stream.\n");
140
141 ctx->state = NE_Z_INFLATING;
142 return HDR_DONE;
143 }
144
145 /* Convert 'buf' to unsigned int; 'buf' must be 'unsigned char *' */
146 #define BUF2UINT(buf) ((buf[3]<<24) + (buf[2]<<16) + (buf[1]<<8) + buf[0])
147
148 /* Process extra 'len' bytes of 'buf' which were received after the
149 * DEFLATE data. */
process_footer(ne_decompress * ctx,const unsigned char * buf,size_t len)150 static void process_footer(ne_decompress *ctx,
151 const unsigned char *buf, size_t len)
152 {
153 if (len + ctx->footcount > 8) {
154 ne_set_error(ctx->session,
155 "Too many bytes (%" NE_FMT_SIZE_T ") in gzip footer",
156 len);
157 ctx->state = NE_Z_ERROR;
158 } else {
159 memcpy(ctx->footer + ctx->footcount, buf, len);
160 ctx->footcount += len;
161 if (ctx->footcount == 8) {
162 uLong crc = BUF2UINT(ctx->footer) & 0xFFFFFFFF;
163 if (crc == ctx->checksum) {
164 ctx->state = NE_Z_FINISHED;
165 /* reader requires a size=0 call at end-of-response */
166 ctx->reader(ctx->userdata, NULL, 0);
167 NE_DEBUG(NE_DBG_HTTP, "compress: Checksum match.\n");
168 } else {
169 NE_DEBUG(NE_DBG_HTTP, "compress: Checksum mismatch: "
170 "given %lu vs computed %lu\n", crc, ctx->checksum);
171 ne_set_error(ctx->session,
172 "Checksum invalid for compressed stream");
173 ctx->state = NE_Z_ERROR;
174 }
175 }
176 }
177 }
178
179 /* A zlib function failed with 'code'; set the session error string
180 * appropriately. */
set_zlib_error(ne_decompress * ctx,const char * msg,int code)181 static void set_zlib_error(ne_decompress *ctx, const char *msg, int code)
182 {
183 if (ctx->zstr.msg)
184 ne_set_error(ctx->session, _("%s: %s"), msg, ctx->zstr.msg);
185 else {
186 const char *err;
187 switch (code) {
188 case Z_STREAM_ERROR: err = "stream error"; break;
189 case Z_DATA_ERROR: err = "data corrupt"; break;
190 case Z_MEM_ERROR: err = "out of memory"; break;
191 case Z_BUF_ERROR: err = "buffer error"; break;
192 case Z_VERSION_ERROR: err = "library version mismatch"; break;
193 default: err = "unknown error"; break;
194 }
195 ne_set_error(ctx->session, _("%s: %s (code %d)"), msg, err, code);
196 }
197 }
198
199 /* Inflate response buffer 'buf' of length 'len'. */
do_inflate(ne_decompress * ctx,const char * buf,size_t len)200 static void do_inflate(ne_decompress *ctx, const char *buf, size_t len)
201 {
202 int ret;
203
204 ctx->zstr.avail_in = len;
205 ctx->zstr.next_in = (unsigned char *)buf;
206 ctx->zstr.total_in = 0;
207
208 do {
209 ctx->zstr.avail_out = sizeof ctx->outbuf;
210 ctx->zstr.next_out = (unsigned char *)ctx->outbuf;
211 ctx->zstr.total_out = 0;
212
213 ret = inflate(&ctx->zstr, Z_NO_FLUSH);
214
215 NE_DEBUG(NE_DBG_HTTP,
216 "compress: inflate %d, %ld bytes out, %d remaining\n",
217 ret, ctx->zstr.total_out, ctx->zstr.avail_in);
218 #if 0
219 NE_DEBUG(NE_DBG_HTTPBODY,
220 "Inflated body block (%ld):\n[%.*s]\n",
221 ctx->zstr.total_out, (int)ctx->zstr.total_out,
222 ctx->outbuf);
223 #endif
224 /* update checksum. */
225 ctx->checksum = crc32(ctx->checksum, (unsigned char *)ctx->outbuf,
226 ctx->zstr.total_out);
227
228 /* pass on the inflated data, if any */
229 if (ctx->zstr.total_out > 0) {
230 ctx->reader(ctx->userdata, ctx->outbuf, ctx->zstr.total_out);
231 }
232 } while (ret == Z_OK && ctx->zstr.avail_in > 0);
233
234 if (ret == Z_STREAM_END) {
235 NE_DEBUG(NE_DBG_HTTP, "compress: end of data stream, remaining %d.\n",
236 ctx->zstr.avail_in);
237 /* process the footer. */
238 ctx->state = NE_Z_AFTER_DATA;
239 process_footer(ctx, ctx->zstr.next_in, ctx->zstr.avail_in);
240 } else if (ret != Z_OK) {
241 ctx->state = NE_Z_ERROR;
242 set_zlib_error(ctx, _("Could not inflate data"), ret);
243 }
244 }
245
246 /* Callback which is passed blocks of the response body. */
gz_reader(void * ud,const char * buf,size_t len)247 static void gz_reader(void *ud, const char *buf, size_t len)
248 {
249 ne_decompress *ctx = ud;
250 const char *zbuf;
251 size_t count;
252
253 switch (ctx->state) {
254 case NE_Z_PASSTHROUGH:
255 /* move along there. */
256 ctx->reader(ctx->userdata, buf, len);
257 return;
258
259 case NE_Z_ERROR:
260 /* beyond hope. */
261 break;
262
263 case NE_Z_FINISHED:
264 /* Could argue for tolerance, and ignoring trailing content;
265 * but it could mean something more serious. */
266 if (len > 0) {
267 ctx->state = NE_Z_ERROR;
268 ne_set_error(ctx->session,
269 "Unexpected content received after compressed stream");
270 }
271 break;
272
273 case NE_Z_BEFORE_DATA:
274 /* work out whether this is a compressed response or not. */
275 if (ctx->enchdr && strcasecmp(ctx->enchdr, "gzip") == 0) {
276 int ret;
277 NE_DEBUG(NE_DBG_HTTP, "compress: got gzipped stream.\n");
278
279 /* inflateInit2() works here where inflateInit() doesn't. */
280 ret = inflateInit2(&ctx->zstr, -MAX_WBITS);
281 if (ret != Z_OK) {
282 set_zlib_error(ctx, _("Could not initialize zlib"), ret);
283 return;
284 }
285 ctx->zstrinit = 1;
286
287 } else {
288 /* No Content-Encoding header: pass it on. TODO: we could
289 * hack it and register the real callback now. But that
290 * would require add_resp_body_rdr to have defined
291 * ordering semantics etc etc */
292 ctx->state = NE_Z_PASSTHROUGH;
293 ctx->reader(ctx->userdata, buf, len);
294 return;
295 }
296
297 ctx->state = NE_Z_IN_HEADER;
298 /* FALLTHROUGH */
299
300 case NE_Z_IN_HEADER:
301 /* copy as many bytes as possible into the buffer. */
302 if (len + ctx->incount > 10) {
303 count = 10 - ctx->incount;
304 } else {
305 count = len;
306 }
307 memcpy(ctx->in.buf + ctx->incount, buf, count);
308 ctx->incount += count;
309 /* have we got the full header yet? */
310 if (ctx->incount != 10) {
311 return;
312 }
313
314 buf += count;
315 len -= count;
316
317 switch (parse_header(ctx)) {
318 case HDR_EXTENDED:
319 if (len == 0)
320 return;
321 break;
322 case HDR_DONE:
323 if (len > 0) {
324 do_inflate(ctx, buf, len);
325 }
326 default:
327 return;
328 }
329
330 /* FALLTHROUGH */
331
332 case NE_Z_POST_HEADER:
333 /* eating the filename string. */
334 zbuf = memchr(buf, '\0', len);
335 if (zbuf == NULL) {
336 /* not found it yet. */
337 return;
338 }
339
340 NE_DEBUG(NE_DBG_HTTP,
341 "compresss: skipped %" NE_FMT_SIZE_T " header bytes.\n",
342 zbuf - buf);
343 /* found end of string. */
344 len -= (1 + zbuf - buf);
345 buf = zbuf + 1;
346 ctx->state = NE_Z_INFLATING;
347 if (len == 0) {
348 /* end of string was at end of buffer. */
349 return;
350 }
351
352 /* FALLTHROUGH */
353
354 case NE_Z_INFLATING:
355 do_inflate(ctx, buf, len);
356 break;
357
358 case NE_Z_AFTER_DATA:
359 process_footer(ctx, (unsigned char *)buf, len);
360 break;
361 }
362
363 }
364
ne_decompress_destroy(ne_decompress * ctx)365 int ne_decompress_destroy(ne_decompress *ctx)
366 {
367 int ret;
368
369 if (ctx->zstrinit)
370 /* inflateEnd only fails if it's passed NULL etc; ignore
371 * return value. */
372 inflateEnd(&ctx->zstr);
373
374 if (ctx->enchdr)
375 ne_free(ctx->enchdr);
376
377 switch (ctx->state) {
378 case NE_Z_BEFORE_DATA:
379 case NE_Z_PASSTHROUGH:
380 case NE_Z_FINISHED:
381 ret = NE_OK;
382 break;
383 case NE_Z_ERROR:
384 /* session error already set. */
385 ret = NE_ERROR;
386 break;
387 default:
388 /* truncated response. */
389 ne_set_error(ctx->session, "Compressed response was truncated");
390 ret = NE_ERROR;
391 break;
392 }
393
394 ne_free(ctx);
395 return ret;
396 }
397
398 /* Prepare for a compressed response */
gz_pre_send(ne_request * r,void * ud,ne_buffer * req)399 static void gz_pre_send(ne_request *r, void *ud, ne_buffer *req)
400 {
401 ne_decompress *ctx = ud;
402
403 NE_DEBUG(NE_DBG_HTTP, "compress: Initialization.\n");
404
405 /* (Re-)Initialize the context */
406 ctx->state = NE_Z_BEFORE_DATA;
407 if (ctx->zstrinit) inflateEnd(&ctx->zstr);
408 ctx->zstrinit = 0;
409 ctx->incount = ctx->footcount = 0;
410 ctx->checksum = crc32(0L, Z_NULL, 0);
411 if (ctx->enchdr) {
412 ne_free(ctx->enchdr);
413 ctx->enchdr = NULL;
414 }
415 }
416
417 /* Kill the pre-send hook */
gz_destroy(ne_request * req,void * userdata)418 static void gz_destroy(ne_request *req, void *userdata)
419 {
420 ne_kill_pre_send(ne_get_session(req), gz_pre_send, userdata);
421 }
422
423 /* Wrapper for user-passed acceptor function. */
gz_acceptor(void * userdata,ne_request * req,const ne_status * st)424 static int gz_acceptor(void *userdata, ne_request *req, const ne_status *st)
425 {
426 ne_decompress *ctx = userdata;
427 return ctx->acceptor(ctx->userdata, req, st);
428 }
429
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)430 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
431 ne_block_reader rdr, void *userdata)
432 {
433 ne_decompress *ctx = ne_calloc(sizeof *ctx);
434
435 ne_add_request_header(req, "Accept-Encoding", "gzip");
436
437 ne_add_response_header_handler(req, "Content-Encoding",
438 ne_duplicate_header, &ctx->enchdr);
439
440 ne_add_response_body_reader(req, gz_acceptor, gz_reader, ctx);
441
442 ctx->reader = rdr;
443 ctx->userdata = userdata;
444 ctx->session = ne_get_session(req);
445 ctx->acceptor = acpt;
446
447 ne_hook_pre_send(ctx->session, gz_pre_send, ctx);
448 ne_hook_destroy_request(ctx->session, gz_destroy, ctx);
449
450 return ctx;
451 }
452
453 #else /* !NEON_ZLIB */
454
455 /* Pass-through interface present to provide ABI compatibility. */
456
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)457 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
458 ne_block_reader rdr, void *userdata)
459 {
460 ne_add_response_body_reader(req, acpt, rdr, userdata);
461 /* an arbitrary return value: don't confuse them by returning NULL. */
462 return (ne_decompress *)req;
463 }
464
ne_decompress_destroy(ne_decompress * dc)465 int ne_decompress_destroy(ne_decompress *dc)
466 {
467 return 0;
468 }
469
470 #endif /* NEON_ZLIB */
471