1 /*
2 Handling of compressed HTTP responses
3 Copyright (C) 2001-2021, Joe Orton <joe@manyfish.co.uk>
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Library General Public License for more details.
14
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18 MA 02111-1307, USA
19
20 */
21
22 #include "config.h"
23
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif
27 #ifdef HAVE_STDLIB_H
28 #include <stdlib.h>
29 #endif
30
31 #include "ne_request.h"
32 #include "ne_compress.h"
33 #include "ne_utils.h"
34 #include "ne_internal.h"
35
36 #ifdef NE_HAVE_ZLIB
37
38 #include <zlib.h>
39
40 /* Adds support for the 'gzip' Content-Encoding in HTTP. gzip is a
41 * file format which wraps the DEFLATE compression algorithm. zlib
42 * implements DEFLATE: we have to unwrap the gzip format (specified in
43 * RFC1952) as it comes off the wire, and hand off chunks of data to
44 * be inflated. */
45
46 struct ne_decompress_s {
47 ne_request *request; /* associated request. */
48 ne_session *session; /* associated session. */
49 /* temporary buffer for holding inflated data. */
50 char outbuf[NE_BUFSIZ];
51 z_stream zstr;
52 int zstrinit; /* non-zero if zstr has been initialized */
53
54 /* pass blocks back to this. */
55 ne_block_reader reader;
56 ne_accept_response acceptor;
57 void *userdata;
58
59 /* buffer for gzip header bytes. */
60 unsigned char header[10];
61 size_t hdrcount; /* bytes in header */
62
63 unsigned char footer[8];
64 size_t footcount; /* bytes in footer. */
65
66 /* CRC32 checksum: odd that zlib uses uLong for this since it is a
67 * 64-bit integer on LP64 platforms. */
68 uLong checksum;
69
70 /* current state. */
71 enum state {
72 NE_Z_BEFORE_DATA, /* not received any response blocks yet. */
73 NE_Z_PASSTHROUGH, /* response not compressed: passing through. */
74 NE_Z_IN_HEADER, /* received a few bytes of response data, but not
75 * got past the gzip header yet. */
76 NE_Z_POST_HEADER, /* waiting for the end of the NUL-terminated bits. */
77 NE_Z_INFLATING, /* inflating response bytes. */
78 NE_Z_AFTER_DATA, /* after data; reading CRC32 & ISIZE */
79 NE_Z_FINISHED /* stream is finished. */
80 } state;
81 };
82
83 /* Convert 'buf' to unsigned int; 'buf' must be 'unsigned char *' */
84 #define BUF2UINT(buf) ((((unsigned int)(buf)[3])<<24) + ((buf)[2]<<16) + ((buf)[1]<<8) + (buf)[0])
85
86 #define ID1 0x1f
87 #define ID2 0x8b
88
89 #define HDR_DONE 0
90 #define HDR_EXTENDED 1
91 #define HDR_ERROR 2
92
93 #define HDR_ID1(ctx) ((ctx)->header[0])
94 #define HDR_ID2(ctx) ((ctx)->header[1])
95 #define HDR_CMETH(ctx) ((ctx)->header[2])
96 #define HDR_FLAGS(ctx) ((ctx)->header[3])
97 #define HDR_MTIME(ctx) (BUF2UINT(&(ctx)->header[4]))
98 #define HDR_XFLAGS(ctx) ((ctx)->header[8])
99 #define HDR_OS(ctx) ((ctx)->header[9])
100
101 /* parse_header parses the gzip header, sets the next state and returns
102 * HDR_DONE: all done, bytes following are raw DEFLATE data.
103 * HDR_EXTENDED: all done, expect a NUL-termianted string
104 * before the DEFLATE data
105 * HDR_ERROR: invalid header, give up (session error is set).
106 */
parse_header(ne_decompress * ctx)107 static int parse_header(ne_decompress *ctx)
108 {
109 NE_DEBUG(NE_DBG_HTTP, "ID1: %d ID2: %d, cmeth %d, flags %d\n",
110 HDR_ID1(ctx), HDR_ID2(ctx), HDR_CMETH(ctx), HDR_FLAGS(ctx));
111
112 if (HDR_ID1(ctx) != ID1 || HDR_ID2(ctx) != ID2 || HDR_CMETH(ctx) != 8) {
113 ne_set_error(ctx->session, "Compressed stream invalid");
114 return HDR_ERROR;
115 }
116
117 NE_DEBUG(NE_DBG_HTTP, "mtime: %d, xflags: %d, os: %d\n",
118 HDR_MTIME(ctx), HDR_XFLAGS(ctx), HDR_OS(ctx));
119
120 /* TODO: we can only handle one NUL-terminated extensions field
121 * currently. Really, we should count the number of bits set, and
122 * skip as many fields as bits set (bailing if any reserved bits
123 * are set. */
124 if (HDR_FLAGS(ctx) == 8) {
125 ctx->state = NE_Z_POST_HEADER;
126 return HDR_EXTENDED;
127 } else if (HDR_FLAGS(ctx) != 0) {
128 ne_set_error(ctx->session, "Compressed stream not supported");
129 return HDR_ERROR;
130 }
131
132 NE_DEBUG(NE_DBG_HTTP, "compress: Good stream.\n");
133
134 ctx->state = NE_Z_INFLATING;
135 return HDR_DONE;
136 }
137
138 /* Process extra 'len' bytes of 'buf' which were received after the
139 * DEFLATE data. */
process_footer(ne_decompress * ctx,const unsigned char * buf,size_t len)140 static int process_footer(ne_decompress *ctx,
141 const unsigned char *buf, size_t len)
142 {
143 if (len + ctx->footcount > 8) {
144 ne_set_error(ctx->session,
145 "Too many bytes (%" NE_FMT_SIZE_T ") in gzip footer",
146 len);
147 return -1;
148 } else {
149 memcpy(ctx->footer + ctx->footcount, buf, len);
150 ctx->footcount += len;
151 if (ctx->footcount == 8) {
152 uLong crc = BUF2UINT(ctx->footer) & 0xFFFFFFFF;
153 if (crc == ctx->checksum) {
154 ctx->state = NE_Z_FINISHED;
155 NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum match.\n");
156 } else {
157 NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum mismatch: "
158 "given %lu vs computed %lu\n", crc, ctx->checksum);
159 ne_set_error(ctx->session,
160 "Checksum invalid for compressed stream");
161 return -1;
162 }
163 }
164 }
165 return 0;
166 }
167
168 /* A zlib function failed with 'code'; set the session error string
169 * appropriately. */
set_zlib_error(ne_decompress * ctx,const char * msg,int code)170 static void set_zlib_error(ne_decompress *ctx, const char *msg, int code)
171 {
172 if (ctx->zstr.msg)
173 ne_set_error(ctx->session, "%s: %s", msg, ctx->zstr.msg);
174 else {
175 const char *err;
176 switch (code) {
177 case Z_STREAM_ERROR: err = "stream error"; break;
178 case Z_DATA_ERROR: err = "data corrupt"; break;
179 case Z_MEM_ERROR: err = "out of memory"; break;
180 case Z_BUF_ERROR: err = "buffer error"; break;
181 case Z_VERSION_ERROR: err = "library version mismatch"; break;
182 default: err = "unknown error"; break;
183 }
184 ne_set_error(ctx->session, _("%s: %s (code %d)"), msg, err, code);
185 }
186 }
187
188 /* Inflate response buffer 'buf' of length 'len'. */
do_inflate(ne_decompress * ctx,const char * buf,size_t len)189 static int do_inflate(ne_decompress *ctx, const char *buf, size_t len)
190 {
191 int ret;
192
193 ctx->zstr.avail_in = len;
194 ctx->zstr.next_in = (unsigned char *)buf;
195 ctx->zstr.total_in = 0;
196
197 do {
198 ctx->zstr.avail_out = sizeof ctx->outbuf;
199 ctx->zstr.next_out = (unsigned char *)ctx->outbuf;
200 ctx->zstr.total_out = 0;
201
202 ret = inflate(&ctx->zstr, Z_NO_FLUSH);
203
204 NE_DEBUG(NE_DBG_HTTP,
205 "compress: inflate %d, %ld bytes out, %d remaining\n",
206 ret, ctx->zstr.total_out, ctx->zstr.avail_in);
207 #if 0
208 NE_DEBUG(NE_DBG_HTTPBODY,
209 "Inflated body block (%ld):\n[%.*s]\n",
210 ctx->zstr.total_out, (int)ctx->zstr.total_out,
211 ctx->outbuf);
212 #endif
213 /* update checksum. */
214 ctx->checksum = crc32(ctx->checksum, (unsigned char *)ctx->outbuf,
215 ctx->zstr.total_out);
216
217 /* pass on the inflated data, if any */
218 if (ctx->zstr.total_out > 0) {
219 int rret = ctx->reader(ctx->userdata, ctx->outbuf,
220 ctx->zstr.total_out);
221 if (rret) return rret;
222 }
223 } while (ret == Z_OK && ctx->zstr.avail_in > 0);
224
225 if (ret == Z_STREAM_END) {
226 NE_DEBUG(NE_DBG_HTTP, "compress: end of data stream, %d bytes remain.\n",
227 ctx->zstr.avail_in);
228 /* process the footer. */
229 ctx->state = NE_Z_AFTER_DATA;
230 return process_footer(ctx, ctx->zstr.next_in, ctx->zstr.avail_in);
231 } else if (ret != Z_OK) {
232 set_zlib_error(ctx, _("Could not inflate data"), ret);
233 return NE_ERROR;
234 }
235 return 0;
236 }
237
238 /* Callback which is passed blocks of the response body. */
gz_reader(void * ud,const char * buf,size_t len)239 static int gz_reader(void *ud, const char *buf, size_t len)
240 {
241 ne_decompress *ctx = ud;
242 const char *zbuf;
243 size_t count;
244 const char *hdr;
245
246 if (len == 0) {
247 /* End of response: */
248 switch (ctx->state) {
249 case NE_Z_BEFORE_DATA:
250 hdr = ne_get_response_header(ctx->request, "Content-Encoding");
251 if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
252 /* response was truncated: return error. */
253 break;
254 }
255 /* else, fall through */
256 case NE_Z_FINISHED: /* complete gzip response */
257 case NE_Z_PASSTHROUGH: /* complete uncompressed response */
258 return ctx->reader(ctx->userdata, buf, 0);
259 default:
260 /* invalid state: truncated response. */
261 break;
262 }
263 /* else: truncated response, fail. */
264 ne_set_error(ctx->session, "Compressed response was truncated");
265 return NE_ERROR;
266 }
267
268 switch (ctx->state) {
269 case NE_Z_PASSTHROUGH:
270 /* move along there. */
271 return ctx->reader(ctx->userdata, buf, len);
272
273 case NE_Z_FINISHED:
274 /* Could argue for tolerance, and ignoring trailing content;
275 * but it could mean something more serious. */
276 if (len > 0) {
277 ne_set_error(ctx->session,
278 "Unexpected content received after compressed stream");
279 return NE_ERROR;
280 }
281 break;
282
283 case NE_Z_BEFORE_DATA:
284 /* work out whether this is a compressed response or not. */
285 hdr = ne_get_response_header(ctx->request, "Content-Encoding");
286 if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
287 int ret;
288 NE_DEBUG(NE_DBG_HTTP, "compress: got gzipped stream.\n");
289
290 /* inflateInit2() works here where inflateInit() doesn't. */
291 ret = inflateInit2(&ctx->zstr, -MAX_WBITS);
292 if (ret != Z_OK) {
293 set_zlib_error(ctx, _("Could not initialize zlib"), ret);
294 return -1;
295 }
296 ctx->zstrinit = 1;
297
298 } else {
299 /* No Content-Encoding header: pass it on. TODO: we could
300 * hack it and register the real callback now. But that
301 * would require add_resp_body_rdr to have defined
302 * ordering semantics etc etc */
303 ctx->state = NE_Z_PASSTHROUGH;
304 return ctx->reader(ctx->userdata, buf, len);
305 }
306
307 ctx->state = NE_Z_IN_HEADER;
308 /* FALLTHROUGH */
309
310 case NE_Z_IN_HEADER:
311 /* copy as many bytes as possible into the buffer. */
312 if (len + ctx->hdrcount > 10) {
313 count = 10 - ctx->hdrcount;
314 } else {
315 count = len;
316 }
317 memcpy(ctx->header + ctx->hdrcount, buf, count);
318 ctx->hdrcount += count;
319 /* have we got the full header yet? */
320 if (ctx->hdrcount != 10) {
321 return 0;
322 }
323
324 buf += count;
325 len -= count;
326
327 switch (parse_header(ctx)) {
328 case HDR_EXTENDED:
329 if (len == 0)
330 return 0;
331 break;
332 case HDR_ERROR:
333 return NE_ERROR;
334 case HDR_DONE:
335 if (len > 0) {
336 return do_inflate(ctx, buf, len);
337 }
338 break;
339 }
340
341 /* FALLTHROUGH */
342
343 case NE_Z_POST_HEADER:
344 /* eating the filename string. */
345 zbuf = memchr(buf, '\0', len);
346 if (zbuf == NULL) {
347 /* not found it yet. */
348 return 0;
349 }
350
351 NE_DEBUG(NE_DBG_HTTP,
352 "compress: skipped %" NE_FMT_SIZE_T " header bytes.\n",
353 zbuf - buf);
354 /* found end of string. */
355 len -= (1 + zbuf - buf);
356 buf = zbuf + 1;
357 ctx->state = NE_Z_INFLATING;
358 if (len == 0) {
359 /* end of string was at end of buffer. */
360 return 0;
361 }
362
363 /* FALLTHROUGH */
364
365 case NE_Z_INFLATING:
366 return do_inflate(ctx, buf, len);
367
368 case NE_Z_AFTER_DATA:
369 return process_footer(ctx, (unsigned char *)buf, len);
370 }
371
372 return 0;
373 }
374
375 /* Prepare for a compressed response; may be called many times per
376 * request, for auth retries etc. */
gz_pre_send(ne_request * r,void * ud,ne_buffer * req)377 static void gz_pre_send(ne_request *r, void *ud, ne_buffer *req)
378 {
379 ne_decompress *ctx = ud;
380
381 if (ctx->request == r) {
382 NE_DEBUG(NE_DBG_HTTP, "compress: Initialization.\n");
383
384 /* (Re-)Initialize the context */
385 ctx->state = NE_Z_BEFORE_DATA;
386 if (ctx->zstrinit) inflateEnd(&ctx->zstr);
387 ctx->zstrinit = 0;
388 ctx->hdrcount = ctx->footcount = 0;
389 ctx->checksum = crc32(0L, Z_NULL, 0);
390 }
391 }
392
393 /* Wrapper for user-passed acceptor function. */
gz_acceptor(void * userdata,ne_request * req,const ne_status * st)394 static int gz_acceptor(void *userdata, ne_request *req, const ne_status *st)
395 {
396 ne_decompress *ctx = userdata;
397 return ctx->acceptor(ctx->userdata, req, st);
398 }
399
400 /* A slightly ugly hack: the pre_send hook is scoped per-session, so
401 * must check that the invoking request is this one, before doing
402 * anything, and must be unregistered when the context is
403 * destroyed. */
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)404 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
405 ne_block_reader rdr, void *userdata)
406 {
407 ne_decompress *ctx = ne_calloc(sizeof *ctx);
408
409 ne_add_request_header(req, "Accept-Encoding", "gzip");
410
411 ne_add_response_body_reader(req, gz_acceptor, gz_reader, ctx);
412
413 ctx->reader = rdr;
414 ctx->userdata = userdata;
415 ctx->session = ne_get_session(req);
416 ctx->request = req;
417 ctx->acceptor = acpt;
418
419 ne_hook_pre_send(ne_get_session(req), gz_pre_send, ctx);
420
421 return ctx;
422 }
423
ne_decompress_destroy(ne_decompress * ctx)424 void ne_decompress_destroy(ne_decompress *ctx)
425 {
426 if (ctx->zstrinit) inflateEnd(&ctx->zstr);
427
428 ne_unhook_pre_send(ctx->session, gz_pre_send, ctx);
429
430 ne_free(ctx);
431 }
432
433 #else /* !NE_HAVE_ZLIB */
434
435 /* Pass-through interface present to provide ABI compatibility. */
436
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)437 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
438 ne_block_reader rdr, void *userdata)
439 {
440 ne_add_response_body_reader(req, acpt, rdr, userdata);
441 /* an arbitrary return value: don't confuse them by returning NULL. */
442 return (ne_decompress *)req;
443 }
444
ne_decompress_destroy(ne_decompress * dc)445 void ne_decompress_destroy(ne_decompress *dc)
446 {
447 }
448
449 #endif /* NE_HAVE_ZLIB */
450