1 /*
2    Handling of compressed HTTP responses
3    Copyright (C) 2001-2021, Joe Orton <joe@manyfish.co.uk>
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with this library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18    MA 02111-1307, USA
19 
20 */
21 
22 #include "config.h"
23 
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif
27 #ifdef HAVE_STDLIB_H
28 #include <stdlib.h>
29 #endif
30 
31 #include "ne_request.h"
32 #include "ne_compress.h"
33 #include "ne_utils.h"
34 #include "ne_internal.h"
35 
36 #ifdef NE_HAVE_ZLIB
37 
38 #include <zlib.h>
39 
40 /* Adds support for the 'gzip' Content-Encoding in HTTP.  gzip is a
41  * file format which wraps the DEFLATE compression algorithm.  zlib
42  * implements DEFLATE: we have to unwrap the gzip format (specified in
43  * RFC1952) as it comes off the wire, and hand off chunks of data to
44  * be inflated. */
45 
46 struct ne_decompress_s {
47     ne_request *request; /* associated request. */
48     ne_session *session; /* associated session. */
49     /* temporary buffer for holding inflated data. */
50     char outbuf[NE_BUFSIZ];
51     z_stream zstr;
52     int zstrinit; /* non-zero if zstr has been initialized */
53 
54     /* pass blocks back to this. */
55     ne_block_reader reader;
56     ne_accept_response acceptor;
57     void *userdata;
58 
59     /* buffer for gzip header bytes. */
60     unsigned char header[10];
61     size_t hdrcount;    /* bytes in header */
62 
63     unsigned char footer[8];
64     size_t footcount; /* bytes in footer. */
65 
66     /* CRC32 checksum: odd that zlib uses uLong for this since it is a
67      * 64-bit integer on LP64 platforms. */
68     uLong checksum;
69 
70     /* current state. */
71     enum state {
72 	NE_Z_BEFORE_DATA, /* not received any response blocks yet. */
73 	NE_Z_PASSTHROUGH, /* response not compressed: passing through. */
74 	NE_Z_IN_HEADER, /* received a few bytes of response data, but not
75 			 * got past the gzip header yet. */
76 	NE_Z_POST_HEADER, /* waiting for the end of the NUL-terminated bits. */
77 	NE_Z_INFLATING, /* inflating response bytes. */
78 	NE_Z_AFTER_DATA, /* after data; reading CRC32 & ISIZE */
79 	NE_Z_FINISHED /* stream is finished. */
80     } state;
81 };
82 
83 /* Convert 'buf' to unsigned int; 'buf' must be 'unsigned char *' */
84 #define BUF2UINT(buf) ((((unsigned int)(buf)[3])<<24) + ((buf)[2]<<16) + ((buf)[1]<<8) + (buf)[0])
85 
86 #define ID1 0x1f
87 #define ID2 0x8b
88 
89 #define HDR_DONE 0
90 #define HDR_EXTENDED 1
91 #define HDR_ERROR 2
92 
93 #define HDR_ID1(ctx) ((ctx)->header[0])
94 #define HDR_ID2(ctx) ((ctx)->header[1])
95 #define HDR_CMETH(ctx) ((ctx)->header[2])
96 #define HDR_FLAGS(ctx) ((ctx)->header[3])
97 #define HDR_MTIME(ctx) (BUF2UINT(&(ctx)->header[4]))
98 #define HDR_XFLAGS(ctx) ((ctx)->header[8])
99 #define HDR_OS(ctx) ((ctx)->header[9])
100 
101 /* parse_header parses the gzip header, sets the next state and returns
102  *   HDR_DONE: all done, bytes following are raw DEFLATE data.
103  *   HDR_EXTENDED: all done, expect a NUL-termianted string
104  *                 before the DEFLATE data
105  *   HDR_ERROR: invalid header, give up (session error is set).
106  */
parse_header(ne_decompress * ctx)107 static int parse_header(ne_decompress *ctx)
108 {
109     NE_DEBUG(NE_DBG_HTTP, "ID1: %d  ID2: %d, cmeth %d, flags %d\n",
110              HDR_ID1(ctx), HDR_ID2(ctx), HDR_CMETH(ctx), HDR_FLAGS(ctx));
111 
112     if (HDR_ID1(ctx) != ID1 || HDR_ID2(ctx) != ID2 || HDR_CMETH(ctx) != 8) {
113 	ne_set_error(ctx->session, "Compressed stream invalid");
114 	return HDR_ERROR;
115     }
116 
117     NE_DEBUG(NE_DBG_HTTP, "mtime: %d, xflags: %d, os: %d\n",
118 	     HDR_MTIME(ctx), HDR_XFLAGS(ctx), HDR_OS(ctx));
119 
120     /* TODO: we can only handle one NUL-terminated extensions field
121      * currently.  Really, we should count the number of bits set, and
122      * skip as many fields as bits set (bailing if any reserved bits
123      * are set. */
124     if (HDR_FLAGS(ctx) == 8) {
125 	ctx->state = NE_Z_POST_HEADER;
126 	return HDR_EXTENDED;
127     } else if (HDR_FLAGS(ctx) != 0) {
128 	ne_set_error(ctx->session, "Compressed stream not supported");
129 	return HDR_ERROR;
130     }
131 
132     NE_DEBUG(NE_DBG_HTTP, "compress: Good stream.\n");
133 
134     ctx->state = NE_Z_INFLATING;
135     return HDR_DONE;
136 }
137 
138 /* Process extra 'len' bytes of 'buf' which were received after the
139  * DEFLATE data. */
process_footer(ne_decompress * ctx,const unsigned char * buf,size_t len)140 static int process_footer(ne_decompress *ctx,
141 			   const unsigned char *buf, size_t len)
142 {
143     if (len + ctx->footcount > 8) {
144         ne_set_error(ctx->session,
145                      "Too many bytes (%" NE_FMT_SIZE_T ") in gzip footer",
146                      len);
147         return -1;
148     } else {
149 	memcpy(ctx->footer + ctx->footcount, buf, len);
150 	ctx->footcount += len;
151 	if (ctx->footcount == 8) {
152 	    uLong crc = BUF2UINT(ctx->footer) & 0xFFFFFFFF;
153 	    if (crc == ctx->checksum) {
154 		ctx->state = NE_Z_FINISHED;
155 		NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum match.\n");
156 	    } else {
157 		NE_DEBUG(NE_DBG_HTTP, "compress: End of response; checksum mismatch: "
158 			 "given %lu vs computed %lu\n", crc, ctx->checksum);
159 		ne_set_error(ctx->session,
160 			     "Checksum invalid for compressed stream");
161                 return -1;
162 	    }
163 	}
164     }
165     return 0;
166 }
167 
168 /* A zlib function failed with 'code'; set the session error string
169  * appropriately. */
set_zlib_error(ne_decompress * ctx,const char * msg,int code)170 static void set_zlib_error(ne_decompress *ctx, const char *msg, int code)
171 {
172     if (ctx->zstr.msg)
173         ne_set_error(ctx->session, "%s: %s", msg, ctx->zstr.msg);
174     else {
175         const char *err;
176         switch (code) {
177         case Z_STREAM_ERROR: err = "stream error"; break;
178         case Z_DATA_ERROR: err = "data corrupt"; break;
179         case Z_MEM_ERROR: err = "out of memory"; break;
180         case Z_BUF_ERROR: err = "buffer error"; break;
181         case Z_VERSION_ERROR: err = "library version mismatch"; break;
182         default: err = "unknown error"; break;
183         }
184         ne_set_error(ctx->session, _("%s: %s (code %d)"), msg, err, code);
185     }
186 }
187 
188 /* Inflate response buffer 'buf' of length 'len'. */
do_inflate(ne_decompress * ctx,const char * buf,size_t len)189 static int do_inflate(ne_decompress *ctx, const char *buf, size_t len)
190 {
191     int ret;
192 
193     ctx->zstr.avail_in = len;
194     ctx->zstr.next_in = (unsigned char *)buf;
195     ctx->zstr.total_in = 0;
196 
197     do {
198 	ctx->zstr.avail_out = sizeof ctx->outbuf;
199 	ctx->zstr.next_out = (unsigned char *)ctx->outbuf;
200 	ctx->zstr.total_out = 0;
201 
202 	ret = inflate(&ctx->zstr, Z_NO_FLUSH);
203 
204 	NE_DEBUG(NE_DBG_HTTP,
205 		 "compress: inflate %d, %ld bytes out, %d remaining\n",
206 		 ret, ctx->zstr.total_out, ctx->zstr.avail_in);
207 #if 0
208 	NE_DEBUG(NE_DBG_HTTPBODY,
209 		 "Inflated body block (%ld):\n[%.*s]\n",
210 		 ctx->zstr.total_out, (int)ctx->zstr.total_out,
211 		 ctx->outbuf);
212 #endif
213 	/* update checksum. */
214 	ctx->checksum = crc32(ctx->checksum, (unsigned char *)ctx->outbuf,
215 			      ctx->zstr.total_out);
216 
217 	/* pass on the inflated data, if any */
218         if (ctx->zstr.total_out > 0) {
219             int rret = ctx->reader(ctx->userdata, ctx->outbuf,
220                                    ctx->zstr.total_out);
221             if (rret) return rret;
222         }
223     } while (ret == Z_OK && ctx->zstr.avail_in > 0);
224 
225     if (ret == Z_STREAM_END) {
226 	NE_DEBUG(NE_DBG_HTTP, "compress: end of data stream, %d bytes remain.\n",
227 		 ctx->zstr.avail_in);
228 	/* process the footer. */
229 	ctx->state = NE_Z_AFTER_DATA;
230 	return process_footer(ctx, ctx->zstr.next_in, ctx->zstr.avail_in);
231     } else if (ret != Z_OK) {
232         set_zlib_error(ctx, _("Could not inflate data"), ret);
233         return NE_ERROR;
234     }
235     return 0;
236 }
237 
238 /* Callback which is passed blocks of the response body. */
gz_reader(void * ud,const char * buf,size_t len)239 static int gz_reader(void *ud, const char *buf, size_t len)
240 {
241     ne_decompress *ctx = ud;
242     const char *zbuf;
243     size_t count;
244     const char *hdr;
245 
246     if (len == 0) {
247         /* End of response: */
248         switch (ctx->state) {
249         case NE_Z_BEFORE_DATA:
250             hdr = ne_get_response_header(ctx->request, "Content-Encoding");
251             if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
252                 /* response was truncated: return error. */
253                 break;
254             }
255             /* else, fall through */
256         case NE_Z_FINISHED: /* complete gzip response */
257         case NE_Z_PASSTHROUGH: /* complete uncompressed response */
258             return ctx->reader(ctx->userdata, buf, 0);
259         default:
260             /* invalid state: truncated response. */
261             break;
262         }
263 	/* else: truncated response, fail. */
264 	ne_set_error(ctx->session, "Compressed response was truncated");
265 	return NE_ERROR;
266     }
267 
268     switch (ctx->state) {
269     case NE_Z_PASSTHROUGH:
270 	/* move along there. */
271 	return ctx->reader(ctx->userdata, buf, len);
272 
273     case NE_Z_FINISHED:
274 	/* Could argue for tolerance, and ignoring trailing content;
275 	 * but it could mean something more serious. */
276 	if (len > 0) {
277 	    ne_set_error(ctx->session,
278 			 "Unexpected content received after compressed stream");
279             return NE_ERROR;
280 	}
281         break;
282 
283     case NE_Z_BEFORE_DATA:
284 	/* work out whether this is a compressed response or not. */
285         hdr = ne_get_response_header(ctx->request, "Content-Encoding");
286         if (hdr && ne_strcasecmp(hdr, "gzip") == 0) {
287             int ret;
288 	    NE_DEBUG(NE_DBG_HTTP, "compress: got gzipped stream.\n");
289 
290             /* inflateInit2() works here where inflateInit() doesn't. */
291             ret = inflateInit2(&ctx->zstr, -MAX_WBITS);
292             if (ret != Z_OK) {
293                 set_zlib_error(ctx, _("Could not initialize zlib"), ret);
294                 return -1;
295             }
296 	    ctx->zstrinit = 1;
297 
298 	} else {
299 	    /* No Content-Encoding header: pass it on.  TODO: we could
300 	     * hack it and register the real callback now. But that
301 	     * would require add_resp_body_rdr to have defined
302 	     * ordering semantics etc etc */
303 	    ctx->state = NE_Z_PASSTHROUGH;
304 	    return ctx->reader(ctx->userdata, buf, len);
305 	}
306 
307 	ctx->state = NE_Z_IN_HEADER;
308 	/* FALLTHROUGH */
309 
310     case NE_Z_IN_HEADER:
311 	/* copy as many bytes as possible into the buffer. */
312 	if (len + ctx->hdrcount > 10) {
313 	    count = 10 - ctx->hdrcount;
314 	} else {
315 	    count = len;
316 	}
317 	memcpy(ctx->header + ctx->hdrcount, buf, count);
318 	ctx->hdrcount += count;
319 	/* have we got the full header yet? */
320 	if (ctx->hdrcount != 10) {
321 	    return 0;
322 	}
323 
324 	buf += count;
325 	len -= count;
326 
327 	switch (parse_header(ctx)) {
328 	case HDR_EXTENDED:
329 	    if (len == 0)
330 		return 0;
331 	    break;
332         case HDR_ERROR:
333             return NE_ERROR;
334 	case HDR_DONE:
335 	    if (len > 0) {
336 		return do_inflate(ctx, buf, len);
337 	    }
338             break;
339 	}
340 
341 	/* FALLTHROUGH */
342 
343     case NE_Z_POST_HEADER:
344 	/* eating the filename string. */
345 	zbuf = memchr(buf, '\0', len);
346 	if (zbuf == NULL) {
347 	    /* not found it yet. */
348 	    return 0;
349 	}
350 
351 	NE_DEBUG(NE_DBG_HTTP,
352 		 "compress: skipped %" NE_FMT_SIZE_T " header bytes.\n",
353 		 zbuf - buf);
354 	/* found end of string. */
355 	len -= (1 + zbuf - buf);
356 	buf = zbuf + 1;
357 	ctx->state = NE_Z_INFLATING;
358 	if (len == 0) {
359 	    /* end of string was at end of buffer. */
360 	    return 0;
361 	}
362 
363 	/* FALLTHROUGH */
364 
365     case NE_Z_INFLATING:
366 	return do_inflate(ctx, buf, len);
367 
368     case NE_Z_AFTER_DATA:
369 	return process_footer(ctx, (unsigned char *)buf, len);
370     }
371 
372     return 0;
373 }
374 
375 /* Prepare for a compressed response; may be called many times per
376  * request, for auth retries etc. */
gz_pre_send(ne_request * r,void * ud,ne_buffer * req)377 static void gz_pre_send(ne_request *r, void *ud, ne_buffer *req)
378 {
379     ne_decompress *ctx = ud;
380 
381     if (ctx->request == r) {
382         NE_DEBUG(NE_DBG_HTTP, "compress: Initialization.\n");
383 
384         /* (Re-)Initialize the context */
385         ctx->state = NE_Z_BEFORE_DATA;
386         if (ctx->zstrinit) inflateEnd(&ctx->zstr);
387         ctx->zstrinit = 0;
388         ctx->hdrcount = ctx->footcount = 0;
389         ctx->checksum = crc32(0L, Z_NULL, 0);
390     }
391 }
392 
393 /* Wrapper for user-passed acceptor function. */
gz_acceptor(void * userdata,ne_request * req,const ne_status * st)394 static int gz_acceptor(void *userdata, ne_request *req, const ne_status *st)
395 {
396     ne_decompress *ctx = userdata;
397     return ctx->acceptor(ctx->userdata, req, st);
398 }
399 
400 /* A slightly ugly hack: the pre_send hook is scoped per-session, so
401  * must check that the invoking request is this one, before doing
402  * anything, and must be unregistered when the context is
403  * destroyed. */
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)404 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
405 				    ne_block_reader rdr, void *userdata)
406 {
407     ne_decompress *ctx = ne_calloc(sizeof *ctx);
408 
409     ne_add_request_header(req, "Accept-Encoding", "gzip");
410 
411     ne_add_response_body_reader(req, gz_acceptor, gz_reader, ctx);
412 
413     ctx->reader = rdr;
414     ctx->userdata = userdata;
415     ctx->session = ne_get_session(req);
416     ctx->request = req;
417     ctx->acceptor = acpt;
418 
419     ne_hook_pre_send(ne_get_session(req), gz_pre_send, ctx);
420 
421     return ctx;
422 }
423 
ne_decompress_destroy(ne_decompress * ctx)424 void ne_decompress_destroy(ne_decompress *ctx)
425 {
426     if (ctx->zstrinit) inflateEnd(&ctx->zstr);
427 
428     ne_unhook_pre_send(ctx->session, gz_pre_send, ctx);
429 
430     ne_free(ctx);
431 }
432 
433 #else /* !NE_HAVE_ZLIB */
434 
435 /* Pass-through interface present to provide ABI compatibility. */
436 
ne_decompress_reader(ne_request * req,ne_accept_response acpt,ne_block_reader rdr,void * userdata)437 ne_decompress *ne_decompress_reader(ne_request *req, ne_accept_response acpt,
438 				    ne_block_reader rdr, void *userdata)
439 {
440     ne_add_response_body_reader(req, acpt, rdr, userdata);
441     /* an arbitrary return value: don't confuse them by returning NULL. */
442     return (ne_decompress *)req;
443 }
444 
ne_decompress_destroy(ne_decompress * dc)445 void ne_decompress_destroy(ne_decompress *dc)
446 {
447 }
448 
449 #endif /* NE_HAVE_ZLIB */
450