1 /*
2  * Copyright (c) 2012 Tim Ruehsen
3  * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4  *
5  * This file is part of libwget.
6  *
7  * Libwget is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * Libwget is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  *
21  * HTTP decompression routines
22  *
23  * Changelog
24  * 20.06.2012  Tim Ruehsen  created
25  * 31.12.2013  Tim Ruehsen  added XZ / LZMA decompression
26  * 02.01.2014  Tim Ruehsen  added BZIP2 decompression
27  * 24.02.2017  Tim Ruehsen  added Brotli decompression
28  *
29  * References
30  *   https://en.wikipedia.org/wiki/HTTP_compression
31  *   https://wiki.mozilla.org/LZMA2_Compression
32  *   https://groups.google.com/forum/#!topic/mozilla.dev.platform/CBhSPWs3HS8
33  *   https://github.com/google/brotli
34  */
35 
36 #include <config.h>
37 
38 #include <stdio.h>
39 #include <string.h>
40 
41 #ifdef WITH_ZLIB
42 #define ZLIB_CONST
43 #include <zlib.h>
44 #endif
45 
46 #ifdef WITH_BZIP2
47 #include <bzlib.h>
48 #endif
49 
50 #ifdef WITH_LZMA
51 #include <lzma.h>
52 #endif
53 
54 #ifdef WITH_BROTLIDEC
55 #include <brotli/decode.h>
56 #endif
57 
58 #ifdef WITH_ZSTD
59 #include <zstd.h>
60 #endif
61 
62 #ifdef WITH_LZIP
63 #include <lzlib.h>
64 #endif
65 
66 #include <wget.h>
67 #include "private.h"
68 
69 typedef int wget_decompressor_decompress_fn(wget_decompressor *dc, const char *src, size_t srclen);
70 typedef void wget_decompressor_exit_fn(wget_decompressor *dc);
71 
72 struct wget_decompressor_st {
73 #ifdef WITH_ZLIB
74 	z_stream
75 		z_strm;
76 #endif
77 #ifdef WITH_LZMA
78 	lzma_stream
79 		lzma_strm;
80 #endif
81 #ifdef WITH_BZIP2
82 	bz_stream
83 		bz_strm;
84 #endif
85 #ifdef WITH_BROTLIDEC
86 	BrotliDecoderState
87 		*brotli_strm;
88 #endif
89 #ifdef WITH_ZSTD
90 	ZSTD_DStream
91 		*zstd_strm;
92 #endif
93 #ifdef WITH_LZIP
94 	struct LZ_Decoder
95 		*lzip_strm;
96 #endif
97 
98 	wget_decompressor_sink_fn
99 		*sink; // decompressed data goes here
100 	wget_decompressor_error_handler
101 		*error_handler; // called on error
102 	wget_decompressor_decompress_fn
103 		*decompress;
104 	wget_decompressor_exit_fn
105 		*exit;
106 	void
107 		*context; // given to sink()
108 	wget_content_encoding
109 		encoding;
110 };
111 
112 #ifdef WITH_ZLIB
gzip_init(z_stream * strm)113 static int gzip_init(z_stream *strm)
114 {
115 	memset(strm, 0, sizeof(*strm));
116 
117 	// +16: decode gzip format only
118 	// +32: decode gzip and zlib (autodetect)
119 	if (inflateInit2(strm, 15 + 32) != Z_OK) {
120 		error_printf(_("Failed to init gzip decompression\n"));
121 		return -1;
122 	}
123 
124 	return 0;
125 }
126 
gzip_decompress(wget_decompressor * dc,const char * src,size_t srclen)127 static int gzip_decompress(wget_decompressor *dc, const char *src, size_t srclen)
128 {
129 	z_stream *strm;
130 	char dst[10240];
131 	int status;
132 
133 	if (!srclen) {
134 		// special case to avoid decompress errors
135 		if (dc->sink)
136 			dc->sink(dc->context, "", 0);
137 
138 		return 0;
139 	}
140 
141 	strm = &dc->z_strm;
142 	strm->next_in = (const unsigned char *) src;
143 	strm->avail_in = (unsigned int) srclen;
144 
145 	do {
146 		strm->next_out = (unsigned char *) dst;
147 		strm->avail_out = sizeof(dst);
148 
149 		status = inflate(strm, Z_SYNC_FLUSH);
150 		if ((status == Z_OK || status == Z_STREAM_END) && strm->avail_out < sizeof(dst)) {
151 			if (dc->sink)
152 				dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
153 		}
154 	} while (status == Z_OK && !strm->avail_out);
155 
156 	if (status == Z_OK || status == Z_BUF_ERROR || status == Z_STREAM_END)
157 		return 0;
158 
159 	error_printf(_("Failed to uncompress gzip stream (%d)\n"), status);
160 	return -1;
161 }
162 
gzip_exit(wget_decompressor * dc)163 static void gzip_exit(wget_decompressor *dc)
164 {
165 	int status;
166 
167 	if ((status = inflateEnd(&dc->z_strm)) != Z_OK) {
168 		error_printf(_("Failed to close gzip stream (%d)\n"), status);
169 	}
170 }
171 
deflate_init(z_stream * strm)172 static int deflate_init(z_stream *strm)
173 {
174 	memset(strm, 0, sizeof(*strm));
175 
176 	if (inflateInit(strm) != Z_OK) {
177 		error_printf(_("Failed to init deflate decompression\n"));
178 		return -1;
179 	}
180 
181 	return 0;
182 }
183 #endif // WITH_ZLIB
184 
185 #ifdef WITH_LZMA
lzma_init(lzma_stream * strm)186 static int lzma_init(lzma_stream *strm)
187 {
188 	memset(strm, 0, sizeof(*strm));
189 
190 //	if (lzma_stream_decoder(strm, UINT64_MAX, LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED) != LZMA_OK) {
191 	if (lzma_auto_decoder(strm, UINT64_MAX, 0) != LZMA_OK) {
192 		error_printf(_("Failed to init LZMA decompression\n"));
193 		return -1;
194 	}
195 
196 	return 0;
197 }
198 
lzma_decompress(wget_decompressor * dc,const char * src,size_t srclen)199 static int lzma_decompress(wget_decompressor *dc, const char *src, size_t srclen)
200 {
201 	lzma_stream *strm;
202 	char dst[10240];
203 	int status;
204 
205 	if (!srclen) {
206 		// special case to avoid decompress errors
207 		if (dc->sink)
208 			dc->sink(dc->context, "", 0);
209 
210 		return 0;
211 	}
212 
213 	strm = &dc->lzma_strm;
214 	strm->next_in = (const uint8_t *) src;
215 	strm->avail_in = srclen;
216 
217 	do {
218 		strm->next_out = (unsigned char *) dst;
219 		strm->avail_out = sizeof(dst);
220 
221 		status = lzma_code(strm, LZMA_RUN);
222 		if ((status == LZMA_OK || status == LZMA_STREAM_END) && strm->avail_out<sizeof(dst)) {
223 			if (dc->sink)
224 				dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
225 		}
226 	} while (status == LZMA_OK && !strm->avail_out);
227 
228 	if (status == LZMA_OK || status == LZMA_STREAM_END)
229 		return 0;
230 
231 	error_printf(_("Failed to uncompress LZMA stream (%d)\n"), status);
232 	return -1;
233 }
234 
lzma_exit(wget_decompressor * dc)235 static void lzma_exit(wget_decompressor *dc)
236 {
237 	lzma_end(&dc->lzma_strm);
238 }
239 #endif // WITH_LZMA
240 
241 #ifdef WITH_BROTLIDEC
brotli_init(BrotliDecoderState ** strm)242 static int brotli_init(BrotliDecoderState **strm)
243 {
244 	if ((*strm = BrotliDecoderCreateInstance(NULL, NULL, NULL)) == NULL) {
245 		error_printf(_("Failed to init Brotli decompression\n"));
246 		return -1;
247 	}
248 
249 	return 0;
250 }
251 
brotli_decompress(wget_decompressor * dc,const char * src,size_t srclen)252 static int brotli_decompress(wget_decompressor *dc, const char *src, size_t srclen)
253 {
254 	BrotliDecoderState *strm;
255 	BrotliDecoderResult status;
256 	uint8_t dst[10240];
257 	size_t available_in, available_out;
258 	const uint8_t *next_in;
259 	uint8_t *next_out;
260 
261 	if (!srclen) {
262 		// special case to avoid decompress errors
263 		if (dc->sink)
264 			dc->sink(dc->context, "", 0);
265 
266 		return 0;
267 	}
268 
269 	strm = dc->brotli_strm;
270 	next_in = (const uint8_t *) src;
271 	available_in = srclen;
272 
273 	do {
274 		next_out = (unsigned char *)dst;
275 		available_out = sizeof(dst);
276 
277 		status = BrotliDecoderDecompressStream(strm, &available_in, &next_in, &available_out, &next_out, NULL);
278 		if (available_out != sizeof(dst)) {
279 			if (dc->sink)
280 				dc->sink(dc->context, (char *)dst, sizeof(dst) - available_out);
281 		}
282 	} while (status == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
283 
284 	if (status == BROTLI_DECODER_RESULT_SUCCESS || status == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
285 		return 0;
286 
287 	BrotliDecoderErrorCode err = BrotliDecoderGetErrorCode(strm);
288 	error_printf(_("Failed to uncompress Brotli stream (%u): %s\n"), status, BrotliDecoderErrorString(err));
289 
290 	return -1;
291 }
292 
brotli_exit(wget_decompressor * dc)293 static void brotli_exit(wget_decompressor *dc)
294 {
295 	BrotliDecoderDestroyInstance(dc->brotli_strm);
296 }
297 #endif // WITH_BROTLIDEC
298 
299 #ifdef WITH_ZSTD
zstd_init(ZSTD_DStream ** strm)300 static int zstd_init(ZSTD_DStream **strm)
301 {
302 	if ((*strm = ZSTD_createDStream()) == NULL) {
303 		error_printf(_("Failed to create Zstandard decompression\n"));
304 		return -1;
305 	}
306 
307 	size_t rc = ZSTD_initDStream(*strm);
308 	if (ZSTD_isError(rc)) {
309 		error_printf(_("Failed to init Zstandard decompression: %s\n"), ZSTD_getErrorName(rc));
310 		ZSTD_freeDStream(*strm);
311 		*strm = NULL;
312 		return -1;
313 	}
314 
315 	return 0;
316 }
317 
zstd_decompress(wget_decompressor * dc,const char * src,size_t srclen)318 static int zstd_decompress(wget_decompressor *dc, const char *src, size_t srclen)
319 {
320 	ZSTD_DStream *strm;
321 	uint8_t dst[10240];
322 
323 	if (!srclen) {
324 		// special case to avoid decompress errors
325 		if (dc->sink)
326 			dc->sink(dc->context, "", 0);
327 
328 		return 0;
329 	}
330 
331 	strm = dc->zstd_strm;
332 
333 	ZSTD_inBuffer input = { .src = src, .size = srclen, .pos = 0 };
334 
335 	while (input.pos < input.size) {
336 		ZSTD_outBuffer output = { .dst = dst, .size = sizeof(dst), .pos = 0 };
337 
338 		size_t rc = ZSTD_decompressStream(strm, &output , &input);
339 		if (ZSTD_isError(rc)) {
340 			error_printf(_("Failed to uncompress Zstandard stream: %s\n"), ZSTD_getErrorName(rc));
341 			return -1;
342 		}
343 
344 		if (output.pos && dc->sink)
345 			dc->sink(dc->context, (char *)dst, output.pos);
346 	}
347 
348 	return 0;
349 }
350 
zstd_exit(wget_decompressor * dc)351 static void zstd_exit(wget_decompressor *dc)
352 {
353 	ZSTD_freeDStream(dc->zstd_strm);
354 }
355 #endif // WITH_ZSTD
356 
357 #ifdef WITH_LZIP
lzip_init(struct LZ_Decoder ** strm)358 static int lzip_init(struct LZ_Decoder **strm)
359 {
360 	if ((*strm = LZ_decompress_open()) == NULL) {
361 		error_printf(_("Failed to create lzip decompression\n"));
362 		return -1;
363 	}
364 
365 	// docs say, we have to check the pointer
366 	enum LZ_Errno err;
367 	if ((err = LZ_decompress_errno(*strm)) != LZ_ok) {
368 		error_printf(_("Failed to create lzip decompression: %d %s\n"), (int) err, LZ_strerror(err));
369 		LZ_decompress_close(*strm);
370 		return -1;
371 	}
372 
373 	return 0;
374 }
375 
lzip_drain(wget_decompressor * dc)376 static int lzip_drain(wget_decompressor *dc)
377 {
378 	struct LZ_Decoder *strm = dc->lzip_strm;
379 	uint8_t dst[10240];
380 	int rbytes;
381 	enum LZ_Errno err;
382 
383 	while ((rbytes = LZ_decompress_read(strm, dst, sizeof(dst))) > 0) {
384 		if (dc->sink)
385 			dc->sink(dc->context, (char *) dst, rbytes);
386 	}
387 
388 	if ((err = LZ_decompress_errno(strm)) != LZ_ok) {
389 		error_printf(_("Failed to uncompress lzip stream: %d %s\n"), (int) err, LZ_strerror(err));
390 		return -1;
391 	}
392 
393 	return 0;
394 }
395 
lzip_decompress(wget_decompressor * dc,const char * src,size_t srclen)396 static int lzip_decompress(wget_decompressor *dc, const char *src, size_t srclen)
397 {
398 	struct LZ_Decoder *strm;
399 	int available_in;
400 	const uint8_t *next_in;
401 	int wbytes;
402 
403 	if (!srclen) {
404 		// special case to avoid decompress errors
405 		if (dc->sink)
406 			dc->sink(dc->context, "", 0);
407 
408 		return 0;
409 	}
410 
411 	strm = dc->lzip_strm;
412 	next_in = (const uint8_t *) src;
413 	available_in = (int) srclen;
414 
415 	do {
416 		wbytes = LZ_decompress_write(strm, next_in, available_in);
417 		next_in += wbytes;
418 		available_in -= wbytes;
419 
420 		if (lzip_drain(dc) < 0)
421 			return -1;
422 	} while (wbytes > 0);
423 
424 	return 0;
425 }
426 
lzip_exit(wget_decompressor * dc)427 static void lzip_exit(wget_decompressor *dc)
428 {
429 	struct LZ_Decoder *strm = dc->lzip_strm;
430 
431 	if (LZ_decompress_finish(strm) == 0)
432 		lzip_drain(dc);
433 
434 	LZ_decompress_close(strm);
435 }
436 #endif // WITH_LZIP
437 
438 #ifdef WITH_BZIP2
bzip2_init(bz_stream * strm)439 static int bzip2_init(bz_stream *strm)
440 {
441 	memset(strm, 0, sizeof(*strm));
442 
443 	if (BZ2_bzDecompressInit(strm, 0, 0) != BZ_OK) {
444 		error_printf(_("Failed to init bzip2 decompression\n"));
445 		return -1;
446 	}
447 
448 	return 0;
449 }
450 
bzip2_decompress(wget_decompressor * dc,const char * src,size_t srclen)451 static int bzip2_decompress(wget_decompressor *dc, const char *src, size_t srclen)
452 {
453 	bz_stream *strm;
454 	char dst[10240];
455 	int status;
456 
457 	if (!srclen) {
458 		// special case to avoid decompress errors
459 		if (dc->sink)
460 			dc->sink(dc->context, "", 0);
461 
462 		return 0;
463 	}
464 
465 	strm = &dc->bz_strm;
466 	strm->next_in = (char *) src;
467 	strm->avail_in = (unsigned int) srclen;
468 
469 	do {
470 		strm->next_out = dst;
471 		strm->avail_out = sizeof(dst);
472 
473 		status = BZ2_bzDecompress(strm);
474 		if ((status == BZ_OK || status == BZ_STREAM_END) && strm->avail_out<sizeof(dst)) {
475 			if (dc->sink)
476 				dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
477 		}
478 	} while (status == BZ_OK && !strm->avail_out);
479 
480 	if (status == BZ_OK || status == BZ_STREAM_END)
481 		return 0;
482 
483 	error_printf(_("Failed to uncompress bzip2 stream (%d)\n"), status);
484 	return -1;
485 }
486 
bzip2_exit(wget_decompressor * dc)487 static void bzip2_exit(wget_decompressor *dc)
488 {
489 	BZ2_bzDecompressEnd(&dc->bz_strm);
490 }
491 #endif // WITH_BZIP2
492 
identity(wget_decompressor * dc,const char * src,size_t srclen)493 static int identity(wget_decompressor *dc, const char *src, size_t srclen)
494 {
495 	if (dc->sink)
496 		dc->sink(dc->context, src, srclen);
497 
498 	return 0;
499 }
500 
wget_decompress_open(wget_content_encoding encoding,wget_decompressor_sink_fn * sink,void * context)501 wget_decompressor *wget_decompress_open(
502 	wget_content_encoding encoding,
503 	wget_decompressor_sink_fn *sink,
504 	void *context)
505 {
506 	int rc = 0;
507 	wget_decompressor *dc = wget_calloc(1, sizeof(wget_decompressor));
508 
509 	if (!dc)
510 		return NULL;
511 
512 	if (encoding == wget_content_encoding_gzip) {
513 #ifdef WITH_ZLIB
514 		if ((rc = gzip_init(&dc->z_strm)) == 0) {
515 			dc->decompress = gzip_decompress;
516 			dc->exit = gzip_exit;
517 		}
518 #endif
519 	} else if (encoding == wget_content_encoding_deflate) {
520 #ifdef WITH_ZLIB
521 		if ((rc = deflate_init(&dc->z_strm)) == 0) {
522 			dc->decompress = gzip_decompress;
523 			dc->exit = gzip_exit;
524 		}
525 #endif
526 	} else if (encoding == wget_content_encoding_bzip2) {
527 #ifdef WITH_BZIP2
528 		if ((rc = bzip2_init(&dc->bz_strm)) == 0) {
529 			dc->decompress = bzip2_decompress;
530 			dc->exit = bzip2_exit;
531 		}
532 #endif
533 	} else if (encoding == wget_content_encoding_lzma || encoding == wget_content_encoding_xz) {
534 #ifdef WITH_LZMA
535 		if ((rc = lzma_init(&dc->lzma_strm)) == 0) {
536 			dc->decompress = lzma_decompress;
537 			dc->exit = lzma_exit;
538 		}
539 #endif
540 	} else if (encoding == wget_content_encoding_brotli) {
541 #ifdef WITH_BROTLIDEC
542 		if ((rc = brotli_init(&dc->brotli_strm)) == 0) {
543 			dc->decompress = brotli_decompress;
544 			dc->exit = brotli_exit;
545 		}
546 #endif
547 	} else if (encoding == wget_content_encoding_zstd) {
548 #ifdef WITH_ZSTD
549 		if ((rc = zstd_init(&dc->zstd_strm)) == 0) {
550 			dc->decompress = zstd_decompress;
551 			dc->exit = zstd_exit;
552 		}
553 #endif
554 	} else if (encoding == wget_content_encoding_lzip) {
555 #ifdef WITH_LZIP
556 		if ((rc = lzip_init(&dc->lzip_strm)) == 0) {
557 			dc->decompress = lzip_decompress;
558 			dc->exit = lzip_exit;
559 		}
560 #endif
561 	}
562 
563 	if (!dc->decompress) {
564 		// identity
565 		if (encoding != wget_content_encoding_identity)
566 			debug_printf("Falling back to Content-Encoding 'identity'\n");
567 		dc->decompress = identity;
568 	}
569 
570 	if (rc) {
571 		xfree(dc);
572 		return NULL;
573 	}
574 
575 	dc->encoding = encoding;
576 	dc->sink = sink;
577 	dc->context = context;
578 	return dc;
579 }
580 
wget_decompress_close(wget_decompressor * dc)581 void wget_decompress_close(wget_decompressor *dc)
582 {
583 	if (dc) {
584 		if (dc->exit)
585 			dc->exit(dc);
586 		xfree(dc);
587 	}
588 }
589 
wget_decompress(wget_decompressor * dc,const char * src,size_t srclen)590 int wget_decompress(wget_decompressor *dc, const char *src, size_t srclen)
591 {
592 	if (dc) {
593 		int rc = dc->decompress(dc, src, srclen);
594 
595 		if (rc && dc->error_handler)
596 			dc->error_handler(dc, rc);
597 	}
598 
599 	return 0;
600 }
601 
wget_decompress_set_error_handler(wget_decompressor * dc,wget_decompressor_error_handler * error_handler)602 void wget_decompress_set_error_handler(wget_decompressor *dc, wget_decompressor_error_handler *error_handler)
603 {
604 	if (dc)
605 		dc->error_handler = error_handler;
606 }
607 
wget_decompress_get_context(wget_decompressor * dc)608 void *wget_decompress_get_context(wget_decompressor *dc)
609 {
610 	return dc ? dc->context : NULL;
611 }
612 
613 static char _encoding_names[][9] = {
614 	[wget_content_encoding_identity] = "identity",
615 	[wget_content_encoding_gzip] = "gzip",
616 	[wget_content_encoding_deflate] = "deflate",
617 	[wget_content_encoding_xz] = "xz",
618 	[wget_content_encoding_lzma] = "lzma",
619 	[wget_content_encoding_bzip2] = "bzip2",
620 	[wget_content_encoding_brotli] = "br",
621 	[wget_content_encoding_zstd] = "zstd",
622 	[wget_content_encoding_lzip] = "lzip",
623 };
624 
wget_content_encoding_by_name(const char * name)625 wget_content_encoding wget_content_encoding_by_name(const char *name)
626 {
627 	if (name) {
628 		for (wget_content_encoding it = 0; it < wget_content_encoding_max; it++) {
629 			if (!strcmp(_encoding_names[it], name))
630 				return it;
631 		}
632 
633 		if (!strcmp("none", name))
634 			return wget_content_encoding_identity;
635 	}
636 
637 	return wget_content_encoding_unknown;
638 }
639 
wget_content_encoding_to_name(wget_content_encoding type)640 const char *wget_content_encoding_to_name(wget_content_encoding type)
641 {
642 	if (type >= 0 && type < wget_content_encoding_max)
643 		return _encoding_names[type];
644 
645 	return NULL;
646 }
647