1 /*
2 * Copyright (c) 2012 Tim Ruehsen
3 * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4 *
5 * This file is part of libwget.
6 *
7 * Libwget is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * Libwget is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with libwget. If not, see <https://www.gnu.org/licenses/>.
19 *
20 *
21 * HTTP decompression routines
22 *
23 * Changelog
24 * 20.06.2012 Tim Ruehsen created
25 * 31.12.2013 Tim Ruehsen added XZ / LZMA decompression
26 * 02.01.2014 Tim Ruehsen added BZIP2 decompression
27 * 24.02.2017 Tim Ruehsen added Brotli decompression
28 *
29 * References
30 * https://en.wikipedia.org/wiki/HTTP_compression
31 * https://wiki.mozilla.org/LZMA2_Compression
32 * https://groups.google.com/forum/#!topic/mozilla.dev.platform/CBhSPWs3HS8
33 * https://github.com/google/brotli
34 */
35
36 #include <config.h>
37
38 #include <stdio.h>
39 #include <string.h>
40
41 #ifdef WITH_ZLIB
42 #define ZLIB_CONST
43 #include <zlib.h>
44 #endif
45
46 #ifdef WITH_BZIP2
47 #include <bzlib.h>
48 #endif
49
50 #ifdef WITH_LZMA
51 #include <lzma.h>
52 #endif
53
54 #ifdef WITH_BROTLIDEC
55 #include <brotli/decode.h>
56 #endif
57
58 #ifdef WITH_ZSTD
59 #include <zstd.h>
60 #endif
61
62 #ifdef WITH_LZIP
63 #include <lzlib.h>
64 #endif
65
66 #include <wget.h>
67 #include "private.h"
68
69 typedef int wget_decompressor_decompress_fn(wget_decompressor *dc, const char *src, size_t srclen);
70 typedef void wget_decompressor_exit_fn(wget_decompressor *dc);
71
72 struct wget_decompressor_st {
73 #ifdef WITH_ZLIB
74 z_stream
75 z_strm;
76 #endif
77 #ifdef WITH_LZMA
78 lzma_stream
79 lzma_strm;
80 #endif
81 #ifdef WITH_BZIP2
82 bz_stream
83 bz_strm;
84 #endif
85 #ifdef WITH_BROTLIDEC
86 BrotliDecoderState
87 *brotli_strm;
88 #endif
89 #ifdef WITH_ZSTD
90 ZSTD_DStream
91 *zstd_strm;
92 #endif
93 #ifdef WITH_LZIP
94 struct LZ_Decoder
95 *lzip_strm;
96 #endif
97
98 wget_decompressor_sink_fn
99 *sink; // decompressed data goes here
100 wget_decompressor_error_handler
101 *error_handler; // called on error
102 wget_decompressor_decompress_fn
103 *decompress;
104 wget_decompressor_exit_fn
105 *exit;
106 void
107 *context; // given to sink()
108 wget_content_encoding
109 encoding;
110 };
111
112 #ifdef WITH_ZLIB
gzip_init(z_stream * strm)113 static int gzip_init(z_stream *strm)
114 {
115 memset(strm, 0, sizeof(*strm));
116
117 // +16: decode gzip format only
118 // +32: decode gzip and zlib (autodetect)
119 if (inflateInit2(strm, 15 + 32) != Z_OK) {
120 error_printf(_("Failed to init gzip decompression\n"));
121 return -1;
122 }
123
124 return 0;
125 }
126
gzip_decompress(wget_decompressor * dc,const char * src,size_t srclen)127 static int gzip_decompress(wget_decompressor *dc, const char *src, size_t srclen)
128 {
129 z_stream *strm;
130 char dst[10240];
131 int status;
132
133 if (!srclen) {
134 // special case to avoid decompress errors
135 if (dc->sink)
136 dc->sink(dc->context, "", 0);
137
138 return 0;
139 }
140
141 strm = &dc->z_strm;
142 strm->next_in = (const unsigned char *) src;
143 strm->avail_in = (unsigned int) srclen;
144
145 do {
146 strm->next_out = (unsigned char *) dst;
147 strm->avail_out = sizeof(dst);
148
149 status = inflate(strm, Z_SYNC_FLUSH);
150 if ((status == Z_OK || status == Z_STREAM_END) && strm->avail_out < sizeof(dst)) {
151 if (dc->sink)
152 dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
153 }
154 } while (status == Z_OK && !strm->avail_out);
155
156 if (status == Z_OK || status == Z_BUF_ERROR || status == Z_STREAM_END)
157 return 0;
158
159 error_printf(_("Failed to uncompress gzip stream (%d)\n"), status);
160 return -1;
161 }
162
gzip_exit(wget_decompressor * dc)163 static void gzip_exit(wget_decompressor *dc)
164 {
165 int status;
166
167 if ((status = inflateEnd(&dc->z_strm)) != Z_OK) {
168 error_printf(_("Failed to close gzip stream (%d)\n"), status);
169 }
170 }
171
deflate_init(z_stream * strm)172 static int deflate_init(z_stream *strm)
173 {
174 memset(strm, 0, sizeof(*strm));
175
176 if (inflateInit(strm) != Z_OK) {
177 error_printf(_("Failed to init deflate decompression\n"));
178 return -1;
179 }
180
181 return 0;
182 }
183 #endif // WITH_ZLIB
184
185 #ifdef WITH_LZMA
lzma_init(lzma_stream * strm)186 static int lzma_init(lzma_stream *strm)
187 {
188 memset(strm, 0, sizeof(*strm));
189
190 // if (lzma_stream_decoder(strm, UINT64_MAX, LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED) != LZMA_OK) {
191 if (lzma_auto_decoder(strm, UINT64_MAX, 0) != LZMA_OK) {
192 error_printf(_("Failed to init LZMA decompression\n"));
193 return -1;
194 }
195
196 return 0;
197 }
198
lzma_decompress(wget_decompressor * dc,const char * src,size_t srclen)199 static int lzma_decompress(wget_decompressor *dc, const char *src, size_t srclen)
200 {
201 lzma_stream *strm;
202 char dst[10240];
203 int status;
204
205 if (!srclen) {
206 // special case to avoid decompress errors
207 if (dc->sink)
208 dc->sink(dc->context, "", 0);
209
210 return 0;
211 }
212
213 strm = &dc->lzma_strm;
214 strm->next_in = (const uint8_t *) src;
215 strm->avail_in = srclen;
216
217 do {
218 strm->next_out = (unsigned char *) dst;
219 strm->avail_out = sizeof(dst);
220
221 status = lzma_code(strm, LZMA_RUN);
222 if ((status == LZMA_OK || status == LZMA_STREAM_END) && strm->avail_out<sizeof(dst)) {
223 if (dc->sink)
224 dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
225 }
226 } while (status == LZMA_OK && !strm->avail_out);
227
228 if (status == LZMA_OK || status == LZMA_STREAM_END)
229 return 0;
230
231 error_printf(_("Failed to uncompress LZMA stream (%d)\n"), status);
232 return -1;
233 }
234
lzma_exit(wget_decompressor * dc)235 static void lzma_exit(wget_decompressor *dc)
236 {
237 lzma_end(&dc->lzma_strm);
238 }
239 #endif // WITH_LZMA
240
241 #ifdef WITH_BROTLIDEC
brotli_init(BrotliDecoderState ** strm)242 static int brotli_init(BrotliDecoderState **strm)
243 {
244 if ((*strm = BrotliDecoderCreateInstance(NULL, NULL, NULL)) == NULL) {
245 error_printf(_("Failed to init Brotli decompression\n"));
246 return -1;
247 }
248
249 return 0;
250 }
251
brotli_decompress(wget_decompressor * dc,const char * src,size_t srclen)252 static int brotli_decompress(wget_decompressor *dc, const char *src, size_t srclen)
253 {
254 BrotliDecoderState *strm;
255 BrotliDecoderResult status;
256 uint8_t dst[10240];
257 size_t available_in, available_out;
258 const uint8_t *next_in;
259 uint8_t *next_out;
260
261 if (!srclen) {
262 // special case to avoid decompress errors
263 if (dc->sink)
264 dc->sink(dc->context, "", 0);
265
266 return 0;
267 }
268
269 strm = dc->brotli_strm;
270 next_in = (const uint8_t *) src;
271 available_in = srclen;
272
273 do {
274 next_out = (unsigned char *)dst;
275 available_out = sizeof(dst);
276
277 status = BrotliDecoderDecompressStream(strm, &available_in, &next_in, &available_out, &next_out, NULL);
278 if (available_out != sizeof(dst)) {
279 if (dc->sink)
280 dc->sink(dc->context, (char *)dst, sizeof(dst) - available_out);
281 }
282 } while (status == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT);
283
284 if (status == BROTLI_DECODER_RESULT_SUCCESS || status == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT)
285 return 0;
286
287 BrotliDecoderErrorCode err = BrotliDecoderGetErrorCode(strm);
288 error_printf(_("Failed to uncompress Brotli stream (%u): %s\n"), status, BrotliDecoderErrorString(err));
289
290 return -1;
291 }
292
brotli_exit(wget_decompressor * dc)293 static void brotli_exit(wget_decompressor *dc)
294 {
295 BrotliDecoderDestroyInstance(dc->brotli_strm);
296 }
297 #endif // WITH_BROTLIDEC
298
299 #ifdef WITH_ZSTD
zstd_init(ZSTD_DStream ** strm)300 static int zstd_init(ZSTD_DStream **strm)
301 {
302 if ((*strm = ZSTD_createDStream()) == NULL) {
303 error_printf(_("Failed to create Zstandard decompression\n"));
304 return -1;
305 }
306
307 size_t rc = ZSTD_initDStream(*strm);
308 if (ZSTD_isError(rc)) {
309 error_printf(_("Failed to init Zstandard decompression: %s\n"), ZSTD_getErrorName(rc));
310 ZSTD_freeDStream(*strm);
311 *strm = NULL;
312 return -1;
313 }
314
315 return 0;
316 }
317
zstd_decompress(wget_decompressor * dc,const char * src,size_t srclen)318 static int zstd_decompress(wget_decompressor *dc, const char *src, size_t srclen)
319 {
320 ZSTD_DStream *strm;
321 uint8_t dst[10240];
322
323 if (!srclen) {
324 // special case to avoid decompress errors
325 if (dc->sink)
326 dc->sink(dc->context, "", 0);
327
328 return 0;
329 }
330
331 strm = dc->zstd_strm;
332
333 ZSTD_inBuffer input = { .src = src, .size = srclen, .pos = 0 };
334
335 while (input.pos < input.size) {
336 ZSTD_outBuffer output = { .dst = dst, .size = sizeof(dst), .pos = 0 };
337
338 size_t rc = ZSTD_decompressStream(strm, &output , &input);
339 if (ZSTD_isError(rc)) {
340 error_printf(_("Failed to uncompress Zstandard stream: %s\n"), ZSTD_getErrorName(rc));
341 return -1;
342 }
343
344 if (output.pos && dc->sink)
345 dc->sink(dc->context, (char *)dst, output.pos);
346 }
347
348 return 0;
349 }
350
zstd_exit(wget_decompressor * dc)351 static void zstd_exit(wget_decompressor *dc)
352 {
353 ZSTD_freeDStream(dc->zstd_strm);
354 }
355 #endif // WITH_ZSTD
356
357 #ifdef WITH_LZIP
lzip_init(struct LZ_Decoder ** strm)358 static int lzip_init(struct LZ_Decoder **strm)
359 {
360 if ((*strm = LZ_decompress_open()) == NULL) {
361 error_printf(_("Failed to create lzip decompression\n"));
362 return -1;
363 }
364
365 // docs say, we have to check the pointer
366 enum LZ_Errno err;
367 if ((err = LZ_decompress_errno(*strm)) != LZ_ok) {
368 error_printf(_("Failed to create lzip decompression: %d %s\n"), (int) err, LZ_strerror(err));
369 LZ_decompress_close(*strm);
370 return -1;
371 }
372
373 return 0;
374 }
375
lzip_drain(wget_decompressor * dc)376 static int lzip_drain(wget_decompressor *dc)
377 {
378 struct LZ_Decoder *strm = dc->lzip_strm;
379 uint8_t dst[10240];
380 int rbytes;
381 enum LZ_Errno err;
382
383 while ((rbytes = LZ_decompress_read(strm, dst, sizeof(dst))) > 0) {
384 if (dc->sink)
385 dc->sink(dc->context, (char *) dst, rbytes);
386 }
387
388 if ((err = LZ_decompress_errno(strm)) != LZ_ok) {
389 error_printf(_("Failed to uncompress lzip stream: %d %s\n"), (int) err, LZ_strerror(err));
390 return -1;
391 }
392
393 return 0;
394 }
395
lzip_decompress(wget_decompressor * dc,const char * src,size_t srclen)396 static int lzip_decompress(wget_decompressor *dc, const char *src, size_t srclen)
397 {
398 struct LZ_Decoder *strm;
399 int available_in;
400 const uint8_t *next_in;
401 int wbytes;
402
403 if (!srclen) {
404 // special case to avoid decompress errors
405 if (dc->sink)
406 dc->sink(dc->context, "", 0);
407
408 return 0;
409 }
410
411 strm = dc->lzip_strm;
412 next_in = (const uint8_t *) src;
413 available_in = (int) srclen;
414
415 do {
416 wbytes = LZ_decompress_write(strm, next_in, available_in);
417 next_in += wbytes;
418 available_in -= wbytes;
419
420 if (lzip_drain(dc) < 0)
421 return -1;
422 } while (wbytes > 0);
423
424 return 0;
425 }
426
lzip_exit(wget_decompressor * dc)427 static void lzip_exit(wget_decompressor *dc)
428 {
429 struct LZ_Decoder *strm = dc->lzip_strm;
430
431 if (LZ_decompress_finish(strm) == 0)
432 lzip_drain(dc);
433
434 LZ_decompress_close(strm);
435 }
436 #endif // WITH_LZIP
437
438 #ifdef WITH_BZIP2
bzip2_init(bz_stream * strm)439 static int bzip2_init(bz_stream *strm)
440 {
441 memset(strm, 0, sizeof(*strm));
442
443 if (BZ2_bzDecompressInit(strm, 0, 0) != BZ_OK) {
444 error_printf(_("Failed to init bzip2 decompression\n"));
445 return -1;
446 }
447
448 return 0;
449 }
450
bzip2_decompress(wget_decompressor * dc,const char * src,size_t srclen)451 static int bzip2_decompress(wget_decompressor *dc, const char *src, size_t srclen)
452 {
453 bz_stream *strm;
454 char dst[10240];
455 int status;
456
457 if (!srclen) {
458 // special case to avoid decompress errors
459 if (dc->sink)
460 dc->sink(dc->context, "", 0);
461
462 return 0;
463 }
464
465 strm = &dc->bz_strm;
466 strm->next_in = (char *) src;
467 strm->avail_in = (unsigned int) srclen;
468
469 do {
470 strm->next_out = dst;
471 strm->avail_out = sizeof(dst);
472
473 status = BZ2_bzDecompress(strm);
474 if ((status == BZ_OK || status == BZ_STREAM_END) && strm->avail_out<sizeof(dst)) {
475 if (dc->sink)
476 dc->sink(dc->context, dst, sizeof(dst) - strm->avail_out);
477 }
478 } while (status == BZ_OK && !strm->avail_out);
479
480 if (status == BZ_OK || status == BZ_STREAM_END)
481 return 0;
482
483 error_printf(_("Failed to uncompress bzip2 stream (%d)\n"), status);
484 return -1;
485 }
486
bzip2_exit(wget_decompressor * dc)487 static void bzip2_exit(wget_decompressor *dc)
488 {
489 BZ2_bzDecompressEnd(&dc->bz_strm);
490 }
491 #endif // WITH_BZIP2
492
identity(wget_decompressor * dc,const char * src,size_t srclen)493 static int identity(wget_decompressor *dc, const char *src, size_t srclen)
494 {
495 if (dc->sink)
496 dc->sink(dc->context, src, srclen);
497
498 return 0;
499 }
500
wget_decompress_open(wget_content_encoding encoding,wget_decompressor_sink_fn * sink,void * context)501 wget_decompressor *wget_decompress_open(
502 wget_content_encoding encoding,
503 wget_decompressor_sink_fn *sink,
504 void *context)
505 {
506 int rc = 0;
507 wget_decompressor *dc = wget_calloc(1, sizeof(wget_decompressor));
508
509 if (!dc)
510 return NULL;
511
512 if (encoding == wget_content_encoding_gzip) {
513 #ifdef WITH_ZLIB
514 if ((rc = gzip_init(&dc->z_strm)) == 0) {
515 dc->decompress = gzip_decompress;
516 dc->exit = gzip_exit;
517 }
518 #endif
519 } else if (encoding == wget_content_encoding_deflate) {
520 #ifdef WITH_ZLIB
521 if ((rc = deflate_init(&dc->z_strm)) == 0) {
522 dc->decompress = gzip_decompress;
523 dc->exit = gzip_exit;
524 }
525 #endif
526 } else if (encoding == wget_content_encoding_bzip2) {
527 #ifdef WITH_BZIP2
528 if ((rc = bzip2_init(&dc->bz_strm)) == 0) {
529 dc->decompress = bzip2_decompress;
530 dc->exit = bzip2_exit;
531 }
532 #endif
533 } else if (encoding == wget_content_encoding_lzma || encoding == wget_content_encoding_xz) {
534 #ifdef WITH_LZMA
535 if ((rc = lzma_init(&dc->lzma_strm)) == 0) {
536 dc->decompress = lzma_decompress;
537 dc->exit = lzma_exit;
538 }
539 #endif
540 } else if (encoding == wget_content_encoding_brotli) {
541 #ifdef WITH_BROTLIDEC
542 if ((rc = brotli_init(&dc->brotli_strm)) == 0) {
543 dc->decompress = brotli_decompress;
544 dc->exit = brotli_exit;
545 }
546 #endif
547 } else if (encoding == wget_content_encoding_zstd) {
548 #ifdef WITH_ZSTD
549 if ((rc = zstd_init(&dc->zstd_strm)) == 0) {
550 dc->decompress = zstd_decompress;
551 dc->exit = zstd_exit;
552 }
553 #endif
554 } else if (encoding == wget_content_encoding_lzip) {
555 #ifdef WITH_LZIP
556 if ((rc = lzip_init(&dc->lzip_strm)) == 0) {
557 dc->decompress = lzip_decompress;
558 dc->exit = lzip_exit;
559 }
560 #endif
561 }
562
563 if (!dc->decompress) {
564 // identity
565 if (encoding != wget_content_encoding_identity)
566 debug_printf("Falling back to Content-Encoding 'identity'\n");
567 dc->decompress = identity;
568 }
569
570 if (rc) {
571 xfree(dc);
572 return NULL;
573 }
574
575 dc->encoding = encoding;
576 dc->sink = sink;
577 dc->context = context;
578 return dc;
579 }
580
wget_decompress_close(wget_decompressor * dc)581 void wget_decompress_close(wget_decompressor *dc)
582 {
583 if (dc) {
584 if (dc->exit)
585 dc->exit(dc);
586 xfree(dc);
587 }
588 }
589
wget_decompress(wget_decompressor * dc,const char * src,size_t srclen)590 int wget_decompress(wget_decompressor *dc, const char *src, size_t srclen)
591 {
592 if (dc) {
593 int rc = dc->decompress(dc, src, srclen);
594
595 if (rc && dc->error_handler)
596 dc->error_handler(dc, rc);
597 }
598
599 return 0;
600 }
601
wget_decompress_set_error_handler(wget_decompressor * dc,wget_decompressor_error_handler * error_handler)602 void wget_decompress_set_error_handler(wget_decompressor *dc, wget_decompressor_error_handler *error_handler)
603 {
604 if (dc)
605 dc->error_handler = error_handler;
606 }
607
wget_decompress_get_context(wget_decompressor * dc)608 void *wget_decompress_get_context(wget_decompressor *dc)
609 {
610 return dc ? dc->context : NULL;
611 }
612
613 static char _encoding_names[][9] = {
614 [wget_content_encoding_identity] = "identity",
615 [wget_content_encoding_gzip] = "gzip",
616 [wget_content_encoding_deflate] = "deflate",
617 [wget_content_encoding_xz] = "xz",
618 [wget_content_encoding_lzma] = "lzma",
619 [wget_content_encoding_bzip2] = "bzip2",
620 [wget_content_encoding_brotli] = "br",
621 [wget_content_encoding_zstd] = "zstd",
622 [wget_content_encoding_lzip] = "lzip",
623 };
624
wget_content_encoding_by_name(const char * name)625 wget_content_encoding wget_content_encoding_by_name(const char *name)
626 {
627 if (name) {
628 for (wget_content_encoding it = 0; it < wget_content_encoding_max; it++) {
629 if (!strcmp(_encoding_names[it], name))
630 return it;
631 }
632
633 if (!strcmp("none", name))
634 return wget_content_encoding_identity;
635 }
636
637 return wget_content_encoding_unknown;
638 }
639
wget_content_encoding_to_name(wget_content_encoding type)640 const char *wget_content_encoding_to_name(wget_content_encoding type)
641 {
642 if (type >= 0 && type < wget_content_encoding_max)
643 return _encoding_names[type];
644
645 return NULL;
646 }
647