1 /*-
2  * Copyright (c) 2010-2012 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 #ifdef HAVE_ZLIB_H
41 #include <zlib.h>
42 #endif
43 
44 #include "archive.h"
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_private.h"
48 #include "archive_read_private.h"
49 #include "archive_endian.h"
50 
51 
52 struct lzx_dec {
53 	/* Decoding status. */
54 	int     		 state;
55 
56 	/*
57 	 * Window to see last decoded data, from 32KBi to 2MBi.
58 	 */
59 	int			 w_size;
60 	int			 w_mask;
61 	/* Window buffer, which is a loop buffer. */
62 	unsigned char		*w_buff;
63 	/* The insert position to the window. */
64 	int			 w_pos;
65 	/* The position where we can copy decoded code from the window. */
66 	int     		 copy_pos;
67 	/* The length how many bytes we can copy decoded code from
68 	 * the window. */
69 	int     		 copy_len;
70 	/* Translation reversal for x86 processor CALL byte sequence(E8).
71 	 * This is used for LZX only. */
72 	uint32_t		 translation_size;
73 	char			 translation;
74 	char			 block_type;
75 #define VERBATIM_BLOCK		1
76 #define ALIGNED_OFFSET_BLOCK	2
77 #define UNCOMPRESSED_BLOCK	3
78 	size_t			 block_size;
79 	size_t			 block_bytes_avail;
80 	/* Repeated offset. */
81 	int			 r0, r1, r2;
82 	unsigned char		 rbytes[4];
83 	int			 rbytes_avail;
84 	int			 length_header;
85 	int			 position_slot;
86 	int			 offset_bits;
87 
88 	struct lzx_pos_tbl {
89 		int		 base;
90 		int		 footer_bits;
91 	}			*pos_tbl;
92 	/*
93 	 * Bit stream reader.
94 	 */
95 	struct lzx_br {
96 #define CACHE_TYPE		uint64_t
97 #define CACHE_BITS		(8 * sizeof(CACHE_TYPE))
98 	 	/* Cache buffer. */
99 		CACHE_TYPE	 cache_buffer;
100 		/* Indicates how many bits avail in cache_buffer. */
101 		int		 cache_avail;
102 		unsigned char	 odd;
103 		char		 have_odd;
104 	} br;
105 
106 	/*
107 	 * Huffman coding.
108 	 */
109 	struct huffman {
110 		int		 len_size;
111 		int		 freq[17];
112 		unsigned char	*bitlen;
113 
114 		/*
115 		 * Use a index table. It's faster than searching a huffman
116 		 * coding tree, which is a binary tree. But a use of a large
117 		 * index table causes L1 cache read miss many times.
118 		 */
119 #define HTBL_BITS	10
120 		int		 max_bits;
121 		int		 shift_bits;
122 		int		 tbl_bits;
123 		int		 tree_used;
124 		int		 tree_avail;
125 		/* Direct access table. */
126 		uint16_t	*tbl;
127 		/* Binary tree table for extra bits over the direct access. */
128 		struct htree_t {
129 			uint16_t left;
130 			uint16_t right;
131 		}		*tree;
132 	}			 at, lt, mt, pt;
133 
134 	int			 loop;
135 	int			 error;
136 };
137 
138 static const int slots[] = {
139 	30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
140 };
141 #define SLOT_BASE	15
142 #define SLOT_MAX	21/*->25*/
143 
144 struct lzx_stream {
145 	const unsigned char	*next_in;
146 	int64_t			 avail_in;
147 	int64_t			 total_in;
148 	unsigned char		*next_out;
149 	int64_t			 avail_out;
150 	int64_t			 total_out;
151 	struct lzx_dec		*ds;
152 };
153 
154 /*
155  * Cabinet file definitions.
156  */
157 /* CFHEADER offset */
158 #define CFHEADER_signature	0
159 #define CFHEADER_cbCabinet	8
160 #define CFHEADER_coffFiles	16
161 #define CFHEADER_versionMinor	24
162 #define CFHEADER_versionMajor	25
163 #define CFHEADER_cFolders	26
164 #define CFHEADER_cFiles		28
165 #define CFHEADER_flags		30
166 #define CFHEADER_setID		32
167 #define CFHEADER_iCabinet	34
168 #define CFHEADER_cbCFHeader	36
169 #define CFHEADER_cbCFFolder	38
170 #define CFHEADER_cbCFData	39
171 
172 /* CFFOLDER offset */
173 #define CFFOLDER_coffCabStart	0
174 #define CFFOLDER_cCFData	4
175 #define CFFOLDER_typeCompress	6
176 #define CFFOLDER_abReserve	8
177 
178 /* CFFILE offset */
179 #define CFFILE_cbFile		0
180 #define CFFILE_uoffFolderStart	4
181 #define CFFILE_iFolder		8
182 #define CFFILE_date_time	10
183 #define CFFILE_attribs		14
184 
185 /* CFDATA offset */
186 #define CFDATA_csum		0
187 #define CFDATA_cbData		4
188 #define CFDATA_cbUncomp		6
189 
190 static const char *compression_name[] = {
191 	"NONE",
192 	"MSZIP",
193 	"Quantum",
194 	"LZX",
195 };
196 
197 struct cfdata {
198 	/* Sum value of this CFDATA. */
199 	uint32_t		 sum;
200 	uint16_t		 compressed_size;
201 	uint16_t		 compressed_bytes_remaining;
202 	uint16_t		 uncompressed_size;
203 	uint16_t		 uncompressed_bytes_remaining;
204 	/* To know how many bytes we have decompressed. */
205 	uint16_t		 uncompressed_avail;
206 	/* Offset from the beginning of compressed data of this CFDATA */
207 	uint16_t		 read_offset;
208 	int64_t			 unconsumed;
209 	/* To keep memory image of this CFDATA to compute the sum. */
210 	size_t			 memimage_size;
211 	unsigned char		*memimage;
212 	/* Result of calculation of sum. */
213 	uint32_t		 sum_calculated;
214 	unsigned char		 sum_extra[4];
215 	int			 sum_extra_avail;
216 	const void		*sum_ptr;
217 };
218 
219 struct cffolder {
220 	uint32_t		 cfdata_offset_in_cab;
221 	uint16_t		 cfdata_count;
222 	uint16_t		 comptype;
223 #define COMPTYPE_NONE		0x0000
224 #define COMPTYPE_MSZIP		0x0001
225 #define COMPTYPE_QUANTUM	0x0002
226 #define COMPTYPE_LZX		0x0003
227 	uint16_t		 compdata;
228 	const char		*compname;
229 	/* At the time reading CFDATA */
230 	struct cfdata		 cfdata;
231 	int			 cfdata_index;
232 	/* Flags to mark progress of decompression. */
233 	char			 decompress_init;
234 };
235 
236 struct cffile {
237 	uint32_t		 uncompressed_size;
238 	uint32_t		 offset;
239 	time_t			 mtime;
240 	uint16_t	 	 folder;
241 #define iFoldCONTINUED_FROM_PREV	0xFFFD
242 #define iFoldCONTINUED_TO_NEXT		0xFFFE
243 #define iFoldCONTINUED_PREV_AND_NEXT	0xFFFF
244 	unsigned char		 attr;
245 #define ATTR_RDONLY		0x01
246 #define ATTR_NAME_IS_UTF	0x80
247 	struct archive_string 	 pathname;
248 };
249 
250 struct cfheader {
251 	/* Total bytes of all file size in a Cabinet. */
252 	uint32_t		 total_bytes;
253 	uint32_t		 files_offset;
254 	uint16_t		 folder_count;
255 	uint16_t		 file_count;
256 	uint16_t		 flags;
257 #define PREV_CABINET	0x0001
258 #define NEXT_CABINET	0x0002
259 #define RESERVE_PRESENT	0x0004
260 	uint16_t		 setid;
261 	uint16_t		 cabinet;
262 	/* Version number. */
263 	unsigned char		 major;
264 	unsigned char		 minor;
265 	unsigned char		 cffolder;
266 	unsigned char		 cfdata;
267 	/* All folders in a cabinet. */
268 	struct cffolder		*folder_array;
269 	/* All files in a cabinet. */
270 	struct cffile		*file_array;
271 	int			 file_index;
272 };
273 
274 struct cab {
275 	/* entry_bytes_remaining is the number of bytes we expect.	    */
276 	int64_t			 entry_offset;
277 	int64_t			 entry_bytes_remaining;
278 	int64_t			 entry_unconsumed;
279 	int64_t			 entry_compressed_bytes_read;
280 	int64_t			 entry_uncompressed_bytes_read;
281 	struct cffolder		*entry_cffolder;
282 	struct cffile		*entry_cffile;
283 	struct cfdata		*entry_cfdata;
284 
285 	/* Offset from beginning of a cabinet file. */
286 	int64_t			 cab_offset;
287 	struct cfheader		 cfheader;
288 	struct archive_wstring	 ws;
289 
290 	/* Flag to mark progress that an archive was read their first header.*/
291 	char			 found_header;
292 	char			 end_of_archive;
293 	char			 end_of_entry;
294 	char			 end_of_entry_cleanup;
295 	char			 read_data_invoked;
296 	int64_t			 bytes_skipped;
297 
298 	unsigned char		*uncompressed_buffer;
299 	size_t			 uncompressed_buffer_size;
300 
301 	int			 init_default_conversion;
302 	struct archive_string_conv *sconv;
303 	struct archive_string_conv *sconv_default;
304 	struct archive_string_conv *sconv_utf8;
305 	char			 format_name[64];
306 
307 #ifdef HAVE_ZLIB_H
308 	z_stream		 stream;
309 	char			 stream_valid;
310 #endif
311 	struct lzx_stream	 xstrm;
312 };
313 
314 static int	archive_read_format_cab_bid(struct archive_read *, int);
315 static int	archive_read_format_cab_options(struct archive_read *,
316 		    const char *, const char *);
317 static int	archive_read_format_cab_read_header(struct archive_read *,
318 		    struct archive_entry *);
319 static int	archive_read_format_cab_read_data(struct archive_read *,
320 		    const void **, size_t *, int64_t *);
321 static int	archive_read_format_cab_read_data_skip(struct archive_read *);
322 static int	archive_read_format_cab_cleanup(struct archive_read *);
323 
324 static int	cab_skip_sfx(struct archive_read *);
325 static time_t	cab_dos_time(const unsigned char *);
326 static int	cab_read_data(struct archive_read *, const void **,
327 		    size_t *, int64_t *);
328 static int	cab_read_header(struct archive_read *);
329 static uint32_t cab_checksum_cfdata_4(const void *, size_t bytes, uint32_t);
330 static uint32_t cab_checksum_cfdata(const void *, size_t bytes, uint32_t);
331 static void	cab_checksum_update(struct archive_read *, size_t);
332 static int	cab_checksum_finish(struct archive_read *);
333 static int	cab_next_cfdata(struct archive_read *);
334 static const void *cab_read_ahead_cfdata(struct archive_read *, ssize_t *);
335 static const void *cab_read_ahead_cfdata_none(struct archive_read *, ssize_t *);
336 static const void *cab_read_ahead_cfdata_deflate(struct archive_read *,
337 		    ssize_t *);
338 static const void *cab_read_ahead_cfdata_lzx(struct archive_read *,
339 		    ssize_t *);
340 static int64_t	cab_consume_cfdata(struct archive_read *, int64_t);
341 static int64_t	cab_minimum_consume_cfdata(struct archive_read *, int64_t);
342 static int	lzx_decode_init(struct lzx_stream *, int);
343 static int	lzx_read_blocks(struct lzx_stream *, int);
344 static int	lzx_decode_blocks(struct lzx_stream *, int);
345 static void	lzx_decode_free(struct lzx_stream *);
346 static void	lzx_translation(struct lzx_stream *, void *, size_t, uint32_t);
347 static void	lzx_cleanup_bitstream(struct lzx_stream *);
348 static int	lzx_decode(struct lzx_stream *, int);
349 static int	lzx_read_pre_tree(struct lzx_stream *);
350 static int	lzx_read_bitlen(struct lzx_stream *, struct huffman *, int);
351 static int	lzx_huffman_init(struct huffman *, size_t, int);
352 static void	lzx_huffman_free(struct huffman *);
353 static int	lzx_make_huffman_table(struct huffman *);
354 static inline int lzx_decode_huffman(struct huffman *, unsigned);
355 static int	lzx_decode_huffman_tree(struct huffman *, unsigned, int);
356 
357 
358 int
archive_read_support_format_cab(struct archive * _a)359 archive_read_support_format_cab(struct archive *_a)
360 {
361 	struct archive_read *a = (struct archive_read *)_a;
362 	struct cab *cab;
363 	int r;
364 
365 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
366 	    ARCHIVE_STATE_NEW, "archive_read_support_format_cab");
367 
368 	cab = (struct cab *)calloc(1, sizeof(*cab));
369 	if (cab == NULL) {
370 		archive_set_error(&a->archive, ENOMEM,
371 		    "Can't allocate CAB data");
372 		return (ARCHIVE_FATAL);
373 	}
374 	archive_string_init(&cab->ws);
375 	archive_wstring_ensure(&cab->ws, 256);
376 
377 	r = __archive_read_register_format(a,
378 	    cab,
379 	    "cab",
380 	    archive_read_format_cab_bid,
381 	    archive_read_format_cab_options,
382 	    archive_read_format_cab_read_header,
383 	    archive_read_format_cab_read_data,
384 	    archive_read_format_cab_read_data_skip,
385 	    NULL,
386 	    archive_read_format_cab_cleanup,
387 	    NULL,
388 	    NULL);
389 
390 	if (r != ARCHIVE_OK)
391 		free(cab);
392 	return (ARCHIVE_OK);
393 }
394 
395 static int
find_cab_magic(const char * p)396 find_cab_magic(const char *p)
397 {
398 	switch (p[4]) {
399 	case 0:
400 		/*
401 		 * Note: Self-Extraction program has 'MSCF' string in their
402 		 * program. If we were finding 'MSCF' string only, we got
403 		 * wrong place for Cabinet header, thus, we have to check
404 		 * following four bytes which are reserved and must be set
405 		 * to zero.
406 		 */
407 		if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
408 			return 0;
409 		return 5;
410 	case 'F': return 1;
411 	case 'C': return 2;
412 	case 'S': return 3;
413 	case 'M': return 4;
414 	default:  return 5;
415 	}
416 }
417 
418 static int
archive_read_format_cab_bid(struct archive_read * a,int best_bid)419 archive_read_format_cab_bid(struct archive_read *a, int best_bid)
420 {
421 	const char *p;
422 	ssize_t bytes_avail, offset, window;
423 
424 	/* If there's already a better bid than we can ever
425 	   make, don't bother testing. */
426 	if (best_bid > 64)
427 		return (-1);
428 
429 	if ((p = __archive_read_ahead(a, 8, NULL)) == NULL)
430 		return (-1);
431 
432 	if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
433 		return (64);
434 
435 	/*
436 	 * Attempt to handle self-extracting archives
437 	 * by noting a PE header and searching forward
438 	 * up to 128k for a 'MSCF' marker.
439 	 */
440 	if (p[0] == 'M' && p[1] == 'Z') {
441 		offset = 0;
442 		window = 4096;
443 		while (offset < (1024 * 128)) {
444 			const char *h = __archive_read_ahead(a, offset + window,
445 			    &bytes_avail);
446 			if (h == NULL) {
447 				/* Remaining bytes are less than window. */
448 				window >>= 1;
449 				if (window < 128)
450 					return (0);
451 				continue;
452 			}
453 			p = h + offset;
454 			while (p + 8 < h + bytes_avail) {
455 				int next;
456 				if ((next = find_cab_magic(p)) == 0)
457 					return (64);
458 				p += next;
459 			}
460 			offset = p - h;
461 		}
462 	}
463 	return (0);
464 }
465 
466 static int
archive_read_format_cab_options(struct archive_read * a,const char * key,const char * val)467 archive_read_format_cab_options(struct archive_read *a,
468     const char *key, const char *val)
469 {
470 	struct cab *cab;
471 	int ret = ARCHIVE_FAILED;
472 
473 	cab = (struct cab *)(a->format->data);
474 	if (strcmp(key, "hdrcharset")  == 0) {
475 		if (val == NULL || val[0] == 0)
476 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
477 			    "cab: hdrcharset option needs a character-set name");
478 		else {
479 			cab->sconv = archive_string_conversion_from_charset(
480 			    &a->archive, val, 0);
481 			if (cab->sconv != NULL)
482 				ret = ARCHIVE_OK;
483 			else
484 				ret = ARCHIVE_FATAL;
485 		}
486 		return (ret);
487 	}
488 
489 	/* Note: The "warn" return is just to inform the options
490 	 * supervisor that we didn't handle it.  It will generate
491 	 * a suitable error if no one used this option. */
492 	return (ARCHIVE_WARN);
493 }
494 
495 static int
cab_skip_sfx(struct archive_read * a)496 cab_skip_sfx(struct archive_read *a)
497 {
498 	const char *p, *q;
499 	size_t skip;
500 	ssize_t bytes, window;
501 
502 	window = 4096;
503 	for (;;) {
504 		const char *h = __archive_read_ahead(a, window, &bytes);
505 		if (h == NULL) {
506 			/* Remaining size are less than window. */
507 			window >>= 1;
508 			if (window < 128) {
509 				archive_set_error(&a->archive,
510 				    ARCHIVE_ERRNO_FILE_FORMAT,
511 				    "Couldn't find out CAB header");
512 				return (ARCHIVE_FATAL);
513 			}
514 			continue;
515 		}
516 		p = h;
517 		q = p + bytes;
518 
519 		/*
520 		 * Scan ahead until we find something that looks
521 		 * like the cab header.
522 		 */
523 		while (p + 8 < q) {
524 			int next;
525 			if ((next = find_cab_magic(p)) == 0) {
526 				skip = p - h;
527 				__archive_read_consume(a, skip);
528 				return (ARCHIVE_OK);
529 			}
530 			p += next;
531 		}
532 		skip = p - h;
533 		__archive_read_consume(a, skip);
534 	}
535 }
536 
537 static int
truncated_error(struct archive_read * a)538 truncated_error(struct archive_read *a)
539 {
540 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
541 	    "Truncated CAB header");
542 	return (ARCHIVE_FATAL);
543 }
544 
545 static ssize_t
cab_strnlen(const unsigned char * p,size_t maxlen)546 cab_strnlen(const unsigned char *p, size_t maxlen)
547 {
548 	size_t i;
549 
550 	for (i = 0; i <= maxlen; i++) {
551 		if (p[i] == 0)
552 			break;
553 	}
554 	if (i > maxlen)
555 		return (-1);/* invalid */
556 	return ((ssize_t)i);
557 }
558 
559 /* Read bytes as much as remaining. */
560 static const void *
cab_read_ahead_remaining(struct archive_read * a,size_t min,ssize_t * avail)561 cab_read_ahead_remaining(struct archive_read *a, size_t min, ssize_t *avail)
562 {
563 	const void *p;
564 
565 	while (min > 0) {
566 		p = __archive_read_ahead(a, min, avail);
567 		if (p != NULL)
568 			return (p);
569 		min--;
570 	}
571 	return (NULL);
572 }
573 
574 /* Convert a path separator '\' -> '/' */
575 static int
cab_convert_path_separator_1(struct archive_string * fn,unsigned char attr)576 cab_convert_path_separator_1(struct archive_string *fn, unsigned char attr)
577 {
578 	size_t i;
579 	int mb;
580 
581 	/* Easy check if we have '\' in multi-byte string. */
582 	mb = 0;
583 	for (i = 0; i < archive_strlen(fn); i++) {
584 		if (fn->s[i] == '\\') {
585 			if (mb) {
586 				/* This may be second byte of multi-byte
587 				 * character. */
588 				break;
589 			}
590 			fn->s[i] = '/';
591 			mb = 0;
592 		} else if ((fn->s[i] & 0x80) && !(attr & ATTR_NAME_IS_UTF))
593 			mb = 1;
594 		else
595 			mb = 0;
596 	}
597 	if (i == archive_strlen(fn))
598 		return (0);
599 	return (-1);
600 }
601 
602 /*
603  * Replace a character '\' with '/' in wide character.
604  */
605 static void
cab_convert_path_separator_2(struct cab * cab,struct archive_entry * entry)606 cab_convert_path_separator_2(struct cab *cab, struct archive_entry *entry)
607 {
608 	const wchar_t *wp;
609 	size_t i;
610 
611 	/* If a conversion to wide character failed, force the replacement. */
612 	if ((wp = archive_entry_pathname_w(entry)) != NULL) {
613 		archive_wstrcpy(&(cab->ws), wp);
614 		for (i = 0; i < archive_strlen(&(cab->ws)); i++) {
615 			if (cab->ws.s[i] == L'\\')
616 				cab->ws.s[i] = L'/';
617 		}
618 		archive_entry_copy_pathname_w(entry, cab->ws.s);
619 	}
620 }
621 
622 /*
623  * Read CFHEADER, CFFOLDER and CFFILE.
624  */
625 static int
cab_read_header(struct archive_read * a)626 cab_read_header(struct archive_read *a)
627 {
628 	const unsigned char *p;
629 	struct cab *cab;
630 	struct cfheader *hd;
631 	size_t bytes, used;
632 	ssize_t len;
633 	int64_t skip;
634 	int err, i;
635 	int cur_folder, prev_folder;
636 	uint32_t offset32;
637 
638 	a->archive.archive_format = ARCHIVE_FORMAT_CAB;
639 	if (a->archive.archive_format_name == NULL)
640 		a->archive.archive_format_name = "CAB";
641 
642 	if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
643 		return (truncated_error(a));
644 
645 	cab = (struct cab *)(a->format->data);
646 	if (cab->found_header == 0 &&
647 	    p[0] == 'M' && p[1] == 'Z') {
648 		/* This is an executable?  Must be self-extracting... */
649 		err = cab_skip_sfx(a);
650 		if (err < ARCHIVE_WARN)
651 			return (err);
652 
653 		/* Re-read header after processing the SFX. */
654 		if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
655 			return (truncated_error(a));
656 	}
657 
658 	cab->cab_offset = 0;
659 	/*
660 	 * Read CFHEADER.
661 	 */
662 	hd = &cab->cfheader;
663 	if (p[CFHEADER_signature+0] != 'M' || p[CFHEADER_signature+1] != 'S' ||
664 	    p[CFHEADER_signature+2] != 'C' || p[CFHEADER_signature+3] != 'F') {
665 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
666 		    "Couldn't find out CAB header");
667 		return (ARCHIVE_FATAL);
668 	}
669 	hd->total_bytes = archive_le32dec(p + CFHEADER_cbCabinet);
670 	hd->files_offset = archive_le32dec(p + CFHEADER_coffFiles);
671 	hd->minor = p[CFHEADER_versionMinor];
672 	hd->major = p[CFHEADER_versionMajor];
673 	hd->folder_count = archive_le16dec(p + CFHEADER_cFolders);
674 	if (hd->folder_count == 0)
675 		goto invalid;
676 	hd->file_count = archive_le16dec(p + CFHEADER_cFiles);
677 	if (hd->file_count == 0)
678 		goto invalid;
679 	hd->flags = archive_le16dec(p + CFHEADER_flags);
680 	hd->setid = archive_le16dec(p + CFHEADER_setID);
681 	hd->cabinet = archive_le16dec(p + CFHEADER_iCabinet);
682 	used = CFHEADER_iCabinet + 2;
683 	if (hd->flags & RESERVE_PRESENT) {
684 		uint16_t cfheader;
685 		cfheader = archive_le16dec(p + CFHEADER_cbCFHeader);
686 		if (cfheader > 60000U)
687 			goto invalid;
688 		hd->cffolder = p[CFHEADER_cbCFFolder];
689 		hd->cfdata = p[CFHEADER_cbCFData];
690 		used += 4;/* cbCFHeader, cbCFFolder and cbCFData */
691 		used += cfheader;/* abReserve */
692 	} else
693 		hd->cffolder = 0;/* Avoid compiling warning. */
694 	if (hd->flags & PREV_CABINET) {
695 		/* How many bytes are used for szCabinetPrev. */
696 		if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
697 			return (truncated_error(a));
698 		if ((len = cab_strnlen(p + used, 255)) <= 0)
699 			goto invalid;
700 		used += len + 1;
701 		/* How many bytes are used for szDiskPrev. */
702 		if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
703 			return (truncated_error(a));
704 		if ((len = cab_strnlen(p + used, 255)) <= 0)
705 			goto invalid;
706 		used += len + 1;
707 	}
708 	if (hd->flags & NEXT_CABINET) {
709 		/* How many bytes are used for szCabinetNext. */
710 		if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
711 			return (truncated_error(a));
712 		if ((len = cab_strnlen(p + used, 255)) <= 0)
713 			goto invalid;
714 		used += len + 1;
715 		/* How many bytes are used for szDiskNext. */
716 		if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
717 			return (truncated_error(a));
718 		if ((len = cab_strnlen(p + used, 255)) <= 0)
719 			goto invalid;
720 		used += len + 1;
721 	}
722 	__archive_read_consume(a, used);
723 	cab->cab_offset += used;
724 	used = 0;
725 
726 	/*
727 	 * Read CFFOLDER.
728 	 */
729 	hd->folder_array = (struct cffolder *)calloc(
730 	    hd->folder_count, sizeof(struct cffolder));
731 	if (hd->folder_array == NULL)
732 		goto nomem;
733 
734 	bytes = 8;
735 	if (hd->flags & RESERVE_PRESENT)
736 		bytes += hd->cffolder;
737 	bytes *= hd->folder_count;
738 	if ((p = __archive_read_ahead(a, bytes, NULL)) == NULL)
739 		return (truncated_error(a));
740 	offset32 = 0;
741 	for (i = 0; i < hd->folder_count; i++) {
742 		struct cffolder *folder = &(hd->folder_array[i]);
743 		folder->cfdata_offset_in_cab =
744 		    archive_le32dec(p + CFFOLDER_coffCabStart);
745 		folder->cfdata_count = archive_le16dec(p+CFFOLDER_cCFData);
746 		folder->comptype =
747 		    archive_le16dec(p+CFFOLDER_typeCompress) & 0x0F;
748 		folder->compdata =
749 		    archive_le16dec(p+CFFOLDER_typeCompress) >> 8;
750 		/* Get a compression name. */
751 		if (folder->comptype <
752 		    sizeof(compression_name) / sizeof(compression_name[0]))
753 			folder->compname = compression_name[folder->comptype];
754 		else
755 			folder->compname = "UNKNOWN";
756 		p += 8;
757 		used += 8;
758 		if (hd->flags & RESERVE_PRESENT) {
759 			p += hd->cffolder;/* abReserve */
760 			used += hd->cffolder;
761 		}
762 		/*
763 		 * Sanity check if each data is acceptable.
764 		 */
765 		if (offset32 >= folder->cfdata_offset_in_cab)
766 			goto invalid;
767 		offset32 = folder->cfdata_offset_in_cab;
768 
769 		/* Set a request to initialize zlib for the CFDATA of
770 		 * this folder. */
771 		folder->decompress_init = 0;
772 	}
773 	__archive_read_consume(a, used);
774 	cab->cab_offset += used;
775 
776 	/*
777 	 * Read CFFILE.
778 	 */
779 	/* Seek read pointer to the offset of CFFILE if needed. */
780 	skip = (int64_t)hd->files_offset - cab->cab_offset;
781 	if (skip <  0) {
782 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
783 		    "Invalid offset of CFFILE %jd < %jd",
784 		    (intmax_t)hd->files_offset, (intmax_t)cab->cab_offset);
785 		return (ARCHIVE_FATAL);
786 	}
787 	if (skip) {
788 		__archive_read_consume(a, skip);
789 		cab->cab_offset += skip;
790 	}
791 	/* Allocate memory for CFDATA */
792 	hd->file_array = (struct cffile *)calloc(
793 	    hd->file_count, sizeof(struct cffile));
794 	if (hd->file_array == NULL)
795 		goto nomem;
796 
797 	prev_folder = -1;
798 	for (i = 0; i < hd->file_count; i++) {
799 		struct cffile *file = &(hd->file_array[i]);
800 		ssize_t avail;
801 
802 		if ((p = __archive_read_ahead(a, 16, NULL)) == NULL)
803 			return (truncated_error(a));
804 		file->uncompressed_size = archive_le32dec(p + CFFILE_cbFile);
805 		file->offset = archive_le32dec(p + CFFILE_uoffFolderStart);
806 		file->folder = archive_le16dec(p + CFFILE_iFolder);
807 		file->mtime = cab_dos_time(p + CFFILE_date_time);
808 		file->attr = (uint8_t)archive_le16dec(p + CFFILE_attribs);
809 		__archive_read_consume(a, 16);
810 
811 		cab->cab_offset += 16;
812 		if ((p = cab_read_ahead_remaining(a, 256, &avail)) == NULL)
813 			return (truncated_error(a));
814 		if ((len = cab_strnlen(p, avail-1)) <= 0)
815 			goto invalid;
816 
817 		/* Copy a pathname.  */
818 		archive_string_init(&(file->pathname));
819 		archive_strncpy(&(file->pathname), p, len);
820 		__archive_read_consume(a, len + 1);
821 		cab->cab_offset += len + 1;
822 
823 		/*
824 		 * Sanity check if each data is acceptable.
825 		 */
826 		if (file->uncompressed_size > 0x7FFF8000)
827 			goto invalid;/* Too large */
828 		if ((int64_t)file->offset + (int64_t)file->uncompressed_size
829 		    > ARCHIVE_LITERAL_LL(0x7FFF8000))
830 			goto invalid;/* Too large */
831 		switch (file->folder) {
832 		case iFoldCONTINUED_TO_NEXT:
833 			/* This must be last file in a folder. */
834 			if (i != hd->file_count -1)
835 				goto invalid;
836 			cur_folder = hd->folder_count -1;
837 			break;
838 		case iFoldCONTINUED_PREV_AND_NEXT:
839 			/* This must be only one file in a folder. */
840 			if (hd->file_count != 1)
841 				goto invalid;
842 			/* FALL THROUGH */
843 		case iFoldCONTINUED_FROM_PREV:
844 			/* This must be first file in a folder. */
845 			if (i != 0)
846 				goto invalid;
847 			prev_folder = cur_folder = 0;
848 			offset32 = file->offset;
849 			break;
850 		default:
851 			if (file->folder >= hd->folder_count)
852 				goto invalid;
853 			cur_folder = file->folder;
854 			break;
855 		}
856 		/* Dot not back track. */
857 		if (cur_folder < prev_folder)
858 			goto invalid;
859 		if (cur_folder != prev_folder)
860 			offset32 = 0;
861 		prev_folder = cur_folder;
862 
863 		/* Make sure there are not any blanks from last file
864 		 * contents. */
865 		if (offset32 != file->offset)
866 			goto invalid;
867 		offset32 += file->uncompressed_size;
868 
869 		/* CFDATA is available for file contents. */
870 		if (file->uncompressed_size > 0 &&
871 		    hd->folder_array[cur_folder].cfdata_count == 0)
872 			goto invalid;
873 	}
874 
875 	if (hd->cabinet != 0 || hd->flags & (PREV_CABINET | NEXT_CABINET)) {
876 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
877 		    "Multivolume cabinet file is unsupported");
878 		return (ARCHIVE_WARN);
879 	}
880 	return (ARCHIVE_OK);
881 invalid:
882 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
883 	    "Invalid CAB header");
884 	return (ARCHIVE_FATAL);
885 nomem:
886 	archive_set_error(&a->archive, ENOMEM,
887 	    "Can't allocate memory for CAB data");
888 	return (ARCHIVE_FATAL);
889 }
890 
891 static int
archive_read_format_cab_read_header(struct archive_read * a,struct archive_entry * entry)892 archive_read_format_cab_read_header(struct archive_read *a,
893     struct archive_entry *entry)
894 {
895 	struct cab *cab;
896 	struct cfheader *hd;
897 	struct cffolder *prev_folder;
898 	struct cffile *file;
899 	struct archive_string_conv *sconv;
900 	int err = ARCHIVE_OK, r;
901 
902 	cab = (struct cab *)(a->format->data);
903 	if (cab->found_header == 0) {
904 		err = cab_read_header(a);
905 		if (err < ARCHIVE_WARN)
906 			return (err);
907 		/* We've found the header. */
908 		cab->found_header = 1;
909 	}
910 	hd = &cab->cfheader;
911 
912 	if (hd->file_index >= hd->file_count) {
913 		cab->end_of_archive = 1;
914 		return (ARCHIVE_EOF);
915 	}
916 	file = &hd->file_array[hd->file_index++];
917 
918 	cab->end_of_entry = 0;
919 	cab->end_of_entry_cleanup = 0;
920 	cab->entry_compressed_bytes_read = 0;
921 	cab->entry_uncompressed_bytes_read = 0;
922 	cab->entry_unconsumed = 0;
923 	cab->entry_cffile = file;
924 
925 	/*
926 	 * Choose a proper folder.
927 	 */
928 	prev_folder = cab->entry_cffolder;
929 	switch (file->folder) {
930 	case iFoldCONTINUED_FROM_PREV:
931 	case iFoldCONTINUED_PREV_AND_NEXT:
932 		cab->entry_cffolder = &hd->folder_array[0];
933 		break;
934 	case iFoldCONTINUED_TO_NEXT:
935 		cab->entry_cffolder = &hd->folder_array[hd->folder_count-1];
936 		break;
937 	default:
938 		cab->entry_cffolder = &hd->folder_array[file->folder];
939 		break;
940 	}
941 	/* If a cffolder of this file is changed, reset a cfdata to read
942 	 * file contents from next cfdata. */
943 	if (prev_folder != cab->entry_cffolder)
944 		cab->entry_cfdata = NULL;
945 
946 	/* If a pathname is UTF-8, prepare a string conversion object
947 	 * for UTF-8 and use it. */
948 	if (file->attr & ATTR_NAME_IS_UTF) {
949 		if (cab->sconv_utf8 == NULL) {
950 			cab->sconv_utf8 =
951 			    archive_string_conversion_from_charset(
952 				&(a->archive), "UTF-8", 1);
953 			if (cab->sconv_utf8 == NULL)
954 				return (ARCHIVE_FATAL);
955 		}
956 		sconv = cab->sconv_utf8;
957 	} else if (cab->sconv != NULL) {
958 		/* Choose the conversion specified by the option. */
959 		sconv = cab->sconv;
960 	} else {
961 		/* Choose the default conversion. */
962 		if (!cab->init_default_conversion) {
963 			cab->sconv_default =
964 			    archive_string_default_conversion_for_read(
965 			      &(a->archive));
966 			cab->init_default_conversion = 1;
967 		}
968 		sconv = cab->sconv_default;
969 	}
970 
971 	/*
972 	 * Set a default value and common data
973 	 */
974 	r = cab_convert_path_separator_1(&(file->pathname), file->attr);
975 	if (archive_entry_copy_pathname_l(entry, file->pathname.s,
976 	    archive_strlen(&(file->pathname)), sconv) != 0) {
977 		if (errno == ENOMEM) {
978 			archive_set_error(&a->archive, ENOMEM,
979 			    "Can't allocate memory for Pathname");
980 			return (ARCHIVE_FATAL);
981 		}
982 		archive_set_error(&a->archive,
983 		    ARCHIVE_ERRNO_FILE_FORMAT,
984 		    "Pathname cannot be converted "
985 		    "from %s to current locale.",
986 		    archive_string_conversion_charset_name(sconv));
987 		err = ARCHIVE_WARN;
988 	}
989 	if (r < 0) {
990 		/* Convert a path separator '\' -> '/' */
991 		cab_convert_path_separator_2(cab, entry);
992 	}
993 
994 	archive_entry_set_size(entry, file->uncompressed_size);
995 	if (file->attr & ATTR_RDONLY)
996 		archive_entry_set_mode(entry, AE_IFREG | 0555);
997 	else
998 		archive_entry_set_mode(entry, AE_IFREG | 0666);
999 	archive_entry_set_mtime(entry, file->mtime, 0);
1000 
1001 	cab->entry_bytes_remaining = file->uncompressed_size;
1002 	cab->entry_offset = 0;
1003 	/* We don't need compress data. */
1004 	if (file->uncompressed_size == 0)
1005 		cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1006 
1007 	/* Set up a more descriptive format name. */
1008 	sprintf(cab->format_name, "CAB %d.%d (%s)",
1009 	    hd->major, hd->minor, cab->entry_cffolder->compname);
1010 	a->archive.archive_format_name = cab->format_name;
1011 
1012 	return (err);
1013 }
1014 
1015 static int
archive_read_format_cab_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1016 archive_read_format_cab_read_data(struct archive_read *a,
1017     const void **buff, size_t *size, int64_t *offset)
1018 {
1019 	struct cab *cab = (struct cab *)(a->format->data);
1020 	int r;
1021 
1022 	switch (cab->entry_cffile->folder) {
1023 	case iFoldCONTINUED_FROM_PREV:
1024 	case iFoldCONTINUED_TO_NEXT:
1025 	case iFoldCONTINUED_PREV_AND_NEXT:
1026 		*buff = NULL;
1027 		*size = 0;
1028 		*offset = 0;
1029 		archive_clear_error(&a->archive);
1030 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1031 		    "Cannot restore this file split in multivolume.");
1032 		return (ARCHIVE_FAILED);
1033 	default:
1034 		break;
1035 	}
1036 	if (cab->read_data_invoked == 0) {
1037 		if (cab->bytes_skipped) {
1038 			if (cab->entry_cfdata == NULL) {
1039 				r = cab_next_cfdata(a);
1040 				if (r < 0)
1041 					return (r);
1042 			}
1043 			if (cab_consume_cfdata(a, cab->bytes_skipped) < 0)
1044 				return (ARCHIVE_FATAL);
1045 			cab->bytes_skipped = 0;
1046 		}
1047 		cab->read_data_invoked = 1;
1048 	}
1049 	if (cab->entry_unconsumed) {
1050 		/* Consume as much as the compressor actually used. */
1051 		r = (int)cab_consume_cfdata(a, cab->entry_unconsumed);
1052 		cab->entry_unconsumed = 0;
1053 		if (r < 0)
1054 			return (r);
1055 	}
1056 	if (cab->end_of_archive || cab->end_of_entry) {
1057 		if (!cab->end_of_entry_cleanup) {
1058 			/* End-of-entry cleanup done. */
1059 			cab->end_of_entry_cleanup = 1;
1060 		}
1061 		*offset = cab->entry_offset;
1062 		*size = 0;
1063 		*buff = NULL;
1064 		return (ARCHIVE_EOF);
1065 	}
1066 
1067 	return (cab_read_data(a, buff, size, offset));
1068 }
1069 
1070 static uint32_t
cab_checksum_cfdata_4(const void * p,size_t bytes,uint32_t seed)1071 cab_checksum_cfdata_4(const void *p, size_t bytes, uint32_t seed)
1072 {
1073 	const unsigned char *b;
1074 	unsigned u32num;
1075 	uint32_t sum;
1076 
1077 	u32num = (unsigned)bytes / 4;
1078 	sum = seed;
1079 	b = p;
1080 	for (;u32num > 0; --u32num) {
1081 		sum ^= archive_le32dec(b);
1082 		b += 4;
1083 	}
1084 	return (sum);
1085 }
1086 
1087 static uint32_t
cab_checksum_cfdata(const void * p,size_t bytes,uint32_t seed)1088 cab_checksum_cfdata(const void *p, size_t bytes, uint32_t seed)
1089 {
1090 	const unsigned char *b;
1091 	uint32_t sum;
1092 	uint32_t t;
1093 
1094 	sum = cab_checksum_cfdata_4(p, bytes, seed);
1095 	b = p;
1096 	b += bytes & ~3;
1097 	t = 0;
1098 	switch (bytes & 3) {
1099 	case 3:
1100 		t |= ((uint32_t)(*b++)) << 16;
1101 		/* FALL THROUGH */
1102 	case 2:
1103 		t |= ((uint32_t)(*b++)) << 8;
1104 		/* FALL THROUGH */
1105 	case 1:
1106 		t |= *b;
1107 		/* FALL THROUGH */
1108 	default:
1109 		break;
1110 	}
1111 	sum ^= t;
1112 
1113 	return (sum);
1114 }
1115 
1116 static void
cab_checksum_update(struct archive_read * a,size_t bytes)1117 cab_checksum_update(struct archive_read *a, size_t bytes)
1118 {
1119 	struct cab *cab = (struct cab *)(a->format->data);
1120 	struct cfdata *cfdata = cab->entry_cfdata;
1121 	const unsigned char *p;
1122 	size_t sumbytes;
1123 
1124 	if (cfdata->sum == 0 || cfdata->sum_ptr == NULL)
1125 		return;
1126 	/*
1127 	 * Calculate the sum of this CFDATA.
1128 	 * Make sure CFDATA must be calculated in four bytes.
1129 	 */
1130 	p = cfdata->sum_ptr;
1131 	sumbytes = bytes;
1132 	if (cfdata->sum_extra_avail) {
1133 		while (cfdata->sum_extra_avail < 4 && sumbytes > 0) {
1134 			cfdata->sum_extra[
1135 			    cfdata->sum_extra_avail++] = *p++;
1136 			sumbytes--;
1137 		}
1138 		if (cfdata->sum_extra_avail == 4) {
1139 			cfdata->sum_calculated = cab_checksum_cfdata_4(
1140 			    cfdata->sum_extra, 4, cfdata->sum_calculated);
1141 			cfdata->sum_extra_avail = 0;
1142 		}
1143 	}
1144 	if (sumbytes) {
1145 		int odd = sumbytes & 3;
1146 		if (sumbytes - odd > 0)
1147 			cfdata->sum_calculated = cab_checksum_cfdata_4(
1148 			    p, sumbytes - odd, cfdata->sum_calculated);
1149 		if (odd)
1150 			memcpy(cfdata->sum_extra, p + sumbytes - odd, odd);
1151 		cfdata->sum_extra_avail = odd;
1152 	}
1153 	cfdata->sum_ptr = NULL;
1154 }
1155 
1156 static int
cab_checksum_finish(struct archive_read * a)1157 cab_checksum_finish(struct archive_read *a)
1158 {
1159 	struct cab *cab = (struct cab *)(a->format->data);
1160 	struct cfdata *cfdata = cab->entry_cfdata;
1161 	int l;
1162 
1163 	/* Do not need to compute a sum. */
1164 	if (cfdata->sum == 0)
1165 		return (ARCHIVE_OK);
1166 
1167 	/*
1168 	 * Calculate the sum of remaining CFDATA.
1169 	 */
1170 	if (cfdata->sum_extra_avail) {
1171 		cfdata->sum_calculated =
1172 		    cab_checksum_cfdata(cfdata->sum_extra,
1173 		       cfdata->sum_extra_avail, cfdata->sum_calculated);
1174 		cfdata->sum_extra_avail = 0;
1175 	}
1176 
1177 	l = 4;
1178 	if (cab->cfheader.flags & RESERVE_PRESENT)
1179 		l += cab->cfheader.cfdata;
1180 	cfdata->sum_calculated = cab_checksum_cfdata(
1181 	    cfdata->memimage + CFDATA_cbData, l, cfdata->sum_calculated);
1182 	if (cfdata->sum_calculated != cfdata->sum) {
1183 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1184 		    "Checksum error CFDATA[%d] %x:%x in %d bytes",
1185 		    cab->entry_cffolder->cfdata_index -1,
1186 		    cfdata->sum, cfdata->sum_calculated,
1187 		    cfdata->compressed_size);
1188 		return (ARCHIVE_FAILED);
1189 	}
1190 	return (ARCHIVE_OK);
1191 }
1192 
1193 /*
1194  * Read CFDATA if needed.
1195  */
1196 static int
cab_next_cfdata(struct archive_read * a)1197 cab_next_cfdata(struct archive_read *a)
1198 {
1199 	struct cab *cab = (struct cab *)(a->format->data);
1200 	struct cfdata *cfdata = cab->entry_cfdata;
1201 
1202 	/* There are remaining bytes in current CFDATA, use it first. */
1203 	if (cfdata != NULL && cfdata->uncompressed_bytes_remaining > 0)
1204 		return (ARCHIVE_OK);
1205 
1206 	if (cfdata == NULL) {
1207 		int64_t skip;
1208 
1209 		cab->entry_cffolder->cfdata_index = 0;
1210 
1211 		/* Seek read pointer to the offset of CFDATA if needed. */
1212 		skip = cab->entry_cffolder->cfdata_offset_in_cab
1213 			- cab->cab_offset;
1214 		if (skip < 0) {
1215 			int folder_index;
1216 			switch (cab->entry_cffile->folder) {
1217 			case iFoldCONTINUED_FROM_PREV:
1218 			case iFoldCONTINUED_PREV_AND_NEXT:
1219 				folder_index = 0;
1220 				break;
1221 			case iFoldCONTINUED_TO_NEXT:
1222 				folder_index = cab->cfheader.folder_count-1;
1223 				break;
1224 			default:
1225 				folder_index = cab->entry_cffile->folder;
1226 				break;
1227 			}
1228 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1229 			    "Invalid offset of CFDATA in folder(%d) %jd < %jd",
1230 			    folder_index,
1231 			    (intmax_t)cab->entry_cffolder->cfdata_offset_in_cab,
1232 			    (intmax_t)cab->cab_offset);
1233 			return (ARCHIVE_FATAL);
1234 		}
1235 		if (skip > 0) {
1236 			if (__archive_read_consume(a, skip) < 0)
1237 				return (ARCHIVE_FATAL);
1238 			cab->cab_offset =
1239 			    cab->entry_cffolder->cfdata_offset_in_cab;
1240 		}
1241 	}
1242 
1243 	/*
1244 	 * Read a CFDATA.
1245 	 */
1246 	if (cab->entry_cffolder->cfdata_index <
1247 	    cab->entry_cffolder->cfdata_count) {
1248 		const unsigned char *p;
1249 		int l;
1250 
1251 		cfdata = &(cab->entry_cffolder->cfdata);
1252 		cab->entry_cffolder->cfdata_index++;
1253 		cab->entry_cfdata = cfdata;
1254 		cfdata->sum_calculated = 0;
1255 		cfdata->sum_extra_avail = 0;
1256 		cfdata->sum_ptr = NULL;
1257 		l = 8;
1258 		if (cab->cfheader.flags & RESERVE_PRESENT)
1259 			l += cab->cfheader.cfdata;
1260 		if ((p = __archive_read_ahead(a, l, NULL)) == NULL)
1261 			return (truncated_error(a));
1262 		cfdata->sum = archive_le32dec(p + CFDATA_csum);
1263 		cfdata->compressed_size = archive_le16dec(p + CFDATA_cbData);
1264 		cfdata->compressed_bytes_remaining = cfdata->compressed_size;
1265 		cfdata->uncompressed_size =
1266 		    archive_le16dec(p + CFDATA_cbUncomp);
1267 		cfdata->uncompressed_bytes_remaining =
1268 		    cfdata->uncompressed_size;
1269 		cfdata->uncompressed_avail = 0;
1270 		cfdata->read_offset = 0;
1271 		cfdata->unconsumed = 0;
1272 
1273 		/*
1274 		 * Sanity check if data size is acceptable.
1275 		 */
1276 		if (cfdata->compressed_size == 0 ||
1277 		    cfdata->compressed_size > (0x8000+6144))
1278 			goto invalid;
1279 		if (cfdata->uncompressed_size > 0x8000)
1280 			goto invalid;
1281 		if (cfdata->uncompressed_size == 0) {
1282 			switch (cab->entry_cffile->folder) {
1283 			case iFoldCONTINUED_PREV_AND_NEXT:
1284 			case iFoldCONTINUED_TO_NEXT:
1285 				break;
1286 			case iFoldCONTINUED_FROM_PREV:
1287 			default:
1288 				goto invalid;
1289 			}
1290 		}
1291 		/* If CFDATA is not last in a folder, an uncompressed
1292 		 * size must be 0x8000(32KBi) */
1293 		if ((cab->entry_cffolder->cfdata_index <
1294 		     cab->entry_cffolder->cfdata_count) &&
1295 		       cfdata->uncompressed_size != 0x8000)
1296 			goto invalid;
1297 
1298 		/* A compressed data size and an uncompressed data size must
1299 		 * be the same in no compression mode. */
1300 		if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
1301 		    cfdata->compressed_size != cfdata->uncompressed_size)
1302 			goto invalid;
1303 
1304 		/*
1305 		 * Save CFDATA image for sum check.
1306 		 */
1307 		if (cfdata->memimage_size < (size_t)l) {
1308 			free(cfdata->memimage);
1309 			cfdata->memimage = malloc(l);
1310 			if (cfdata->memimage == NULL) {
1311 				archive_set_error(&a->archive, ENOMEM,
1312 				    "Can't allocate memory for CAB data");
1313 				return (ARCHIVE_FATAL);
1314 			}
1315 			cfdata->memimage_size = l;
1316 		}
1317 		memcpy(cfdata->memimage, p, l);
1318 
1319 		/* Consume bytes as much as we used. */
1320 		__archive_read_consume(a, l);
1321 		cab->cab_offset += l;
1322 	} else if (cab->entry_cffolder->cfdata_count > 0) {
1323 		/* Run out of all CFDATA in a folder. */
1324 		cfdata->compressed_size = 0;
1325 		cfdata->uncompressed_size = 0;
1326 		cfdata->compressed_bytes_remaining = 0;
1327 		cfdata->uncompressed_bytes_remaining = 0;
1328 	} else {
1329 		/* Current folder does not have any CFDATA. */
1330 		cfdata = &(cab->entry_cffolder->cfdata);
1331 		cab->entry_cfdata = cfdata;
1332 		memset(cfdata, 0, sizeof(*cfdata));
1333 	}
1334 	return (ARCHIVE_OK);
1335 invalid:
1336 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1337 	    "Invalid CFDATA");
1338 	return (ARCHIVE_FATAL);
1339 }
1340 
1341 /*
1342  * Read ahead CFDATA.
1343  */
1344 static const void *
cab_read_ahead_cfdata(struct archive_read * a,ssize_t * avail)1345 cab_read_ahead_cfdata(struct archive_read *a, ssize_t *avail)
1346 {
1347 	struct cab *cab = (struct cab *)(a->format->data);
1348 	int err;
1349 
1350 	err = cab_next_cfdata(a);
1351 	if (err < ARCHIVE_OK) {
1352 		*avail = err;
1353 		return (NULL);
1354 	}
1355 
1356 	switch (cab->entry_cffolder->comptype) {
1357 	case COMPTYPE_NONE:
1358 		return (cab_read_ahead_cfdata_none(a, avail));
1359 	case COMPTYPE_MSZIP:
1360 		return (cab_read_ahead_cfdata_deflate(a, avail));
1361 	case COMPTYPE_LZX:
1362 		return (cab_read_ahead_cfdata_lzx(a, avail));
1363 	default: /* Unsupported compression. */
1364 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1365 		    "Unsupported CAB compression : %s",
1366 		    cab->entry_cffolder->compname);
1367 		*avail = ARCHIVE_FAILED;
1368 		return (NULL);
1369 	}
1370 }
1371 
1372 /*
1373  * Read ahead CFDATA as uncompressed data.
1374  */
1375 static const void *
cab_read_ahead_cfdata_none(struct archive_read * a,ssize_t * avail)1376 cab_read_ahead_cfdata_none(struct archive_read *a, ssize_t *avail)
1377 {
1378 	struct cab *cab = (struct cab *)(a->format->data);
1379 	struct cfdata *cfdata;
1380 	const void *d;
1381 
1382 	cfdata = cab->entry_cfdata;
1383 
1384 	/*
1385 	 * Note: '1' here is a performance optimization.
1386 	 * Recall that the decompression layer returns a count of
1387 	 * available bytes; asking for more than that forces the
1388 	 * decompressor to combine reads by copying data.
1389 	 */
1390 	d = __archive_read_ahead(a, 1, avail);
1391 	if (*avail <= 0) {
1392 		*avail = truncated_error(a);
1393 		return (NULL);
1394 	}
1395 	if (*avail > cfdata->uncompressed_bytes_remaining)
1396 		*avail = cfdata->uncompressed_bytes_remaining;
1397 	cfdata->uncompressed_avail = cfdata->uncompressed_size;
1398 	cfdata->unconsumed = *avail;
1399 	cfdata->sum_ptr = d;
1400 	return (d);
1401 }
1402 
1403 /*
1404  * Read ahead CFDATA as deflate data.
1405  */
1406 #ifdef HAVE_ZLIB_H
1407 static const void *
cab_read_ahead_cfdata_deflate(struct archive_read * a,ssize_t * avail)1408 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1409 {
1410 	struct cab *cab = (struct cab *)(a->format->data);
1411 	struct cfdata *cfdata;
1412 	const void *d;
1413 	int r, mszip;
1414 	uint16_t uavail;
1415 	char eod = 0;
1416 
1417 	cfdata = cab->entry_cfdata;
1418 	/* If the buffer hasn't been allocated, allocate it now. */
1419 	if (cab->uncompressed_buffer == NULL) {
1420 		cab->uncompressed_buffer_size = 0x8000;
1421 		cab->uncompressed_buffer
1422 		    = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1423 		if (cab->uncompressed_buffer == NULL) {
1424 			archive_set_error(&a->archive, ENOMEM,
1425 			    "No memory for CAB reader");
1426 			*avail = ARCHIVE_FATAL;
1427 			return (NULL);
1428 		}
1429 	}
1430 
1431 	uavail = cfdata->uncompressed_avail;
1432 	if (uavail == cfdata->uncompressed_size) {
1433 		d = cab->uncompressed_buffer + cfdata->read_offset;
1434 		*avail = uavail - cfdata->read_offset;
1435 		return (d);
1436 	}
1437 
1438 	if (!cab->entry_cffolder->decompress_init) {
1439 		cab->stream.next_in = NULL;
1440 		cab->stream.avail_in = 0;
1441 		cab->stream.total_in = 0;
1442 		cab->stream.next_out = NULL;
1443 		cab->stream.avail_out = 0;
1444 		cab->stream.total_out = 0;
1445 		if (cab->stream_valid)
1446 			r = inflateReset(&cab->stream);
1447 		else
1448 			r = inflateInit2(&cab->stream,
1449 			    -15 /* Don't check for zlib header */);
1450 		if (r != Z_OK) {
1451 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1452 			    "Can't initialize deflate decompression.");
1453 			*avail = ARCHIVE_FATAL;
1454 			return (NULL);
1455 		}
1456 		/* Stream structure has been set up. */
1457 		cab->stream_valid = 1;
1458 		/* We've initialized decompression for this stream. */
1459 		cab->entry_cffolder->decompress_init = 1;
1460 	}
1461 
1462 	if (cfdata->compressed_bytes_remaining == cfdata->compressed_size)
1463 		mszip = 2;
1464 	else
1465 		mszip = 0;
1466 	eod = 0;
1467 	cab->stream.total_out = uavail;
1468 	/*
1469 	 * We always uncompress all data in current CFDATA.
1470 	 */
1471 	while (!eod && cab->stream.total_out < cfdata->uncompressed_size) {
1472 		ssize_t bytes_avail;
1473 
1474 		cab->stream.next_out =
1475 		    cab->uncompressed_buffer + cab->stream.total_out;
1476 		cab->stream.avail_out =
1477 		    cfdata->uncompressed_size - cab->stream.total_out;
1478 
1479 		d = __archive_read_ahead(a, 1, &bytes_avail);
1480 		if (bytes_avail <= 0) {
1481 			*avail = truncated_error(a);
1482 			return (NULL);
1483 		}
1484 		if (bytes_avail > cfdata->compressed_bytes_remaining)
1485 			bytes_avail = cfdata->compressed_bytes_remaining;
1486 		/*
1487 		 * A bug in zlib.h: stream.next_in should be marked 'const'
1488 		 * but isn't (the library never alters data through the
1489 		 * next_in pointer, only reads it).  The result: this ugly
1490 		 * cast to remove 'const'.
1491 		 */
1492 		cab->stream.next_in = (Bytef *)(uintptr_t)d;
1493 		cab->stream.avail_in = (uInt)bytes_avail;
1494 		cab->stream.total_in = 0;
1495 
1496 		/* Cut out a tow-byte MSZIP signature(0x43, 0x4b). */
1497 		if (mszip > 0) {
1498 			if (bytes_avail <= 0)
1499 				goto nomszip;
1500 			if (bytes_avail <= mszip) {
1501 				if (mszip == 2) {
1502 					if (cab->stream.next_in[0] != 0x43)
1503 						goto nomszip;
1504 					if (bytes_avail > 1 &&
1505 					    cab->stream.next_in[1] != 0x4b)
1506 						goto nomszip;
1507 				} else if (cab->stream.next_in[0] != 0x4b)
1508 					goto nomszip;
1509 				cfdata->unconsumed = bytes_avail;
1510 				cfdata->sum_ptr = d;
1511 				if (cab_minimum_consume_cfdata(
1512 				    a, cfdata->unconsumed) < 0) {
1513 					*avail = ARCHIVE_FATAL;
1514 					return (NULL);
1515 				}
1516 				mszip -= (int)bytes_avail;
1517 				continue;
1518 			}
1519 			if (mszip == 1 && cab->stream.next_in[0] != 0x4b)
1520 				goto nomszip;
1521 			else if (cab->stream.next_in[0] != 0x43 ||
1522 			    cab->stream.next_in[1] != 0x4b)
1523 				goto nomszip;
1524 			cab->stream.next_in += mszip;
1525 			cab->stream.avail_in -= mszip;
1526 			cab->stream.total_in += mszip;
1527 			mszip = 0;
1528 		}
1529 
1530 		r = inflate(&cab->stream, 0);
1531 		switch (r) {
1532 		case Z_OK:
1533 			break;
1534 		case Z_STREAM_END:
1535 			eod = 1;
1536 			break;
1537 		default:
1538 			goto zlibfailed;
1539 		}
1540 		cfdata->unconsumed = cab->stream.total_in;
1541 		cfdata->sum_ptr = d;
1542 		if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1543 			*avail = ARCHIVE_FATAL;
1544 			return (NULL);
1545 		}
1546 	}
1547 	uavail = (uint16_t)cab->stream.total_out;
1548 
1549 	if (uavail < cfdata->uncompressed_size) {
1550 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1551 		    "Invalid uncompressed size (%d < %d)",
1552 		    uavail, cfdata->uncompressed_size);
1553 		*avail = ARCHIVE_FATAL;
1554 		return (NULL);
1555 	}
1556 
1557 	/*
1558 	 * Note: I suspect there is a bug in makecab.exe because, in rare
1559 	 * case, compressed bytes are still remaining regardless we have
1560 	 * gotten all uncompressed bytes, which size is recorded in CFDATA,
1561 	 * as much as we need, and we have to use the garbage so as to
1562 	 * correctly compute the sum of CFDATA accordingly.
1563 	 */
1564 	if (cfdata->compressed_bytes_remaining > 0) {
1565 		ssize_t bytes_avail;
1566 
1567 		d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1568 		    &bytes_avail);
1569 		if (bytes_avail <= 0) {
1570 			*avail = truncated_error(a);
1571 			return (NULL);
1572 		}
1573 		cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1574 		cfdata->sum_ptr = d;
1575 		if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1576 			*avail = ARCHIVE_FATAL;
1577 			return (NULL);
1578 		}
1579 	}
1580 
1581 	/*
1582 	 * Set dictionary data for decompressing of next CFDATA, which
1583 	 * in the same folder. This is why we always do decompress CFDATA
1584 	 * even if beginning CFDATA or some of CFDATA are not used in
1585 	 * skipping file data.
1586 	 */
1587 	if (cab->entry_cffolder->cfdata_index <
1588 	    cab->entry_cffolder->cfdata_count) {
1589 		r = inflateReset(&cab->stream);
1590 		if (r != Z_OK)
1591 			goto zlibfailed;
1592 		r = inflateSetDictionary(&cab->stream,
1593 		    cab->uncompressed_buffer, cfdata->uncompressed_size);
1594 		if (r != Z_OK)
1595 			goto zlibfailed;
1596 	}
1597 
1598 	d = cab->uncompressed_buffer + cfdata->read_offset;
1599 	*avail = uavail - cfdata->read_offset;
1600 	cfdata->uncompressed_avail = uavail;
1601 
1602 	return (d);
1603 
1604 zlibfailed:
1605 	switch (r) {
1606 	case Z_MEM_ERROR:
1607 		archive_set_error(&a->archive, ENOMEM,
1608 		    "Out of memory for deflate decompression");
1609 		break;
1610 	default:
1611 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1612 		    "Deflate decompression failed (%d)", r);
1613 		break;
1614 	}
1615 	*avail = ARCHIVE_FATAL;
1616 	return (NULL);
1617 nomszip:
1618 	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1619 	    "CFDATA incorrect(no MSZIP signature)");
1620 	*avail = ARCHIVE_FATAL;
1621 	return (NULL);
1622 }
1623 
1624 #else /* HAVE_ZLIB_H */
1625 
1626 static const void *
cab_read_ahead_cfdata_deflate(struct archive_read * a,ssize_t * avail)1627 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1628 {
1629 	*avail = ARCHIVE_FATAL;
1630 	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1631 	    "libarchive compiled without deflate support (no libz)");
1632 	return (NULL);
1633 }
1634 
1635 #endif /* HAVE_ZLIB_H */
1636 
1637 static const void *
cab_read_ahead_cfdata_lzx(struct archive_read * a,ssize_t * avail)1638 cab_read_ahead_cfdata_lzx(struct archive_read *a, ssize_t *avail)
1639 {
1640 	struct cab *cab = (struct cab *)(a->format->data);
1641 	struct cfdata *cfdata;
1642 	const void *d;
1643 	int r;
1644 	uint16_t uavail;
1645 
1646 	cfdata = cab->entry_cfdata;
1647 	/* If the buffer hasn't been allocated, allocate it now. */
1648 	if (cab->uncompressed_buffer == NULL) {
1649 		cab->uncompressed_buffer_size = 0x8000;
1650 		cab->uncompressed_buffer
1651 		    = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1652 		if (cab->uncompressed_buffer == NULL) {
1653 			archive_set_error(&a->archive, ENOMEM,
1654 			    "No memory for CAB reader");
1655 			*avail = ARCHIVE_FATAL;
1656 			return (NULL);
1657 		}
1658 	}
1659 
1660 	uavail = cfdata->uncompressed_avail;
1661 	if (uavail == cfdata->uncompressed_size) {
1662 		d = cab->uncompressed_buffer + cfdata->read_offset;
1663 		*avail = uavail - cfdata->read_offset;
1664 		return (d);
1665 	}
1666 
1667 	if (!cab->entry_cffolder->decompress_init) {
1668 		r = lzx_decode_init(&cab->xstrm,
1669 		    cab->entry_cffolder->compdata);
1670 		if (r != ARCHIVE_OK) {
1671 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1672 			    "Can't initialize LZX decompression.");
1673 			*avail = ARCHIVE_FATAL;
1674 			return (NULL);
1675 		}
1676 		/* We've initialized decompression for this stream. */
1677 		cab->entry_cffolder->decompress_init = 1;
1678 	}
1679 
1680 	/* Clean up remaining bits of previous CFDATA. */
1681 	lzx_cleanup_bitstream(&cab->xstrm);
1682 	cab->xstrm.total_out = uavail;
1683 	while (cab->xstrm.total_out < cfdata->uncompressed_size) {
1684 		ssize_t bytes_avail;
1685 
1686 		cab->xstrm.next_out =
1687 		    cab->uncompressed_buffer + cab->xstrm.total_out;
1688 		cab->xstrm.avail_out =
1689 		    cfdata->uncompressed_size - cab->xstrm.total_out;
1690 
1691 		d = __archive_read_ahead(a, 1, &bytes_avail);
1692 		if (bytes_avail <= 0) {
1693 			archive_set_error(&a->archive,
1694 			    ARCHIVE_ERRNO_FILE_FORMAT,
1695 			    "Truncated CAB file data");
1696 			*avail = ARCHIVE_FATAL;
1697 			return (NULL);
1698 		}
1699 		if (bytes_avail > cfdata->compressed_bytes_remaining)
1700 			bytes_avail = cfdata->compressed_bytes_remaining;
1701 
1702 		cab->xstrm.next_in = d;
1703 		cab->xstrm.avail_in = bytes_avail;
1704 		cab->xstrm.total_in = 0;
1705 		r = lzx_decode(&cab->xstrm,
1706 		    cfdata->compressed_bytes_remaining == bytes_avail);
1707 		switch (r) {
1708 		case ARCHIVE_OK:
1709 		case ARCHIVE_EOF:
1710 			break;
1711 		default:
1712 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1713 			    "LZX decompression failed (%d)", r);
1714 			*avail = ARCHIVE_FATAL;
1715 			return (NULL);
1716 		}
1717 		cfdata->unconsumed = cab->xstrm.total_in;
1718 		cfdata->sum_ptr = d;
1719 		if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1720 			*avail = ARCHIVE_FATAL;
1721 			return (NULL);
1722 		}
1723 	}
1724 
1725 	uavail = (uint16_t)cab->xstrm.total_out;
1726 	/*
1727 	 * Make sure a read pointer advances to next CFDATA.
1728 	 */
1729 	if (cfdata->compressed_bytes_remaining > 0) {
1730 		ssize_t bytes_avail;
1731 
1732 		d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1733 		    &bytes_avail);
1734 		if (bytes_avail <= 0) {
1735 			*avail = truncated_error(a);
1736 			return (NULL);
1737 		}
1738 		cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1739 		cfdata->sum_ptr = d;
1740 		if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1741 			*avail = ARCHIVE_FATAL;
1742 			return (NULL);
1743 		}
1744 	}
1745 
1746 	/*
1747 	 * Translation reversal of x86 processor CALL byte sequence(E8).
1748 	 */
1749 	lzx_translation(&cab->xstrm, cab->uncompressed_buffer,
1750 	    cfdata->uncompressed_size,
1751 	    (cab->entry_cffolder->cfdata_index-1) * 0x8000);
1752 
1753 	d = cab->uncompressed_buffer + cfdata->read_offset;
1754 	*avail = uavail - cfdata->read_offset;
1755 	cfdata->uncompressed_avail = uavail;
1756 
1757 	return (d);
1758 }
1759 
1760 /*
1761  * Consume CFDATA.
1762  * We always decompress CFDATA to consume CFDATA as much as we need
1763  * in uncompressed bytes because all CFDATA in a folder are related
1764  * so we do not skip any CFDATA without decompressing.
1765  * Note: If the folder of a CFFILE is iFoldCONTINUED_PREV_AND_NEXT or
1766  * iFoldCONTINUED_FROM_PREV, we won't decompress because a CFDATA for
1767  * the CFFILE is remaining bytes of previous Multivolume CAB file.
1768  */
1769 static int64_t
cab_consume_cfdata(struct archive_read * a,int64_t consumed_bytes)1770 cab_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1771 {
1772 	struct cab *cab = (struct cab *)(a->format->data);
1773 	struct cfdata *cfdata;
1774 	int64_t cbytes, rbytes;
1775 	int err;
1776 
1777 	rbytes = cab_minimum_consume_cfdata(a, consumed_bytes);
1778 	if (rbytes < 0)
1779 		return (ARCHIVE_FATAL);
1780 
1781 	cfdata = cab->entry_cfdata;
1782 	while (rbytes > 0) {
1783 		ssize_t avail;
1784 
1785 		if (cfdata->compressed_size == 0) {
1786 			archive_set_error(&a->archive,
1787 			    ARCHIVE_ERRNO_FILE_FORMAT,
1788 			    "Invalid CFDATA");
1789 			return (ARCHIVE_FATAL);
1790 		}
1791 		cbytes = cfdata->uncompressed_bytes_remaining;
1792 		if (cbytes > rbytes)
1793 			cbytes = rbytes;
1794 		rbytes -= cbytes;
1795 
1796 		if (cfdata->uncompressed_avail == 0 &&
1797 		   (cab->entry_cffile->folder == iFoldCONTINUED_PREV_AND_NEXT ||
1798 		    cab->entry_cffile->folder == iFoldCONTINUED_FROM_PREV)) {
1799 			/* We have not read any data yet. */
1800 			if (cbytes == cfdata->uncompressed_bytes_remaining) {
1801 				/* Skip whole current CFDATA. */
1802 				__archive_read_consume(a,
1803 				    cfdata->compressed_size);
1804 				cab->cab_offset += cfdata->compressed_size;
1805 				cfdata->compressed_bytes_remaining = 0;
1806 				cfdata->uncompressed_bytes_remaining = 0;
1807 				err = cab_next_cfdata(a);
1808 				if (err < 0)
1809 					return (err);
1810 				cfdata = cab->entry_cfdata;
1811 				if (cfdata->uncompressed_size == 0) {
1812 					switch (cab->entry_cffile->folder) {
1813 					case iFoldCONTINUED_PREV_AND_NEXT:
1814 					case iFoldCONTINUED_TO_NEXT:
1815 					case iFoldCONTINUED_FROM_PREV:
1816 						rbytes = 0;
1817 						break;
1818 					default:
1819 						break;
1820 					}
1821 				}
1822 				continue;
1823 			}
1824 			cfdata->read_offset += (uint16_t)cbytes;
1825 			cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1826 			break;
1827 		} else if (cbytes == 0) {
1828 			err = cab_next_cfdata(a);
1829 			if (err < 0)
1830 				return (err);
1831 			cfdata = cab->entry_cfdata;
1832 			if (cfdata->uncompressed_size == 0) {
1833 				switch (cab->entry_cffile->folder) {
1834 				case iFoldCONTINUED_PREV_AND_NEXT:
1835 				case iFoldCONTINUED_TO_NEXT:
1836 				case iFoldCONTINUED_FROM_PREV:
1837 					return (ARCHIVE_FATAL);
1838 				default:
1839 					break;
1840 				}
1841 			}
1842 			continue;
1843 		}
1844 		while (cbytes > 0) {
1845 			(void)cab_read_ahead_cfdata(a, &avail);
1846 			if (avail <= 0)
1847 				return (ARCHIVE_FATAL);
1848 			if (avail > cbytes)
1849 				avail = (ssize_t)cbytes;
1850 			if (cab_minimum_consume_cfdata(a, avail) < 0)
1851 				return (ARCHIVE_FATAL);
1852 			cbytes -= avail;
1853 		}
1854 	}
1855 	return (consumed_bytes);
1856 }
1857 
1858 /*
1859  * Consume CFDATA as much as we have already gotten and
1860  * compute the sum of CFDATA.
1861  */
1862 static int64_t
cab_minimum_consume_cfdata(struct archive_read * a,int64_t consumed_bytes)1863 cab_minimum_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1864 {
1865 	struct cab *cab = (struct cab *)(a->format->data);
1866 	struct cfdata *cfdata;
1867 	int64_t cbytes, rbytes;
1868 	int err;
1869 
1870 	cfdata = cab->entry_cfdata;
1871 	rbytes = consumed_bytes;
1872 	if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1873 		if (consumed_bytes < cfdata->unconsumed)
1874 			cbytes = consumed_bytes;
1875 		else
1876 			cbytes = cfdata->unconsumed;
1877 		rbytes -= cbytes;
1878 		cfdata->read_offset += (uint16_t)cbytes;
1879 		cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1880 		cfdata->unconsumed -= cbytes;
1881 	} else {
1882 		cbytes = cfdata->uncompressed_avail - cfdata->read_offset;
1883 		if (cbytes > 0) {
1884 			if (consumed_bytes < cbytes)
1885 				cbytes = consumed_bytes;
1886 			rbytes -= cbytes;
1887 			cfdata->read_offset += (uint16_t)cbytes;
1888 			cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1889 		}
1890 
1891 		if (cfdata->unconsumed) {
1892 			cbytes = cfdata->unconsumed;
1893 			cfdata->unconsumed = 0;
1894 		} else
1895 			cbytes = 0;
1896 	}
1897 	if (cbytes) {
1898 		/* Compute the sum. */
1899 		cab_checksum_update(a, (size_t)cbytes);
1900 
1901 		/* Consume as much as the compressor actually used. */
1902 		__archive_read_consume(a, cbytes);
1903 		cab->cab_offset += cbytes;
1904 		cfdata->compressed_bytes_remaining -= (uint16_t)cbytes;
1905 		if (cfdata->compressed_bytes_remaining == 0) {
1906 			err = cab_checksum_finish(a);
1907 			if (err < 0)
1908 				return (err);
1909 		}
1910 	}
1911 	return (rbytes);
1912 }
1913 
1914 /*
1915  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1916  * cab->end_of_entry if it consumes all of the data.
1917  */
1918 static int
cab_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1919 cab_read_data(struct archive_read *a, const void **buff,
1920     size_t *size, int64_t *offset)
1921 {
1922 	struct cab *cab = (struct cab *)(a->format->data);
1923 	ssize_t bytes_avail;
1924 
1925 	if (cab->entry_bytes_remaining == 0) {
1926 		*buff = NULL;
1927 		*size = 0;
1928 		*offset = cab->entry_offset;
1929 		cab->end_of_entry = 1;
1930 		return (ARCHIVE_OK);
1931 	}
1932 
1933 	*buff = cab_read_ahead_cfdata(a, &bytes_avail);
1934 	if (bytes_avail <= 0) {
1935 		*buff = NULL;
1936 		*size = 0;
1937 		*offset = 0;
1938 		if (bytes_avail == 0 &&
1939 		    cab->entry_cfdata->uncompressed_size == 0) {
1940 			/* All of CFDATA in a folder has been handled. */
1941 			archive_set_error(&a->archive,
1942 			    ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA");
1943 			return (ARCHIVE_FATAL);
1944 		} else
1945 			return ((int)bytes_avail);
1946 	}
1947 	if (bytes_avail > cab->entry_bytes_remaining)
1948 		bytes_avail = (ssize_t)cab->entry_bytes_remaining;
1949 
1950 	*size = bytes_avail;
1951 	*offset = cab->entry_offset;
1952 	cab->entry_offset += bytes_avail;
1953 	cab->entry_bytes_remaining -= bytes_avail;
1954 	if (cab->entry_bytes_remaining == 0)
1955 		cab->end_of_entry = 1;
1956 	cab->entry_unconsumed = bytes_avail;
1957 	if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1958 		/* Don't consume more than current entry used. */
1959 		if (cab->entry_cfdata->unconsumed > cab->entry_unconsumed)
1960 			cab->entry_cfdata->unconsumed = cab->entry_unconsumed;
1961 	}
1962 	return (ARCHIVE_OK);
1963 }
1964 
1965 static int
archive_read_format_cab_read_data_skip(struct archive_read * a)1966 archive_read_format_cab_read_data_skip(struct archive_read *a)
1967 {
1968 	struct cab *cab;
1969 	int64_t bytes_skipped;
1970 	int r;
1971 
1972 	cab = (struct cab *)(a->format->data);
1973 
1974 	if (cab->end_of_archive)
1975 		return (ARCHIVE_EOF);
1976 
1977 	if (!cab->read_data_invoked) {
1978 		cab->bytes_skipped += cab->entry_bytes_remaining;
1979 		cab->entry_bytes_remaining = 0;
1980 		/* This entry is finished and done. */
1981 		cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1982 		return (ARCHIVE_OK);
1983 	}
1984 
1985 	if (cab->entry_unconsumed) {
1986 		/* Consume as much as the compressor actually used. */
1987 		r = (int)cab_consume_cfdata(a, cab->entry_unconsumed);
1988 		cab->entry_unconsumed = 0;
1989 		if (r < 0)
1990 			return (r);
1991 	} else if (cab->entry_cfdata == NULL) {
1992 		r = cab_next_cfdata(a);
1993 		if (r < 0)
1994 			return (r);
1995 	}
1996 
1997 	/* if we've already read to end of data, we're done. */
1998 	if (cab->end_of_entry_cleanup)
1999 		return (ARCHIVE_OK);
2000 
2001 	/*
2002 	 * If the length is at the beginning, we can skip the
2003 	 * compressed data much more quickly.
2004 	 */
2005 	bytes_skipped = cab_consume_cfdata(a, cab->entry_bytes_remaining);
2006 	if (bytes_skipped < 0)
2007 		return (ARCHIVE_FATAL);
2008 
2009 	/* If the compression type is none(uncompressed), we've already
2010 	 * consumed data as much as the current entry size. */
2011 	if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
2012 	    cab->entry_cfdata != NULL)
2013 		cab->entry_cfdata->unconsumed = 0;
2014 
2015 	/* This entry is finished and done. */
2016 	cab->end_of_entry_cleanup = cab->end_of_entry = 1;
2017 	return (ARCHIVE_OK);
2018 }
2019 
2020 static int
archive_read_format_cab_cleanup(struct archive_read * a)2021 archive_read_format_cab_cleanup(struct archive_read *a)
2022 {
2023 	struct cab *cab = (struct cab *)(a->format->data);
2024 	struct cfheader *hd = &cab->cfheader;
2025 	int i;
2026 
2027 	if (hd->folder_array != NULL) {
2028 		for (i = 0; i < hd->folder_count; i++)
2029 			free(hd->folder_array[i].cfdata.memimage);
2030 		free(hd->folder_array);
2031 	}
2032 	if (hd->file_array != NULL) {
2033 		for (i = 0; i < cab->cfheader.file_count; i++)
2034 			archive_string_free(&(hd->file_array[i].pathname));
2035 		free(hd->file_array);
2036 	}
2037 #ifdef HAVE_ZLIB_H
2038 	if (cab->stream_valid)
2039 		inflateEnd(&cab->stream);
2040 #endif
2041 	lzx_decode_free(&cab->xstrm);
2042 	archive_wstring_free(&cab->ws);
2043 	free(cab->uncompressed_buffer);
2044 	free(cab);
2045 	(a->format->data) = NULL;
2046 	return (ARCHIVE_OK);
2047 }
2048 
2049 /* Convert an MSDOS-style date/time into Unix-style time. */
2050 static time_t
cab_dos_time(const unsigned char * p)2051 cab_dos_time(const unsigned char *p)
2052 {
2053 	int msTime, msDate;
2054 	struct tm ts;
2055 
2056 	msDate = archive_le16dec(p);
2057 	msTime = archive_le16dec(p+2);
2058 
2059 	memset(&ts, 0, sizeof(ts));
2060 	ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
2061 	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
2062 	ts.tm_mday = msDate & 0x1f;		    /* Day of month.     */
2063 	ts.tm_hour = (msTime >> 11) & 0x1f;
2064 	ts.tm_min = (msTime >> 5) & 0x3f;
2065 	ts.tm_sec = (msTime << 1) & 0x3e;
2066 	ts.tm_isdst = -1;
2067 	return (mktime(&ts));
2068 }
2069 
2070 /*****************************************************************
2071  *
2072  * LZX decompression code.
2073  *
2074  *****************************************************************/
2075 
2076 /*
2077  * Initialize LZX decoder.
2078  *
2079  * Returns ARCHIVE_OK if initialization was successful.
2080  * Returns ARCHIVE_FAILED if w_bits has unsupported value.
2081  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
2082  * error occurred.
2083  */
2084 static int
lzx_decode_init(struct lzx_stream * strm,int w_bits)2085 lzx_decode_init(struct lzx_stream *strm, int w_bits)
2086 {
2087 	struct lzx_dec *ds;
2088 	int slot, w_size, w_slot;
2089 	int base, footer;
2090 	int base_inc[18];
2091 
2092 	if (strm->ds == NULL) {
2093 		strm->ds = calloc(1, sizeof(*strm->ds));
2094 		if (strm->ds == NULL)
2095 			return (ARCHIVE_FATAL);
2096 	}
2097 	ds = strm->ds;
2098 	ds->error = ARCHIVE_FAILED;
2099 
2100 	/* Allow bits from 15(32KBi) up to 21(2MBi) */
2101 	if (w_bits < SLOT_BASE || w_bits > SLOT_MAX)
2102 		return (ARCHIVE_FAILED);
2103 
2104 	ds->error = ARCHIVE_FATAL;
2105 
2106 	/*
2107 	 * Alloc window
2108 	 */
2109 	w_size = ds->w_size;
2110 	w_slot = slots[w_bits - SLOT_BASE];
2111 	ds->w_size = 1U << w_bits;
2112 	ds->w_mask = ds->w_size -1;
2113 	if (ds->w_buff == NULL || w_size != ds->w_size) {
2114 		free(ds->w_buff);
2115 		ds->w_buff = malloc(ds->w_size);
2116 		if (ds->w_buff == NULL)
2117 			return (ARCHIVE_FATAL);
2118 		free(ds->pos_tbl);
2119 		ds->pos_tbl = malloc(sizeof(ds->pos_tbl[0]) * w_slot);
2120 		if (ds->pos_tbl == NULL)
2121 			return (ARCHIVE_FATAL);
2122 		lzx_huffman_free(&(ds->mt));
2123 	}
2124 
2125 	for (footer = 0; footer < 18; footer++)
2126 		base_inc[footer] = 1 << footer;
2127 	base = footer = 0;
2128 	for (slot = 0; slot < w_slot; slot++) {
2129 		int n;
2130 		if (footer == 0)
2131 			base = slot;
2132 		else
2133 			base += base_inc[footer];
2134 		if (footer < 17) {
2135 			footer = -2;
2136 			for (n = base; n; n >>= 1)
2137 				footer++;
2138 			if (footer <= 0)
2139 				footer = 0;
2140 		}
2141 		ds->pos_tbl[slot].base = base;
2142 		ds->pos_tbl[slot].footer_bits = footer;
2143 	}
2144 
2145 	ds->w_pos = 0;
2146 	ds->state = 0;
2147 	ds->br.cache_buffer = 0;
2148 	ds->br.cache_avail = 0;
2149 	ds->r0 = ds->r1 = ds->r2 = 1;
2150 
2151 	/* Initialize aligned offset tree. */
2152 	if (lzx_huffman_init(&(ds->at), 8, 8) != ARCHIVE_OK)
2153 		return (ARCHIVE_FATAL);
2154 
2155 	/* Initialize pre-tree. */
2156 	if (lzx_huffman_init(&(ds->pt), 20, 10) != ARCHIVE_OK)
2157 		return (ARCHIVE_FATAL);
2158 
2159 	/* Initialize Main tree. */
2160 	if (lzx_huffman_init(&(ds->mt), 256+(w_slot<<3), 16)
2161 	    != ARCHIVE_OK)
2162 		return (ARCHIVE_FATAL);
2163 
2164 	/* Initialize Length tree. */
2165 	if (lzx_huffman_init(&(ds->lt), 249, 16) != ARCHIVE_OK)
2166 		return (ARCHIVE_FATAL);
2167 
2168 	ds->error = 0;
2169 
2170 	return (ARCHIVE_OK);
2171 }
2172 
2173 /*
2174  * Release LZX decoder.
2175  */
2176 static void
lzx_decode_free(struct lzx_stream * strm)2177 lzx_decode_free(struct lzx_stream *strm)
2178 {
2179 
2180 	if (strm->ds == NULL)
2181 		return;
2182 	free(strm->ds->w_buff);
2183 	free(strm->ds->pos_tbl);
2184 	lzx_huffman_free(&(strm->ds->at));
2185 	lzx_huffman_free(&(strm->ds->pt));
2186 	lzx_huffman_free(&(strm->ds->mt));
2187 	lzx_huffman_free(&(strm->ds->lt));
2188 	free(strm->ds);
2189 	strm->ds = NULL;
2190 }
2191 
2192 /*
2193  * E8 Call Translation reversal.
2194  */
2195 static void
lzx_translation(struct lzx_stream * strm,void * p,size_t size,uint32_t offset)2196 lzx_translation(struct lzx_stream *strm, void *p, size_t size, uint32_t offset)
2197 {
2198 	struct lzx_dec *ds = strm->ds;
2199 	unsigned char *b, *end;
2200 
2201 	if (!ds->translation || size <= 10)
2202 		return;
2203 	b = p;
2204 	end = b + size - 10;
2205 	while (b < end && (b = memchr(b, 0xE8, end - b)) != NULL) {
2206 		size_t i = b - (unsigned char *)p;
2207 		int32_t cp, displacement, value;
2208 
2209 		cp = (int32_t)(offset + (uint32_t)i);
2210 		value = archive_le32dec(&b[1]);
2211 		if (value >= -cp && value < (int32_t)ds->translation_size) {
2212 			if (value >= 0)
2213 				displacement = value - cp;
2214 			else
2215 				displacement = value + ds->translation_size;
2216 			archive_le32enc(&b[1], (uint32_t)displacement);
2217 		}
2218 		b += 5;
2219 	}
2220 }
2221 
2222 /*
2223  * Bit stream reader.
2224  */
2225 /* Check that the cache buffer has enough bits. */
2226 #define lzx_br_has(br, n)	((br)->cache_avail >= n)
2227 /* Get compressed data by bit. */
2228 #define lzx_br_bits(br, n)				\
2229 	(((uint32_t)((br)->cache_buffer >>		\
2230 		((br)->cache_avail - (n)))) & cache_masks[n])
2231 #define lzx_br_bits_forced(br, n)			\
2232 	(((uint32_t)((br)->cache_buffer <<		\
2233 		((n) - (br)->cache_avail))) & cache_masks[n])
2234 /* Read ahead to make sure the cache buffer has enough compressed data we
2235  * will use.
2236  *  True  : completed, there is enough data in the cache buffer.
2237  *  False : we met that strm->next_in is empty, we have to get following
2238  *          bytes. */
2239 #define lzx_br_read_ahead_0(strm, br, n)	\
2240 	(lzx_br_has((br), (n)) || lzx_br_fillup(strm, br))
2241 /*  True  : the cache buffer has some bits as much as we need.
2242  *  False : there are no enough bits in the cache buffer to be used,
2243  *          we have to get following bytes if we could. */
2244 #define lzx_br_read_ahead(strm, br, n)	\
2245 	(lzx_br_read_ahead_0((strm), (br), (n)) || lzx_br_has((br), (n)))
2246 
2247 /* Notify how many bits we consumed. */
2248 #define lzx_br_consume(br, n)	((br)->cache_avail -= (n))
2249 #define lzx_br_consume_unaligned_bits(br) ((br)->cache_avail &= ~0x0f)
2250 
2251 #define lzx_br_is_unaligned(br)	((br)->cache_avail & 0x0f)
2252 
2253 static const uint32_t cache_masks[] = {
2254 	0x00000000, 0x00000001, 0x00000003, 0x00000007,
2255 	0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
2256 	0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,
2257 	0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
2258 	0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF,
2259 	0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
2260 	0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF,
2261 	0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF,
2262 	0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
2263 };
2264 
2265 /*
2266  * Shift away used bits in the cache data and fill it up with following bits.
2267  * Call this when cache buffer does not have enough bits you need.
2268  *
2269  * Returns 1 if the cache buffer is full.
2270  * Returns 0 if the cache buffer is not full; input buffer is empty.
2271  */
2272 static int
lzx_br_fillup(struct lzx_stream * strm,struct lzx_br * br)2273 lzx_br_fillup(struct lzx_stream *strm, struct lzx_br *br)
2274 {
2275 /*
2276  * x86 processor family can read misaligned data without an access error.
2277  */
2278 	int n = CACHE_BITS - br->cache_avail;
2279 
2280 	for (;;) {
2281 		switch (n >> 4) {
2282 		case 4:
2283 			if (strm->avail_in >= 8) {
2284 				br->cache_buffer =
2285 				    ((uint64_t)strm->next_in[1]) << 56 |
2286 				    ((uint64_t)strm->next_in[0]) << 48 |
2287 				    ((uint64_t)strm->next_in[3]) << 40 |
2288 				    ((uint64_t)strm->next_in[2]) << 32 |
2289 				    ((uint32_t)strm->next_in[5]) << 24 |
2290 				    ((uint32_t)strm->next_in[4]) << 16 |
2291 				    ((uint32_t)strm->next_in[7]) << 8 |
2292 				     (uint32_t)strm->next_in[6];
2293 				strm->next_in += 8;
2294 				strm->avail_in -= 8;
2295 				br->cache_avail += 8 * 8;
2296 				return (1);
2297 			}
2298 			break;
2299 		case 3:
2300 			if (strm->avail_in >= 6) {
2301 				br->cache_buffer =
2302 		 		   (br->cache_buffer << 48) |
2303 				    ((uint64_t)strm->next_in[1]) << 40 |
2304 				    ((uint64_t)strm->next_in[0]) << 32 |
2305 				    ((uint32_t)strm->next_in[3]) << 24 |
2306 				    ((uint32_t)strm->next_in[2]) << 16 |
2307 				    ((uint32_t)strm->next_in[5]) << 8 |
2308 				     (uint32_t)strm->next_in[4];
2309 				strm->next_in += 6;
2310 				strm->avail_in -= 6;
2311 				br->cache_avail += 6 * 8;
2312 				return (1);
2313 			}
2314 			break;
2315 		case 0:
2316 			/* We have enough compressed data in
2317 			 * the cache buffer.*/
2318 			return (1);
2319 		default:
2320 			break;
2321 		}
2322 		if (strm->avail_in < 2) {
2323 			/* There is not enough compressed data to
2324 			 * fill up the cache buffer. */
2325 			if (strm->avail_in == 1) {
2326 				br->odd = *strm->next_in++;
2327 				strm->avail_in--;
2328 				br->have_odd = 1;
2329 			}
2330 			return (0);
2331 		}
2332 		br->cache_buffer =
2333 		   (br->cache_buffer << 16) |
2334 		    archive_le16dec(strm->next_in);
2335 		strm->next_in += 2;
2336 		strm->avail_in -= 2;
2337 		br->cache_avail += 16;
2338 		n -= 16;
2339 	}
2340 }
2341 
2342 static void
lzx_br_fixup(struct lzx_stream * strm,struct lzx_br * br)2343 lzx_br_fixup(struct lzx_stream *strm, struct lzx_br *br)
2344 {
2345 	int n = CACHE_BITS - br->cache_avail;
2346 
2347 	if (br->have_odd && n >= 16 && strm->avail_in > 0) {
2348 		br->cache_buffer =
2349 		   (br->cache_buffer << 16) |
2350 		   ((uint16_t)(*strm->next_in)) << 8 | br->odd;
2351 		strm->next_in++;
2352 		strm->avail_in--;
2353 		br->cache_avail += 16;
2354 		br->have_odd = 0;
2355 	}
2356 }
2357 
2358 static void
lzx_cleanup_bitstream(struct lzx_stream * strm)2359 lzx_cleanup_bitstream(struct lzx_stream *strm)
2360 {
2361 	strm->ds->br.cache_avail = 0;
2362 	strm->ds->br.have_odd = 0;
2363 }
2364 
2365 /*
2366  * Decode LZX.
2367  *
2368  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2369  *    Please set available buffer and call this function again.
2370  * 2. Returns ARCHIVE_EOF if decompression has been completed.
2371  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2372  *    is broken or you do not set 'last' flag properly.
2373  */
2374 #define ST_RD_TRANSLATION	0
2375 #define ST_RD_TRANSLATION_SIZE	1
2376 #define ST_RD_BLOCK_TYPE	2
2377 #define ST_RD_BLOCK_SIZE	3
2378 #define ST_RD_ALIGNMENT		4
2379 #define ST_RD_R0		5
2380 #define ST_RD_R1		6
2381 #define ST_RD_R2		7
2382 #define ST_COPY_UNCOMP1		8
2383 #define ST_COPY_UNCOMP2		9
2384 #define ST_RD_ALIGNED_OFFSET	10
2385 #define ST_RD_VERBATIM		11
2386 #define ST_RD_PRE_MAIN_TREE_256	12
2387 #define ST_MAIN_TREE_256	13
2388 #define ST_RD_PRE_MAIN_TREE_REM	14
2389 #define ST_MAIN_TREE_REM	15
2390 #define ST_RD_PRE_LENGTH_TREE	16
2391 #define ST_LENGTH_TREE		17
2392 #define ST_MAIN			18
2393 #define ST_LENGTH		19
2394 #define ST_OFFSET		20
2395 #define ST_REAL_POS		21
2396 #define ST_COPY			22
2397 
2398 static int
lzx_decode(struct lzx_stream * strm,int last)2399 lzx_decode(struct lzx_stream *strm, int last)
2400 {
2401 	struct lzx_dec *ds = strm->ds;
2402 	int64_t avail_in;
2403 	int r;
2404 
2405 	if (ds->error)
2406 		return (ds->error);
2407 
2408 	avail_in = strm->avail_in;
2409 	lzx_br_fixup(strm, &(ds->br));
2410 	do {
2411 		if (ds->state < ST_MAIN)
2412 			r = lzx_read_blocks(strm, last);
2413 		else {
2414 			int64_t bytes_written = strm->avail_out;
2415 			r = lzx_decode_blocks(strm, last);
2416 			bytes_written -= strm->avail_out;
2417 			strm->next_out += bytes_written;
2418 			strm->total_out += bytes_written;
2419 		}
2420 	} while (r == 100);
2421 	strm->total_in += avail_in - strm->avail_in;
2422 	return (r);
2423 }
2424 
2425 static int
lzx_read_blocks(struct lzx_stream * strm,int last)2426 lzx_read_blocks(struct lzx_stream *strm, int last)
2427 {
2428 	struct lzx_dec *ds = strm->ds;
2429 	struct lzx_br *br = &(ds->br);
2430 	int i, r;
2431 
2432 	for (;;) {
2433 		switch (ds->state) {
2434 		case ST_RD_TRANSLATION:
2435 			if (!lzx_br_read_ahead(strm, br, 1)) {
2436 				ds->state = ST_RD_TRANSLATION;
2437 				if (last)
2438 					goto failed;
2439 				return (ARCHIVE_OK);
2440 			}
2441 			ds->translation = lzx_br_bits(br, 1);
2442 			lzx_br_consume(br, 1);
2443 			/* FALL THROUGH */
2444 		case ST_RD_TRANSLATION_SIZE:
2445 			if (ds->translation) {
2446 				if (!lzx_br_read_ahead(strm, br, 32)) {
2447 					ds->state = ST_RD_TRANSLATION_SIZE;
2448 					if (last)
2449 						goto failed;
2450 					return (ARCHIVE_OK);
2451 				}
2452 				ds->translation_size = lzx_br_bits(br, 16);
2453 				lzx_br_consume(br, 16);
2454 				ds->translation_size <<= 16;
2455 				ds->translation_size |= lzx_br_bits(br, 16);
2456 				lzx_br_consume(br, 16);
2457 			}
2458 			/* FALL THROUGH */
2459 		case ST_RD_BLOCK_TYPE:
2460 			if (!lzx_br_read_ahead(strm, br, 3)) {
2461 				ds->state = ST_RD_BLOCK_TYPE;
2462 				if (last)
2463 					goto failed;
2464 				return (ARCHIVE_OK);
2465 			}
2466 			ds->block_type = lzx_br_bits(br, 3);
2467 			lzx_br_consume(br, 3);
2468 			/* Check a block type. */
2469 			switch (ds->block_type) {
2470 			case VERBATIM_BLOCK:
2471 			case ALIGNED_OFFSET_BLOCK:
2472 			case UNCOMPRESSED_BLOCK:
2473 				break;
2474 			default:
2475 				goto failed;/* Invalid */
2476 			}
2477 			/* FALL THROUGH */
2478 		case ST_RD_BLOCK_SIZE:
2479 			if (!lzx_br_read_ahead(strm, br, 24)) {
2480 				ds->state = ST_RD_BLOCK_SIZE;
2481 				if (last)
2482 					goto failed;
2483 				return (ARCHIVE_OK);
2484 			}
2485 			ds->block_size = lzx_br_bits(br, 8);
2486 			lzx_br_consume(br, 8);
2487 			ds->block_size <<= 16;
2488 			ds->block_size |= lzx_br_bits(br, 16);
2489 			lzx_br_consume(br, 16);
2490 			if (ds->block_size == 0)
2491 				goto failed;
2492 			ds->block_bytes_avail = ds->block_size;
2493 			if (ds->block_type != UNCOMPRESSED_BLOCK) {
2494 				if (ds->block_type == VERBATIM_BLOCK)
2495 					ds->state = ST_RD_VERBATIM;
2496 				else
2497 					ds->state = ST_RD_ALIGNED_OFFSET;
2498 				break;
2499 			}
2500 			/* FALL THROUGH */
2501 		case ST_RD_ALIGNMENT:
2502 			/*
2503 			 * Handle an Uncompressed Block.
2504 			 */
2505 			/* Skip padding to align following field on
2506 			 * 16-bit boundary. */
2507 			if (lzx_br_is_unaligned(br))
2508 				lzx_br_consume_unaligned_bits(br);
2509 			else {
2510 				if (lzx_br_read_ahead(strm, br, 16))
2511 					lzx_br_consume(br, 16);
2512 				else {
2513 					ds->state = ST_RD_ALIGNMENT;
2514 					if (last)
2515 						goto failed;
2516 					return (ARCHIVE_OK);
2517 				}
2518 			}
2519 			/* Preparation to read repeated offsets R0,R1 and R2. */
2520 			ds->rbytes_avail = 0;
2521 			ds->state = ST_RD_R0;
2522 			/* FALL THROUGH */
2523 		case ST_RD_R0:
2524 		case ST_RD_R1:
2525 		case ST_RD_R2:
2526 			do {
2527 				uint16_t u16;
2528 				/* Drain bits in the cache buffer of
2529 				 * bit-stream. */
2530 				if (lzx_br_has(br, 32)) {
2531 					u16 = lzx_br_bits(br, 16);
2532 					lzx_br_consume(br, 16);
2533 					archive_le16enc(ds->rbytes, u16);
2534 					u16 = lzx_br_bits(br, 16);
2535 					lzx_br_consume(br, 16);
2536 					archive_le16enc(ds->rbytes+2, u16);
2537 					ds->rbytes_avail = 4;
2538 				} else if (lzx_br_has(br, 16)) {
2539 					u16 = lzx_br_bits(br, 16);
2540 					lzx_br_consume(br, 16);
2541 					archive_le16enc(ds->rbytes, u16);
2542 					ds->rbytes_avail = 2;
2543 				}
2544 				if (ds->rbytes_avail < 4 && ds->br.have_odd) {
2545 					ds->rbytes[ds->rbytes_avail++] =
2546 					    ds->br.odd;
2547 					ds->br.have_odd = 0;
2548 				}
2549 				while (ds->rbytes_avail < 4) {
2550 					if (strm->avail_in <= 0) {
2551 						if (last)
2552 							goto failed;
2553 						return (ARCHIVE_OK);
2554 					}
2555 					ds->rbytes[ds->rbytes_avail++] =
2556 					    *strm->next_in++;
2557 					strm->avail_in--;
2558 				}
2559 				ds->rbytes_avail = 0;
2560 				if (ds->state == ST_RD_R0) {
2561 					ds->r0 = archive_le32dec(ds->rbytes);
2562 					if (ds->r0 < 0)
2563 						goto failed;
2564 					ds->state = ST_RD_R1;
2565 				} else if (ds->state == ST_RD_R1) {
2566 					ds->r1 = archive_le32dec(ds->rbytes);
2567 					if (ds->r1 < 0)
2568 						goto failed;
2569 					ds->state = ST_RD_R2;
2570 				} else if (ds->state == ST_RD_R2) {
2571 					ds->r2 = archive_le32dec(ds->rbytes);
2572 					if (ds->r2 < 0)
2573 						goto failed;
2574 					/* We've gotten all repeated offsets. */
2575 					ds->state = ST_COPY_UNCOMP1;
2576 				}
2577 			} while (ds->state != ST_COPY_UNCOMP1);
2578 			/* FALL THROUGH */
2579 		case ST_COPY_UNCOMP1:
2580 			/*
2581 			 * Copy bytes form next_in to next_out directly.
2582 			 */
2583 			while (ds->block_bytes_avail) {
2584 				int l;
2585 
2586 				if (strm->avail_out <= 0)
2587 					/* Output buffer is empty. */
2588 					return (ARCHIVE_OK);
2589 				if (strm->avail_in <= 0) {
2590 					/* Input buffer is empty. */
2591 					if (last)
2592 						goto failed;
2593 					return (ARCHIVE_OK);
2594 				}
2595 				l = (int)ds->block_bytes_avail;
2596 				if (l > ds->w_size - ds->w_pos)
2597 					l = ds->w_size - ds->w_pos;
2598 				if (l > strm->avail_out)
2599 					l = (int)strm->avail_out;
2600 				if (l > strm->avail_in)
2601 					l = (int)strm->avail_in;
2602 				memcpy(strm->next_out, strm->next_in, l);
2603 				memcpy(&(ds->w_buff[ds->w_pos]),
2604 				    strm->next_in, l);
2605 				strm->next_in += l;
2606 				strm->avail_in -= l;
2607 				strm->next_out += l;
2608 				strm->avail_out -= l;
2609 				strm->total_out += l;
2610 				ds->w_pos = (ds->w_pos + l) & ds->w_mask;
2611 				ds->block_bytes_avail -= l;
2612 			}
2613 			/* FALL THROUGH */
2614 		case ST_COPY_UNCOMP2:
2615 			/* Re-align; skip padding byte. */
2616 			if (ds->block_size & 1) {
2617 				if (strm->avail_in <= 0) {
2618 					/* Input buffer is empty. */
2619 					ds->state = ST_COPY_UNCOMP2;
2620 					if (last)
2621 						goto failed;
2622 					return (ARCHIVE_OK);
2623 				}
2624 				strm->next_in++;
2625 				strm->avail_in --;
2626 			}
2627 			/* This block ended. */
2628 			ds->state = ST_RD_BLOCK_TYPE;
2629 			return (ARCHIVE_EOF);
2630 			/********************/
2631 		case ST_RD_ALIGNED_OFFSET:
2632 			/*
2633 			 * Read Aligned offset tree.
2634 			 */
2635 			if (!lzx_br_read_ahead(strm, br, 3 * ds->at.len_size)) {
2636 				ds->state = ST_RD_ALIGNED_OFFSET;
2637 				if (last)
2638 					goto failed;
2639 				return (ARCHIVE_OK);
2640 			}
2641 			memset(ds->at.freq, 0, sizeof(ds->at.freq));
2642 			for (i = 0; i < ds->at.len_size; i++) {
2643 				ds->at.bitlen[i] = lzx_br_bits(br, 3);
2644 				ds->at.freq[ds->at.bitlen[i]]++;
2645 				lzx_br_consume(br, 3);
2646 			}
2647 			if (!lzx_make_huffman_table(&ds->at))
2648 				goto failed;
2649 			/* FALL THROUGH */
2650 		case ST_RD_VERBATIM:
2651 			ds->loop = 0;
2652 			/* FALL THROUGH */
2653 		case ST_RD_PRE_MAIN_TREE_256:
2654 			/*
2655 			 * Read Pre-tree for first 256 elements of main tree.
2656 			 */
2657 			if (!lzx_read_pre_tree(strm)) {
2658 				ds->state = ST_RD_PRE_MAIN_TREE_256;
2659 				if (last)
2660 					goto failed;
2661 				return (ARCHIVE_OK);
2662 			}
2663 			if (!lzx_make_huffman_table(&ds->pt))
2664 				goto failed;
2665 			ds->loop = 0;
2666 			/* FALL THROUGH */
2667 		case ST_MAIN_TREE_256:
2668 			/*
2669 			 * Get path lengths of first 256 elements of main tree.
2670 			 */
2671 			r = lzx_read_bitlen(strm, &ds->mt, 256);
2672 			if (r < 0)
2673 				goto failed;
2674 			else if (!r) {
2675 				ds->state = ST_MAIN_TREE_256;
2676 				if (last)
2677 					goto failed;
2678 				return (ARCHIVE_OK);
2679 			}
2680 			ds->loop = 0;
2681 			/* FALL THROUGH */
2682 		case ST_RD_PRE_MAIN_TREE_REM:
2683 			/*
2684 			 * Read Pre-tree for remaining elements of main tree.
2685 			 */
2686 			if (!lzx_read_pre_tree(strm)) {
2687 				ds->state = ST_RD_PRE_MAIN_TREE_REM;
2688 				if (last)
2689 					goto failed;
2690 				return (ARCHIVE_OK);
2691 			}
2692 			if (!lzx_make_huffman_table(&ds->pt))
2693 				goto failed;
2694 			ds->loop = 256;
2695 			/* FALL THROUGH */
2696 		case ST_MAIN_TREE_REM:
2697 			/*
2698 			 * Get path lengths of remaining elements of main tree.
2699 			 */
2700 			r = lzx_read_bitlen(strm, &ds->mt, -1);
2701 			if (r < 0)
2702 				goto failed;
2703 			else if (!r) {
2704 				ds->state = ST_MAIN_TREE_REM;
2705 				if (last)
2706 					goto failed;
2707 				return (ARCHIVE_OK);
2708 			}
2709 			if (!lzx_make_huffman_table(&ds->mt))
2710 				goto failed;
2711 			ds->loop = 0;
2712 			/* FALL THROUGH */
2713 		case ST_RD_PRE_LENGTH_TREE:
2714 			/*
2715 			 * Read Pre-tree for remaining elements of main tree.
2716 			 */
2717 			if (!lzx_read_pre_tree(strm)) {
2718 				ds->state = ST_RD_PRE_LENGTH_TREE;
2719 				if (last)
2720 					goto failed;
2721 				return (ARCHIVE_OK);
2722 			}
2723 			if (!lzx_make_huffman_table(&ds->pt))
2724 				goto failed;
2725 			ds->loop = 0;
2726 			/* FALL THROUGH */
2727 		case ST_LENGTH_TREE:
2728 			/*
2729 			 * Get path lengths of remaining elements of main tree.
2730 			 */
2731 			r = lzx_read_bitlen(strm, &ds->lt, -1);
2732 			if (r < 0)
2733 				goto failed;
2734 			else if (!r) {
2735 				ds->state = ST_LENGTH_TREE;
2736 				if (last)
2737 					goto failed;
2738 				return (ARCHIVE_OK);
2739 			}
2740 			if (!lzx_make_huffman_table(&ds->lt))
2741 				goto failed;
2742 			ds->state = ST_MAIN;
2743 			return (100);
2744 		}
2745 	}
2746 failed:
2747 	return (ds->error = ARCHIVE_FAILED);
2748 }
2749 
2750 static int
lzx_decode_blocks(struct lzx_stream * strm,int last)2751 lzx_decode_blocks(struct lzx_stream *strm, int last)
2752 {
2753 	struct lzx_dec *ds = strm->ds;
2754 	struct lzx_br bre = ds->br;
2755 	struct huffman *at = &(ds->at), *lt = &(ds->lt), *mt = &(ds->mt);
2756 	const struct lzx_pos_tbl *pos_tbl = ds->pos_tbl;
2757 	unsigned char *noutp = strm->next_out;
2758 	unsigned char *endp = noutp + strm->avail_out;
2759 	unsigned char *w_buff = ds->w_buff;
2760 	unsigned char *at_bitlen = at->bitlen;
2761 	unsigned char *lt_bitlen = lt->bitlen;
2762 	unsigned char *mt_bitlen = mt->bitlen;
2763 	size_t block_bytes_avail = ds->block_bytes_avail;
2764 	int at_max_bits = at->max_bits;
2765 	int lt_max_bits = lt->max_bits;
2766 	int mt_max_bits = mt->max_bits;
2767 	int c, copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2768 	int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2769 	int length_header = ds->length_header;
2770 	int offset_bits = ds->offset_bits;
2771 	int position_slot = ds->position_slot;
2772 	int r0 = ds->r0, r1 = ds->r1, r2 = ds->r2;
2773 	int state = ds->state;
2774 	char block_type = ds->block_type;
2775 
2776 	for (;;) {
2777 		switch (state) {
2778 		case ST_MAIN:
2779 			for (;;) {
2780 				if (block_bytes_avail == 0) {
2781 					/* This block ended. */
2782 					ds->state = ST_RD_BLOCK_TYPE;
2783 					ds->br = bre;
2784 					ds->block_bytes_avail =
2785 					    block_bytes_avail;
2786 					ds->copy_len = copy_len;
2787 					ds->copy_pos = copy_pos;
2788 					ds->length_header = length_header;
2789 					ds->position_slot = position_slot;
2790 					ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
2791 					ds->w_pos = w_pos;
2792 					strm->avail_out = endp - noutp;
2793 					return (ARCHIVE_EOF);
2794 				}
2795 				if (noutp >= endp)
2796 					/* Output buffer is empty. */
2797 					goto next_data;
2798 
2799 				if (!lzx_br_read_ahead(strm, &bre,
2800 				    mt_max_bits)) {
2801 					if (!last)
2802 						goto next_data;
2803 					/* Remaining bits are less than
2804 					 * maximum bits(mt.max_bits) but maybe
2805 					 * it still remains as much as we need,
2806 					 * so we should try to use it with
2807 					 * dummy bits. */
2808 					c = lzx_decode_huffman(mt,
2809 					      lzx_br_bits_forced(
2810 				 	        &bre, mt_max_bits));
2811 					lzx_br_consume(&bre, mt_bitlen[c]);
2812 					if (!lzx_br_has(&bre, 0))
2813 						goto failed;/* Over read. */
2814 				} else {
2815 					c = lzx_decode_huffman(mt,
2816 					      lzx_br_bits(&bre, mt_max_bits));
2817 					lzx_br_consume(&bre, mt_bitlen[c]);
2818 				}
2819 				if (c > UCHAR_MAX)
2820 					break;
2821 				/*
2822 				 * 'c' is exactly literal code.
2823 				 */
2824 				/* Save a decoded code to reference it
2825 				 * afterward. */
2826 				w_buff[w_pos] = c;
2827 				w_pos = (w_pos + 1) & w_mask;
2828 				/* Store the decoded code to output buffer. */
2829 				*noutp++ = c;
2830 				block_bytes_avail--;
2831 			}
2832 			/*
2833 			 * Get a match code, its length and offset.
2834 			 */
2835 			c -= UCHAR_MAX + 1;
2836 			length_header = c & 7;
2837 			position_slot = c >> 3;
2838 			/* FALL THROUGH */
2839 		case ST_LENGTH:
2840 			/*
2841 			 * Get a length.
2842 			 */
2843 			if (length_header == 7) {
2844 				if (!lzx_br_read_ahead(strm, &bre,
2845 				    lt_max_bits)) {
2846 					if (!last) {
2847 						state = ST_LENGTH;
2848 						goto next_data;
2849 					}
2850 					c = lzx_decode_huffman(lt,
2851 					      lzx_br_bits_forced(
2852 					        &bre, lt_max_bits));
2853 					lzx_br_consume(&bre, lt_bitlen[c]);
2854 					if (!lzx_br_has(&bre, 0))
2855 						goto failed;/* Over read. */
2856 				} else {
2857 					c = lzx_decode_huffman(lt,
2858 					    lzx_br_bits(&bre, lt_max_bits));
2859 					lzx_br_consume(&bre, lt_bitlen[c]);
2860 				}
2861 				copy_len = c + 7 + 2;
2862 			} else
2863 				copy_len = length_header + 2;
2864 			if ((size_t)copy_len > block_bytes_avail)
2865 				goto failed;
2866 			/*
2867 			 * Get an offset.
2868 			 */
2869 			switch (position_slot) {
2870 			case 0: /* Use repeated offset 0. */
2871 				copy_pos = r0;
2872 				state = ST_REAL_POS;
2873 				continue;
2874 			case 1: /* Use repeated offset 1. */
2875 				copy_pos = r1;
2876 				/* Swap repeated offset. */
2877 				r1 = r0;
2878 				r0 = copy_pos;
2879 				state = ST_REAL_POS;
2880 				continue;
2881 			case 2: /* Use repeated offset 2. */
2882 				copy_pos = r2;
2883 				/* Swap repeated offset. */
2884 				r2 = r0;
2885 				r0 = copy_pos;
2886 				state = ST_REAL_POS;
2887 				continue;
2888 			default:
2889 				offset_bits =
2890 				    pos_tbl[position_slot].footer_bits;
2891 				break;
2892 			}
2893 			/* FALL THROUGH */
2894 		case ST_OFFSET:
2895 			/*
2896 			 * Get the offset, which is a distance from
2897 			 * current window position.
2898 			 */
2899 			if (block_type == ALIGNED_OFFSET_BLOCK &&
2900 			    offset_bits >= 3) {
2901 				int offbits = offset_bits - 3;
2902 
2903 				if (!lzx_br_read_ahead(strm, &bre, offbits)) {
2904 					state = ST_OFFSET;
2905 					if (last)
2906 						goto failed;
2907 					goto next_data;
2908 				}
2909 				copy_pos = lzx_br_bits(&bre, offbits) << 3;
2910 
2911 				/* Get an aligned number. */
2912 				if (!lzx_br_read_ahead(strm, &bre,
2913 				    offbits + at_max_bits)) {
2914 					if (!last) {
2915 						state = ST_OFFSET;
2916 						goto next_data;
2917 					}
2918 					lzx_br_consume(&bre, offbits);
2919 					c = lzx_decode_huffman(at,
2920 					      lzx_br_bits_forced(&bre,
2921 					        at_max_bits));
2922 					lzx_br_consume(&bre, at_bitlen[c]);
2923 					if (!lzx_br_has(&bre, 0))
2924 						goto failed;/* Over read. */
2925 				} else {
2926 					lzx_br_consume(&bre, offbits);
2927 					c = lzx_decode_huffman(at,
2928 					      lzx_br_bits(&bre, at_max_bits));
2929 					lzx_br_consume(&bre, at_bitlen[c]);
2930 				}
2931 				/* Add an aligned number. */
2932 				copy_pos += c;
2933 			} else {
2934 				if (!lzx_br_read_ahead(strm, &bre,
2935 				    offset_bits)) {
2936 					state = ST_OFFSET;
2937 					if (last)
2938 						goto failed;
2939 					goto next_data;
2940 				}
2941 				copy_pos = lzx_br_bits(&bre, offset_bits);
2942 				lzx_br_consume(&bre, offset_bits);
2943 			}
2944 			copy_pos += pos_tbl[position_slot].base -2;
2945 
2946 			/* Update repeated offset LRU queue. */
2947 			r2 = r1;
2948 			r1 = r0;
2949 			r0 = copy_pos;
2950 			/* FALL THROUGH */
2951 		case ST_REAL_POS:
2952 			/*
2953 			 * Compute a real position in window.
2954 			 */
2955 			copy_pos = (w_pos - copy_pos) & w_mask;
2956 			/* FALL THROUGH */
2957 		case ST_COPY:
2958 			/*
2959 			 * Copy several bytes as extracted data from the window
2960 			 * into the output buffer.
2961 			 */
2962 			for (;;) {
2963 				const unsigned char *s;
2964 				int l;
2965 
2966 				l = copy_len;
2967 				if (copy_pos > w_pos) {
2968 					if (l > w_size - copy_pos)
2969 						l = w_size - copy_pos;
2970 				} else {
2971 					if (l > w_size - w_pos)
2972 						l = w_size - w_pos;
2973 				}
2974 				if (noutp + l >= endp)
2975 					l = (int)(endp - noutp);
2976 				s = w_buff + copy_pos;
2977 				if (l >= 8 && ((copy_pos + l < w_pos)
2978 				  || (w_pos + l < copy_pos))) {
2979 					memcpy(w_buff + w_pos, s, l);
2980 					memcpy(noutp, s, l);
2981 				} else {
2982 					unsigned char *d;
2983 					int li;
2984 
2985 					d = w_buff + w_pos;
2986 					for (li = 0; li < l; li++)
2987 						noutp[li] = d[li] = s[li];
2988 				}
2989 				noutp += l;
2990 				copy_pos = (copy_pos + l) & w_mask;
2991 				w_pos = (w_pos + l) & w_mask;
2992 				block_bytes_avail -= l;
2993 				if (copy_len <= l)
2994 					/* A copy of current pattern ended. */
2995 					break;
2996 				copy_len -= l;
2997 				if (noutp >= endp) {
2998 					/* Output buffer is empty. */
2999 					state = ST_COPY;
3000 					goto next_data;
3001 				}
3002 			}
3003 			state = ST_MAIN;
3004 			break;
3005 		}
3006 	}
3007 failed:
3008 	return (ds->error = ARCHIVE_FAILED);
3009 next_data:
3010 	ds->br = bre;
3011 	ds->block_bytes_avail = block_bytes_avail;
3012 	ds->copy_len = copy_len;
3013 	ds->copy_pos = copy_pos;
3014 	ds->length_header = length_header;
3015 	ds->offset_bits = offset_bits;
3016 	ds->position_slot = position_slot;
3017 	ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
3018 	ds->state = state;
3019 	ds->w_pos = w_pos;
3020 	strm->avail_out = endp - noutp;
3021 	return (ARCHIVE_OK);
3022 }
3023 
3024 static int
lzx_read_pre_tree(struct lzx_stream * strm)3025 lzx_read_pre_tree(struct lzx_stream *strm)
3026 {
3027 	struct lzx_dec *ds = strm->ds;
3028 	struct lzx_br *br = &(ds->br);
3029 	int i;
3030 
3031 	if (ds->loop == 0)
3032 		memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
3033 	for (i = ds->loop; i < ds->pt.len_size; i++) {
3034 		if (!lzx_br_read_ahead(strm, br, 4)) {
3035 			ds->loop = i;
3036 			return (0);
3037 		}
3038 		ds->pt.bitlen[i] = lzx_br_bits(br, 4);
3039 		ds->pt.freq[ds->pt.bitlen[i]]++;
3040 		lzx_br_consume(br, 4);
3041 	}
3042 	ds->loop = i;
3043 	return (1);
3044 }
3045 
3046 /*
3047  * Read a bunch of bit-lengths from pre-tree.
3048  */
3049 static int
lzx_read_bitlen(struct lzx_stream * strm,struct huffman * d,int end)3050 lzx_read_bitlen(struct lzx_stream *strm, struct huffman *d, int end)
3051 {
3052 	struct lzx_dec *ds = strm->ds;
3053 	struct lzx_br *br = &(ds->br);
3054 	int c, i, j, ret, same;
3055 	unsigned rbits;
3056 
3057 	i = ds->loop;
3058 	if (i == 0)
3059 		memset(d->freq, 0, sizeof(d->freq));
3060 	ret = 0;
3061 	if (end < 0)
3062 		end = d->len_size;
3063 	while (i < end) {
3064 		ds->loop = i;
3065 		if (!lzx_br_read_ahead(strm, br, ds->pt.max_bits))
3066 			goto getdata;
3067 		rbits = lzx_br_bits(br, ds->pt.max_bits);
3068 		c = lzx_decode_huffman(&(ds->pt), rbits);
3069 		switch (c) {
3070 		case 17:/* several zero lengths, from 4 to 19. */
3071 			if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+4))
3072 				goto getdata;
3073 			lzx_br_consume(br, ds->pt.bitlen[c]);
3074 			same = lzx_br_bits(br, 4) + 4;
3075 			if (i + same > end)
3076 				return (-1);/* Invalid */
3077 			lzx_br_consume(br, 4);
3078 			for (j = 0; j < same; j++)
3079 				d->bitlen[i++] = 0;
3080 			break;
3081 		case 18:/* many zero lengths, from 20 to 51. */
3082 			if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+5))
3083 				goto getdata;
3084 			lzx_br_consume(br, ds->pt.bitlen[c]);
3085 			same = lzx_br_bits(br, 5) + 20;
3086 			if (i + same > end)
3087 				return (-1);/* Invalid */
3088 			lzx_br_consume(br, 5);
3089 			memset(d->bitlen + i, 0, same);
3090 			i += same;
3091 			break;
3092 		case 19:/* a few same lengths. */
3093 			if (!lzx_br_read_ahead(strm, br,
3094 			    ds->pt.bitlen[c]+1+ds->pt.max_bits))
3095 				goto getdata;
3096 			lzx_br_consume(br, ds->pt.bitlen[c]);
3097 			same = lzx_br_bits(br, 1) + 4;
3098 			if (i + same > end)
3099 				return (-1);
3100 			lzx_br_consume(br, 1);
3101 			rbits = lzx_br_bits(br, ds->pt.max_bits);
3102 			c = lzx_decode_huffman(&(ds->pt), rbits);
3103 			lzx_br_consume(br, ds->pt.bitlen[c]);
3104 			c = (d->bitlen[i] - c + 17) % 17;
3105 			if (c < 0)
3106 				return (-1);/* Invalid */
3107 			for (j = 0; j < same; j++)
3108 				d->bitlen[i++] = c;
3109 			d->freq[c] += same;
3110 			break;
3111 		default:
3112 			lzx_br_consume(br, ds->pt.bitlen[c]);
3113 			c = (d->bitlen[i] - c + 17) % 17;
3114 			if (c < 0)
3115 				return (-1);/* Invalid */
3116 			d->freq[c]++;
3117 			d->bitlen[i++] = c;
3118 			break;
3119 		}
3120 	}
3121 	ret = 1;
3122 getdata:
3123 	ds->loop = i;
3124 	return (ret);
3125 }
3126 
3127 static int
lzx_huffman_init(struct huffman * hf,size_t len_size,int tbl_bits)3128 lzx_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
3129 {
3130 	int bits;
3131 
3132 	if (hf->bitlen == NULL || hf->len_size != (int)len_size) {
3133 		free(hf->bitlen);
3134 		hf->bitlen = calloc(len_size,  sizeof(hf->bitlen[0]));
3135 		if (hf->bitlen == NULL)
3136 			return (ARCHIVE_FATAL);
3137 		hf->len_size = (int)len_size;
3138 	} else
3139 		memset(hf->bitlen, 0, len_size *  sizeof(hf->bitlen[0]));
3140 	if (hf->tbl == NULL) {
3141 		if (tbl_bits < HTBL_BITS)
3142 			bits = tbl_bits;
3143 		else
3144 			bits = HTBL_BITS;
3145 		hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
3146 		if (hf->tbl == NULL)
3147 			return (ARCHIVE_FATAL);
3148 		hf->tbl_bits = tbl_bits;
3149 	}
3150 	if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
3151 		hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
3152 		hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
3153 		if (hf->tree == NULL)
3154 			return (ARCHIVE_FATAL);
3155 	}
3156 	return (ARCHIVE_OK);
3157 }
3158 
3159 static void
lzx_huffman_free(struct huffman * hf)3160 lzx_huffman_free(struct huffman *hf)
3161 {
3162 	free(hf->bitlen);
3163 	free(hf->tbl);
3164 	free(hf->tree);
3165 }
3166 
3167 /*
3168  * Make a huffman coding table.
3169  */
3170 static int
lzx_make_huffman_table(struct huffman * hf)3171 lzx_make_huffman_table(struct huffman *hf)
3172 {
3173 	uint16_t *tbl;
3174 	const unsigned char *bitlen;
3175 	int bitptn[17], weight[17];
3176 	int i, maxbits = 0, ptn, tbl_size, w;
3177 	int diffbits, len_avail;
3178 
3179 	/*
3180 	 * Initialize bit patterns.
3181 	 */
3182 	ptn = 0;
3183 	for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
3184 		bitptn[i] = ptn;
3185 		weight[i] = w;
3186 		if (hf->freq[i]) {
3187 			ptn += hf->freq[i] * w;
3188 			maxbits = i;
3189 		}
3190 	}
3191 	if ((ptn & 0xffff) != 0 || maxbits > hf->tbl_bits)
3192 		return (0);/* Invalid */
3193 
3194 	hf->max_bits = maxbits;
3195 
3196 	/*
3197 	 * Cut out extra bits which we won't house in the table.
3198 	 * This preparation reduces the same calculation in the for-loop
3199 	 * making the table.
3200 	 */
3201 	if (maxbits < 16) {
3202 		int ebits = 16 - maxbits;
3203 		for (i = 1; i <= maxbits; i++) {
3204 			bitptn[i] >>= ebits;
3205 			weight[i] >>= ebits;
3206 		}
3207 	}
3208 	if (maxbits > HTBL_BITS) {
3209 		int htbl_max;
3210 		uint16_t *p;
3211 
3212 		diffbits = maxbits - HTBL_BITS;
3213 		for (i = 1; i <= HTBL_BITS; i++) {
3214 			bitptn[i] >>= diffbits;
3215 			weight[i] >>= diffbits;
3216 		}
3217 		htbl_max = bitptn[HTBL_BITS] +
3218 		    weight[HTBL_BITS] * hf->freq[HTBL_BITS];
3219 		p = &(hf->tbl[htbl_max]);
3220 		while (p < &hf->tbl[1U<<HTBL_BITS])
3221 			*p++ = 0;
3222 	} else
3223 		diffbits = 0;
3224 	hf->shift_bits = diffbits;
3225 
3226 	/*
3227 	 * Make the table.
3228 	 */
3229 	tbl_size = 1 << HTBL_BITS;
3230 	tbl = hf->tbl;
3231 	bitlen = hf->bitlen;
3232 	len_avail = hf->len_size;
3233 	hf->tree_used = 0;
3234 	for (i = 0; i < len_avail; i++) {
3235 		uint16_t *p;
3236 		int len, cnt;
3237 		uint16_t bit;
3238 		int extlen;
3239 		struct htree_t *ht;
3240 
3241 		if (bitlen[i] == 0)
3242 			continue;
3243 		/* Get a bit pattern */
3244 		len = bitlen[i];
3245 		ptn = bitptn[len];
3246 		cnt = weight[len];
3247 		if (len <= HTBL_BITS) {
3248 			/* Calculate next bit pattern */
3249 			if ((bitptn[len] = ptn + cnt) > tbl_size)
3250 				return (0);/* Invalid */
3251 			/* Update the table */
3252 			p = &(tbl[ptn]);
3253 			while (--cnt >= 0)
3254 				p[cnt] = (uint16_t)i;
3255 			continue;
3256 		}
3257 
3258 		/*
3259 		 * A bit length is too big to be housed to a direct table,
3260 		 * so we use a tree model for its extra bits.
3261 		 */
3262 		bitptn[len] = ptn + cnt;
3263 		bit = 1U << (diffbits -1);
3264 		extlen = len - HTBL_BITS;
3265 
3266 		p = &(tbl[ptn >> diffbits]);
3267 		if (*p == 0) {
3268 			*p = len_avail + hf->tree_used;
3269 			ht = &(hf->tree[hf->tree_used++]);
3270 			if (hf->tree_used > hf->tree_avail)
3271 				return (0);/* Invalid */
3272 			ht->left = 0;
3273 			ht->right = 0;
3274 		} else {
3275 			if (*p < len_avail ||
3276 			    *p >= (len_avail + hf->tree_used))
3277 				return (0);/* Invalid */
3278 			ht = &(hf->tree[*p - len_avail]);
3279 		}
3280 		while (--extlen > 0) {
3281 			if (ptn & bit) {
3282 				if (ht->left < len_avail) {
3283 					ht->left = len_avail + hf->tree_used;
3284 					ht = &(hf->tree[hf->tree_used++]);
3285 					if (hf->tree_used > hf->tree_avail)
3286 						return (0);/* Invalid */
3287 					ht->left = 0;
3288 					ht->right = 0;
3289 				} else {
3290 					ht = &(hf->tree[ht->left - len_avail]);
3291 				}
3292 			} else {
3293 				if (ht->right < len_avail) {
3294 					ht->right = len_avail + hf->tree_used;
3295 					ht = &(hf->tree[hf->tree_used++]);
3296 					if (hf->tree_used > hf->tree_avail)
3297 						return (0);/* Invalid */
3298 					ht->left = 0;
3299 					ht->right = 0;
3300 				} else {
3301 					ht = &(hf->tree[ht->right - len_avail]);
3302 				}
3303 			}
3304 			bit >>= 1;
3305 		}
3306 		if (ptn & bit) {
3307 			if (ht->left != 0)
3308 				return (0);/* Invalid */
3309 			ht->left = (uint16_t)i;
3310 		} else {
3311 			if (ht->right != 0)
3312 				return (0);/* Invalid */
3313 			ht->right = (uint16_t)i;
3314 		}
3315 	}
3316 	return (1);
3317 }
3318 
3319 static int
lzx_decode_huffman_tree(struct huffman * hf,unsigned rbits,int c)3320 lzx_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
3321 {
3322 	struct htree_t *ht;
3323 	int extlen;
3324 
3325 	ht = hf->tree;
3326 	extlen = hf->shift_bits;
3327 	while (c >= hf->len_size) {
3328 		c -= hf->len_size;
3329 		if (extlen-- <= 0 || c >= hf->tree_used)
3330 			return (0);
3331 		if (rbits & (1U << extlen))
3332 			c = ht[c].left;
3333 		else
3334 			c = ht[c].right;
3335 	}
3336 	return (c);
3337 }
3338 
3339 static inline int
lzx_decode_huffman(struct huffman * hf,unsigned rbits)3340 lzx_decode_huffman(struct huffman *hf, unsigned rbits)
3341 {
3342 	int c;
3343 	/*
3344 	 * At first search an index table for a bit pattern.
3345 	 * If it fails, search a huffman tree for.
3346 	 */
3347 	c = hf->tbl[rbits >> hf->shift_bits];
3348 	if (c < hf->len_size)
3349 		return (c);
3350 	/* This bit pattern needs to be found out at a huffman tree. */
3351 	return (lzx_decode_huffman_tree(hf, rbits, c));
3352 }
3353 
3354