1 /*
2  * deflate_decompress.c - a decompressor for DEFLATE
3  *
4  * Copyright 2016 Eric Biggers
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  * ---------------------------------------------------------------------------
28  *
29  * This is a highly optimized DEFLATE decompressor.  When compiled with gcc on
30  * x86_64, it decompresses data in about 52% of the time of zlib (48% if BMI2
31  * instructions are available).  On other architectures it should still be
32  * significantly faster than zlib, but the difference may be smaller.
33  *
34  * Why this is faster than zlib's implementation:
35  *
36  * - Word accesses rather than byte accesses when reading input
37  * - Word accesses rather than byte accesses when copying matches
38  * - Faster Huffman decoding combined with various DEFLATE-specific tricks
39  * - Larger bitbuffer variable that doesn't need to be filled as often
40  * - Other optimizations to remove unnecessary branches
41  * - Only full-buffer decompression is supported, so the code doesn't need to
42  *   support stopping and resuming decompression.
43  * - On x86_64, compile a version of the decompression routine using BMI2
44  *   instructions and use it automatically at runtime when supported.
45  */
46 
47 #include <limits.h>
48 
49 #include "deflate_constants.h"
50 #include "unaligned.h"
51 
52 #include "libdeflate.h"
53 
54 /*
55  * If the expression passed to SAFETY_CHECK() evaluates to false, then the
56  * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the
57  * compressed data is invalid.
58  *
59  * Theoretically, these checks could be disabled for specialized applications
60  * where all input to the decompressor will be trusted.
61  */
62 #if 0
63 #  pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!")
64 #  define SAFETY_CHECK(expr)	(void)(expr)
65 #else
66 #  define SAFETY_CHECK(expr)	if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA
67 #endif
68 
69 /*
70  * Each TABLEBITS number is the base-2 logarithm of the number of entries in the
71  * main portion of the corresponding decode table.  Each number should be large
72  * enough to ensure that for typical data, the vast majority of symbols can be
73  * decoded by a direct lookup of the next TABLEBITS bits of compressed data.
74  * However, this must be balanced against the fact that a larger table requires
75  * more memory and requires more time to fill.
76  *
77  * Note: you cannot change a TABLEBITS number without also changing the
78  * corresponding ENOUGH number!
79  */
80 #define PRECODE_TABLEBITS	7
81 #define LITLEN_TABLEBITS	10
82 #define OFFSET_TABLEBITS	8
83 
84 /*
85  * Each ENOUGH number is the maximum number of decode table entries that may be
86  * required for the corresponding Huffman code, including the main table and all
87  * subtables.  Each number depends on three parameters:
88  *
89  *	(1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMS)
90  *	(2) the number of main table bits (the TABLEBITS numbers defined above)
91  *	(3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN)
92  *
93  * The ENOUGH numbers were computed using the utility program 'enough' from
94  * zlib.  This program enumerates all possible relevant Huffman codes to find
95  * the worst-case usage of decode table entries.
96  */
97 #define PRECODE_ENOUGH		128	/* enough 19 7 7	*/
98 #define LITLEN_ENOUGH		1334	/* enough 288 10 15	*/
99 #define OFFSET_ENOUGH		402	/* enough 32 8 15	*/
100 
101 /*
102  * Type for codeword lengths.
103  */
104 typedef u8 len_t;
105 
106 /*
107  * The main DEFLATE decompressor structure.  Since this implementation only
108  * supports full buffer decompression, this structure does not store the entire
109  * decompression state, but rather only some arrays that are too large to
110  * comfortably allocate on the stack.
111  */
112 struct libdeflate_decompressor {
113 
114 	/*
115 	 * The arrays aren't all needed at the same time.  'precode_lens' and
116 	 * 'precode_decode_table' are unneeded after 'lens' has been filled.
117 	 * Furthermore, 'lens' need not be retained after building the litlen
118 	 * and offset decode tables.  In fact, 'lens' can be in union with
119 	 * 'litlen_decode_table' provided that 'offset_decode_table' is separate
120 	 * and is built first.
121 	 */
122 
123 	union {
124 		len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS];
125 
126 		struct {
127 			len_t lens[DEFLATE_NUM_LITLEN_SYMS +
128 				   DEFLATE_NUM_OFFSET_SYMS +
129 				   DEFLATE_MAX_LENS_OVERRUN];
130 
131 			u32 precode_decode_table[PRECODE_ENOUGH];
132 		} l;
133 
134 		u32 litlen_decode_table[LITLEN_ENOUGH];
135 	} u;
136 
137 	u32 offset_decode_table[OFFSET_ENOUGH];
138 
139 	/* used only during build_decode_table() */
140 	u16 sorted_syms[DEFLATE_MAX_NUM_SYMS];
141 
142 	bool static_codes_loaded;
143 };
144 
145 /*****************************************************************************
146  *				Input bitstream                              *
147  *****************************************************************************/
148 
149 /*
150  * The state of the "input bitstream" consists of the following variables:
151  *
152  *	- in_next: pointer to the next unread byte in the input buffer
153  *
154  *	- in_end: pointer just past the end of the input buffer
155  *
156  *	- bitbuf: a word-sized variable containing bits that have been read from
157  *		  the input buffer.  The buffered bits are right-aligned
158  *		  (they're the low-order bits).
159  *
160  *	- bitsleft: number of bits in 'bitbuf' that are valid.
161  *
162  * To make it easier for the compiler to optimize the code by keeping variables
163  * in registers, these are declared as normal variables and manipulated using
164  * macros.
165  */
166 
167 /*
168  * The type for the bitbuffer variable ('bitbuf' described above).  For best
169  * performance, this should have size equal to a machine word.
170  *
171  * 64-bit platforms have a significant advantage: they get a bigger bitbuffer
172  * which they have to fill less often.
173  */
174 typedef machine_word_t bitbuf_t;
175 
176 /*
177  * Number of bits the bitbuffer variable can hold.
178  *
179  * This is one less than the obvious value because of the optimized arithmetic
180  * in FILL_BITS_WORDWISE() that leaves 'bitsleft' in the range
181  * [WORDBITS - 8, WORDBITS - 1] rather than [WORDBITS - 7, WORDBITS].
182  */
183 #define BITBUF_NBITS	(8 * sizeof(bitbuf_t) - 1)
184 
185 /*
186  * The maximum number of bits that can be ensured in the bitbuffer variable,
187  * i.e. the maximum value of 'n' that can be passed ENSURE_BITS(n).  The decoder
188  * only reads whole bytes from memory, so this is the lowest value of 'bitsleft'
189  * at which another byte cannot be read without first consuming some bits.
190  */
191 #define MAX_ENSURE	(BITBUF_NBITS - 7)
192 
193 /*
194  * Evaluates to true if 'n' is a valid argument to ENSURE_BITS(n), or false if
195  * 'n' is too large to be passed to ENSURE_BITS(n).  Note: if 'n' is a compile
196  * time constant, then this expression will be a compile-type constant.
197  * Therefore, CAN_ENSURE() can be used choose between alternative
198  * implementations at compile time.
199  */
200 #define CAN_ENSURE(n)	((n) <= MAX_ENSURE)
201 
202 /*
203  * Fill the bitbuffer variable, reading one byte at a time.
204  *
205  * If we would overread the input buffer, we just don't read anything, leaving
206  * the bits zeroed but marking them filled.  This simplifies the decompressor
207  * because it removes the need to distinguish between real overreads and
208  * overreads that occur only because of the decompressor's own lookahead.
209  *
210  * The disadvantage is that real overreads are not detected immediately.
211  * However, this is safe because the decompressor is still guaranteed to make
212  * forward progress when presented never-ending 0 bits.  In an existing block
213  * output will be getting generated, whereas new blocks can only be uncompressed
214  * (since the type code for uncompressed blocks is 0), for which we check for
215  * previous overread.  But even if we didn't check, uncompressed blocks would
216  * fail to validate because LEN would not equal ~NLEN.  So the decompressor will
217  * eventually either detect that the output buffer is full, or detect invalid
218  * input, or finish the final block.
219  */
220 #define FILL_BITS_BYTEWISE()					\
221 do {								\
222 	if (likely(in_next != in_end))				\
223 		bitbuf |= (bitbuf_t)*in_next++ << bitsleft;	\
224 	else							\
225 		overrun_count++;				\
226 	bitsleft += 8;						\
227 } while (bitsleft <= BITBUF_NBITS - 8)
228 
229 /*
230  * Fill the bitbuffer variable by reading the next word from the input buffer
231  * and branchlessly updating 'in_next' and 'bitsleft' based on how many bits
232  * were filled.  This can be significantly faster than FILL_BITS_BYTEWISE().
233  * However, for this to work correctly, the word must be interpreted in
234  * little-endian format.  In addition, the memory access may be unaligned.
235  * Therefore, this method is most efficient on little-endian architectures that
236  * support fast unaligned access, such as x86 and x86_64.
237  *
238  * For faster updating of 'bitsleft', we consider the bitbuffer size in bits to
239  * be 1 less than the word size and therefore be all 1 bits.  Then the number of
240  * bits filled is the value of the 0 bits in position >= 3 when changed to 1.
241  * E.g. if words are 64 bits and bitsleft = 16 = b010000 then we refill b101000
242  * = 40 bits = 5 bytes.  This uses only 4 operations to update 'in_next' and
243  * 'bitsleft': one each of +, ^, >>, and |.  (Not counting operations the
244  * compiler optimizes out.)  In contrast, the alternative of:
245  *
246  *	in_next += (BITBUF_NBITS - bitsleft) >> 3;
247  *	bitsleft += (BITBUF_NBITS - bitsleft) & ~7;
248  *
249  * (where BITBUF_NBITS would be WORDBITS rather than WORDBITS - 1) would on
250  * average refill an extra bit, but uses 5 operations: two +, and one each of
251  * -, >>, and &.  Also the - and & must be completed before 'bitsleft' can be
252  * updated, while the current solution updates 'bitsleft' with no dependencies.
253  */
254 #define FILL_BITS_WORDWISE()					\
255 do {								\
256 	/* BITBUF_NBITS must be all 1's in binary, see above */	\
257 	STATIC_ASSERT((BITBUF_NBITS & (BITBUF_NBITS + 1)) == 0);\
258 								\
259 	bitbuf |= get_unaligned_leword(in_next) << bitsleft;	\
260 	in_next += (bitsleft ^ BITBUF_NBITS) >> 3;		\
261 	bitsleft |= BITBUF_NBITS & ~7;				\
262 } while (0)
263 
264 /*
265  * Does the bitbuffer variable currently contain at least 'n' bits?
266  */
267 #define HAVE_BITS(n) (bitsleft >= (n))
268 
269 /*
270  * Load more bits from the input buffer until the specified number of bits is
271  * present in the bitbuffer variable.  'n' cannot be too large; see MAX_ENSURE
272  * and CAN_ENSURE().
273  */
274 #define ENSURE_BITS(n)						\
275 if (!HAVE_BITS(n)) {						\
276 	if (CPU_IS_LITTLE_ENDIAN() &&				\
277 	    UNALIGNED_ACCESS_IS_FAST &&				\
278 	    likely(in_end - in_next >= sizeof(bitbuf_t)))	\
279 		FILL_BITS_WORDWISE();				\
280 	else							\
281 		FILL_BITS_BYTEWISE();				\
282 }
283 
284 /*
285  * Return the next 'n' bits from the bitbuffer variable without removing them.
286  */
287 #define BITS(n) ((u32)bitbuf & (((u32)1 << (n)) - 1))
288 
289 /*
290  * Remove the next 'n' bits from the bitbuffer variable.
291  */
292 #define REMOVE_BITS(n) (bitbuf >>= (n), bitsleft -= (n))
293 
294 /*
295  * Remove and return the next 'n' bits from the bitbuffer variable.
296  */
297 #define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32)
298 
299 /*
300  * Verify that the input buffer hasn't been overread, then align the input to
301  * the next byte boundary, discarding any remaining bits in the current byte.
302  *
303  * Note that if the bitbuffer variable currently contains more than 7 bits, then
304  * we must rewind 'in_next', effectively putting those bits back.  Only the bits
305  * in what would be the "current" byte if we were reading one byte at a time can
306  * be actually discarded.
307  */
308 #define ALIGN_INPUT()							\
309 do {									\
310 	SAFETY_CHECK(overrun_count <= (bitsleft >> 3));			\
311 	in_next -= (bitsleft >> 3) - overrun_count;			\
312 	overrun_count = 0;						\
313 	bitbuf = 0;							\
314 	bitsleft = 0;							\
315 } while(0)
316 
317 /*
318  * Read a 16-bit value from the input.  This must have been preceded by a call
319  * to ALIGN_INPUT(), and the caller must have already checked for overrun.
320  */
321 #define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16)
322 
323 /*****************************************************************************
324  *                              Huffman decoding                             *
325  *****************************************************************************/
326 
327 /*
328  * A decode table for order TABLEBITS consists of a main table of (1 <<
329  * TABLEBITS) entries followed by a variable number of subtables.
330  *
331  * The decoding algorithm takes the next TABLEBITS bits of compressed data and
332  * uses them as an index into the decode table.  The resulting entry is either a
333  * "direct entry", meaning that it contains the value desired, or a "subtable
334  * pointer", meaning that the entry references a subtable that must be indexed
335  * using more bits of the compressed data to decode the symbol.
336  *
337  * Each decode table (a main table along with its subtables, if any) is
338  * associated with a Huffman code.  Logically, the result of a decode table
339  * lookup is a symbol from the alphabet from which the corresponding Huffman
340  * code was constructed.  A symbol with codeword length n <= TABLEBITS is
341  * associated with 2**(TABLEBITS - n) direct entries in the table, whereas a
342  * symbol with codeword length n > TABLEBITS is associated with one or more
343  * subtable entries.
344  *
345  * On top of this basic design, we implement several optimizations:
346  *
347  * - We store the length of each codeword directly in each of its decode table
348  *   entries.  This allows the codeword length to be produced without indexing
349  *   an additional table.
350  *
351  * - When beneficial, we don't store the Huffman symbol itself, but instead data
352  *   generated from it.  For example, when decoding an offset symbol in DEFLATE,
353  *   it's more efficient if we can decode the offset base and number of extra
354  *   offset bits directly rather than decoding the offset symbol and then
355  *   looking up both of those values in an additional table or tables.
356  *
357  * The size of each decode table entry is 32 bits, which provides slightly
358  * better performance than 16-bit entries on 32 and 64 bit processers, provided
359  * that the table doesn't get so large that it takes up too much memory and
360  * starts generating cache misses.  The bits of each decode table entry are
361  * defined as follows:
362  *
363  * - Bits 30 -- 31: flags (see below)
364  * - Bits 8 -- 29: decode result: a Huffman symbol or related data
365  * - Bits 0 -- 7: codeword length
366  */
367 
368 /*
369  * This flag is set in all main decode table entries that represent subtable
370  * pointers.
371  */
372 #define HUFFDEC_SUBTABLE_POINTER	0x80000000
373 
374 /*
375  * This flag is set in all entries in the litlen decode table that represent
376  * literals.
377  */
378 #define HUFFDEC_LITERAL			0x40000000
379 
380 /* Mask for extracting the codeword length from a decode table entry.  */
381 #define HUFFDEC_LENGTH_MASK		0xFF
382 
383 /* Shift to extract the decode result from a decode table entry.  */
384 #define HUFFDEC_RESULT_SHIFT		8
385 
386 /* Shift a decode result into its position in the decode table entry.  */
387 #define HUFFDEC_RESULT_ENTRY(result)	((u32)(result) << HUFFDEC_RESULT_SHIFT)
388 
389 /* The decode result for each precode symbol.  There is no special optimization
390  * for the precode; the decode result is simply the symbol value.  */
391 static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = {
392 #define ENTRY(presym)	HUFFDEC_RESULT_ENTRY(presym)
393 	ENTRY(0)   , ENTRY(1)   , ENTRY(2)   , ENTRY(3)   ,
394 	ENTRY(4)   , ENTRY(5)   , ENTRY(6)   , ENTRY(7)   ,
395 	ENTRY(8)   , ENTRY(9)   , ENTRY(10)  , ENTRY(11)  ,
396 	ENTRY(12)  , ENTRY(13)  , ENTRY(14)  , ENTRY(15)  ,
397 	ENTRY(16)  , ENTRY(17)  , ENTRY(18)  ,
398 #undef ENTRY
399 };
400 
401 /* The decode result for each litlen symbol.  For literals, this is the literal
402  * value itself and the HUFFDEC_LITERAL flag.  For lengths, this is the length
403  * base and the number of extra length bits.  */
404 static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = {
405 
406 	/* Literals  */
407 #define ENTRY(literal)	(HUFFDEC_LITERAL | HUFFDEC_RESULT_ENTRY(literal))
408 	ENTRY(0)   , ENTRY(1)   , ENTRY(2)   , ENTRY(3)   ,
409 	ENTRY(4)   , ENTRY(5)   , ENTRY(6)   , ENTRY(7)   ,
410 	ENTRY(8)   , ENTRY(9)   , ENTRY(10)  , ENTRY(11)  ,
411 	ENTRY(12)  , ENTRY(13)  , ENTRY(14)  , ENTRY(15)  ,
412 	ENTRY(16)  , ENTRY(17)  , ENTRY(18)  , ENTRY(19)  ,
413 	ENTRY(20)  , ENTRY(21)  , ENTRY(22)  , ENTRY(23)  ,
414 	ENTRY(24)  , ENTRY(25)  , ENTRY(26)  , ENTRY(27)  ,
415 	ENTRY(28)  , ENTRY(29)  , ENTRY(30)  , ENTRY(31)  ,
416 	ENTRY(32)  , ENTRY(33)  , ENTRY(34)  , ENTRY(35)  ,
417 	ENTRY(36)  , ENTRY(37)  , ENTRY(38)  , ENTRY(39)  ,
418 	ENTRY(40)  , ENTRY(41)  , ENTRY(42)  , ENTRY(43)  ,
419 	ENTRY(44)  , ENTRY(45)  , ENTRY(46)  , ENTRY(47)  ,
420 	ENTRY(48)  , ENTRY(49)  , ENTRY(50)  , ENTRY(51)  ,
421 	ENTRY(52)  , ENTRY(53)  , ENTRY(54)  , ENTRY(55)  ,
422 	ENTRY(56)  , ENTRY(57)  , ENTRY(58)  , ENTRY(59)  ,
423 	ENTRY(60)  , ENTRY(61)  , ENTRY(62)  , ENTRY(63)  ,
424 	ENTRY(64)  , ENTRY(65)  , ENTRY(66)  , ENTRY(67)  ,
425 	ENTRY(68)  , ENTRY(69)  , ENTRY(70)  , ENTRY(71)  ,
426 	ENTRY(72)  , ENTRY(73)  , ENTRY(74)  , ENTRY(75)  ,
427 	ENTRY(76)  , ENTRY(77)  , ENTRY(78)  , ENTRY(79)  ,
428 	ENTRY(80)  , ENTRY(81)  , ENTRY(82)  , ENTRY(83)  ,
429 	ENTRY(84)  , ENTRY(85)  , ENTRY(86)  , ENTRY(87)  ,
430 	ENTRY(88)  , ENTRY(89)  , ENTRY(90)  , ENTRY(91)  ,
431 	ENTRY(92)  , ENTRY(93)  , ENTRY(94)  , ENTRY(95)  ,
432 	ENTRY(96)  , ENTRY(97)  , ENTRY(98)  , ENTRY(99)  ,
433 	ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) ,
434 	ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) ,
435 	ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) ,
436 	ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) ,
437 	ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) ,
438 	ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) ,
439 	ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) ,
440 	ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) ,
441 	ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) ,
442 	ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) ,
443 	ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) ,
444 	ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) ,
445 	ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) ,
446 	ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) ,
447 	ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) ,
448 	ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) ,
449 	ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) ,
450 	ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) ,
451 	ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) ,
452 	ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) ,
453 	ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) ,
454 	ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) ,
455 	ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) ,
456 	ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) ,
457 	ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) ,
458 	ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) ,
459 	ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) ,
460 	ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) ,
461 	ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) ,
462 	ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) ,
463 	ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) ,
464 	ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) ,
465 	ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) ,
466 	ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) ,
467 	ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) ,
468 	ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) ,
469 	ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) ,
470 	ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) ,
471 	ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) ,
472 #undef ENTRY
473 
474 #define HUFFDEC_EXTRA_LENGTH_BITS_MASK	0xFF
475 #define HUFFDEC_LENGTH_BASE_SHIFT	8
476 #define HUFFDEC_END_OF_BLOCK_LENGTH	0
477 
478 #define ENTRY(length_base, num_extra_bits)	HUFFDEC_RESULT_ENTRY(	\
479 	((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits))
480 
481 	/* End of block  */
482 	ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0),
483 
484 	/* Lengths  */
485 	ENTRY(3  , 0) , ENTRY(4  , 0) , ENTRY(5  , 0) , ENTRY(6  , 0),
486 	ENTRY(7  , 0) , ENTRY(8  , 0) , ENTRY(9  , 0) , ENTRY(10 , 0),
487 	ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1),
488 	ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2),
489 	ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3),
490 	ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4),
491 	ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5),
492 	ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) ,
493 #undef ENTRY
494 };
495 
496 /* The decode result for each offset symbol.  This is the offset base and the
497  * number of extra offset bits.  */
498 static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = {
499 
500 #define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16
501 #define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1)
502 
503 #define ENTRY(offset_base, num_extra_bits)	HUFFDEC_RESULT_ENTRY(	\
504 		((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) | \
505 		(offset_base))
506 	ENTRY(1     , 0)  , ENTRY(2     , 0)  , ENTRY(3     , 0)  , ENTRY(4     , 0)  ,
507 	ENTRY(5     , 1)  , ENTRY(7     , 1)  , ENTRY(9     , 2)  , ENTRY(13    , 2) ,
508 	ENTRY(17    , 3)  , ENTRY(25    , 3)  , ENTRY(33    , 4)  , ENTRY(49    , 4)  ,
509 	ENTRY(65    , 5)  , ENTRY(97    , 5)  , ENTRY(129   , 6)  , ENTRY(193   , 6)  ,
510 	ENTRY(257   , 7)  , ENTRY(385   , 7)  , ENTRY(513   , 8)  , ENTRY(769   , 8)  ,
511 	ENTRY(1025  , 9)  , ENTRY(1537  , 9)  , ENTRY(2049  , 10) , ENTRY(3073  , 10) ,
512 	ENTRY(4097  , 11) , ENTRY(6145  , 11) , ENTRY(8193  , 12) , ENTRY(12289 , 12) ,
513 	ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(32769 , 14) , ENTRY(49153 , 14) ,
514 #undef ENTRY
515 };
516 
517 /*
518  * Build a table for fast decoding of symbols from a Huffman code.  As input,
519  * this function takes the codeword length of each symbol which may be used in
520  * the code.  As output, it produces a decode table for the canonical Huffman
521  * code described by the codeword lengths.  The decode table is built with the
522  * assumption that it will be indexed with "bit-reversed" codewords, where the
523  * low-order bit is the first bit of the codeword.  This format is used for all
524  * Huffman codes in DEFLATE.
525  *
526  * @decode_table
527  *	The array in which the decode table will be generated.  This array must
528  *	have sufficient length; see the definition of the ENOUGH numbers.
529  * @lens
530  *	An array which provides, for each symbol, the length of the
531  *	corresponding codeword in bits, or 0 if the symbol is unused.  This may
532  *	alias @decode_table, since nothing is written to @decode_table until all
533  *	@lens have been consumed.  All codeword lengths are assumed to be <=
534  *	@max_codeword_len but are otherwise considered untrusted.  If they do
535  *	not form a valid Huffman code, then the decode table is not built and
536  *	%false is returned.
537  * @num_syms
538  *	The number of symbols in the code, including all unused symbols.
539  * @decode_results
540  *	An array which provides, for each symbol, the actual value to store into
541  *	the decode table.  This value will be directly produced as the result of
542  *	decoding that symbol, thereby moving the indirection out of the decode
543  *	loop and into the table initialization.
544  * @table_bits
545  *	The log base-2 of the number of main table entries to use.
546  * @max_codeword_len
547  *	The maximum allowed codeword length for this Huffman code.
548  *	Must be <= DEFLATE_MAX_CODEWORD_LEN.
549  * @sorted_syms
550  *	A temporary array of length @num_syms.
551  *
552  * Returns %true if successful; %false if the codeword lengths do not form a
553  * valid Huffman code.
554  */
555 static bool
build_decode_table(u32 decode_table[],const len_t lens[],const unsigned num_syms,const u32 decode_results[],const unsigned table_bits,const unsigned max_codeword_len,u16 * sorted_syms)556 build_decode_table(u32 decode_table[],
557 		   const len_t lens[],
558 		   const unsigned num_syms,
559 		   const u32 decode_results[],
560 		   const unsigned table_bits,
561 		   const unsigned max_codeword_len,
562 		   u16 *sorted_syms)
563 {
564 	unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1];
565 	unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1];
566 	unsigned sym;		/* current symbol */
567 	unsigned codeword;	/* current codeword, bit-reversed */
568 	unsigned len;		/* current codeword length in bits */
569 	unsigned count;		/* num codewords remaining with this length */
570 	u32 codespace_used;	/* codespace used out of '2^max_codeword_len' */
571 	unsigned cur_table_end; /* end index of current table */
572 	unsigned subtable_prefix; /* codeword prefix of current subtable */
573 	unsigned subtable_start;  /* start index of current subtable */
574 	unsigned subtable_bits;   /* log2 of current subtable length */
575 
576 	/* Count how many codewords have each length, including 0. */
577 	for (len = 0; len <= max_codeword_len; len++)
578 		len_counts[len] = 0;
579 	for (sym = 0; sym < num_syms; sym++)
580 		len_counts[lens[sym]]++;
581 
582 	/*
583 	 * Sort the symbols primarily by increasing codeword length and
584 	 * secondarily by increasing symbol value; or equivalently by their
585 	 * codewords in lexicographic order, since a canonical code is assumed.
586 	 *
587 	 * For efficiency, also compute 'codespace_used' in the same pass over
588 	 * 'len_counts[]' used to build 'offsets[]' for sorting.
589 	 */
590 
591 	/* Ensure that 'codespace_used' cannot overflow. */
592 	STATIC_ASSERT(sizeof(codespace_used) == 4);
593 	STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >=
594 		      DEFLATE_MAX_NUM_SYMS);
595 
596 	offsets[0] = 0;
597 	offsets[1] = len_counts[0];
598 	codespace_used = 0;
599 	for (len = 1; len < max_codeword_len; len++) {
600 		offsets[len + 1] = offsets[len] + len_counts[len];
601 		codespace_used = (codespace_used << 1) + len_counts[len];
602 	}
603 	codespace_used = (codespace_used << 1) + len_counts[len];
604 
605 	for (sym = 0; sym < num_syms; sym++)
606 		sorted_syms[offsets[lens[sym]]++] = sym;
607 
608 	sorted_syms += offsets[0]; /* Skip unused symbols */
609 
610 	/* lens[] is done being used, so we can write to decode_table[] now. */
611 
612 	/*
613 	 * Check whether the lengths form a complete code (exactly fills the
614 	 * codespace), an incomplete code (doesn't fill the codespace), or an
615 	 * overfull code (overflows the codespace).  A codeword of length 'n'
616 	 * uses proportion '1/(2^n)' of the codespace.  An overfull code is
617 	 * nonsensical, so is considered invalid.  An incomplete code is
618 	 * considered valid only in two specific cases; see below.
619 	 */
620 
621 	/* overfull code? */
622 	if (unlikely(codespace_used > (1U << max_codeword_len)))
623 		return false;
624 
625 	/* incomplete code? */
626 	if (unlikely(codespace_used < (1U << max_codeword_len))) {
627 		u32 entry;
628 		unsigned i;
629 
630 		if (codespace_used == 0) {
631 			/*
632 			 * An empty code is allowed.  This can happen for the
633 			 * offset code in DEFLATE, since a dynamic Huffman block
634 			 * need not contain any matches.
635 			 */
636 
637 			/* sym=0, len=1 (arbitrary) */
638 			entry = decode_results[0] | 1;
639 		} else {
640 			/*
641 			 * Allow codes with a single used symbol, with codeword
642 			 * length 1.  The DEFLATE RFC is unclear regarding this
643 			 * case.  What zlib's decompressor does is permit this
644 			 * for the litlen and offset codes and assume the
645 			 * codeword is '0' rather than '1'.  We do the same
646 			 * except we allow this for precodes too, since there's
647 			 * no convincing reason to treat the codes differently.
648 			 * We also assign both codewords '0' and '1' to the
649 			 * symbol to avoid having to handle '1' specially.
650 			 */
651 			if (codespace_used != (1U << (max_codeword_len - 1)) ||
652 			    len_counts[1] != 1)
653 				return false;
654 			entry = decode_results[*sorted_syms] | 1;
655 		}
656 		/*
657 		 * Note: the decode table still must be fully initialized, in
658 		 * case the stream is malformed and contains bits from the part
659 		 * of the codespace the incomplete code doesn't use.
660 		 */
661 		for (i = 0; i < (1U << table_bits); i++)
662 			decode_table[i] = entry;
663 		return true;
664 	}
665 
666 	/*
667 	 * The lengths form a complete code.  Now, enumerate the codewords in
668 	 * lexicographic order and fill the decode table entries for each one.
669 	 *
670 	 * First, process all codewords with len <= table_bits.  Each one gets
671 	 * '2^(table_bits-len)' direct entries in the table.
672 	 *
673 	 * Since DEFLATE uses bit-reversed codewords, these entries aren't
674 	 * consecutive but rather are spaced '2^len' entries apart.  This makes
675 	 * filling them naively somewhat awkward and inefficient, since strided
676 	 * stores are less cache-friendly and preclude the use of word or
677 	 * vector-at-a-time stores to fill multiple entries per instruction.
678 	 *
679 	 * To optimize this, we incrementally double the table size.  When
680 	 * processing codewords with length 'len', the table is treated as
681 	 * having only '2^len' entries, so each codeword uses just one entry.
682 	 * Then, each time 'len' is incremented, the table size is doubled and
683 	 * the first half is copied to the second half.  This significantly
684 	 * improves performance over naively doing strided stores.
685 	 *
686 	 * Note that some entries copied for each table doubling may not have
687 	 * been initialized yet, but it doesn't matter since they're guaranteed
688 	 * to be initialized later (because the Huffman code is complete).
689 	 */
690 	codeword = 0;
691 	len = 1;
692 	while ((count = len_counts[len]) == 0)
693 		len++;
694 	cur_table_end = 1U << len;
695 	while (len <= table_bits) {
696 		/* Process all 'count' codewords with length 'len' bits. */
697 		do {
698 			unsigned bit;
699 
700 			/* Fill the first entry for the current codeword. */
701 			decode_table[codeword] =
702 				decode_results[*sorted_syms++] | len;
703 
704 			if (codeword == cur_table_end - 1) {
705 				/* Last codeword (all 1's) */
706 				for (; len < table_bits; len++) {
707 					memcpy(&decode_table[cur_table_end],
708 					       decode_table,
709 					       cur_table_end *
710 						sizeof(decode_table[0]));
711 					cur_table_end <<= 1;
712 				}
713 				return true;
714 			}
715 			/*
716 			 * To advance to the lexicographically next codeword in
717 			 * the canonical code, the codeword must be incremented,
718 			 * then 0's must be appended to the codeword as needed
719 			 * to match the next codeword's length.
720 			 *
721 			 * Since the codeword is bit-reversed, appending 0's is
722 			 * a no-op.  However, incrementing it is nontrivial.  To
723 			 * do so efficiently, use the 'bsr' instruction to find
724 			 * the last (highest order) 0 bit in the codeword, set
725 			 * it, and clear any later (higher order) 1 bits.  But
726 			 * 'bsr' actually finds the highest order 1 bit, so to
727 			 * use it first flip all bits in the codeword by XOR'ing
728 			 * it with (1U << len) - 1 == cur_table_end - 1.
729 			 */
730 			bit = 1U << bsr32(codeword ^ (cur_table_end - 1));
731 			codeword &= bit - 1;
732 			codeword |= bit;
733 		} while (--count);
734 
735 		/* Advance to the next codeword length. */
736 		do {
737 			if (++len <= table_bits) {
738 				memcpy(&decode_table[cur_table_end],
739 				       decode_table,
740 				       cur_table_end * sizeof(decode_table[0]));
741 				cur_table_end <<= 1;
742 			}
743 		} while ((count = len_counts[len]) == 0);
744 	}
745 
746 	/* Process codewords with len > table_bits.  These require subtables. */
747 	cur_table_end = 1U << table_bits;
748 	subtable_prefix = -1;
749 	subtable_start = 0;
750 	for (;;) {
751 		u32 entry;
752 		unsigned i;
753 		unsigned stride;
754 		unsigned bit;
755 
756 		/*
757 		 * Start a new subtable if the first 'table_bits' bits of the
758 		 * codeword don't match the prefix of the current subtable.
759 		 */
760 		if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) {
761 			subtable_prefix = (codeword & ((1U << table_bits) - 1));
762 			subtable_start = cur_table_end;
763 			/*
764 			 * Calculate the subtable length.  If the codeword has
765 			 * length 'table_bits + n', then the subtable needs
766 			 * '2^n' entries.  But it may need more; if fewer than
767 			 * '2^n' codewords of length 'table_bits + n' remain,
768 			 * then the length will need to be incremented to bring
769 			 * in longer codewords until the subtable can be
770 			 * completely filled.  Note that because the Huffman
771 			 * code is complete, it will always be possible to fill
772 			 * the subtable eventually.
773 			 */
774 			subtable_bits = len - table_bits;
775 			codespace_used = count;
776 			while (codespace_used < (1U << subtable_bits)) {
777 				subtable_bits++;
778 				codespace_used = (codespace_used << 1) +
779 					len_counts[table_bits + subtable_bits];
780 			}
781 			cur_table_end = subtable_start + (1U << subtable_bits);
782 
783 			/*
784 			 * Create the entry that points from the main table to
785 			 * the subtable.  This entry contains the index of the
786 			 * start of the subtable and the number of bits with
787 			 * which the subtable is indexed (the log base 2 of the
788 			 * number of entries it contains).
789 			 */
790 			decode_table[subtable_prefix] =
791 				HUFFDEC_SUBTABLE_POINTER |
792 				HUFFDEC_RESULT_ENTRY(subtable_start) |
793 				subtable_bits;
794 		}
795 
796 		/* Fill the subtable entries for the current codeword. */
797 		entry = decode_results[*sorted_syms++] | (len - table_bits);
798 		i = subtable_start + (codeword >> table_bits);
799 		stride = 1U << (len - table_bits);
800 		do {
801 			decode_table[i] = entry;
802 			i += stride;
803 		} while (i < cur_table_end);
804 
805 		/* Advance to the next codeword. */
806 		if (codeword == (1U << len) - 1) /* last codeword (all 1's)? */
807 			return true;
808 		bit = 1U << bsr32(codeword ^ ((1U << len) - 1));
809 		codeword &= bit - 1;
810 		codeword |= bit;
811 		count--;
812 		while (count == 0)
813 			count = len_counts[++len];
814 	}
815 }
816 
817 /* Build the decode table for the precode.  */
818 static bool
build_precode_decode_table(struct libdeflate_decompressor * d)819 build_precode_decode_table(struct libdeflate_decompressor *d)
820 {
821 	/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
822 	STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128);
823 
824 	return build_decode_table(d->u.l.precode_decode_table,
825 				  d->u.precode_lens,
826 				  DEFLATE_NUM_PRECODE_SYMS,
827 				  precode_decode_results,
828 				  PRECODE_TABLEBITS,
829 				  DEFLATE_MAX_PRE_CODEWORD_LEN,
830 				  d->sorted_syms);
831 }
832 
833 /* Build the decode table for the literal/length code.  */
834 static bool
build_litlen_decode_table(struct libdeflate_decompressor * d,unsigned num_litlen_syms,unsigned num_offset_syms)835 build_litlen_decode_table(struct libdeflate_decompressor *d,
836 			  unsigned num_litlen_syms, unsigned num_offset_syms)
837 {
838 	/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
839 	STATIC_ASSERT(LITLEN_TABLEBITS == 10 && LITLEN_ENOUGH == 1334);
840 
841 	return build_decode_table(d->u.litlen_decode_table,
842 				  d->u.l.lens,
843 				  num_litlen_syms,
844 				  litlen_decode_results,
845 				  LITLEN_TABLEBITS,
846 				  DEFLATE_MAX_LITLEN_CODEWORD_LEN,
847 				  d->sorted_syms);
848 }
849 
850 /* Build the decode table for the offset code.  */
851 static bool
build_offset_decode_table(struct libdeflate_decompressor * d,unsigned num_litlen_syms,unsigned num_offset_syms)852 build_offset_decode_table(struct libdeflate_decompressor *d,
853 			  unsigned num_litlen_syms, unsigned num_offset_syms)
854 {
855 	/* When you change TABLEBITS, you must change ENOUGH, and vice versa! */
856 	STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402);
857 
858 	return build_decode_table(d->offset_decode_table,
859 				  d->u.l.lens + num_litlen_syms,
860 				  num_offset_syms,
861 				  offset_decode_results,
862 				  OFFSET_TABLEBITS,
863 				  DEFLATE_MAX_OFFSET_CODEWORD_LEN,
864 				  d->sorted_syms);
865 }
866 
867 static forceinline machine_word_t
repeat_byte(u8 b)868 repeat_byte(u8 b)
869 {
870 	machine_word_t v;
871 
872 	STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64);
873 
874 	v = b;
875 	v |= v << 8;
876 	v |= v << 16;
877 	v |= v << ((WORDBITS == 64) ? 32 : 0);
878 	return v;
879 }
880 
881 static forceinline void
copy_word_unaligned(const void * src,void * dst)882 copy_word_unaligned(const void *src, void *dst)
883 {
884 	store_word_unaligned(load_word_unaligned(src), dst);
885 }
886 
887 /*****************************************************************************
888  *                         Main decompression routine
889  *****************************************************************************/
890 
891 typedef enum libdeflate_result (*decompress_func_t)
892 	(struct libdeflate_decompressor * restrict d,
893 	 const void * restrict in, size_t in_nbytes,
894 	 void * restrict out, size_t out_nbytes_avail,
895 	 size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
896 
897 #undef DEFAULT_IMPL
898 #undef DISPATCH
899 #if defined(__i386__) || defined(__x86_64__)
900 #  include "x86/decompress_impl.h"
901 #endif
902 
903 #ifndef DEFAULT_IMPL
904 #  define FUNCNAME deflate_decompress_default
905 #  define ATTRIBUTES
906 #  include "decompress_template.h"
907 #  define DEFAULT_IMPL deflate_decompress_default
908 #endif
909 
910 #ifdef DISPATCH
911 static enum libdeflate_result
912 dispatch(struct libdeflate_decompressor * restrict d,
913 	 const void * restrict in, size_t in_nbytes,
914 	 void * restrict out, size_t out_nbytes_avail,
915 	 size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret);
916 
917 static volatile decompress_func_t decompress_impl = dispatch;
918 
919 /* Choose the fastest implementation at runtime */
920 static enum libdeflate_result
dispatch(struct libdeflate_decompressor * restrict d,const void * restrict in,size_t in_nbytes,void * restrict out,size_t out_nbytes_avail,size_t * actual_in_nbytes_ret,size_t * actual_out_nbytes_ret)921 dispatch(struct libdeflate_decompressor * restrict d,
922 	 const void * restrict in, size_t in_nbytes,
923 	 void * restrict out, size_t out_nbytes_avail,
924 	 size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret)
925 {
926 	decompress_func_t f = arch_select_decompress_func();
927 
928 	if (f == NULL)
929 		f = DEFAULT_IMPL;
930 
931 	decompress_impl = f;
932 	return (*f)(d, in, in_nbytes, out, out_nbytes_avail,
933 		    actual_in_nbytes_ret, actual_out_nbytes_ret);
934 }
935 #else
936 #  define decompress_impl DEFAULT_IMPL /* only one implementation, use it */
937 #endif
938 
939 
940 /*
941  * This is the main DEFLATE decompression routine.  See libdeflate.h for the
942  * documentation.
943  *
944  * Note that the real code is in decompress_template.h.  The part here just
945  * handles calling the appropriate implementation depending on the CPU features
946  * at runtime.
947  */
948 LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,const void * restrict in,size_t in_nbytes,void * restrict out,size_t out_nbytes_avail,size_t * actual_in_nbytes_ret,size_t * actual_out_nbytes_ret)949 libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d,
950 				 const void * restrict in, size_t in_nbytes,
951 				 void * restrict out, size_t out_nbytes_avail,
952 				 size_t *actual_in_nbytes_ret,
953 				 size_t *actual_out_nbytes_ret)
954 {
955 	return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail,
956 			       actual_in_nbytes_ret, actual_out_nbytes_ret);
957 }
958 
959 LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI
libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,const void * restrict in,size_t in_nbytes,void * restrict out,size_t out_nbytes_avail,size_t * actual_out_nbytes_ret)960 libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d,
961 			      const void * restrict in, size_t in_nbytes,
962 			      void * restrict out, size_t out_nbytes_avail,
963 			      size_t *actual_out_nbytes_ret)
964 {
965 	return libdeflate_deflate_decompress_ex(d, in, in_nbytes,
966 						out, out_nbytes_avail,
967 						NULL, actual_out_nbytes_ret);
968 }
969 
970 LIBDEFLATEEXPORT struct libdeflate_decompressor * LIBDEFLATEAPI
libdeflate_alloc_decompressor(void)971 libdeflate_alloc_decompressor(void)
972 {
973 	/*
974 	 * Note that only certain parts of the decompressor actually must be
975 	 * initialized here:
976 	 *
977 	 * - 'static_codes_loaded' must be initialized to false.
978 	 *
979 	 * - The first half of the main portion of each decode table must be
980 	 *   initialized to any value, to avoid reading from uninitialized
981 	 *   memory during table expansion in build_decode_table().  (Although,
982 	 *   this is really just to avoid warnings with dynamic tools like
983 	 *   valgrind, since build_decode_table() is guaranteed to initialize
984 	 *   all entries eventually anyway.)
985 	 *
986 	 * But for simplicity, we currently just zero the whole decompressor.
987 	 */
988 	struct libdeflate_decompressor *d = libdeflate_malloc(sizeof(*d));
989 
990 	if (d == NULL)
991 		return NULL;
992 	memset(d, 0, sizeof(*d));
993 	return d;
994 }
995 
996 LIBDEFLATEEXPORT void LIBDEFLATEAPI
libdeflate_free_decompressor(struct libdeflate_decompressor * d)997 libdeflate_free_decompressor(struct libdeflate_decompressor *d)
998 {
999 	libdeflate_free(d);
1000 }
1001