1 /*
2  * Reimplementation of Deflate (RFC1951) compression. Adapted from
3  * the version in PuTTY, and extended to write dynamic Huffman
4  * trees and choose block boundaries usefully.
5  */
6 
7 /*
8  * TODO:
9  *
10  *  - Feature: could do with forms of flush other than SYNC_FLUSH.
11  *    I'm not sure exactly how those work when you don't know in
12  *    advance that your next block will be static (as we did in
13  *    PuTTY). And remember the 9-bit limitation of zlib.
14  *     + also, zlib has FULL_FLUSH which clears the LZ77 state as
15  * 	 well, for random access.
16  *
17  *  - Compression quality: chooseblock() appears to be computing
18  *    wildly inaccurate block size estimates. Possible resolutions:
19  *     + find and fix some trivial bug I haven't spotted yet
20  *     + abandon the entropic approximation and go with trial
21  * 	 Huffman runs
22  *
23  *  - Compression quality: see if increasing SYMLIMIT causes
24  *    dynamic blocks to start being consistently smaller than it.
25  *     + actually we seem to be there already, but check on a
26  * 	 larger corpus.
27  *
28  *  - Compression quality: we ought to be able to fall right back
29  *    to actual uncompressed blocks if really necessary, though
30  *    it's not clear what the criterion for doing so would be.
31  */
32 
33 /*
34  * This software is copyright 2000-2006 Simon Tatham.
35  *
36  * Permission is hereby granted, free of charge, to any person
37  * obtaining a copy of this software and associated documentation
38  * files (the "Software"), to deal in the Software without
39  * restriction, including without limitation the rights to use,
40  * copy, modify, merge, publish, distribute, sublicense, and/or
41  * sell copies of the Software, and to permit persons to whom the
42  * Software is furnished to do so, subject to the following
43  * conditions:
44  *
45  * The above copyright notice and this permission notice shall be
46  * included in all copies or substantial portions of the Software.
47  *
48  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
49  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
50  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
51  * NONINFRINGEMENT.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
52  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
53  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
54  * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
55  * THE SOFTWARE.
56  */
57 
58 #include <stdio.h>
59 #include <stddef.h>
60 #include <string.h>
61 #include <stdlib.h>
62 #include <assert.h>
63 
64 #include "halibut.h"
65 #include "huffman.h"
66 #include "lz77.h"
67 #include "deflate.h"
68 
69 /* ----------------------------------------------------------------------
70  * This file can be compiled in a number of modes.
71  *
72  * With -DSTANDALONE, it builds a self-contained deflate tool which
73  * can compress, decompress, and also analyse a deflated file to
74  * print out the sequence of literals and copy commands it
75  * contains.
76  *
77  * With -DTESTMODE, it builds a test application which is given a
78  * file on standard input, both compresses and decompresses it, and
79  * outputs the re-decompressed result so it can be conveniently
80  * diffed against the original. Define -DTESTDBG as well for lots
81  * of diagnostics.
82  */
83 
84 #if defined TESTDBG
85 /* gcc-specific diagnostic macro */
86 #define debug_int(x...) ( fprintf(stderr, x) )
87 #define debug(x) ( debug_int x )
88 #else
89 #define debug(x) ((void)0)
90 #endif
91 
92 #ifdef STANDALONE
93 #define ANALYSIS
94 #endif
95 
96 #ifdef ANALYSIS
97 int analyse_level = 0;
98 #endif
99 
100 /* ----------------------------------------------------------------------
101  * Deflate functionality common to both compression and decompression.
102  */
103 
104 #define DWINSIZE 32768
105 
106 static const unsigned char lenlenmap[] = {
107     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
108 };
109 
110 /*
111  * Given a sequence of Huffman code lengths, compute the actual codes
112  * in the final form suitable for feeding to outbits (i.e. already
113  * bit-mirrored). Returns the same as compute_huffman_codes.
114  */
deflate_hufcodes(const unsigned char * lengths,int * codes,int nsyms)115 static int deflate_hufcodes(const unsigned char *lengths,
116                             int *codes, int nsyms)
117 {
118     int maxlen = compute_huffman_codes(lengths, codes, nsyms);
119     int i, j;
120 
121     for (i = 0; i < nsyms; i++) {
122         int code = codes[i];
123 	codes[i] = 0;
124 	for (j = 0; j < lengths[i]; j++) {
125 	    codes[i] = (codes[i] << 1) | (code & 1);
126 	    code >>= 1;
127 	}
128     }
129 
130     return maxlen;
131 }
132 
133 /*
134  * Adler32 checksum function.
135  */
adler32_update(unsigned long s,const unsigned char * data,int len)136 static unsigned long adler32_update(unsigned long s,
137 				    const unsigned char *data, int len)
138 {
139     unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF;
140     int i;
141 
142     for (i = 0; i < len; i++) {
143 	s1 += data[i];
144 	s2 += s1;
145 	if (!(i & 0xFFF)) {
146 	    s1 %= 65521;
147 	    s2 %= 65521;
148 	}
149     }
150 
151     return ((s2 % 65521) << 16) | (s1 % 65521);
152 }
153 
154 /*
155  * CRC32 checksum function.
156  */
157 
crc32_update(unsigned long crcword,const unsigned char * data,int len)158 static unsigned long crc32_update(unsigned long crcword,
159 				  const unsigned char *data, int len)
160 {
161     static const unsigned long crc32_table[256] = {
162 	0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
163 	0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
164 	0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
165 	0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
166 	0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
167 	0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
168 	0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
169 	0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
170 	0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
171 	0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
172 	0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
173 	0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
174 	0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
175 	0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
176 	0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
177 	0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
178 	0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
179 	0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
180 	0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
181 	0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
182 	0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
183 	0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
184 	0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
185 	0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
186 	0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
187 	0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
188 	0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
189 	0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
190 	0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
191 	0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
192 	0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
193 	0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
194 	0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
195 	0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
196 	0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
197 	0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
198 	0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
199 	0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
200 	0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
201 	0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
202 	0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
203 	0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
204 	0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
205 	0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
206 	0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
207 	0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
208 	0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
209 	0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
210 	0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
211 	0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
212 	0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
213 	0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
214 	0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
215 	0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
216 	0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
217 	0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
218 	0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
219 	0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
220 	0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
221 	0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
222 	0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
223 	0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
224 	0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
225 	0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
226     };
227     crcword ^= 0xFFFFFFFFL;
228     while (len--) {
229 	unsigned long newbyte = *data++;
230 	newbyte ^= crcword & 0xFFL;
231 	crcword = (crcword >> 8) ^ crc32_table[newbyte];
232     }
233     return crcword ^ 0xFFFFFFFFL;
234 }
235 
236 typedef struct {
237     short code, extrabits;
238     int min, max;
239 } coderecord;
240 
241 static const coderecord lencodes[] = {
242     {257, 0, 3, 3},
243     {258, 0, 4, 4},
244     {259, 0, 5, 5},
245     {260, 0, 6, 6},
246     {261, 0, 7, 7},
247     {262, 0, 8, 8},
248     {263, 0, 9, 9},
249     {264, 0, 10, 10},
250     {265, 1, 11, 12},
251     {266, 1, 13, 14},
252     {267, 1, 15, 16},
253     {268, 1, 17, 18},
254     {269, 2, 19, 22},
255     {270, 2, 23, 26},
256     {271, 2, 27, 30},
257     {272, 2, 31, 34},
258     {273, 3, 35, 42},
259     {274, 3, 43, 50},
260     {275, 3, 51, 58},
261     {276, 3, 59, 66},
262     {277, 4, 67, 82},
263     {278, 4, 83, 98},
264     {279, 4, 99, 114},
265     {280, 4, 115, 130},
266     {281, 5, 131, 162},
267     {282, 5, 163, 194},
268     {283, 5, 195, 226},
269     {284, 5, 227, 257},
270     {285, 0, 258, 258},
271 };
272 
273 static const coderecord distcodes[] = {
274     {0, 0, 1, 1},
275     {1, 0, 2, 2},
276     {2, 0, 3, 3},
277     {3, 0, 4, 4},
278     {4, 1, 5, 6},
279     {5, 1, 7, 8},
280     {6, 2, 9, 12},
281     {7, 2, 13, 16},
282     {8, 3, 17, 24},
283     {9, 3, 25, 32},
284     {10, 4, 33, 48},
285     {11, 4, 49, 64},
286     {12, 5, 65, 96},
287     {13, 5, 97, 128},
288     {14, 6, 129, 192},
289     {15, 6, 193, 256},
290     {16, 7, 257, 384},
291     {17, 7, 385, 512},
292     {18, 8, 513, 768},
293     {19, 8, 769, 1024},
294     {20, 9, 1025, 1536},
295     {21, 9, 1537, 2048},
296     {22, 10, 2049, 3072},
297     {23, 10, 3073, 4096},
298     {24, 11, 4097, 6144},
299     {25, 11, 6145, 8192},
300     {26, 12, 8193, 12288},
301     {27, 12, 12289, 16384},
302     {28, 13, 16385, 24576},
303     {29, 13, 24577, 32768},
304 };
305 
306 /* ----------------------------------------------------------------------
307  * Deflate compression.
308  */
309 
310 #define SYMLIMIT 65536
311 #define SYMPFX_LITLEN    0x00000000U
312 #define SYMPFX_DIST      0x40000000U
313 #define SYMPFX_EXTRABITS 0x80000000U
314 #define SYMPFX_CODELEN   0xC0000000U
315 #define SYMPFX_MASK      0xC0000000U
316 
317 #define SYM_EXTRABITS_MASK 0x3C000000U
318 #define SYM_EXTRABITS_SHIFT 26
319 
320 struct huftrees {
321     unsigned char *len_litlen;
322     int *code_litlen;
323     unsigned char *len_dist;
324     int *code_dist;
325     unsigned char *len_codelen;
326     int *code_codelen;
327 };
328 
329 struct deflate_compress_ctx {
330     struct LZ77Context *lzc;
331     unsigned char *outbuf;
332     int outlen, outsize;
333     unsigned long outbits;
334     int noutbits;
335     int firstblock;
336     unsigned long *syms;
337     int symstart, nsyms;
338     int type;
339     unsigned long checksum;
340     unsigned long datasize;
341     int lastblock;
342     int finished;
343     unsigned char static_len1[288], static_len2[30];
344     int static_code1[288], static_code2[30];
345     struct huftrees sht;
346 #ifdef STATISTICS
347     unsigned long bitcount;
348 #endif
349 };
350 
outbits(deflate_compress_ctx * out,unsigned long bits,int nbits)351 static void outbits(deflate_compress_ctx *out,
352 		    unsigned long bits, int nbits)
353 {
354     assert(out->noutbits + nbits <= 32);
355     out->outbits |= bits << out->noutbits;
356     out->noutbits += nbits;
357     while (out->noutbits >= 8) {
358 	if (out->outlen >= out->outsize) {
359 	    out->outsize = out->outlen + 64;
360 	    out->outbuf = sresize(out->outbuf, out->outsize, unsigned char);
361 	}
362 	out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF);
363 	out->outbits >>= 8;
364 	out->noutbits -= 8;
365     }
366 #ifdef STATISTICS
367     out->bitcount += nbits;
368 #endif
369 }
370 
371 /*
372  * Compute the bit length of a symbol, given the three Huffman
373  * trees.
374  */
symsize(unsigned sym,const struct huftrees * trees)375 static int symsize(unsigned sym, const struct huftrees *trees)
376 {
377     unsigned basesym = sym &~ SYMPFX_MASK;
378 
379     switch (sym & SYMPFX_MASK) {
380       case SYMPFX_LITLEN:
381 	return trees->len_litlen[basesym];
382       case SYMPFX_DIST:
383 	return trees->len_dist[basesym];
384       case SYMPFX_CODELEN:
385 	return trees->len_codelen[basesym];
386       default /*case SYMPFX_EXTRABITS*/:
387 	return basesym >> SYM_EXTRABITS_SHIFT;
388     }
389 }
390 
391 /*
392  * Write out a single symbol, given the three Huffman trees.
393  */
writesym(deflate_compress_ctx * out,unsigned sym,const struct huftrees * trees)394 static void writesym(deflate_compress_ctx *out,
395 		     unsigned sym, const struct huftrees *trees)
396 {
397     unsigned basesym = sym &~ SYMPFX_MASK;
398     int i;
399 
400     switch (sym & SYMPFX_MASK) {
401       case SYMPFX_LITLEN:
402 	debug(("send: litlen %d\n", basesym));
403 	outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]);
404 	break;
405       case SYMPFX_DIST:
406 	debug(("send: dist %d\n", basesym));
407 	outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]);
408 	break;
409       case SYMPFX_CODELEN:
410 	debug(("send: codelen %d\n", basesym));
411 	outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]);
412 	break;
413       case SYMPFX_EXTRABITS:
414 	i = basesym >> SYM_EXTRABITS_SHIFT;
415 	basesym &= ~SYM_EXTRABITS_MASK;
416 	debug(("send: extrabits %d/%d\n", basesym, i));
417 	outbits(out, basesym, i);
418 	break;
419     }
420 }
421 
422 /*
423  * outblock() must output _either_ a dynamic block of length
424  * `dynamic_len', _or_ a static block of length `static_len', but
425  * it gets to choose which.
426  */
outblock(deflate_compress_ctx * out,int dynamic_len,int static_len)427 static void outblock(deflate_compress_ctx *out,
428 		     int dynamic_len, int static_len)
429 {
430     int freqs1[286], freqs2[30], freqs3[19];
431     unsigned char len1[286], len2[30], len3[19];
432     int code1[286], code2[30], code3[19];
433     int hlit, hdist, hclen, bfinal, btype;
434     int treesrc[286 + 30];
435     int treesyms[286 + 30];
436     int codelen[19];
437     int i, ntreesrc, ntreesyms;
438     int dynamic, blklen;
439     struct huftrees dht;
440     const struct huftrees *ht;
441 #ifdef STATISTICS
442     unsigned long bitcount_before;
443 #endif
444 
445     dht.len_litlen = len1;
446     dht.len_dist = len2;
447     dht.len_codelen = len3;
448     dht.code_litlen = code1;
449     dht.code_dist = code2;
450     dht.code_codelen = code3;
451 
452     /*
453      * We make our choice of block to output by doing all the
454      * detailed work to determine the exact length of each possible
455      * block. Then we choose the one which has fewest output bits
456      * per symbol.
457      */
458 
459     /*
460      * First build the two main Huffman trees for the dynamic
461      * block.
462      */
463 
464     /*
465      * Count up the frequency tables.
466      */
467     memset(freqs1, 0, sizeof(freqs1));
468     memset(freqs2, 0, sizeof(freqs2));
469     freqs1[256] = 1;	       /* we're bound to need one EOB */
470     for (i = 0; i < dynamic_len; i++) {
471 	unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
472 
473 	/*
474 	 * Increment the occurrence counter for this symbol, if
475 	 * it's in one of the Huffman alphabets and isn't extra
476 	 * bits.
477 	 */
478 	if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
479 	    sym &= ~SYMPFX_MASK;
480 	    assert(sym < lenof(freqs1));
481 	    freqs1[sym]++;
482 	} else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
483 	    sym &= ~SYMPFX_MASK;
484 	    assert(sym < lenof(freqs2));
485 	    freqs2[sym]++;
486 	}
487     }
488     build_huffman_tree(freqs1, len1, lenof(freqs1), 15);
489     build_huffman_tree(freqs2, len2, lenof(freqs2), 15);
490     deflate_hufcodes(len1, code1, lenof(freqs1));
491     deflate_hufcodes(len2, code2, lenof(freqs2));
492 
493     /*
494      * Determine HLIT and HDIST.
495      */
496     for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--);
497     for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--);
498 
499     /*
500      * Write out the list of symbols used to transmit the
501      * trees.
502      */
503     ntreesrc = 0;
504     for (i = 0; i < hlit; i++)
505 	treesrc[ntreesrc++] = len1[i];
506     for (i = 0; i < hdist; i++)
507 	treesrc[ntreesrc++] = len2[i];
508     ntreesyms = 0;
509     for (i = 0; i < ntreesrc ;) {
510 	int j = 1;
511 	int k;
512 
513 	/* Find length of run of the same length code. */
514 	while (i+j < ntreesrc && treesrc[i+j] == treesrc[i])
515 	    j++;
516 
517 	/* Encode that run as economically as we can. */
518 	k = j;
519 	if (treesrc[i] == 0) {
520 	    /*
521 	     * Zero code length: we can output run codes for
522 	     * 3-138 zeroes. So if we have fewer than 3 zeroes,
523 	     * we just output literals. Otherwise, we output
524 	     * nothing but run codes, and tweak their lengths
525 	     * to make sure we aren't left with under 3 at the
526 	     * end.
527 	     */
528 	    if (k < 3) {
529 		while (k--)
530 		    treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN;
531 	    } else {
532 		while (k > 0) {
533 		    int rpt = (k < 138 ? k : 138);
534 		    if (rpt > k-3 && rpt < k)
535 			rpt = k-3;
536 		    assert(rpt >= 3 && rpt <= 138);
537 		    if (rpt < 11) {
538 			treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN;
539 			treesyms[ntreesyms++] =
540 			    (SYMPFX_EXTRABITS | (rpt - 3) |
541 			     (3 << SYM_EXTRABITS_SHIFT));
542 		    } else {
543 			treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN;
544 			treesyms[ntreesyms++] =
545 			    (SYMPFX_EXTRABITS | (rpt - 11) |
546 			     (7 << SYM_EXTRABITS_SHIFT));
547 		    }
548 		    k -= rpt;
549 		}
550 	    }
551 	} else {
552 	    /*
553 	     * Non-zero code length: we must output the first
554 	     * one explicitly, then we can output a copy code
555 	     * for 3-6 repeats. So if we have fewer than 4
556 	     * repeats, we _just_ output literals. Otherwise,
557 	     * we output one literal plus at least one copy
558 	     * code, and tweak the copy codes to make sure we
559 	     * aren't left with under 3 at the end.
560 	     */
561 	    assert(treesrc[i] < 16);
562 	    treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
563 	    k--;
564 	    if (k < 3) {
565 		while (k--)
566 		    treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
567 	    } else {
568 		while (k > 0) {
569 		    int rpt = (k < 6 ? k : 6);
570 		    if (rpt > k-3 && rpt < k)
571 			rpt = k-3;
572 		    assert(rpt >= 3 && rpt <= 6);
573 		    treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN;
574 		    treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) |
575 					     (2 << SYM_EXTRABITS_SHIFT));
576 		    k -= rpt;
577 		}
578 	    }
579 	}
580 
581 	i += j;
582     }
583     assert((unsigned)ntreesyms < lenof(treesyms));
584 
585     /*
586      * Count up the frequency table for the tree-transmission
587      * symbols, and build the auxiliary Huffman tree for that.
588      */
589     memset(freqs3, 0, sizeof(freqs3));
590     for (i = 0; i < ntreesyms; i++) {
591 	unsigned sym = treesyms[i];
592 
593 	/*
594 	 * Increment the occurrence counter for this symbol, if
595 	 * it's the Huffman alphabet and isn't extra bits.
596 	 */
597 	if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) {
598 	    sym &= ~SYMPFX_MASK;
599 	    assert(sym < lenof(freqs3));
600 	    freqs3[sym]++;
601 	}
602     }
603     build_huffman_tree(freqs3, len3, lenof(freqs3), 7);
604     deflate_hufcodes(len3, code3, lenof(freqs3));
605 
606     /*
607      * Reorder the code length codes into transmission order, and
608      * determine HCLEN.
609      */
610     for (i = 0; i < 19; i++)
611 	codelen[i] = len3[lenlenmap[i]];
612     for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--)
613         /* empty loop body */;
614 
615     /*
616      * Now work out the exact size of both the dynamic and the
617      * static block, in bits.
618      */
619     {
620 	int ssize, dsize;
621 
622 	/*
623 	 * First the dynamic block.
624 	 */
625 	dsize = 3 + 5 + 5 + 4;	       /* 3-bit header, HLIT, HDIST, HCLEN */
626 	dsize += 3 * hclen;	       /* code-length-alphabet code lengths */
627 	/* Code lengths */
628 	for (i = 0; i < ntreesyms; i++)
629 	    dsize += symsize(treesyms[i], &dht);
630 	/* The actual block data */
631 	for (i = 0; i < dynamic_len; i++) {
632 	    unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
633 	    dsize += symsize(sym, &dht);
634 	}
635 	/* And the end-of-data symbol. */
636 	dsize += symsize(SYMPFX_LITLEN | 256, &dht);
637 
638 	/*
639 	 * Now the static block.
640 	 */
641 	ssize = 3;		       /* 3-bit block header */
642 	/* The actual block data */
643 	for (i = 0; i < static_len; i++) {
644 	    unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
645 	    ssize += symsize(sym, &out->sht);
646 	}
647 	/* And the end-of-data symbol. */
648 	ssize += symsize(SYMPFX_LITLEN | 256, &out->sht);
649 
650 	/*
651 	 * Compare the two and decide which to output. We break
652 	 * exact ties in favour of the static block, because of the
653 	 * special case in which that block has zero length.
654 	 */
655 	dynamic = ((double)ssize * dynamic_len > (double)dsize * static_len);
656 	ht = dynamic ? &dht : &out->sht;
657 	blklen = dynamic ? dynamic_len : static_len;
658     }
659 
660     /*
661      * Actually transmit the block.
662      */
663 
664     /* 3-bit block header */
665     bfinal = (out->lastblock ? 1 : 0);
666     btype = dynamic ? 2 : 1;
667     debug(("send: bfinal=%d btype=%d\n", bfinal, btype));
668     outbits(out, bfinal, 1);
669     outbits(out, btype, 2);
670 
671 #ifdef STATISTICS
672     bitcount_before = out->bitcount;
673 #endif
674 
675     if (dynamic) {
676 	/* HLIT, HDIST and HCLEN */
677 	debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen));
678 	outbits(out, hlit - 257, 5);
679 	outbits(out, hdist - 1, 5);
680 	outbits(out, hclen - 4, 4);
681 
682 	/* Code lengths for the auxiliary tree */
683 	for (i = 0; i < hclen; i++) {
684 	    debug(("send: lenlen %d\n", codelen[i]));
685 	    outbits(out, codelen[i], 3);
686 	}
687 
688 	/* Code lengths for the literal/length and distance trees */
689 	for (i = 0; i < ntreesyms; i++)
690 	    writesym(out, treesyms[i], ht);
691 #ifdef STATISTICS
692 	fprintf(stderr, "total tree size %lu bits\n",
693 		out->bitcount - bitcount_before);
694 #endif
695     }
696 
697     /* Output the actual symbols from the buffer */
698     for (i = 0; i < blklen; i++) {
699 	unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
700 	writesym(out, sym, ht);
701     }
702 
703     /* Output the end-of-data symbol */
704     writesym(out, SYMPFX_LITLEN | 256, ht);
705 
706     /*
707      * Remove all the just-output symbols from the symbol buffer by
708      * adjusting symstart and nsyms.
709      */
710     out->symstart = (out->symstart + blklen) % SYMLIMIT;
711     out->nsyms -= blklen;
712 }
713 
714 /*
715  * Give the approximate log-base-2 of an input integer, measured in
716  * 8ths of a bit. (I.e. this computes an integer approximation to
717  * 8*logbase2(x).)
718  */
approxlog2(unsigned x)719 static int approxlog2(unsigned x)
720 {
721     int ret = 31*8;
722 
723     /*
724      * Binary-search to get the top bit of x up to bit 31.
725      */
726     if (x < 0x00010000U) x <<= 16, ret -= 16*8;
727     if (x < 0x01000000U) x <<=  8, ret -=  8*8;
728     if (x < 0x10000000U) x <<=  4, ret -=  4*8;
729     if (x < 0x40000000U) x <<=  2, ret -=  2*8;
730     if (x < 0x80000000U) x <<=  1, ret -=  1*8;
731 
732     /*
733      * Now we know the logarithm we want is in [ret,ret+1).
734      * Determine the bottom three bits by checking against
735      * threshold values.
736      *
737      * (Each of these threshold values is 0x80000000 times an odd
738      * power of 2^(1/16). Therefore, this function rounds to
739      * nearest.)
740      */
741     if (x <= 0xAD583EEAU) {
742 	if (x <= 0x91C3D373U)
743 	    ret += (x <= 0x85AAC367U ? 0 : 1);
744 	else
745 	    ret += (x <= 0x9EF53260U ? 2 : 3);
746     } else {
747 	if (x <= 0xCE248C15U)
748 	    ret += (x <= 0xBD08A39FU ? 4 : 5);
749 	else
750 	    ret += (x <= 0xE0CCDEECU ? 6 : x <= 0xF5257D15L ? 7 : 8);
751     }
752 
753     return ret;
754 }
755 
chooseblock(deflate_compress_ctx * out)756 static void chooseblock(deflate_compress_ctx *out)
757 {
758     int freqs1[286], freqs2[30];
759     int i, len, bestlen, longestlen = 0;
760     int total1, total2;
761     int bestvfm;
762 
763     memset(freqs1, 0, sizeof(freqs1));
764     memset(freqs2, 0, sizeof(freqs2));
765     freqs1[256] = 1;		       /* we're bound to need one EOB */
766     total1 = 1;
767     total2 = 0;
768 
769     /*
770      * Iterate over all possible block lengths, computing the
771      * entropic coding approximation to the final length at every
772      * stage. We divide the result by the number of symbols
773      * encoded, to determine the `value for money' (overall
774      * bits-per-symbol count) of a block of that length.
775      */
776     bestlen = -1;
777     bestvfm = 0;
778 
779     len = 300 * 8;	      /* very approximate size of the Huffman trees */
780 
781     for (i = 0; i < out->nsyms; i++) {
782 	unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
783 
784 	if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
785 	    /*
786 	     * This is a viable point at which to end the block.
787 	     * Compute the value for money.
788 	     */
789 	    int vfm = i * 32768 / len;      /* symbols encoded per bit */
790 
791 	    if (bestlen < 0 || vfm > bestvfm) {
792 		bestlen = i;
793 		bestvfm = vfm;
794 	    }
795 
796 	    longestlen = i;
797 	}
798 
799 	/*
800 	 * Increment the occurrence counter for this symbol, if
801 	 * it's in one of the Huffman alphabets and isn't extra
802 	 * bits.
803 	 */
804 	if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
805 	    sym &= ~SYMPFX_MASK;
806 	    assert(sym < lenof(freqs1));
807 	    len += freqs1[sym] * approxlog2(freqs1[sym]);
808 	    len -= total1 * approxlog2(total1);
809 	    freqs1[sym]++;
810 	    total1++;
811 	    len -= freqs1[sym] * approxlog2(freqs1[sym]);
812 	    len += total1 * approxlog2(total1);
813 	} else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
814 	    sym &= ~SYMPFX_MASK;
815 	    assert(sym < lenof(freqs2));
816 	    len += freqs2[sym] * approxlog2(freqs2[sym]);
817 	    len -= total2 * approxlog2(total2);
818 	    freqs2[sym]++;
819 	    total2++;
820 	    len -= freqs2[sym] * approxlog2(freqs2[sym]);
821 	    len += total2 * approxlog2(total2);
822 	} else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) {
823 	    len += 8 * ((sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT);
824 	}
825     }
826 
827     assert(bestlen > 0);
828 
829     outblock(out, bestlen, longestlen);
830 }
831 
832 /*
833  * Force the current symbol buffer to be flushed out as a single
834  * block.
835  */
flushblock(deflate_compress_ctx * out)836 static void flushblock(deflate_compress_ctx *out)
837 {
838     /*
839      * No need to check that out->nsyms is a valid block length: we
840      * know it has to be, because flushblock() is called in between
841      * two matches/literals.
842      */
843     outblock(out, out->nsyms, out->nsyms);
844     assert(out->nsyms == 0);
845 }
846 
847 /*
848  * Place a symbol into the symbols buffer.
849  */
outsym(deflate_compress_ctx * out,unsigned long sym)850 static void outsym(deflate_compress_ctx *out, unsigned long sym)
851 {
852     assert(out->nsyms < SYMLIMIT);
853     out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym;
854 
855     if (out->nsyms == SYMLIMIT)
856 	chooseblock(out);
857 }
858 
literal(struct LZ77Context * ectx,unsigned char c)859 static void literal(struct LZ77Context *ectx, unsigned char c)
860 {
861     deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
862 
863     outsym(out, SYMPFX_LITLEN | c);
864 }
865 
match(struct LZ77Context * ectx,int distance,int len)866 static void match(struct LZ77Context *ectx, int distance, int len)
867 {
868     const coderecord *d, *l;
869     int i, j, k;
870     deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
871 
872     while (len > 0) {
873         int thislen;
874 
875         /*
876          * We can transmit matches of lengths 3 through 258
877          * inclusive. So if len exceeds 258, we must transmit in
878          * several steps, with 258 or less in each step.
879          *
880          * Specifically: if len >= 261, we can transmit 258 and be
881          * sure of having at least 3 left for the next step. And if
882          * len <= 258, we can just transmit len. But if len == 259
883          * or 260, we must transmit len-3.
884          */
885         thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
886         len -= thislen;
887 
888         /*
889          * Binary-search to find which length code we're
890          * transmitting.
891          */
892         i = -1;
893         j = sizeof(lencodes) / sizeof(*lencodes);
894         while (1) {
895             assert(j - i >= 2);
896             k = (j + i) / 2;
897             if (thislen < lencodes[k].min)
898                 j = k;
899             else if (thislen > lencodes[k].max)
900                 i = k;
901             else {
902                 l = &lencodes[k];
903                 break;                 /* found it! */
904             }
905         }
906 
907         /*
908          * Transmit the length code.
909          */
910         outsym(out, SYMPFX_LITLEN | l->code);
911 
912         /*
913          * Transmit the extra bits.
914          */
915         if (l->extrabits) {
916             outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) |
917                          (l->extrabits << SYM_EXTRABITS_SHIFT)));
918         }
919 
920         /*
921          * Binary-search to find which distance code we're
922          * transmitting.
923          */
924         i = -1;
925         j = sizeof(distcodes) / sizeof(*distcodes);
926         while (1) {
927             assert(j - i >= 2);
928             k = (j + i) / 2;
929             if (distance < distcodes[k].min)
930                 j = k;
931             else if (distance > distcodes[k].max)
932                 i = k;
933             else {
934                 d = &distcodes[k];
935                 break;                 /* found it! */
936             }
937         }
938 
939         /*
940          * Write the distance code.
941          */
942         outsym(out, SYMPFX_DIST | d->code);
943 
944         /*
945          * Transmit the extra bits.
946          */
947         if (d->extrabits) {
948             outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) |
949                          (d->extrabits << SYM_EXTRABITS_SHIFT)));
950         }
951     }
952 }
953 
deflate_compress_new(int type)954 deflate_compress_ctx *deflate_compress_new(int type)
955 {
956     deflate_compress_ctx *out;
957     struct LZ77Context *ectx = snew(struct LZ77Context);
958 
959     lz77_init(ectx, DWINSIZE);
960     ectx->literal = literal;
961     ectx->match = match;
962 
963     out = snew(deflate_compress_ctx);
964     out->type = type;
965     out->outbits = out->noutbits = 0;
966     out->firstblock = TRUE;
967 #ifdef STATISTICS
968     out->bitcount = 0;
969 #endif
970 
971     out->syms = snewn(SYMLIMIT, unsigned long);
972     out->symstart = out->nsyms = 0;
973 
974     out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
975     out->datasize = 0;
976     out->lastblock = FALSE;
977     out->finished = FALSE;
978 
979     /*
980      * Build the static Huffman tables now, so we'll have them
981      * available every time outblock() is called.
982      */
983     {
984 	int i;
985 
986 	for (i = 0; i < (int)lenof(out->static_len1); i++)
987 	    out->static_len1[i] = (i < 144 ? 8 :
988 				   i < 256 ? 9 :
989 				   i < 280 ? 7 : 8);
990 	for (i = 0; i < (int)lenof(out->static_len2); i++)
991 	    out->static_len2[i] = 5;
992     }
993     deflate_hufcodes(out->static_len1, out->static_code1,
994                      lenof(out->static_code1));
995     deflate_hufcodes(out->static_len2, out->static_code2,
996                      lenof(out->static_code2));
997     out->sht.len_litlen = out->static_len1;
998     out->sht.len_dist = out->static_len2;
999     out->sht.len_codelen = NULL;
1000     out->sht.code_litlen = out->static_code1;
1001     out->sht.code_dist = out->static_code2;
1002     out->sht.code_codelen = NULL;
1003 
1004     ectx->userdata = out;
1005     out->lzc = ectx;
1006 
1007     return out;
1008 }
1009 
deflate_compress_free(deflate_compress_ctx * out)1010 void deflate_compress_free(deflate_compress_ctx *out)
1011 {
1012     struct LZ77Context *ectx = out->lzc;
1013 
1014     sfree(out->syms);
1015     lz77_cleanup(ectx);
1016     sfree(ectx);
1017     sfree(out);
1018 }
1019 
deflate_compress_data(deflate_compress_ctx * out,const void * vblock,int len,int flushtype,void ** outblock,int * outlen)1020 void deflate_compress_data(deflate_compress_ctx *out,
1021 			   const void *vblock, int len, int flushtype,
1022 			   void **outblock, int *outlen)
1023 {
1024     struct LZ77Context *ectx = out->lzc;
1025     const unsigned char *block = (const unsigned char *)vblock;
1026 
1027     assert(!out->finished);
1028 
1029     out->outbuf = NULL;
1030     out->outlen = out->outsize = 0;
1031 
1032     /*
1033      * If this is the first block, output the header.
1034      */
1035     if (out->firstblock) {
1036 	switch (out->type) {
1037 	  case DEFLATE_TYPE_BARE:
1038 	    break;		       /* no header */
1039 	  case DEFLATE_TYPE_ZLIB:
1040 	    /*
1041 	     * zlib (RFC1950) header bytes: 78 9C. (Deflate
1042 	     * compression, 32K window size, default algorithm.)
1043 	     */
1044 	    outbits(out, 0x9C78, 16);
1045 	    break;
1046 	  case DEFLATE_TYPE_GZIP:
1047 	    /*
1048 	     * Minimal gzip (RFC1952) header:
1049 	     *
1050 	     *  - basic header of 1F 8B
1051 	     *  - compression method byte (8 = deflate)
1052 	     *  - flags byte (zero: we use no optional features)
1053 	     *  - modification time (zero: no time stamp available)
1054 	     * 	- extra flags byte (2: we use maximum compression
1055 	     * 	  always)
1056 	     *  - operating system byte (255: we do not specify)
1057 	     */
1058 	    outbits(out, 0x00088B1F, 32);   /* header, CM, flags */
1059 	    outbits(out, 0, 32);       /* mtime */
1060 	    outbits(out, 0xFF02, 16);  /* xflags, OS */
1061 	    break;
1062 	}
1063 	out->firstblock = FALSE;
1064     }
1065 
1066     /*
1067      * Feed our data to the LZ77 compression phase.
1068      */
1069     lz77_compress(ectx, block, len, TRUE);
1070 
1071     /*
1072      * Update checksums and counters.
1073      */
1074     switch (out->type) {
1075       case DEFLATE_TYPE_ZLIB:
1076 	out->checksum = adler32_update(out->checksum, block, len);
1077 	break;
1078       case DEFLATE_TYPE_GZIP:
1079 	out->checksum = crc32_update(out->checksum, block, len);
1080 	break;
1081     }
1082     out->datasize += len;
1083 
1084     switch (flushtype) {
1085 	/*
1086 	 * FIXME: what other flush types are available and useful?
1087 	 * In PuTTY, it was clear that we generally wanted to be in
1088 	 * a static block so it was safe to open one. Here, we
1089 	 * probably prefer to be _outside_ a block if we can. Think
1090 	 * about this.
1091 	 */
1092       case DEFLATE_NO_FLUSH:
1093 	break;			       /* don't flush any data at all (duh) */
1094       case DEFLATE_SYNC_FLUSH:
1095 	/*
1096 	 * Close the current block.
1097 	 */
1098 	flushblock(out);
1099 
1100 	/*
1101 	 * Then output an empty _uncompressed_ block: send 000,
1102 	 * then sync to byte boundary, then send bytes 00 00 FF
1103 	 * FF.
1104 	 */
1105 	outbits(out, 0, 3);
1106 	if (out->noutbits)
1107 	    outbits(out, 0, 8 - out->noutbits);
1108 	outbits(out, 0, 16);
1109 	outbits(out, 0xFFFF, 16);
1110 	break;
1111       case DEFLATE_END_OF_DATA:
1112 	/*
1113 	 * Output a block with BFINAL set.
1114 	 */
1115 	out->lastblock = TRUE;
1116 	flushblock(out);
1117 
1118 	/*
1119 	 * Sync to byte boundary, flushing out the final byte.
1120 	 */
1121 	if (out->noutbits)
1122 	    outbits(out, 0, 8 - out->noutbits);
1123 
1124 	/*
1125 	 * Format-specific trailer data.
1126 	 */
1127 	switch (out->type) {
1128 	  case DEFLATE_TYPE_ZLIB:
1129 	    /*
1130 	     * Just write out the Adler32 checksum.
1131 	     */
1132 	    outbits(out, (out->checksum >> 24) & 0xFF, 8);
1133 	    outbits(out, (out->checksum >> 16) & 0xFF, 8);
1134 	    outbits(out, (out->checksum >>  8) & 0xFF, 8);
1135 	    outbits(out, (out->checksum >>  0) & 0xFF, 8);
1136 	    break;
1137 	  case DEFLATE_TYPE_GZIP:
1138 	    /*
1139 	     * Write out the CRC32 checksum and the data length.
1140 	     */
1141 	    outbits(out, out->checksum, 32);
1142 	    outbits(out, out->datasize, 32);
1143 	    break;
1144 	}
1145 
1146 	out->finished = TRUE;
1147 	break;
1148     }
1149 
1150     /*
1151      * Return any data that we've generated.
1152      */
1153     *outblock = (void *)out->outbuf;
1154     *outlen = out->outlen;
1155 }
1156 
1157 /* ----------------------------------------------------------------------
1158  * Deflate decompression.
1159  */
1160 
1161 /*
1162  * The way we work the Huffman decode is to have a table lookup on
1163  * the first N bits of the input stream (in the order they arrive,
1164  * of course, i.e. the first bit of the Huffman code is in bit 0).
1165  * Each table entry lists the number of bits to consume, plus
1166  * either an output code or a pointer to a secondary table.
1167  */
1168 struct table;
1169 struct tableentry;
1170 
1171 struct tableentry {
1172     unsigned char nbits;
1173     short code;
1174     struct table *nexttable;
1175 };
1176 
1177 struct table {
1178     int mask;			       /* mask applied to input bit stream */
1179     struct tableentry *table;
1180 };
1181 
1182 #define MAXSYMS 288
1183 
1184 /*
1185  * Build a single-level decode table for elements
1186  * [minlength,maxlength) of the provided code/length tables, and
1187  * recurse to build subtables.
1188  */
mkonetab(int * codes,unsigned char * lengths,int nsyms,int pfx,int pfxbits,int bits)1189 static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms,
1190 			      int pfx, int pfxbits, int bits)
1191 {
1192     struct table *tab = snew(struct table);
1193     int pfxmask = (1 << pfxbits) - 1;
1194     int nbits, i, j, code;
1195 
1196     tab->table = snewn(1 << bits, struct tableentry);
1197     tab->mask = (1 << bits) - 1;
1198 
1199     for (code = 0; code <= tab->mask; code++) {
1200 	tab->table[code].code = -1;
1201 	tab->table[code].nbits = 0;
1202 	tab->table[code].nexttable = NULL;
1203     }
1204 
1205     for (i = 0; i < nsyms; i++) {
1206 	if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
1207 	    continue;
1208 	code = (codes[i] >> pfxbits) & tab->mask;
1209 	for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
1210 	    tab->table[j].code = i;
1211 	    nbits = lengths[i] - pfxbits;
1212 	    if (tab->table[j].nbits < nbits)
1213 		tab->table[j].nbits = nbits;
1214 	}
1215     }
1216     for (code = 0; code <= tab->mask; code++) {
1217 	if (tab->table[code].nbits <= bits)
1218 	    continue;
1219 	/* Generate a subtable. */
1220 	tab->table[code].code = -1;
1221 	nbits = tab->table[code].nbits - bits;
1222 	if (nbits > 7)
1223 	    nbits = 7;
1224 	tab->table[code].nbits = bits;
1225 	tab->table[code].nexttable = mkonetab(codes, lengths, nsyms,
1226 					      pfx | (code << pfxbits),
1227 					      pfxbits + bits, nbits);
1228     }
1229 
1230     return tab;
1231 }
1232 
1233 /*
1234  * Build a decode table, given a set of Huffman tree lengths.
1235  */
mktable(unsigned char * lengths,int nlengths,const char * alphabet,int * error)1236 static struct table *mktable(unsigned char *lengths, int nlengths,
1237 #ifdef ANALYSIS
1238 			     const char *alphabet,
1239 #endif
1240 			     int *error)
1241 {
1242     int codes[MAXSYMS];
1243     int maxlen;
1244 
1245 #ifdef ANALYSIS
1246     if (alphabet && analyse_level > 1) {
1247 	int i, col = 0;
1248 	printf("code lengths for %s alphabet:\n", alphabet);
1249 	for (i = 0; i < nlengths; i++) {
1250 	    col += printf("%3d", lengths[i]);
1251 	    if (col > 72) {
1252 		putchar('\n');
1253 		col = 0;
1254 	    }
1255 	}
1256 	if (col > 0)
1257 	    putchar('\n');
1258     }
1259 #endif
1260 
1261     maxlen = deflate_hufcodes(lengths, codes, nlengths);
1262 
1263     if (maxlen < 0) {
1264 	*error = (maxlen == -1 ? DEFLATE_ERR_LARGE_HUFTABLE :
1265 		  DEFLATE_ERR_SMALL_HUFTABLE);
1266 	return NULL;
1267     }
1268 
1269     /*
1270      * Now we have the complete list of Huffman codes. Build a
1271      * table.
1272      */
1273     return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9);
1274 }
1275 
freetable(struct table ** ztab)1276 static int freetable(struct table **ztab)
1277 {
1278     struct table *tab;
1279     int code;
1280 
1281     if (ztab == NULL)
1282 	return -1;
1283 
1284     if (*ztab == NULL)
1285 	return 0;
1286 
1287     tab = *ztab;
1288 
1289     for (code = 0; code <= tab->mask; code++)
1290 	if (tab->table[code].nexttable != NULL)
1291 	    freetable(&tab->table[code].nexttable);
1292 
1293     sfree(tab->table);
1294     tab->table = NULL;
1295 
1296     sfree(tab);
1297     *ztab = NULL;
1298 
1299     return (0);
1300 }
1301 
1302 struct deflate_decompress_ctx {
1303     struct table *staticlentable, *staticdisttable;
1304     struct table *currlentable, *currdisttable, *lenlentable;
1305     enum {
1306 	ZLIBSTART,
1307 	GZIPSTART, GZIPMETHFLAGS, GZIPIGNORE1, GZIPIGNORE2, GZIPIGNORE3,
1308 	GZIPEXTRA, GZIPFNAME, GZIPCOMMENT,
1309 	OUTSIDEBLK, TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
1310 	INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
1311 	UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA,
1312 	END,
1313 	ADLER1, ADLER2,
1314 	CRC1, CRC2, ILEN1, ILEN2,
1315 	FINALSPIN
1316     } state;
1317     int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
1318 	lenrep, lastblock;
1319     int uncomplen;
1320     unsigned char lenlen[19];
1321     unsigned char lengths[286 + 32];
1322     unsigned long bits;
1323     int nbits;
1324     unsigned char window[DWINSIZE];
1325     int winpos;
1326     unsigned char *outblk;
1327     int outlen, outsize;
1328     int type;
1329     unsigned long checksum;
1330     unsigned long bytesout;
1331     int gzflags, gzextralen;
1332 #ifdef ANALYSIS
1333     int bytesread;
1334     int bitcount_before;
1335 #define BITCOUNT(dctx) ( (dctx)->bytesread * 8 - (dctx)->nbits )
1336 #endif
1337 };
1338 
deflate_decompress_new(int type)1339 deflate_decompress_ctx *deflate_decompress_new(int type)
1340 {
1341     deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx);
1342     unsigned char lengths[288];
1343 
1344     memset(lengths, 8, 144);
1345     memset(lengths + 144, 9, 256 - 144);
1346     memset(lengths + 256, 7, 280 - 256);
1347     memset(lengths + 280, 8, 288 - 280);
1348     dctx->staticlentable = mktable(lengths, 288,
1349 #ifdef ANALYSIS
1350 				   NULL,
1351 #endif
1352 				   NULL);
1353     assert(dctx->staticlentable);
1354     memset(lengths, 5, 32);
1355     dctx->staticdisttable = mktable(lengths, 32,
1356 #ifdef ANALYSIS
1357 				    NULL,
1358 #endif
1359 				    NULL);
1360     assert(dctx->staticdisttable);
1361     dctx->state = (type == DEFLATE_TYPE_ZLIB ? ZLIBSTART :
1362 		   type == DEFLATE_TYPE_GZIP ? GZIPSTART :
1363 		   OUTSIDEBLK);
1364     dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
1365     dctx->bits = 0;
1366     dctx->nbits = 0;
1367     dctx->winpos = 0;
1368     dctx->type = type;
1369     dctx->lastblock = FALSE;
1370     dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
1371     dctx->bytesout = 0;
1372     dctx->gzflags = dctx->gzextralen = 0;
1373 #ifdef ANALYSIS
1374     dctx->bytesread = dctx->bitcount_before = 0;
1375 #endif
1376 
1377     return dctx;
1378 }
1379 
deflate_decompress_free(deflate_decompress_ctx * dctx)1380 void deflate_decompress_free(deflate_decompress_ctx *dctx)
1381 {
1382     if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
1383 	freetable(&dctx->currlentable);
1384     if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
1385 	freetable(&dctx->currdisttable);
1386     if (dctx->lenlentable)
1387 	freetable(&dctx->lenlentable);
1388     freetable(&dctx->staticlentable);
1389     freetable(&dctx->staticdisttable);
1390     sfree(dctx);
1391 }
1392 
huflookup(unsigned long * bitsp,int * nbitsp,struct table * tab)1393 static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab)
1394 {
1395     unsigned long bits = *bitsp;
1396     int nbits = *nbitsp;
1397     while (1) {
1398 	struct tableentry *ent;
1399 	ent = &tab->table[bits & tab->mask];
1400 	if (ent->nbits > nbits)
1401 	    return -1;		       /* not enough data */
1402 	bits >>= ent->nbits;
1403 	nbits -= ent->nbits;
1404 	if (ent->code == -1)
1405 	    tab = ent->nexttable;
1406 	else {
1407 	    *bitsp = bits;
1408 	    *nbitsp = nbits;
1409 	    return ent->code;
1410 	}
1411 
1412 	/*
1413 	 * If we reach here with `tab' null, it can only be because
1414 	 * there was a missing entry in the Huffman table. This
1415 	 * should never occur even with invalid input data, because
1416 	 * we enforce at mktable time that the Huffman codes should
1417 	 * precisely cover the code space; so we can enforce this
1418 	 * by assertion.
1419 	 */
1420 	assert(tab);
1421     }
1422 }
1423 
emit_char(deflate_decompress_ctx * dctx,int c)1424 static void emit_char(deflate_decompress_ctx *dctx, int c)
1425 {
1426     dctx->window[dctx->winpos] = c;
1427     dctx->winpos = (dctx->winpos + 1) & (DWINSIZE - 1);
1428     if (dctx->outlen >= dctx->outsize) {
1429 	dctx->outsize = dctx->outlen * 3 / 2 + 512;
1430 	dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char);
1431     }
1432     if (dctx->type == DEFLATE_TYPE_ZLIB) {
1433 	unsigned char uc = c;
1434 	dctx->checksum = adler32_update(dctx->checksum, &uc, 1);
1435     } else if (dctx->type == DEFLATE_TYPE_GZIP) {
1436 	unsigned char uc = c;
1437 	dctx->checksum = crc32_update(dctx->checksum, &uc, 1);
1438     }
1439     dctx->outblk[dctx->outlen++] = c;
1440     dctx->bytesout++;
1441 }
1442 
1443 #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
1444 
deflate_decompress_data(deflate_decompress_ctx * dctx,const void * vblock,int len,void ** outblock,int * outlen)1445 int deflate_decompress_data(deflate_decompress_ctx *dctx,
1446 			    const void *vblock, int len,
1447 			    void **outblock, int *outlen)
1448 {
1449     const coderecord *rec;
1450     const unsigned char *block = (const unsigned char *)vblock;
1451     int code, bfinal, btype, rep, dist, nlen, header;
1452     unsigned long cksum;
1453     int error = 0;
1454 
1455     if (len == 0) {
1456 	*outblock = NULL;
1457 	*outlen = 0;
1458 	if (dctx->state != FINALSPIN)
1459 	    return DEFLATE_ERR_UNEXPECTED_EOF;
1460 	else
1461 	    return 0;
1462     }
1463 
1464     dctx->outblk = NULL;
1465     dctx->outsize = 0;
1466     dctx->outlen = 0;
1467 
1468     while (len > 0 || dctx->nbits > 0) {
1469 	while (dctx->nbits < 24 && len > 0) {
1470 	    dctx->bits |= (*block++) << dctx->nbits;
1471 	    dctx->nbits += 8;
1472 	    len--;
1473 #ifdef ANALYSIS
1474 	    dctx->bytesread++;
1475 #endif
1476 	}
1477 	switch (dctx->state) {
1478 	  case ZLIBSTART:
1479 	    /* Expect 16-bit zlib header. */
1480 	    if (dctx->nbits < 16)
1481 		goto finished;	       /* done all we can */
1482 
1483             /*
1484              * The header is stored as a big-endian 16-bit integer,
1485              * in contrast to the general little-endian policy in
1486              * the rest of the format :-(
1487              */
1488             header = (((dctx->bits & 0xFF00) >> 8) |
1489                       ((dctx->bits & 0x00FF) << 8));
1490             EATBITS(16);
1491 
1492             /*
1493              * Check the header:
1494              *
1495              *  - bits 8-11 should be 1000 (Deflate/RFC1951)
1496              *  - bits 12-15 should be at most 0111 (window size)
1497              *  - bit 5 should be zero (no dictionary present)
1498              *  - we don't care about bits 6-7 (compression rate)
1499              *  - bits 0-4 should be set up to make the whole thing
1500              *    a multiple of 31 (checksum).
1501              */
1502 	    if ((header & 0xF000) >  0x7000 ||
1503                 (header & 0x0020) != 0x0000 ||
1504                 (header % 31) != 0) {
1505 		error = DEFLATE_ERR_ZLIB_HEADER;
1506                 goto finished;
1507 	    }
1508             if ((header & 0x0F00) != 0x0800) {
1509 		error = DEFLATE_ERR_ZLIB_WRONGCOMP;
1510                 goto finished;
1511 	    }
1512 	    dctx->state = OUTSIDEBLK;
1513 	    break;
1514 	  case GZIPSTART:
1515 	    /* Expect 16-bit gzip header. */
1516 	    if (dctx->nbits < 16)
1517 		goto finished;
1518 	    header = dctx->bits & 0xFFFF;
1519 	    EATBITS(16);
1520 	    if (header != 0x8B1F) {
1521 		error = DEFLATE_ERR_GZIP_HEADER;
1522 		goto finished;
1523 	    }
1524 	    dctx->state = GZIPMETHFLAGS;
1525 	    break;
1526 	  case GZIPMETHFLAGS:
1527 	    /* Expect gzip compression method and flags bytes. */
1528 	    if (dctx->nbits < 16)
1529 		goto finished;
1530 	    header = dctx->bits & 0xFF;
1531 	    EATBITS(8);
1532 	    if (header != 8) {
1533 		error = DEFLATE_ERR_GZIP_WRONGCOMP;
1534 		goto finished;
1535 	    }
1536 	    dctx->gzflags = dctx->bits & 0xFF;
1537 	    if (dctx->gzflags & 2) {
1538 		/*
1539 		 * The FHCRC flag is slightly confusing. RFC1952
1540 		 * documents it as indicating the presence of a
1541 		 * two-byte CRC16 of the gzip header, occurring
1542 		 * just before the beginning of the Deflate stream.
1543 		 * However, gzip itself (as of 1.3.5) appears to
1544 		 * believe it indicates that the current gzip
1545 		 * `member' is not the final one, i.e. that the
1546 		 * stream is composed of multiple gzip members
1547 		 * concatenated together, and furthermore gzip will
1548 		 * refuse to decode any file that has it set.
1549 		 *
1550 		 * For this reason, I label it as `disputed' and
1551 		 * also refuse to decode anything that has it set.
1552 		 * I don't expect this to be a problem in practice.
1553 		 */
1554 		error = DEFLATE_ERR_GZIP_FHCRC;
1555 		goto finished;
1556 	    }
1557 	    EATBITS(8);
1558 	    dctx->state = GZIPIGNORE1;
1559 	    break;
1560 	  case GZIPIGNORE1:
1561 	  case GZIPIGNORE2:
1562 	  case GZIPIGNORE3:
1563 	    /* Expect two bytes of gzip timestamp/XFL/OS, which we ignore. */
1564 	    if (dctx->nbits < 16)
1565 		goto finished;
1566 	    EATBITS(16);
1567 	    if (dctx->state == GZIPIGNORE3) {
1568 		dctx->state = GZIPEXTRA;
1569 	    } else
1570 		dctx->state++;	       /* maps IGNORE1 -> IGNORE2 -> IGNORE3 */
1571 	    break;
1572 	  case GZIPEXTRA:
1573 	    if (dctx->gzflags & 4) {
1574 		/* Expect two bytes of extra-length count, then that many
1575 		 * extra bytes of header data, all of which we ignore. */
1576 		if (!dctx->gzextralen) {
1577 		    if (dctx->nbits < 16)
1578 			goto finished;
1579 		    dctx->gzextralen = dctx->bits & 0xFFFF;
1580 		    EATBITS(16);
1581 		    break;
1582 		} else if (dctx->gzextralen > 0) {
1583 		    if (dctx->nbits < 8)
1584 			goto finished;
1585 		    EATBITS(8);
1586 		    if (--dctx->gzextralen > 0)
1587 			break;
1588 		}
1589 	    }
1590 	    dctx->state = GZIPFNAME;
1591 	    break;
1592 	  case GZIPFNAME:
1593 	    if (dctx->gzflags & 8) {
1594 		/*
1595 		 * Expect a NUL-terminated filename.
1596 		 */
1597 		if (dctx->nbits < 8)
1598 		    goto finished;
1599 		code = dctx->bits & 0xFF;
1600 		EATBITS(8);
1601 	    } else
1602 		code = 0;
1603 	    if (code == 0)
1604 		dctx->state = GZIPCOMMENT;
1605 	    break;
1606 	  case GZIPCOMMENT:
1607 	    if (dctx->gzflags & 16) {
1608 		/*
1609 		 * Expect a NUL-terminated filename.
1610 		 */
1611 		if (dctx->nbits < 8)
1612 		    goto finished;
1613 		code = dctx->bits & 0xFF;
1614 		EATBITS(8);
1615 	    } else
1616 		code = 0;
1617 	    if (code == 0)
1618 		dctx->state = OUTSIDEBLK;
1619 	    break;
1620 	  case OUTSIDEBLK:
1621 	    /* Expect 3-bit block header. */
1622 	    if (dctx->nbits < 3)
1623 		goto finished;	       /* done all we can */
1624 	    bfinal = dctx->bits & 1;
1625 	    if (bfinal)
1626 		dctx->lastblock = TRUE;
1627 	    EATBITS(1);
1628 	    btype = dctx->bits & 3;
1629 	    EATBITS(2);
1630 	    if (btype == 0) {
1631 		int to_eat = dctx->nbits & 7;
1632 		dctx->state = UNCOMP_LEN;
1633 		EATBITS(to_eat);       /* align to byte boundary */
1634 	    } else if (btype == 1) {
1635 		dctx->currlentable = dctx->staticlentable;
1636 		dctx->currdisttable = dctx->staticdisttable;
1637 		dctx->state = INBLK;
1638 	    } else if (btype == 2) {
1639 		dctx->state = TREES_HDR;
1640 	    }
1641 	    debug(("recv: bfinal=%d btype=%d\n", bfinal, btype));
1642 #ifdef ANALYSIS
1643 	    if (analyse_level > 1) {
1644 		static const char *const btypes[] = {
1645 		    "uncompressed", "static", "dynamic", "type 3 (unknown)"
1646 		};
1647 		printf("new block, %sfinal, %s\n",
1648 		       bfinal ? "" : "not ",
1649 		       btypes[btype]);
1650 	    }
1651 #endif
1652 	    break;
1653 	  case TREES_HDR:
1654 	    /*
1655 	     * Dynamic block header. Five bits of HLIT, five of
1656 	     * HDIST, four of HCLEN.
1657 	     */
1658 	    if (dctx->nbits < 5 + 5 + 4)
1659 		goto finished;	       /* done all we can */
1660 	    dctx->hlit = 257 + (dctx->bits & 31);
1661 	    EATBITS(5);
1662 	    dctx->hdist = 1 + (dctx->bits & 31);
1663 	    EATBITS(5);
1664 	    dctx->hclen = 4 + (dctx->bits & 15);
1665 	    EATBITS(4);
1666 	    debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit,
1667 		   dctx->hdist, dctx->hclen));
1668 #ifdef ANALYSIS
1669 	    if (analyse_level > 1)
1670 		printf("hlit=%d, hdist=%d, hclen=%d\n",
1671 		        dctx->hlit, dctx->hdist, dctx->hclen);
1672 #endif
1673 	    dctx->lenptr = 0;
1674 	    dctx->state = TREES_LENLEN;
1675 	    memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
1676 	    break;
1677 	  case TREES_LENLEN:
1678 	    if (dctx->nbits < 3)
1679 		goto finished;
1680 	    while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
1681 		dctx->lenlen[lenlenmap[dctx->lenptr++]] =
1682 		    (unsigned char) (dctx->bits & 7);
1683 		debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7)));
1684 		EATBITS(3);
1685 	    }
1686 	    if (dctx->lenptr == dctx->hclen) {
1687 		dctx->lenlentable = mktable(dctx->lenlen, 19,
1688 #ifdef ANALYSIS
1689 					    "code length",
1690 #endif
1691 					    &error);
1692 		if (!dctx->lenlentable)
1693 		    goto finished;     /* error code set up by mktable */
1694 		dctx->state = TREES_LEN;
1695 		dctx->lenptr = 0;
1696 	    }
1697 	    break;
1698 	  case TREES_LEN:
1699 	    if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
1700 		dctx->currlentable = mktable(dctx->lengths, dctx->hlit,
1701 #ifdef ANALYSIS
1702 					     "literal/length",
1703 #endif
1704 					     &error);
1705 		if (!dctx->currlentable)
1706 		    goto finished;     /* error code set up by mktable */
1707                 if (dctx->hdist == 1 && dctx->lengths[dctx->hlit] == 0) {
1708                     /*
1709                      * Special case: if the code length list for the
1710                      * backward-distance table contains a single zero
1711                      * entry, it means this block will never encode a
1712                      * backward distance at all (i.e. it's all
1713                      * literals).
1714                      */
1715                     dctx->currdisttable = NULL;
1716                 } else {
1717                     dctx->currdisttable = mktable(dctx->lengths + dctx->hlit,
1718                                                   dctx->hdist,
1719 #ifdef ANALYSIS
1720                                                   "distance",
1721 #endif
1722                                                   &error);
1723                     if (!dctx->currdisttable)
1724                         goto finished;     /* error code set up by mktable */
1725                 }
1726 		freetable(&dctx->lenlentable);
1727 		dctx->lenlentable = NULL;
1728 		dctx->state = INBLK;
1729 		break;
1730 	    }
1731 	    code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
1732 	    debug(("recv: codelen %d\n", code));
1733 	    if (code == -1)
1734 		goto finished;
1735 	    if (code < 16) {
1736 #ifdef ANALYSIS
1737 		if (analyse_level > 1)
1738 		    printf("code-length %d\n", code);
1739 #endif
1740 		dctx->lengths[dctx->lenptr++] = code;
1741 	    } else {
1742 		dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
1743 		dctx->lenaddon = (code == 18 ? 11 : 3);
1744 		dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
1745 				dctx->lengths[dctx->lenptr - 1] : 0);
1746 		dctx->state = TREES_LENREP;
1747 	    }
1748 	    break;
1749 	  case TREES_LENREP:
1750 	    if (dctx->nbits < dctx->lenextrabits)
1751 		goto finished;
1752 	    rep =
1753 		dctx->lenaddon +
1754 		(dctx->bits & ((1 << dctx->lenextrabits) - 1));
1755 	    EATBITS(dctx->lenextrabits);
1756 	    if (dctx->lenextrabits)
1757 		debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon,
1758 		       dctx->lenextrabits));
1759 #ifdef ANALYSIS
1760 	    if (analyse_level > 1)
1761 		printf("code-length-repeat: %d copies of %d\n", rep,
1762 		       dctx->lenrep);
1763 #endif
1764 	    while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
1765 		dctx->lengths[dctx->lenptr] = dctx->lenrep;
1766 		dctx->lenptr++;
1767 		rep--;
1768 	    }
1769 	    dctx->state = TREES_LEN;
1770 	    break;
1771 	  case INBLK:
1772 #ifdef ANALYSIS
1773 	    dctx->bitcount_before = BITCOUNT(dctx);
1774 #endif
1775 	    code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
1776 	    debug(("recv: litlen %d\n", code));
1777 	    if (code == -1)
1778 		goto finished;
1779 	    if (code < 256) {
1780 #ifdef ANALYSIS
1781 		if (analyse_level > 0)
1782 		    printf("%lu: literal %d [%d]\n", dctx->bytesout, code,
1783 			   BITCOUNT(dctx) - dctx->bitcount_before);
1784 #endif
1785 		emit_char(dctx, code);
1786 	    } else if (code == 256) {
1787 		if (dctx->lastblock)
1788 		    dctx->state = END;
1789 		else
1790 		    dctx->state = OUTSIDEBLK;
1791 		if (dctx->currlentable != dctx->staticlentable) {
1792 		    freetable(&dctx->currlentable);
1793 		    dctx->currlentable = NULL;
1794 		}
1795 		if (dctx->currdisttable &&
1796                     dctx->currdisttable != dctx->staticdisttable) {
1797 		    freetable(&dctx->currdisttable);
1798 		    dctx->currdisttable = NULL;
1799 		}
1800 	    } else if (code < 286) {   /* static tree can give >285; ignore */
1801 		dctx->state = GOTLENSYM;
1802 		dctx->sym = code;
1803 	    }
1804 	    break;
1805 	  case GOTLENSYM:
1806 	    rec = &lencodes[dctx->sym - 257];
1807 	    if (dctx->nbits < rec->extrabits)
1808 		goto finished;
1809 	    dctx->len =
1810 		rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1811 	    if (rec->extrabits)
1812 		debug(("recv: litlen-extrabits %d/%d\n",
1813 		       dctx->len - rec->min, rec->extrabits));
1814 	    EATBITS(rec->extrabits);
1815 	    dctx->state = GOTLEN;
1816 	    break;
1817 	  case GOTLEN:
1818             if (!dctx->currdisttable) {
1819 		error = DEFLATE_ERR_NODISTTABLE;
1820                 goto finished;
1821             }
1822 	    code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable);
1823 	    debug(("recv: dist %d\n", code));
1824 	    if (code == -1)
1825 		goto finished;
1826             if (code >= 30) {
1827 		error = DEFLATE_ERR_BADDISTCODE;
1828                 goto finished;
1829             }
1830 	    dctx->state = GOTDISTSYM;
1831 	    dctx->sym = code;
1832 	    break;
1833 	  case GOTDISTSYM:
1834 	    rec = &distcodes[dctx->sym];
1835 	    if (dctx->nbits < rec->extrabits)
1836 		goto finished;
1837 	    dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1838 	    if (rec->extrabits)
1839 		debug(("recv: dist-extrabits %d/%d\n",
1840 		       dist - rec->min, rec->extrabits));
1841 	    EATBITS(rec->extrabits);
1842 	    dctx->state = INBLK;
1843 #ifdef ANALYSIS
1844 	    if (analyse_level > 0)
1845 		printf("%lu: copy len=%d dist=%d [%d]\n", dctx->bytesout,
1846 		       dctx->len, dist,
1847 		       BITCOUNT(dctx) - dctx->bitcount_before);
1848 #endif
1849 	    while (dctx->len--)
1850 		emit_char(dctx, dctx->window[(dctx->winpos - dist) &
1851 					     (DWINSIZE - 1)]);
1852 	    break;
1853 	  case UNCOMP_LEN:
1854 	    /*
1855 	     * Uncompressed block. We expect to see a 16-bit LEN.
1856 	     */
1857 	    if (dctx->nbits < 16)
1858 		goto finished;
1859 	    dctx->uncomplen = dctx->bits & 0xFFFF;
1860 	    EATBITS(16);
1861 	    dctx->state = UNCOMP_NLEN;
1862 	    break;
1863 	  case UNCOMP_NLEN:
1864 	    /*
1865 	     * Uncompressed block. We expect to see a 16-bit NLEN,
1866 	     * which should be the one's complement of the previous
1867 	     * LEN.
1868 	     */
1869 	    if (dctx->nbits < 16)
1870 		goto finished;
1871 	    nlen = dctx->bits & 0xFFFF;
1872 	    EATBITS(16);
1873 	    if (dctx->uncomplen != (nlen ^ 0xFFFF)) {
1874                 error = DEFLATE_ERR_UNCOMP_HDR;
1875                 goto finished;
1876             }
1877 	    if (dctx->uncomplen == 0) {/* block is empty */
1878 		if (dctx->lastblock)
1879 		    dctx->state = END;
1880 		else
1881 		    dctx->state = OUTSIDEBLK;
1882 	    } else
1883 		dctx->state = UNCOMP_DATA;
1884 	    break;
1885 	  case UNCOMP_DATA:
1886 	    if (dctx->nbits < 8)
1887 		goto finished;
1888 #ifdef ANALYSIS
1889 	    if (analyse_level > 0)
1890 		printf("%lu: uncompressed %d [8]\n", dctx->bytesout,
1891 		       (int)(dctx->bits & 0xFF));
1892 #endif
1893 	    emit_char(dctx, dctx->bits & 0xFF);
1894 	    EATBITS(8);
1895 	    if (--dctx->uncomplen == 0) {	/* end of uncompressed block */
1896 		if (dctx->lastblock)
1897 		    dctx->state = END;
1898 		else
1899 		    dctx->state = OUTSIDEBLK;
1900 	    }
1901 	    break;
1902 	  case END:
1903 	    /*
1904 	     * End of compressed data. We align to a byte boundary,
1905 	     * and then look for format-specific trailer data.
1906 	     */
1907 	    {
1908 		int to_eat = dctx->nbits & 7;
1909 		EATBITS(to_eat);
1910 	    }
1911 	    if (dctx->type == DEFLATE_TYPE_ZLIB)
1912 		dctx->state = ADLER1;
1913 	    else if (dctx->type == DEFLATE_TYPE_GZIP)
1914 		dctx->state = CRC1;
1915 	    else
1916 		dctx->state = FINALSPIN;
1917 	    break;
1918 	  case ADLER1:
1919 	    if (dctx->nbits < 16)
1920 		goto finished;
1921 	    cksum = (dctx->bits & 0xFF) << 8;
1922 	    EATBITS(8);
1923 	    cksum |= (dctx->bits & 0xFF);
1924 	    EATBITS(8);
1925 	    if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) {
1926 		error = DEFLATE_ERR_CHECKSUM;
1927 		goto finished;
1928 	    }
1929 	    dctx->state = ADLER2;
1930 	    break;
1931 	  case ADLER2:
1932 	    if (dctx->nbits < 16)
1933 		goto finished;
1934 	    cksum = (dctx->bits & 0xFF) << 8;
1935 	    EATBITS(8);
1936 	    cksum |= (dctx->bits & 0xFF);
1937 	    EATBITS(8);
1938 	    if (cksum != (dctx->checksum & 0xFFFF)) {
1939 		error = DEFLATE_ERR_CHECKSUM;
1940 		goto finished;
1941 	    }
1942 	    dctx->state = FINALSPIN;
1943 	    break;
1944 	  case CRC1:
1945 	    if (dctx->nbits < 16)
1946 		goto finished;
1947 	    cksum = dctx->bits & 0xFFFF;
1948 	    EATBITS(16);
1949 	    if (cksum != (dctx->checksum & 0xFFFF)) {
1950 		error = DEFLATE_ERR_CHECKSUM;
1951 		goto finished;
1952 	    }
1953 	    dctx->state = CRC2;
1954 	    break;
1955 	  case CRC2:
1956 	    if (dctx->nbits < 16)
1957 		goto finished;
1958 	    cksum = dctx->bits & 0xFFFF;
1959 	    EATBITS(16);
1960 	    if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) {
1961 		error = DEFLATE_ERR_CHECKSUM;
1962 		goto finished;
1963 	    }
1964 	    dctx->state = ILEN1;
1965 	    break;
1966 	  case ILEN1:
1967 	    if (dctx->nbits < 16)
1968 		goto finished;
1969 	    cksum = dctx->bits & 0xFFFF;
1970 	    EATBITS(16);
1971 	    if (cksum != (dctx->bytesout & 0xFFFF)) {
1972 		error = DEFLATE_ERR_INLEN;
1973 		goto finished;
1974 	    }
1975 	    dctx->state = ILEN2;
1976 	    break;
1977 	  case ILEN2:
1978 	    if (dctx->nbits < 16)
1979 		goto finished;
1980 	    cksum = dctx->bits & 0xFFFF;
1981 	    EATBITS(16);
1982 	    if (cksum != ((dctx->bytesout >> 16) & 0xFFFF)) {
1983 		error = DEFLATE_ERR_INLEN;
1984 		goto finished;
1985 	    }
1986 	    dctx->state = FINALSPIN;
1987 	    break;
1988 	  case FINALSPIN:
1989 	    /* Just ignore any trailing garbage on the data stream. */
1990 	    /* (We could alternatively throw an error here, if we wanted
1991 	     * to detect and complain about trailing garbage.) */
1992 	    EATBITS(dctx->nbits);
1993 	    break;
1994 	}
1995     }
1996 
1997     finished:
1998     *outblock = dctx->outblk;
1999     *outlen = dctx->outlen;
2000     return error;
2001 }
2002 
2003 #define A(code,str) str
2004 const char *const deflate_error_msg[DEFLATE_NUM_ERRORS] = {
2005     DEFLATE_ERRORLIST(A)
2006 };
2007 #undef A
2008 
2009 #define A(code,str) #code
2010 const char *const deflate_error_sym[DEFLATE_NUM_ERRORS] = {
2011     DEFLATE_ERRORLIST(A)
2012 };
2013 #undef A
2014 
2015 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
2016 #define WINDOWS_IO
2017 #endif
2018 
2019 #if defined(WINDOWS_IO) && (defined(STANDALONE) || defined(TESTMODE))
2020 #include <fcntl.h>
2021 #include <io.h>
2022 #endif
2023 
2024 #ifdef STANDALONE
2025 
main(int argc,char ** argv)2026 int main(int argc, char **argv)
2027 {
2028     unsigned char buf[65536];
2029     void *outbuf;
2030     int ret, err, outlen;
2031     deflate_decompress_ctx *dhandle;
2032     deflate_compress_ctx *chandle;
2033     int type = DEFLATE_TYPE_ZLIB, opts = TRUE;
2034     int compress = FALSE, decompress = FALSE;
2035     int got_arg = FALSE;
2036     char *filename = NULL;
2037     FILE *fp;
2038 
2039     while (--argc) {
2040         char *p = *++argv;
2041 
2042 	got_arg = TRUE;
2043 
2044         if (p[0] == '-' && opts) {
2045             if (!strcmp(p, "-b"))
2046                 type = DEFLATE_TYPE_BARE;
2047             else if (!strcmp(p, "-g"))
2048                 type = DEFLATE_TYPE_GZIP;
2049             else if (!strcmp(p, "-c"))
2050                 compress = TRUE;
2051             else if (!strcmp(p, "-d"))
2052                 decompress = TRUE;
2053             else if (!strcmp(p, "-a"))
2054                 analyse_level++, decompress = TRUE;
2055             else if (!strcmp(p, "--"))
2056                 opts = FALSE;          /* next thing is filename */
2057             else {
2058                 fprintf(stderr, "unknown command line option '%s'\n", p);
2059                 return 1;
2060             }
2061         } else if (!filename) {
2062             filename = p;
2063         } else {
2064             fprintf(stderr, "can only handle one filename\n");
2065             return 1;
2066         }
2067     }
2068 
2069     if (!compress && !decompress) {
2070 	fprintf(stderr, "usage: deflate [ -c | -d | -a ] [ -b | -g ]"
2071 		" [filename]\n");
2072 	return (got_arg ? 1 : 0);
2073     }
2074 
2075     if (compress && decompress) {
2076 	fprintf(stderr, "please do not specify both compression and"
2077 		" decompression\n");
2078 	return (got_arg ? 1 : 0);
2079     }
2080 
2081     if (compress) {
2082 	chandle = deflate_compress_new(type);
2083 	dhandle = NULL;
2084     } else {
2085 	dhandle = deflate_decompress_new(type);
2086 	chandle = NULL;
2087     }
2088 
2089     if (filename)
2090         fp = fopen(filename, "rb");
2091     else
2092         fp = stdin;
2093 
2094     if (!fp) {
2095         assert(filename);
2096         fprintf(stderr, "unable to open '%s'\n", filename);
2097         return 1;
2098     }
2099 
2100 #ifdef WINDOWS_IO
2101     if(_setmode(_fileno(stdout), _O_BINARY ) == -1)
2102     {
2103         fprintf(stderr, "Can't set stdout to binary mode\n");
2104         return 1;
2105     }
2106 #endif
2107 
2108     do {
2109 	ret = fread(buf, 1, sizeof(buf), fp);
2110 	outbuf = NULL;
2111 	if (dhandle) {
2112 	    if (ret > 0)
2113 		err = deflate_decompress_data(dhandle, buf, ret,
2114 					      (void **)&outbuf, &outlen);
2115 	    else
2116 		err = deflate_decompress_data(dhandle, NULL, 0,
2117 					      (void **)&outbuf, &outlen);
2118 	} else {
2119 	    if (ret > 0)
2120 		deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2121 				      (void **)&outbuf, &outlen);
2122 	    else
2123 		deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA,
2124 				      (void **)&outbuf, &outlen);
2125 	    err = 0;
2126 	}
2127         if (outbuf) {
2128             if (!analyse_level && outlen)
2129                 fwrite(outbuf, 1, outlen, stdout);
2130             sfree(outbuf);
2131         }
2132 	if (err > 0) {
2133             fprintf(stderr, "decoding error: %s\n", deflate_error_msg[err]);
2134             return 1;
2135         }
2136     } while (ret > 0);
2137 
2138     if (dhandle)
2139 	deflate_decompress_free(dhandle);
2140     if (chandle)
2141 	deflate_compress_free(chandle);
2142 
2143     if (filename)
2144         fclose(fp);
2145 
2146     return 0;
2147 }
2148 
2149 #endif
2150 
2151 #ifdef TESTMODE
2152 
main(int argc,char ** argv)2153 int main(int argc, char **argv)
2154 {
2155     char *filename = NULL;
2156     FILE *fp;
2157     deflate_compress_ctx *chandle;
2158     deflate_decompress_ctx *dhandle;
2159     unsigned char buf[65536], *outbuf, *outbuf2;
2160     int ret, err, outlen, outlen2;
2161     int dlen = 0, clen = 0;
2162     int opts = TRUE;
2163 
2164     while (--argc) {
2165         char *p = *++argv;
2166 
2167         if (p[0] == '-' && opts) {
2168             if (!strcmp(p, "--"))
2169                 opts = FALSE;          /* next thing is filename */
2170             else {
2171                 fprintf(stderr, "unknown command line option '%s'\n", p);
2172                 return 1;
2173             }
2174         } else if (!filename) {
2175             filename = p;
2176         } else {
2177             fprintf(stderr, "can only handle one filename\n");
2178             return 1;
2179         }
2180     }
2181 
2182     if (filename)
2183         fp = fopen(filename, "rb");
2184     else
2185         fp = stdin;
2186 
2187     if (!fp) {
2188         assert(filename);
2189         fprintf(stderr, "unable to open '%s'\n", filename);
2190         return 1;
2191     }
2192 
2193     chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB);
2194     dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB);
2195 
2196 #ifdef WINDOWS_IO
2197     if(_setmode(_fileno(stdout), _O_BINARY ) == -1)
2198     {
2199         fprintf(stderr, "Can't set stdout to binary mode\n");
2200         return 1;
2201     }
2202 #endif
2203 
2204     do {
2205 	ret = fread(buf, 1, sizeof(buf), fp);
2206 	if (ret <= 0) {
2207 	    deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA,
2208 				  (void **)&outbuf, &outlen);
2209 	} else {
2210 	    dlen += ret;
2211 	    deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2212 				  (void **)&outbuf, &outlen);
2213 	}
2214 	if (outbuf) {
2215 	    clen += outlen;
2216 	    err = deflate_decompress_data(dhandle, outbuf, outlen,
2217 					  (void **)&outbuf2, &outlen2);
2218 	    sfree(outbuf);
2219 	    if (outbuf2) {
2220 		if (outlen2)
2221 		    fwrite(outbuf2, 1, outlen2, stdout);
2222 		sfree(outbuf2);
2223 	    }
2224 	    if (!err && ret <= 0) {
2225 		/*
2226 		 * signal EOF
2227 		 */
2228 		err = deflate_decompress_data(dhandle, NULL, 0,
2229 					      (void **)&outbuf2, &outlen2);
2230 		assert(outbuf2 == NULL);
2231 	    }
2232 	    if (err) {
2233 		fprintf(stderr, "decoding error: %s\n",
2234 			deflate_error_msg[err]);
2235 		return 1;
2236 	    }
2237 	}
2238     } while (ret > 0);
2239 
2240     fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen);
2241 
2242     return 0;
2243 }
2244 
2245 #endif
2246