1 /*
2 * Reimplementation of Deflate (RFC1951) compression. Adapted from
3 * the version in PuTTY, and extended to write dynamic Huffman
4 * trees and choose block boundaries usefully.
5 */
6
7 /*
8 * TODO:
9 *
10 * - Feature: could do with forms of flush other than SYNC_FLUSH.
11 * I'm not sure exactly how those work when you don't know in
12 * advance that your next block will be static (as we did in
13 * PuTTY). And remember the 9-bit limitation of zlib.
14 * + also, zlib has FULL_FLUSH which clears the LZ77 state as
15 * well, for random access.
16 *
17 * - Compression quality: chooseblock() appears to be computing
18 * wildly inaccurate block size estimates. Possible resolutions:
19 * + find and fix some trivial bug I haven't spotted yet
20 * + abandon the entropic approximation and go with trial
21 * Huffman runs
22 *
23 * - Compression quality: see if increasing SYMLIMIT causes
24 * dynamic blocks to start being consistently smaller than it.
25 * + actually we seem to be there already, but check on a
26 * larger corpus.
27 *
28 * - Compression quality: we ought to be able to fall right back
29 * to actual uncompressed blocks if really necessary, though
30 * it's not clear what the criterion for doing so would be.
31 */
32
33 /*
34 * This software is copyright 2000-2006 Simon Tatham.
35 *
36 * Permission is hereby granted, free of charge, to any person
37 * obtaining a copy of this software and associated documentation
38 * files (the "Software"), to deal in the Software without
39 * restriction, including without limitation the rights to use,
40 * copy, modify, merge, publish, distribute, sublicense, and/or
41 * sell copies of the Software, and to permit persons to whom the
42 * Software is furnished to do so, subject to the following
43 * conditions:
44 *
45 * The above copyright notice and this permission notice shall be
46 * included in all copies or substantial portions of the Software.
47 *
48 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
49 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
50 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
51 * NONINFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE
52 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
53 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
54 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
55 * THE SOFTWARE.
56 */
57
58 #include <stdio.h>
59 #include <stddef.h>
60 #include <string.h>
61 #include <stdlib.h>
62 #include <assert.h>
63
64 #include "halibut.h"
65 #include "huffman.h"
66 #include "lz77.h"
67 #include "deflate.h"
68
69 /* ----------------------------------------------------------------------
70 * This file can be compiled in a number of modes.
71 *
72 * With -DSTANDALONE, it builds a self-contained deflate tool which
73 * can compress, decompress, and also analyse a deflated file to
74 * print out the sequence of literals and copy commands it
75 * contains.
76 *
77 * With -DTESTMODE, it builds a test application which is given a
78 * file on standard input, both compresses and decompresses it, and
79 * outputs the re-decompressed result so it can be conveniently
80 * diffed against the original. Define -DTESTDBG as well for lots
81 * of diagnostics.
82 */
83
84 #if defined TESTDBG
85 /* gcc-specific diagnostic macro */
86 #define debug_int(x...) ( fprintf(stderr, x) )
87 #define debug(x) ( debug_int x )
88 #else
89 #define debug(x) ((void)0)
90 #endif
91
92 #ifdef STANDALONE
93 #define ANALYSIS
94 #endif
95
96 #ifdef ANALYSIS
97 int analyse_level = 0;
98 #endif
99
100 /* ----------------------------------------------------------------------
101 * Deflate functionality common to both compression and decompression.
102 */
103
104 #define DWINSIZE 32768
105
106 static const unsigned char lenlenmap[] = {
107 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
108 };
109
110 /*
111 * Given a sequence of Huffman code lengths, compute the actual codes
112 * in the final form suitable for feeding to outbits (i.e. already
113 * bit-mirrored). Returns the same as compute_huffman_codes.
114 */
deflate_hufcodes(const unsigned char * lengths,int * codes,int nsyms)115 static int deflate_hufcodes(const unsigned char *lengths,
116 int *codes, int nsyms)
117 {
118 int maxlen = compute_huffman_codes(lengths, codes, nsyms);
119 int i, j;
120
121 for (i = 0; i < nsyms; i++) {
122 int code = codes[i];
123 codes[i] = 0;
124 for (j = 0; j < lengths[i]; j++) {
125 codes[i] = (codes[i] << 1) | (code & 1);
126 code >>= 1;
127 }
128 }
129
130 return maxlen;
131 }
132
133 /*
134 * Adler32 checksum function.
135 */
adler32_update(unsigned long s,const unsigned char * data,int len)136 static unsigned long adler32_update(unsigned long s,
137 const unsigned char *data, int len)
138 {
139 unsigned s1 = s & 0xFFFF, s2 = (s >> 16) & 0xFFFF;
140 int i;
141
142 for (i = 0; i < len; i++) {
143 s1 += data[i];
144 s2 += s1;
145 if (!(i & 0xFFF)) {
146 s1 %= 65521;
147 s2 %= 65521;
148 }
149 }
150
151 return ((s2 % 65521) << 16) | (s1 % 65521);
152 }
153
154 /*
155 * CRC32 checksum function.
156 */
157
crc32_update(unsigned long crcword,const unsigned char * data,int len)158 static unsigned long crc32_update(unsigned long crcword,
159 const unsigned char *data, int len)
160 {
161 static const unsigned long crc32_table[256] = {
162 0x00000000L, 0x77073096L, 0xEE0E612CL, 0x990951BAL,
163 0x076DC419L, 0x706AF48FL, 0xE963A535L, 0x9E6495A3L,
164 0x0EDB8832L, 0x79DCB8A4L, 0xE0D5E91EL, 0x97D2D988L,
165 0x09B64C2BL, 0x7EB17CBDL, 0xE7B82D07L, 0x90BF1D91L,
166 0x1DB71064L, 0x6AB020F2L, 0xF3B97148L, 0x84BE41DEL,
167 0x1ADAD47DL, 0x6DDDE4EBL, 0xF4D4B551L, 0x83D385C7L,
168 0x136C9856L, 0x646BA8C0L, 0xFD62F97AL, 0x8A65C9ECL,
169 0x14015C4FL, 0x63066CD9L, 0xFA0F3D63L, 0x8D080DF5L,
170 0x3B6E20C8L, 0x4C69105EL, 0xD56041E4L, 0xA2677172L,
171 0x3C03E4D1L, 0x4B04D447L, 0xD20D85FDL, 0xA50AB56BL,
172 0x35B5A8FAL, 0x42B2986CL, 0xDBBBC9D6L, 0xACBCF940L,
173 0x32D86CE3L, 0x45DF5C75L, 0xDCD60DCFL, 0xABD13D59L,
174 0x26D930ACL, 0x51DE003AL, 0xC8D75180L, 0xBFD06116L,
175 0x21B4F4B5L, 0x56B3C423L, 0xCFBA9599L, 0xB8BDA50FL,
176 0x2802B89EL, 0x5F058808L, 0xC60CD9B2L, 0xB10BE924L,
177 0x2F6F7C87L, 0x58684C11L, 0xC1611DABL, 0xB6662D3DL,
178 0x76DC4190L, 0x01DB7106L, 0x98D220BCL, 0xEFD5102AL,
179 0x71B18589L, 0x06B6B51FL, 0x9FBFE4A5L, 0xE8B8D433L,
180 0x7807C9A2L, 0x0F00F934L, 0x9609A88EL, 0xE10E9818L,
181 0x7F6A0DBBL, 0x086D3D2DL, 0x91646C97L, 0xE6635C01L,
182 0x6B6B51F4L, 0x1C6C6162L, 0x856530D8L, 0xF262004EL,
183 0x6C0695EDL, 0x1B01A57BL, 0x8208F4C1L, 0xF50FC457L,
184 0x65B0D9C6L, 0x12B7E950L, 0x8BBEB8EAL, 0xFCB9887CL,
185 0x62DD1DDFL, 0x15DA2D49L, 0x8CD37CF3L, 0xFBD44C65L,
186 0x4DB26158L, 0x3AB551CEL, 0xA3BC0074L, 0xD4BB30E2L,
187 0x4ADFA541L, 0x3DD895D7L, 0xA4D1C46DL, 0xD3D6F4FBL,
188 0x4369E96AL, 0x346ED9FCL, 0xAD678846L, 0xDA60B8D0L,
189 0x44042D73L, 0x33031DE5L, 0xAA0A4C5FL, 0xDD0D7CC9L,
190 0x5005713CL, 0x270241AAL, 0xBE0B1010L, 0xC90C2086L,
191 0x5768B525L, 0x206F85B3L, 0xB966D409L, 0xCE61E49FL,
192 0x5EDEF90EL, 0x29D9C998L, 0xB0D09822L, 0xC7D7A8B4L,
193 0x59B33D17L, 0x2EB40D81L, 0xB7BD5C3BL, 0xC0BA6CADL,
194 0xEDB88320L, 0x9ABFB3B6L, 0x03B6E20CL, 0x74B1D29AL,
195 0xEAD54739L, 0x9DD277AFL, 0x04DB2615L, 0x73DC1683L,
196 0xE3630B12L, 0x94643B84L, 0x0D6D6A3EL, 0x7A6A5AA8L,
197 0xE40ECF0BL, 0x9309FF9DL, 0x0A00AE27L, 0x7D079EB1L,
198 0xF00F9344L, 0x8708A3D2L, 0x1E01F268L, 0x6906C2FEL,
199 0xF762575DL, 0x806567CBL, 0x196C3671L, 0x6E6B06E7L,
200 0xFED41B76L, 0x89D32BE0L, 0x10DA7A5AL, 0x67DD4ACCL,
201 0xF9B9DF6FL, 0x8EBEEFF9L, 0x17B7BE43L, 0x60B08ED5L,
202 0xD6D6A3E8L, 0xA1D1937EL, 0x38D8C2C4L, 0x4FDFF252L,
203 0xD1BB67F1L, 0xA6BC5767L, 0x3FB506DDL, 0x48B2364BL,
204 0xD80D2BDAL, 0xAF0A1B4CL, 0x36034AF6L, 0x41047A60L,
205 0xDF60EFC3L, 0xA867DF55L, 0x316E8EEFL, 0x4669BE79L,
206 0xCB61B38CL, 0xBC66831AL, 0x256FD2A0L, 0x5268E236L,
207 0xCC0C7795L, 0xBB0B4703L, 0x220216B9L, 0x5505262FL,
208 0xC5BA3BBEL, 0xB2BD0B28L, 0x2BB45A92L, 0x5CB36A04L,
209 0xC2D7FFA7L, 0xB5D0CF31L, 0x2CD99E8BL, 0x5BDEAE1DL,
210 0x9B64C2B0L, 0xEC63F226L, 0x756AA39CL, 0x026D930AL,
211 0x9C0906A9L, 0xEB0E363FL, 0x72076785L, 0x05005713L,
212 0x95BF4A82L, 0xE2B87A14L, 0x7BB12BAEL, 0x0CB61B38L,
213 0x92D28E9BL, 0xE5D5BE0DL, 0x7CDCEFB7L, 0x0BDBDF21L,
214 0x86D3D2D4L, 0xF1D4E242L, 0x68DDB3F8L, 0x1FDA836EL,
215 0x81BE16CDL, 0xF6B9265BL, 0x6FB077E1L, 0x18B74777L,
216 0x88085AE6L, 0xFF0F6A70L, 0x66063BCAL, 0x11010B5CL,
217 0x8F659EFFL, 0xF862AE69L, 0x616BFFD3L, 0x166CCF45L,
218 0xA00AE278L, 0xD70DD2EEL, 0x4E048354L, 0x3903B3C2L,
219 0xA7672661L, 0xD06016F7L, 0x4969474DL, 0x3E6E77DBL,
220 0xAED16A4AL, 0xD9D65ADCL, 0x40DF0B66L, 0x37D83BF0L,
221 0xA9BCAE53L, 0xDEBB9EC5L, 0x47B2CF7FL, 0x30B5FFE9L,
222 0xBDBDF21CL, 0xCABAC28AL, 0x53B39330L, 0x24B4A3A6L,
223 0xBAD03605L, 0xCDD70693L, 0x54DE5729L, 0x23D967BFL,
224 0xB3667A2EL, 0xC4614AB8L, 0x5D681B02L, 0x2A6F2B94L,
225 0xB40BBE37L, 0xC30C8EA1L, 0x5A05DF1BL, 0x2D02EF8DL
226 };
227 crcword ^= 0xFFFFFFFFL;
228 while (len--) {
229 unsigned long newbyte = *data++;
230 newbyte ^= crcword & 0xFFL;
231 crcword = (crcword >> 8) ^ crc32_table[newbyte];
232 }
233 return crcword ^ 0xFFFFFFFFL;
234 }
235
236 typedef struct {
237 short code, extrabits;
238 int min, max;
239 } coderecord;
240
241 static const coderecord lencodes[] = {
242 {257, 0, 3, 3},
243 {258, 0, 4, 4},
244 {259, 0, 5, 5},
245 {260, 0, 6, 6},
246 {261, 0, 7, 7},
247 {262, 0, 8, 8},
248 {263, 0, 9, 9},
249 {264, 0, 10, 10},
250 {265, 1, 11, 12},
251 {266, 1, 13, 14},
252 {267, 1, 15, 16},
253 {268, 1, 17, 18},
254 {269, 2, 19, 22},
255 {270, 2, 23, 26},
256 {271, 2, 27, 30},
257 {272, 2, 31, 34},
258 {273, 3, 35, 42},
259 {274, 3, 43, 50},
260 {275, 3, 51, 58},
261 {276, 3, 59, 66},
262 {277, 4, 67, 82},
263 {278, 4, 83, 98},
264 {279, 4, 99, 114},
265 {280, 4, 115, 130},
266 {281, 5, 131, 162},
267 {282, 5, 163, 194},
268 {283, 5, 195, 226},
269 {284, 5, 227, 257},
270 {285, 0, 258, 258},
271 };
272
273 static const coderecord distcodes[] = {
274 {0, 0, 1, 1},
275 {1, 0, 2, 2},
276 {2, 0, 3, 3},
277 {3, 0, 4, 4},
278 {4, 1, 5, 6},
279 {5, 1, 7, 8},
280 {6, 2, 9, 12},
281 {7, 2, 13, 16},
282 {8, 3, 17, 24},
283 {9, 3, 25, 32},
284 {10, 4, 33, 48},
285 {11, 4, 49, 64},
286 {12, 5, 65, 96},
287 {13, 5, 97, 128},
288 {14, 6, 129, 192},
289 {15, 6, 193, 256},
290 {16, 7, 257, 384},
291 {17, 7, 385, 512},
292 {18, 8, 513, 768},
293 {19, 8, 769, 1024},
294 {20, 9, 1025, 1536},
295 {21, 9, 1537, 2048},
296 {22, 10, 2049, 3072},
297 {23, 10, 3073, 4096},
298 {24, 11, 4097, 6144},
299 {25, 11, 6145, 8192},
300 {26, 12, 8193, 12288},
301 {27, 12, 12289, 16384},
302 {28, 13, 16385, 24576},
303 {29, 13, 24577, 32768},
304 };
305
306 /* ----------------------------------------------------------------------
307 * Deflate compression.
308 */
309
310 #define SYMLIMIT 65536
311 #define SYMPFX_LITLEN 0x00000000U
312 #define SYMPFX_DIST 0x40000000U
313 #define SYMPFX_EXTRABITS 0x80000000U
314 #define SYMPFX_CODELEN 0xC0000000U
315 #define SYMPFX_MASK 0xC0000000U
316
317 #define SYM_EXTRABITS_MASK 0x3C000000U
318 #define SYM_EXTRABITS_SHIFT 26
319
320 struct huftrees {
321 unsigned char *len_litlen;
322 int *code_litlen;
323 unsigned char *len_dist;
324 int *code_dist;
325 unsigned char *len_codelen;
326 int *code_codelen;
327 };
328
329 struct deflate_compress_ctx {
330 struct LZ77Context *lzc;
331 unsigned char *outbuf;
332 int outlen, outsize;
333 unsigned long outbits;
334 int noutbits;
335 int firstblock;
336 unsigned long *syms;
337 int symstart, nsyms;
338 int type;
339 unsigned long checksum;
340 unsigned long datasize;
341 int lastblock;
342 int finished;
343 unsigned char static_len1[288], static_len2[30];
344 int static_code1[288], static_code2[30];
345 struct huftrees sht;
346 #ifdef STATISTICS
347 unsigned long bitcount;
348 #endif
349 };
350
outbits(deflate_compress_ctx * out,unsigned long bits,int nbits)351 static void outbits(deflate_compress_ctx *out,
352 unsigned long bits, int nbits)
353 {
354 assert(out->noutbits + nbits <= 32);
355 out->outbits |= bits << out->noutbits;
356 out->noutbits += nbits;
357 while (out->noutbits >= 8) {
358 if (out->outlen >= out->outsize) {
359 out->outsize = out->outlen + 64;
360 out->outbuf = sresize(out->outbuf, out->outsize, unsigned char);
361 }
362 out->outbuf[out->outlen++] = (unsigned char) (out->outbits & 0xFF);
363 out->outbits >>= 8;
364 out->noutbits -= 8;
365 }
366 #ifdef STATISTICS
367 out->bitcount += nbits;
368 #endif
369 }
370
371 /*
372 * Compute the bit length of a symbol, given the three Huffman
373 * trees.
374 */
symsize(unsigned sym,const struct huftrees * trees)375 static int symsize(unsigned sym, const struct huftrees *trees)
376 {
377 unsigned basesym = sym &~ SYMPFX_MASK;
378
379 switch (sym & SYMPFX_MASK) {
380 case SYMPFX_LITLEN:
381 return trees->len_litlen[basesym];
382 case SYMPFX_DIST:
383 return trees->len_dist[basesym];
384 case SYMPFX_CODELEN:
385 return trees->len_codelen[basesym];
386 default /*case SYMPFX_EXTRABITS*/:
387 return basesym >> SYM_EXTRABITS_SHIFT;
388 }
389 }
390
391 /*
392 * Write out a single symbol, given the three Huffman trees.
393 */
writesym(deflate_compress_ctx * out,unsigned sym,const struct huftrees * trees)394 static void writesym(deflate_compress_ctx *out,
395 unsigned sym, const struct huftrees *trees)
396 {
397 unsigned basesym = sym &~ SYMPFX_MASK;
398 int i;
399
400 switch (sym & SYMPFX_MASK) {
401 case SYMPFX_LITLEN:
402 debug(("send: litlen %d\n", basesym));
403 outbits(out, trees->code_litlen[basesym], trees->len_litlen[basesym]);
404 break;
405 case SYMPFX_DIST:
406 debug(("send: dist %d\n", basesym));
407 outbits(out, trees->code_dist[basesym], trees->len_dist[basesym]);
408 break;
409 case SYMPFX_CODELEN:
410 debug(("send: codelen %d\n", basesym));
411 outbits(out, trees->code_codelen[basesym],trees->len_codelen[basesym]);
412 break;
413 case SYMPFX_EXTRABITS:
414 i = basesym >> SYM_EXTRABITS_SHIFT;
415 basesym &= ~SYM_EXTRABITS_MASK;
416 debug(("send: extrabits %d/%d\n", basesym, i));
417 outbits(out, basesym, i);
418 break;
419 }
420 }
421
422 /*
423 * outblock() must output _either_ a dynamic block of length
424 * `dynamic_len', _or_ a static block of length `static_len', but
425 * it gets to choose which.
426 */
outblock(deflate_compress_ctx * out,int dynamic_len,int static_len)427 static void outblock(deflate_compress_ctx *out,
428 int dynamic_len, int static_len)
429 {
430 int freqs1[286], freqs2[30], freqs3[19];
431 unsigned char len1[286], len2[30], len3[19];
432 int code1[286], code2[30], code3[19];
433 int hlit, hdist, hclen, bfinal, btype;
434 int treesrc[286 + 30];
435 int treesyms[286 + 30];
436 int codelen[19];
437 int i, ntreesrc, ntreesyms;
438 int dynamic, blklen;
439 struct huftrees dht;
440 const struct huftrees *ht;
441 #ifdef STATISTICS
442 unsigned long bitcount_before;
443 #endif
444
445 dht.len_litlen = len1;
446 dht.len_dist = len2;
447 dht.len_codelen = len3;
448 dht.code_litlen = code1;
449 dht.code_dist = code2;
450 dht.code_codelen = code3;
451
452 /*
453 * We make our choice of block to output by doing all the
454 * detailed work to determine the exact length of each possible
455 * block. Then we choose the one which has fewest output bits
456 * per symbol.
457 */
458
459 /*
460 * First build the two main Huffman trees for the dynamic
461 * block.
462 */
463
464 /*
465 * Count up the frequency tables.
466 */
467 memset(freqs1, 0, sizeof(freqs1));
468 memset(freqs2, 0, sizeof(freqs2));
469 freqs1[256] = 1; /* we're bound to need one EOB */
470 for (i = 0; i < dynamic_len; i++) {
471 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
472
473 /*
474 * Increment the occurrence counter for this symbol, if
475 * it's in one of the Huffman alphabets and isn't extra
476 * bits.
477 */
478 if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
479 sym &= ~SYMPFX_MASK;
480 assert(sym < lenof(freqs1));
481 freqs1[sym]++;
482 } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
483 sym &= ~SYMPFX_MASK;
484 assert(sym < lenof(freqs2));
485 freqs2[sym]++;
486 }
487 }
488 build_huffman_tree(freqs1, len1, lenof(freqs1), 15);
489 build_huffman_tree(freqs2, len2, lenof(freqs2), 15);
490 deflate_hufcodes(len1, code1, lenof(freqs1));
491 deflate_hufcodes(len2, code2, lenof(freqs2));
492
493 /*
494 * Determine HLIT and HDIST.
495 */
496 for (hlit = 286; hlit > 257 && len1[hlit-1] == 0; hlit--);
497 for (hdist = 30; hdist > 1 && len2[hdist-1] == 0; hdist--);
498
499 /*
500 * Write out the list of symbols used to transmit the
501 * trees.
502 */
503 ntreesrc = 0;
504 for (i = 0; i < hlit; i++)
505 treesrc[ntreesrc++] = len1[i];
506 for (i = 0; i < hdist; i++)
507 treesrc[ntreesrc++] = len2[i];
508 ntreesyms = 0;
509 for (i = 0; i < ntreesrc ;) {
510 int j = 1;
511 int k;
512
513 /* Find length of run of the same length code. */
514 while (i+j < ntreesrc && treesrc[i+j] == treesrc[i])
515 j++;
516
517 /* Encode that run as economically as we can. */
518 k = j;
519 if (treesrc[i] == 0) {
520 /*
521 * Zero code length: we can output run codes for
522 * 3-138 zeroes. So if we have fewer than 3 zeroes,
523 * we just output literals. Otherwise, we output
524 * nothing but run codes, and tweak their lengths
525 * to make sure we aren't left with under 3 at the
526 * end.
527 */
528 if (k < 3) {
529 while (k--)
530 treesyms[ntreesyms++] = 0 | SYMPFX_CODELEN;
531 } else {
532 while (k > 0) {
533 int rpt = (k < 138 ? k : 138);
534 if (rpt > k-3 && rpt < k)
535 rpt = k-3;
536 assert(rpt >= 3 && rpt <= 138);
537 if (rpt < 11) {
538 treesyms[ntreesyms++] = 17 | SYMPFX_CODELEN;
539 treesyms[ntreesyms++] =
540 (SYMPFX_EXTRABITS | (rpt - 3) |
541 (3 << SYM_EXTRABITS_SHIFT));
542 } else {
543 treesyms[ntreesyms++] = 18 | SYMPFX_CODELEN;
544 treesyms[ntreesyms++] =
545 (SYMPFX_EXTRABITS | (rpt - 11) |
546 (7 << SYM_EXTRABITS_SHIFT));
547 }
548 k -= rpt;
549 }
550 }
551 } else {
552 /*
553 * Non-zero code length: we must output the first
554 * one explicitly, then we can output a copy code
555 * for 3-6 repeats. So if we have fewer than 4
556 * repeats, we _just_ output literals. Otherwise,
557 * we output one literal plus at least one copy
558 * code, and tweak the copy codes to make sure we
559 * aren't left with under 3 at the end.
560 */
561 assert(treesrc[i] < 16);
562 treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
563 k--;
564 if (k < 3) {
565 while (k--)
566 treesyms[ntreesyms++] = treesrc[i] | SYMPFX_CODELEN;
567 } else {
568 while (k > 0) {
569 int rpt = (k < 6 ? k : 6);
570 if (rpt > k-3 && rpt < k)
571 rpt = k-3;
572 assert(rpt >= 3 && rpt <= 6);
573 treesyms[ntreesyms++] = 16 | SYMPFX_CODELEN;
574 treesyms[ntreesyms++] = (SYMPFX_EXTRABITS | (rpt - 3) |
575 (2 << SYM_EXTRABITS_SHIFT));
576 k -= rpt;
577 }
578 }
579 }
580
581 i += j;
582 }
583 assert((unsigned)ntreesyms < lenof(treesyms));
584
585 /*
586 * Count up the frequency table for the tree-transmission
587 * symbols, and build the auxiliary Huffman tree for that.
588 */
589 memset(freqs3, 0, sizeof(freqs3));
590 for (i = 0; i < ntreesyms; i++) {
591 unsigned sym = treesyms[i];
592
593 /*
594 * Increment the occurrence counter for this symbol, if
595 * it's the Huffman alphabet and isn't extra bits.
596 */
597 if ((sym & SYMPFX_MASK) == SYMPFX_CODELEN) {
598 sym &= ~SYMPFX_MASK;
599 assert(sym < lenof(freqs3));
600 freqs3[sym]++;
601 }
602 }
603 build_huffman_tree(freqs3, len3, lenof(freqs3), 7);
604 deflate_hufcodes(len3, code3, lenof(freqs3));
605
606 /*
607 * Reorder the code length codes into transmission order, and
608 * determine HCLEN.
609 */
610 for (i = 0; i < 19; i++)
611 codelen[i] = len3[lenlenmap[i]];
612 for (hclen = 19; hclen > 4 && codelen[hclen-1] == 0; hclen--)
613 /* empty loop body */;
614
615 /*
616 * Now work out the exact size of both the dynamic and the
617 * static block, in bits.
618 */
619 {
620 int ssize, dsize;
621
622 /*
623 * First the dynamic block.
624 */
625 dsize = 3 + 5 + 5 + 4; /* 3-bit header, HLIT, HDIST, HCLEN */
626 dsize += 3 * hclen; /* code-length-alphabet code lengths */
627 /* Code lengths */
628 for (i = 0; i < ntreesyms; i++)
629 dsize += symsize(treesyms[i], &dht);
630 /* The actual block data */
631 for (i = 0; i < dynamic_len; i++) {
632 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
633 dsize += symsize(sym, &dht);
634 }
635 /* And the end-of-data symbol. */
636 dsize += symsize(SYMPFX_LITLEN | 256, &dht);
637
638 /*
639 * Now the static block.
640 */
641 ssize = 3; /* 3-bit block header */
642 /* The actual block data */
643 for (i = 0; i < static_len; i++) {
644 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
645 ssize += symsize(sym, &out->sht);
646 }
647 /* And the end-of-data symbol. */
648 ssize += symsize(SYMPFX_LITLEN | 256, &out->sht);
649
650 /*
651 * Compare the two and decide which to output. We break
652 * exact ties in favour of the static block, because of the
653 * special case in which that block has zero length.
654 */
655 dynamic = ((double)ssize * dynamic_len > (double)dsize * static_len);
656 ht = dynamic ? &dht : &out->sht;
657 blklen = dynamic ? dynamic_len : static_len;
658 }
659
660 /*
661 * Actually transmit the block.
662 */
663
664 /* 3-bit block header */
665 bfinal = (out->lastblock ? 1 : 0);
666 btype = dynamic ? 2 : 1;
667 debug(("send: bfinal=%d btype=%d\n", bfinal, btype));
668 outbits(out, bfinal, 1);
669 outbits(out, btype, 2);
670
671 #ifdef STATISTICS
672 bitcount_before = out->bitcount;
673 #endif
674
675 if (dynamic) {
676 /* HLIT, HDIST and HCLEN */
677 debug(("send: hlit=%d hdist=%d hclen=%d\n", hlit, hdist, hclen));
678 outbits(out, hlit - 257, 5);
679 outbits(out, hdist - 1, 5);
680 outbits(out, hclen - 4, 4);
681
682 /* Code lengths for the auxiliary tree */
683 for (i = 0; i < hclen; i++) {
684 debug(("send: lenlen %d\n", codelen[i]));
685 outbits(out, codelen[i], 3);
686 }
687
688 /* Code lengths for the literal/length and distance trees */
689 for (i = 0; i < ntreesyms; i++)
690 writesym(out, treesyms[i], ht);
691 #ifdef STATISTICS
692 fprintf(stderr, "total tree size %lu bits\n",
693 out->bitcount - bitcount_before);
694 #endif
695 }
696
697 /* Output the actual symbols from the buffer */
698 for (i = 0; i < blklen; i++) {
699 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
700 writesym(out, sym, ht);
701 }
702
703 /* Output the end-of-data symbol */
704 writesym(out, SYMPFX_LITLEN | 256, ht);
705
706 /*
707 * Remove all the just-output symbols from the symbol buffer by
708 * adjusting symstart and nsyms.
709 */
710 out->symstart = (out->symstart + blklen) % SYMLIMIT;
711 out->nsyms -= blklen;
712 }
713
714 /*
715 * Give the approximate log-base-2 of an input integer, measured in
716 * 8ths of a bit. (I.e. this computes an integer approximation to
717 * 8*logbase2(x).)
718 */
approxlog2(unsigned x)719 static int approxlog2(unsigned x)
720 {
721 int ret = 31*8;
722
723 /*
724 * Binary-search to get the top bit of x up to bit 31.
725 */
726 if (x < 0x00010000U) x <<= 16, ret -= 16*8;
727 if (x < 0x01000000U) x <<= 8, ret -= 8*8;
728 if (x < 0x10000000U) x <<= 4, ret -= 4*8;
729 if (x < 0x40000000U) x <<= 2, ret -= 2*8;
730 if (x < 0x80000000U) x <<= 1, ret -= 1*8;
731
732 /*
733 * Now we know the logarithm we want is in [ret,ret+1).
734 * Determine the bottom three bits by checking against
735 * threshold values.
736 *
737 * (Each of these threshold values is 0x80000000 times an odd
738 * power of 2^(1/16). Therefore, this function rounds to
739 * nearest.)
740 */
741 if (x <= 0xAD583EEAU) {
742 if (x <= 0x91C3D373U)
743 ret += (x <= 0x85AAC367U ? 0 : 1);
744 else
745 ret += (x <= 0x9EF53260U ? 2 : 3);
746 } else {
747 if (x <= 0xCE248C15U)
748 ret += (x <= 0xBD08A39FU ? 4 : 5);
749 else
750 ret += (x <= 0xE0CCDEECU ? 6 : x <= 0xF5257D15L ? 7 : 8);
751 }
752
753 return ret;
754 }
755
chooseblock(deflate_compress_ctx * out)756 static void chooseblock(deflate_compress_ctx *out)
757 {
758 int freqs1[286], freqs2[30];
759 int i, len, bestlen, longestlen = 0;
760 int total1, total2;
761 int bestvfm;
762
763 memset(freqs1, 0, sizeof(freqs1));
764 memset(freqs2, 0, sizeof(freqs2));
765 freqs1[256] = 1; /* we're bound to need one EOB */
766 total1 = 1;
767 total2 = 0;
768
769 /*
770 * Iterate over all possible block lengths, computing the
771 * entropic coding approximation to the final length at every
772 * stage. We divide the result by the number of symbols
773 * encoded, to determine the `value for money' (overall
774 * bits-per-symbol count) of a block of that length.
775 */
776 bestlen = -1;
777 bestvfm = 0;
778
779 len = 300 * 8; /* very approximate size of the Huffman trees */
780
781 for (i = 0; i < out->nsyms; i++) {
782 unsigned sym = out->syms[(out->symstart + i) % SYMLIMIT];
783
784 if (i > 0 && (sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
785 /*
786 * This is a viable point at which to end the block.
787 * Compute the value for money.
788 */
789 int vfm = i * 32768 / len; /* symbols encoded per bit */
790
791 if (bestlen < 0 || vfm > bestvfm) {
792 bestlen = i;
793 bestvfm = vfm;
794 }
795
796 longestlen = i;
797 }
798
799 /*
800 * Increment the occurrence counter for this symbol, if
801 * it's in one of the Huffman alphabets and isn't extra
802 * bits.
803 */
804 if ((sym & SYMPFX_MASK) == SYMPFX_LITLEN) {
805 sym &= ~SYMPFX_MASK;
806 assert(sym < lenof(freqs1));
807 len += freqs1[sym] * approxlog2(freqs1[sym]);
808 len -= total1 * approxlog2(total1);
809 freqs1[sym]++;
810 total1++;
811 len -= freqs1[sym] * approxlog2(freqs1[sym]);
812 len += total1 * approxlog2(total1);
813 } else if ((sym & SYMPFX_MASK) == SYMPFX_DIST) {
814 sym &= ~SYMPFX_MASK;
815 assert(sym < lenof(freqs2));
816 len += freqs2[sym] * approxlog2(freqs2[sym]);
817 len -= total2 * approxlog2(total2);
818 freqs2[sym]++;
819 total2++;
820 len -= freqs2[sym] * approxlog2(freqs2[sym]);
821 len += total2 * approxlog2(total2);
822 } else if ((sym & SYMPFX_MASK) == SYMPFX_EXTRABITS) {
823 len += 8 * ((sym &~ SYMPFX_MASK) >> SYM_EXTRABITS_SHIFT);
824 }
825 }
826
827 assert(bestlen > 0);
828
829 outblock(out, bestlen, longestlen);
830 }
831
832 /*
833 * Force the current symbol buffer to be flushed out as a single
834 * block.
835 */
flushblock(deflate_compress_ctx * out)836 static void flushblock(deflate_compress_ctx *out)
837 {
838 /*
839 * No need to check that out->nsyms is a valid block length: we
840 * know it has to be, because flushblock() is called in between
841 * two matches/literals.
842 */
843 outblock(out, out->nsyms, out->nsyms);
844 assert(out->nsyms == 0);
845 }
846
847 /*
848 * Place a symbol into the symbols buffer.
849 */
outsym(deflate_compress_ctx * out,unsigned long sym)850 static void outsym(deflate_compress_ctx *out, unsigned long sym)
851 {
852 assert(out->nsyms < SYMLIMIT);
853 out->syms[(out->symstart + out->nsyms++) % SYMLIMIT] = sym;
854
855 if (out->nsyms == SYMLIMIT)
856 chooseblock(out);
857 }
858
literal(struct LZ77Context * ectx,unsigned char c)859 static void literal(struct LZ77Context *ectx, unsigned char c)
860 {
861 deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
862
863 outsym(out, SYMPFX_LITLEN | c);
864 }
865
match(struct LZ77Context * ectx,int distance,int len)866 static void match(struct LZ77Context *ectx, int distance, int len)
867 {
868 const coderecord *d, *l;
869 int i, j, k;
870 deflate_compress_ctx *out = (deflate_compress_ctx *) ectx->userdata;
871
872 while (len > 0) {
873 int thislen;
874
875 /*
876 * We can transmit matches of lengths 3 through 258
877 * inclusive. So if len exceeds 258, we must transmit in
878 * several steps, with 258 or less in each step.
879 *
880 * Specifically: if len >= 261, we can transmit 258 and be
881 * sure of having at least 3 left for the next step. And if
882 * len <= 258, we can just transmit len. But if len == 259
883 * or 260, we must transmit len-3.
884 */
885 thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
886 len -= thislen;
887
888 /*
889 * Binary-search to find which length code we're
890 * transmitting.
891 */
892 i = -1;
893 j = sizeof(lencodes) / sizeof(*lencodes);
894 while (1) {
895 assert(j - i >= 2);
896 k = (j + i) / 2;
897 if (thislen < lencodes[k].min)
898 j = k;
899 else if (thislen > lencodes[k].max)
900 i = k;
901 else {
902 l = &lencodes[k];
903 break; /* found it! */
904 }
905 }
906
907 /*
908 * Transmit the length code.
909 */
910 outsym(out, SYMPFX_LITLEN | l->code);
911
912 /*
913 * Transmit the extra bits.
914 */
915 if (l->extrabits) {
916 outsym(out, (SYMPFX_EXTRABITS | (thislen - l->min) |
917 (l->extrabits << SYM_EXTRABITS_SHIFT)));
918 }
919
920 /*
921 * Binary-search to find which distance code we're
922 * transmitting.
923 */
924 i = -1;
925 j = sizeof(distcodes) / sizeof(*distcodes);
926 while (1) {
927 assert(j - i >= 2);
928 k = (j + i) / 2;
929 if (distance < distcodes[k].min)
930 j = k;
931 else if (distance > distcodes[k].max)
932 i = k;
933 else {
934 d = &distcodes[k];
935 break; /* found it! */
936 }
937 }
938
939 /*
940 * Write the distance code.
941 */
942 outsym(out, SYMPFX_DIST | d->code);
943
944 /*
945 * Transmit the extra bits.
946 */
947 if (d->extrabits) {
948 outsym(out, (SYMPFX_EXTRABITS | (distance - d->min) |
949 (d->extrabits << SYM_EXTRABITS_SHIFT)));
950 }
951 }
952 }
953
deflate_compress_new(int type)954 deflate_compress_ctx *deflate_compress_new(int type)
955 {
956 deflate_compress_ctx *out;
957 struct LZ77Context *ectx = snew(struct LZ77Context);
958
959 lz77_init(ectx, DWINSIZE);
960 ectx->literal = literal;
961 ectx->match = match;
962
963 out = snew(deflate_compress_ctx);
964 out->type = type;
965 out->outbits = out->noutbits = 0;
966 out->firstblock = TRUE;
967 #ifdef STATISTICS
968 out->bitcount = 0;
969 #endif
970
971 out->syms = snewn(SYMLIMIT, unsigned long);
972 out->symstart = out->nsyms = 0;
973
974 out->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
975 out->datasize = 0;
976 out->lastblock = FALSE;
977 out->finished = FALSE;
978
979 /*
980 * Build the static Huffman tables now, so we'll have them
981 * available every time outblock() is called.
982 */
983 {
984 int i;
985
986 for (i = 0; i < (int)lenof(out->static_len1); i++)
987 out->static_len1[i] = (i < 144 ? 8 :
988 i < 256 ? 9 :
989 i < 280 ? 7 : 8);
990 for (i = 0; i < (int)lenof(out->static_len2); i++)
991 out->static_len2[i] = 5;
992 }
993 deflate_hufcodes(out->static_len1, out->static_code1,
994 lenof(out->static_code1));
995 deflate_hufcodes(out->static_len2, out->static_code2,
996 lenof(out->static_code2));
997 out->sht.len_litlen = out->static_len1;
998 out->sht.len_dist = out->static_len2;
999 out->sht.len_codelen = NULL;
1000 out->sht.code_litlen = out->static_code1;
1001 out->sht.code_dist = out->static_code2;
1002 out->sht.code_codelen = NULL;
1003
1004 ectx->userdata = out;
1005 out->lzc = ectx;
1006
1007 return out;
1008 }
1009
deflate_compress_free(deflate_compress_ctx * out)1010 void deflate_compress_free(deflate_compress_ctx *out)
1011 {
1012 struct LZ77Context *ectx = out->lzc;
1013
1014 sfree(out->syms);
1015 lz77_cleanup(ectx);
1016 sfree(ectx);
1017 sfree(out);
1018 }
1019
deflate_compress_data(deflate_compress_ctx * out,const void * vblock,int len,int flushtype,void ** outblock,int * outlen)1020 void deflate_compress_data(deflate_compress_ctx *out,
1021 const void *vblock, int len, int flushtype,
1022 void **outblock, int *outlen)
1023 {
1024 struct LZ77Context *ectx = out->lzc;
1025 const unsigned char *block = (const unsigned char *)vblock;
1026
1027 assert(!out->finished);
1028
1029 out->outbuf = NULL;
1030 out->outlen = out->outsize = 0;
1031
1032 /*
1033 * If this is the first block, output the header.
1034 */
1035 if (out->firstblock) {
1036 switch (out->type) {
1037 case DEFLATE_TYPE_BARE:
1038 break; /* no header */
1039 case DEFLATE_TYPE_ZLIB:
1040 /*
1041 * zlib (RFC1950) header bytes: 78 9C. (Deflate
1042 * compression, 32K window size, default algorithm.)
1043 */
1044 outbits(out, 0x9C78, 16);
1045 break;
1046 case DEFLATE_TYPE_GZIP:
1047 /*
1048 * Minimal gzip (RFC1952) header:
1049 *
1050 * - basic header of 1F 8B
1051 * - compression method byte (8 = deflate)
1052 * - flags byte (zero: we use no optional features)
1053 * - modification time (zero: no time stamp available)
1054 * - extra flags byte (2: we use maximum compression
1055 * always)
1056 * - operating system byte (255: we do not specify)
1057 */
1058 outbits(out, 0x00088B1F, 32); /* header, CM, flags */
1059 outbits(out, 0, 32); /* mtime */
1060 outbits(out, 0xFF02, 16); /* xflags, OS */
1061 break;
1062 }
1063 out->firstblock = FALSE;
1064 }
1065
1066 /*
1067 * Feed our data to the LZ77 compression phase.
1068 */
1069 lz77_compress(ectx, block, len, TRUE);
1070
1071 /*
1072 * Update checksums and counters.
1073 */
1074 switch (out->type) {
1075 case DEFLATE_TYPE_ZLIB:
1076 out->checksum = adler32_update(out->checksum, block, len);
1077 break;
1078 case DEFLATE_TYPE_GZIP:
1079 out->checksum = crc32_update(out->checksum, block, len);
1080 break;
1081 }
1082 out->datasize += len;
1083
1084 switch (flushtype) {
1085 /*
1086 * FIXME: what other flush types are available and useful?
1087 * In PuTTY, it was clear that we generally wanted to be in
1088 * a static block so it was safe to open one. Here, we
1089 * probably prefer to be _outside_ a block if we can. Think
1090 * about this.
1091 */
1092 case DEFLATE_NO_FLUSH:
1093 break; /* don't flush any data at all (duh) */
1094 case DEFLATE_SYNC_FLUSH:
1095 /*
1096 * Close the current block.
1097 */
1098 flushblock(out);
1099
1100 /*
1101 * Then output an empty _uncompressed_ block: send 000,
1102 * then sync to byte boundary, then send bytes 00 00 FF
1103 * FF.
1104 */
1105 outbits(out, 0, 3);
1106 if (out->noutbits)
1107 outbits(out, 0, 8 - out->noutbits);
1108 outbits(out, 0, 16);
1109 outbits(out, 0xFFFF, 16);
1110 break;
1111 case DEFLATE_END_OF_DATA:
1112 /*
1113 * Output a block with BFINAL set.
1114 */
1115 out->lastblock = TRUE;
1116 flushblock(out);
1117
1118 /*
1119 * Sync to byte boundary, flushing out the final byte.
1120 */
1121 if (out->noutbits)
1122 outbits(out, 0, 8 - out->noutbits);
1123
1124 /*
1125 * Format-specific trailer data.
1126 */
1127 switch (out->type) {
1128 case DEFLATE_TYPE_ZLIB:
1129 /*
1130 * Just write out the Adler32 checksum.
1131 */
1132 outbits(out, (out->checksum >> 24) & 0xFF, 8);
1133 outbits(out, (out->checksum >> 16) & 0xFF, 8);
1134 outbits(out, (out->checksum >> 8) & 0xFF, 8);
1135 outbits(out, (out->checksum >> 0) & 0xFF, 8);
1136 break;
1137 case DEFLATE_TYPE_GZIP:
1138 /*
1139 * Write out the CRC32 checksum and the data length.
1140 */
1141 outbits(out, out->checksum, 32);
1142 outbits(out, out->datasize, 32);
1143 break;
1144 }
1145
1146 out->finished = TRUE;
1147 break;
1148 }
1149
1150 /*
1151 * Return any data that we've generated.
1152 */
1153 *outblock = (void *)out->outbuf;
1154 *outlen = out->outlen;
1155 }
1156
1157 /* ----------------------------------------------------------------------
1158 * Deflate decompression.
1159 */
1160
1161 /*
1162 * The way we work the Huffman decode is to have a table lookup on
1163 * the first N bits of the input stream (in the order they arrive,
1164 * of course, i.e. the first bit of the Huffman code is in bit 0).
1165 * Each table entry lists the number of bits to consume, plus
1166 * either an output code or a pointer to a secondary table.
1167 */
1168 struct table;
1169 struct tableentry;
1170
1171 struct tableentry {
1172 unsigned char nbits;
1173 short code;
1174 struct table *nexttable;
1175 };
1176
1177 struct table {
1178 int mask; /* mask applied to input bit stream */
1179 struct tableentry *table;
1180 };
1181
1182 #define MAXSYMS 288
1183
1184 /*
1185 * Build a single-level decode table for elements
1186 * [minlength,maxlength) of the provided code/length tables, and
1187 * recurse to build subtables.
1188 */
mkonetab(int * codes,unsigned char * lengths,int nsyms,int pfx,int pfxbits,int bits)1189 static struct table *mkonetab(int *codes, unsigned char *lengths, int nsyms,
1190 int pfx, int pfxbits, int bits)
1191 {
1192 struct table *tab = snew(struct table);
1193 int pfxmask = (1 << pfxbits) - 1;
1194 int nbits, i, j, code;
1195
1196 tab->table = snewn(1 << bits, struct tableentry);
1197 tab->mask = (1 << bits) - 1;
1198
1199 for (code = 0; code <= tab->mask; code++) {
1200 tab->table[code].code = -1;
1201 tab->table[code].nbits = 0;
1202 tab->table[code].nexttable = NULL;
1203 }
1204
1205 for (i = 0; i < nsyms; i++) {
1206 if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
1207 continue;
1208 code = (codes[i] >> pfxbits) & tab->mask;
1209 for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
1210 tab->table[j].code = i;
1211 nbits = lengths[i] - pfxbits;
1212 if (tab->table[j].nbits < nbits)
1213 tab->table[j].nbits = nbits;
1214 }
1215 }
1216 for (code = 0; code <= tab->mask; code++) {
1217 if (tab->table[code].nbits <= bits)
1218 continue;
1219 /* Generate a subtable. */
1220 tab->table[code].code = -1;
1221 nbits = tab->table[code].nbits - bits;
1222 if (nbits > 7)
1223 nbits = 7;
1224 tab->table[code].nbits = bits;
1225 tab->table[code].nexttable = mkonetab(codes, lengths, nsyms,
1226 pfx | (code << pfxbits),
1227 pfxbits + bits, nbits);
1228 }
1229
1230 return tab;
1231 }
1232
1233 /*
1234 * Build a decode table, given a set of Huffman tree lengths.
1235 */
mktable(unsigned char * lengths,int nlengths,const char * alphabet,int * error)1236 static struct table *mktable(unsigned char *lengths, int nlengths,
1237 #ifdef ANALYSIS
1238 const char *alphabet,
1239 #endif
1240 int *error)
1241 {
1242 int codes[MAXSYMS];
1243 int maxlen;
1244
1245 #ifdef ANALYSIS
1246 if (alphabet && analyse_level > 1) {
1247 int i, col = 0;
1248 printf("code lengths for %s alphabet:\n", alphabet);
1249 for (i = 0; i < nlengths; i++) {
1250 col += printf("%3d", lengths[i]);
1251 if (col > 72) {
1252 putchar('\n');
1253 col = 0;
1254 }
1255 }
1256 if (col > 0)
1257 putchar('\n');
1258 }
1259 #endif
1260
1261 maxlen = deflate_hufcodes(lengths, codes, nlengths);
1262
1263 if (maxlen < 0) {
1264 *error = (maxlen == -1 ? DEFLATE_ERR_LARGE_HUFTABLE :
1265 DEFLATE_ERR_SMALL_HUFTABLE);
1266 return NULL;
1267 }
1268
1269 /*
1270 * Now we have the complete list of Huffman codes. Build a
1271 * table.
1272 */
1273 return mkonetab(codes, lengths, nlengths, 0, 0, maxlen < 9 ? maxlen : 9);
1274 }
1275
freetable(struct table ** ztab)1276 static int freetable(struct table **ztab)
1277 {
1278 struct table *tab;
1279 int code;
1280
1281 if (ztab == NULL)
1282 return -1;
1283
1284 if (*ztab == NULL)
1285 return 0;
1286
1287 tab = *ztab;
1288
1289 for (code = 0; code <= tab->mask; code++)
1290 if (tab->table[code].nexttable != NULL)
1291 freetable(&tab->table[code].nexttable);
1292
1293 sfree(tab->table);
1294 tab->table = NULL;
1295
1296 sfree(tab);
1297 *ztab = NULL;
1298
1299 return (0);
1300 }
1301
1302 struct deflate_decompress_ctx {
1303 struct table *staticlentable, *staticdisttable;
1304 struct table *currlentable, *currdisttable, *lenlentable;
1305 enum {
1306 ZLIBSTART,
1307 GZIPSTART, GZIPMETHFLAGS, GZIPIGNORE1, GZIPIGNORE2, GZIPIGNORE3,
1308 GZIPEXTRA, GZIPFNAME, GZIPCOMMENT,
1309 OUTSIDEBLK, TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
1310 INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
1311 UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA,
1312 END,
1313 ADLER1, ADLER2,
1314 CRC1, CRC2, ILEN1, ILEN2,
1315 FINALSPIN
1316 } state;
1317 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
1318 lenrep, lastblock;
1319 int uncomplen;
1320 unsigned char lenlen[19];
1321 unsigned char lengths[286 + 32];
1322 unsigned long bits;
1323 int nbits;
1324 unsigned char window[DWINSIZE];
1325 int winpos;
1326 unsigned char *outblk;
1327 int outlen, outsize;
1328 int type;
1329 unsigned long checksum;
1330 unsigned long bytesout;
1331 int gzflags, gzextralen;
1332 #ifdef ANALYSIS
1333 int bytesread;
1334 int bitcount_before;
1335 #define BITCOUNT(dctx) ( (dctx)->bytesread * 8 - (dctx)->nbits )
1336 #endif
1337 };
1338
deflate_decompress_new(int type)1339 deflate_decompress_ctx *deflate_decompress_new(int type)
1340 {
1341 deflate_decompress_ctx *dctx = snew(deflate_decompress_ctx);
1342 unsigned char lengths[288];
1343
1344 memset(lengths, 8, 144);
1345 memset(lengths + 144, 9, 256 - 144);
1346 memset(lengths + 256, 7, 280 - 256);
1347 memset(lengths + 280, 8, 288 - 280);
1348 dctx->staticlentable = mktable(lengths, 288,
1349 #ifdef ANALYSIS
1350 NULL,
1351 #endif
1352 NULL);
1353 assert(dctx->staticlentable);
1354 memset(lengths, 5, 32);
1355 dctx->staticdisttable = mktable(lengths, 32,
1356 #ifdef ANALYSIS
1357 NULL,
1358 #endif
1359 NULL);
1360 assert(dctx->staticdisttable);
1361 dctx->state = (type == DEFLATE_TYPE_ZLIB ? ZLIBSTART :
1362 type == DEFLATE_TYPE_GZIP ? GZIPSTART :
1363 OUTSIDEBLK);
1364 dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
1365 dctx->bits = 0;
1366 dctx->nbits = 0;
1367 dctx->winpos = 0;
1368 dctx->type = type;
1369 dctx->lastblock = FALSE;
1370 dctx->checksum = (type == DEFLATE_TYPE_ZLIB ? 1 : 0);
1371 dctx->bytesout = 0;
1372 dctx->gzflags = dctx->gzextralen = 0;
1373 #ifdef ANALYSIS
1374 dctx->bytesread = dctx->bitcount_before = 0;
1375 #endif
1376
1377 return dctx;
1378 }
1379
deflate_decompress_free(deflate_decompress_ctx * dctx)1380 void deflate_decompress_free(deflate_decompress_ctx *dctx)
1381 {
1382 if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
1383 freetable(&dctx->currlentable);
1384 if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
1385 freetable(&dctx->currdisttable);
1386 if (dctx->lenlentable)
1387 freetable(&dctx->lenlentable);
1388 freetable(&dctx->staticlentable);
1389 freetable(&dctx->staticdisttable);
1390 sfree(dctx);
1391 }
1392
huflookup(unsigned long * bitsp,int * nbitsp,struct table * tab)1393 static int huflookup(unsigned long *bitsp, int *nbitsp, struct table *tab)
1394 {
1395 unsigned long bits = *bitsp;
1396 int nbits = *nbitsp;
1397 while (1) {
1398 struct tableentry *ent;
1399 ent = &tab->table[bits & tab->mask];
1400 if (ent->nbits > nbits)
1401 return -1; /* not enough data */
1402 bits >>= ent->nbits;
1403 nbits -= ent->nbits;
1404 if (ent->code == -1)
1405 tab = ent->nexttable;
1406 else {
1407 *bitsp = bits;
1408 *nbitsp = nbits;
1409 return ent->code;
1410 }
1411
1412 /*
1413 * If we reach here with `tab' null, it can only be because
1414 * there was a missing entry in the Huffman table. This
1415 * should never occur even with invalid input data, because
1416 * we enforce at mktable time that the Huffman codes should
1417 * precisely cover the code space; so we can enforce this
1418 * by assertion.
1419 */
1420 assert(tab);
1421 }
1422 }
1423
emit_char(deflate_decompress_ctx * dctx,int c)1424 static void emit_char(deflate_decompress_ctx *dctx, int c)
1425 {
1426 dctx->window[dctx->winpos] = c;
1427 dctx->winpos = (dctx->winpos + 1) & (DWINSIZE - 1);
1428 if (dctx->outlen >= dctx->outsize) {
1429 dctx->outsize = dctx->outlen * 3 / 2 + 512;
1430 dctx->outblk = sresize(dctx->outblk, dctx->outsize, unsigned char);
1431 }
1432 if (dctx->type == DEFLATE_TYPE_ZLIB) {
1433 unsigned char uc = c;
1434 dctx->checksum = adler32_update(dctx->checksum, &uc, 1);
1435 } else if (dctx->type == DEFLATE_TYPE_GZIP) {
1436 unsigned char uc = c;
1437 dctx->checksum = crc32_update(dctx->checksum, &uc, 1);
1438 }
1439 dctx->outblk[dctx->outlen++] = c;
1440 dctx->bytesout++;
1441 }
1442
1443 #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
1444
deflate_decompress_data(deflate_decompress_ctx * dctx,const void * vblock,int len,void ** outblock,int * outlen)1445 int deflate_decompress_data(deflate_decompress_ctx *dctx,
1446 const void *vblock, int len,
1447 void **outblock, int *outlen)
1448 {
1449 const coderecord *rec;
1450 const unsigned char *block = (const unsigned char *)vblock;
1451 int code, bfinal, btype, rep, dist, nlen, header;
1452 unsigned long cksum;
1453 int error = 0;
1454
1455 if (len == 0) {
1456 *outblock = NULL;
1457 *outlen = 0;
1458 if (dctx->state != FINALSPIN)
1459 return DEFLATE_ERR_UNEXPECTED_EOF;
1460 else
1461 return 0;
1462 }
1463
1464 dctx->outblk = NULL;
1465 dctx->outsize = 0;
1466 dctx->outlen = 0;
1467
1468 while (len > 0 || dctx->nbits > 0) {
1469 while (dctx->nbits < 24 && len > 0) {
1470 dctx->bits |= (*block++) << dctx->nbits;
1471 dctx->nbits += 8;
1472 len--;
1473 #ifdef ANALYSIS
1474 dctx->bytesread++;
1475 #endif
1476 }
1477 switch (dctx->state) {
1478 case ZLIBSTART:
1479 /* Expect 16-bit zlib header. */
1480 if (dctx->nbits < 16)
1481 goto finished; /* done all we can */
1482
1483 /*
1484 * The header is stored as a big-endian 16-bit integer,
1485 * in contrast to the general little-endian policy in
1486 * the rest of the format :-(
1487 */
1488 header = (((dctx->bits & 0xFF00) >> 8) |
1489 ((dctx->bits & 0x00FF) << 8));
1490 EATBITS(16);
1491
1492 /*
1493 * Check the header:
1494 *
1495 * - bits 8-11 should be 1000 (Deflate/RFC1951)
1496 * - bits 12-15 should be at most 0111 (window size)
1497 * - bit 5 should be zero (no dictionary present)
1498 * - we don't care about bits 6-7 (compression rate)
1499 * - bits 0-4 should be set up to make the whole thing
1500 * a multiple of 31 (checksum).
1501 */
1502 if ((header & 0xF000) > 0x7000 ||
1503 (header & 0x0020) != 0x0000 ||
1504 (header % 31) != 0) {
1505 error = DEFLATE_ERR_ZLIB_HEADER;
1506 goto finished;
1507 }
1508 if ((header & 0x0F00) != 0x0800) {
1509 error = DEFLATE_ERR_ZLIB_WRONGCOMP;
1510 goto finished;
1511 }
1512 dctx->state = OUTSIDEBLK;
1513 break;
1514 case GZIPSTART:
1515 /* Expect 16-bit gzip header. */
1516 if (dctx->nbits < 16)
1517 goto finished;
1518 header = dctx->bits & 0xFFFF;
1519 EATBITS(16);
1520 if (header != 0x8B1F) {
1521 error = DEFLATE_ERR_GZIP_HEADER;
1522 goto finished;
1523 }
1524 dctx->state = GZIPMETHFLAGS;
1525 break;
1526 case GZIPMETHFLAGS:
1527 /* Expect gzip compression method and flags bytes. */
1528 if (dctx->nbits < 16)
1529 goto finished;
1530 header = dctx->bits & 0xFF;
1531 EATBITS(8);
1532 if (header != 8) {
1533 error = DEFLATE_ERR_GZIP_WRONGCOMP;
1534 goto finished;
1535 }
1536 dctx->gzflags = dctx->bits & 0xFF;
1537 if (dctx->gzflags & 2) {
1538 /*
1539 * The FHCRC flag is slightly confusing. RFC1952
1540 * documents it as indicating the presence of a
1541 * two-byte CRC16 of the gzip header, occurring
1542 * just before the beginning of the Deflate stream.
1543 * However, gzip itself (as of 1.3.5) appears to
1544 * believe it indicates that the current gzip
1545 * `member' is not the final one, i.e. that the
1546 * stream is composed of multiple gzip members
1547 * concatenated together, and furthermore gzip will
1548 * refuse to decode any file that has it set.
1549 *
1550 * For this reason, I label it as `disputed' and
1551 * also refuse to decode anything that has it set.
1552 * I don't expect this to be a problem in practice.
1553 */
1554 error = DEFLATE_ERR_GZIP_FHCRC;
1555 goto finished;
1556 }
1557 EATBITS(8);
1558 dctx->state = GZIPIGNORE1;
1559 break;
1560 case GZIPIGNORE1:
1561 case GZIPIGNORE2:
1562 case GZIPIGNORE3:
1563 /* Expect two bytes of gzip timestamp/XFL/OS, which we ignore. */
1564 if (dctx->nbits < 16)
1565 goto finished;
1566 EATBITS(16);
1567 if (dctx->state == GZIPIGNORE3) {
1568 dctx->state = GZIPEXTRA;
1569 } else
1570 dctx->state++; /* maps IGNORE1 -> IGNORE2 -> IGNORE3 */
1571 break;
1572 case GZIPEXTRA:
1573 if (dctx->gzflags & 4) {
1574 /* Expect two bytes of extra-length count, then that many
1575 * extra bytes of header data, all of which we ignore. */
1576 if (!dctx->gzextralen) {
1577 if (dctx->nbits < 16)
1578 goto finished;
1579 dctx->gzextralen = dctx->bits & 0xFFFF;
1580 EATBITS(16);
1581 break;
1582 } else if (dctx->gzextralen > 0) {
1583 if (dctx->nbits < 8)
1584 goto finished;
1585 EATBITS(8);
1586 if (--dctx->gzextralen > 0)
1587 break;
1588 }
1589 }
1590 dctx->state = GZIPFNAME;
1591 break;
1592 case GZIPFNAME:
1593 if (dctx->gzflags & 8) {
1594 /*
1595 * Expect a NUL-terminated filename.
1596 */
1597 if (dctx->nbits < 8)
1598 goto finished;
1599 code = dctx->bits & 0xFF;
1600 EATBITS(8);
1601 } else
1602 code = 0;
1603 if (code == 0)
1604 dctx->state = GZIPCOMMENT;
1605 break;
1606 case GZIPCOMMENT:
1607 if (dctx->gzflags & 16) {
1608 /*
1609 * Expect a NUL-terminated filename.
1610 */
1611 if (dctx->nbits < 8)
1612 goto finished;
1613 code = dctx->bits & 0xFF;
1614 EATBITS(8);
1615 } else
1616 code = 0;
1617 if (code == 0)
1618 dctx->state = OUTSIDEBLK;
1619 break;
1620 case OUTSIDEBLK:
1621 /* Expect 3-bit block header. */
1622 if (dctx->nbits < 3)
1623 goto finished; /* done all we can */
1624 bfinal = dctx->bits & 1;
1625 if (bfinal)
1626 dctx->lastblock = TRUE;
1627 EATBITS(1);
1628 btype = dctx->bits & 3;
1629 EATBITS(2);
1630 if (btype == 0) {
1631 int to_eat = dctx->nbits & 7;
1632 dctx->state = UNCOMP_LEN;
1633 EATBITS(to_eat); /* align to byte boundary */
1634 } else if (btype == 1) {
1635 dctx->currlentable = dctx->staticlentable;
1636 dctx->currdisttable = dctx->staticdisttable;
1637 dctx->state = INBLK;
1638 } else if (btype == 2) {
1639 dctx->state = TREES_HDR;
1640 }
1641 debug(("recv: bfinal=%d btype=%d\n", bfinal, btype));
1642 #ifdef ANALYSIS
1643 if (analyse_level > 1) {
1644 static const char *const btypes[] = {
1645 "uncompressed", "static", "dynamic", "type 3 (unknown)"
1646 };
1647 printf("new block, %sfinal, %s\n",
1648 bfinal ? "" : "not ",
1649 btypes[btype]);
1650 }
1651 #endif
1652 break;
1653 case TREES_HDR:
1654 /*
1655 * Dynamic block header. Five bits of HLIT, five of
1656 * HDIST, four of HCLEN.
1657 */
1658 if (dctx->nbits < 5 + 5 + 4)
1659 goto finished; /* done all we can */
1660 dctx->hlit = 257 + (dctx->bits & 31);
1661 EATBITS(5);
1662 dctx->hdist = 1 + (dctx->bits & 31);
1663 EATBITS(5);
1664 dctx->hclen = 4 + (dctx->bits & 15);
1665 EATBITS(4);
1666 debug(("recv: hlit=%d hdist=%d hclen=%d\n", dctx->hlit,
1667 dctx->hdist, dctx->hclen));
1668 #ifdef ANALYSIS
1669 if (analyse_level > 1)
1670 printf("hlit=%d, hdist=%d, hclen=%d\n",
1671 dctx->hlit, dctx->hdist, dctx->hclen);
1672 #endif
1673 dctx->lenptr = 0;
1674 dctx->state = TREES_LENLEN;
1675 memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
1676 break;
1677 case TREES_LENLEN:
1678 if (dctx->nbits < 3)
1679 goto finished;
1680 while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
1681 dctx->lenlen[lenlenmap[dctx->lenptr++]] =
1682 (unsigned char) (dctx->bits & 7);
1683 debug(("recv: lenlen %d\n", (unsigned char) (dctx->bits & 7)));
1684 EATBITS(3);
1685 }
1686 if (dctx->lenptr == dctx->hclen) {
1687 dctx->lenlentable = mktable(dctx->lenlen, 19,
1688 #ifdef ANALYSIS
1689 "code length",
1690 #endif
1691 &error);
1692 if (!dctx->lenlentable)
1693 goto finished; /* error code set up by mktable */
1694 dctx->state = TREES_LEN;
1695 dctx->lenptr = 0;
1696 }
1697 break;
1698 case TREES_LEN:
1699 if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
1700 dctx->currlentable = mktable(dctx->lengths, dctx->hlit,
1701 #ifdef ANALYSIS
1702 "literal/length",
1703 #endif
1704 &error);
1705 if (!dctx->currlentable)
1706 goto finished; /* error code set up by mktable */
1707 if (dctx->hdist == 1 && dctx->lengths[dctx->hlit] == 0) {
1708 /*
1709 * Special case: if the code length list for the
1710 * backward-distance table contains a single zero
1711 * entry, it means this block will never encode a
1712 * backward distance at all (i.e. it's all
1713 * literals).
1714 */
1715 dctx->currdisttable = NULL;
1716 } else {
1717 dctx->currdisttable = mktable(dctx->lengths + dctx->hlit,
1718 dctx->hdist,
1719 #ifdef ANALYSIS
1720 "distance",
1721 #endif
1722 &error);
1723 if (!dctx->currdisttable)
1724 goto finished; /* error code set up by mktable */
1725 }
1726 freetable(&dctx->lenlentable);
1727 dctx->lenlentable = NULL;
1728 dctx->state = INBLK;
1729 break;
1730 }
1731 code = huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
1732 debug(("recv: codelen %d\n", code));
1733 if (code == -1)
1734 goto finished;
1735 if (code < 16) {
1736 #ifdef ANALYSIS
1737 if (analyse_level > 1)
1738 printf("code-length %d\n", code);
1739 #endif
1740 dctx->lengths[dctx->lenptr++] = code;
1741 } else {
1742 dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
1743 dctx->lenaddon = (code == 18 ? 11 : 3);
1744 dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
1745 dctx->lengths[dctx->lenptr - 1] : 0);
1746 dctx->state = TREES_LENREP;
1747 }
1748 break;
1749 case TREES_LENREP:
1750 if (dctx->nbits < dctx->lenextrabits)
1751 goto finished;
1752 rep =
1753 dctx->lenaddon +
1754 (dctx->bits & ((1 << dctx->lenextrabits) - 1));
1755 EATBITS(dctx->lenextrabits);
1756 if (dctx->lenextrabits)
1757 debug(("recv: codelen-extrabits %d/%d\n", rep - dctx->lenaddon,
1758 dctx->lenextrabits));
1759 #ifdef ANALYSIS
1760 if (analyse_level > 1)
1761 printf("code-length-repeat: %d copies of %d\n", rep,
1762 dctx->lenrep);
1763 #endif
1764 while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
1765 dctx->lengths[dctx->lenptr] = dctx->lenrep;
1766 dctx->lenptr++;
1767 rep--;
1768 }
1769 dctx->state = TREES_LEN;
1770 break;
1771 case INBLK:
1772 #ifdef ANALYSIS
1773 dctx->bitcount_before = BITCOUNT(dctx);
1774 #endif
1775 code = huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
1776 debug(("recv: litlen %d\n", code));
1777 if (code == -1)
1778 goto finished;
1779 if (code < 256) {
1780 #ifdef ANALYSIS
1781 if (analyse_level > 0)
1782 printf("%lu: literal %d [%d]\n", dctx->bytesout, code,
1783 BITCOUNT(dctx) - dctx->bitcount_before);
1784 #endif
1785 emit_char(dctx, code);
1786 } else if (code == 256) {
1787 if (dctx->lastblock)
1788 dctx->state = END;
1789 else
1790 dctx->state = OUTSIDEBLK;
1791 if (dctx->currlentable != dctx->staticlentable) {
1792 freetable(&dctx->currlentable);
1793 dctx->currlentable = NULL;
1794 }
1795 if (dctx->currdisttable &&
1796 dctx->currdisttable != dctx->staticdisttable) {
1797 freetable(&dctx->currdisttable);
1798 dctx->currdisttable = NULL;
1799 }
1800 } else if (code < 286) { /* static tree can give >285; ignore */
1801 dctx->state = GOTLENSYM;
1802 dctx->sym = code;
1803 }
1804 break;
1805 case GOTLENSYM:
1806 rec = &lencodes[dctx->sym - 257];
1807 if (dctx->nbits < rec->extrabits)
1808 goto finished;
1809 dctx->len =
1810 rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1811 if (rec->extrabits)
1812 debug(("recv: litlen-extrabits %d/%d\n",
1813 dctx->len - rec->min, rec->extrabits));
1814 EATBITS(rec->extrabits);
1815 dctx->state = GOTLEN;
1816 break;
1817 case GOTLEN:
1818 if (!dctx->currdisttable) {
1819 error = DEFLATE_ERR_NODISTTABLE;
1820 goto finished;
1821 }
1822 code = huflookup(&dctx->bits, &dctx->nbits, dctx->currdisttable);
1823 debug(("recv: dist %d\n", code));
1824 if (code == -1)
1825 goto finished;
1826 if (code >= 30) {
1827 error = DEFLATE_ERR_BADDISTCODE;
1828 goto finished;
1829 }
1830 dctx->state = GOTDISTSYM;
1831 dctx->sym = code;
1832 break;
1833 case GOTDISTSYM:
1834 rec = &distcodes[dctx->sym];
1835 if (dctx->nbits < rec->extrabits)
1836 goto finished;
1837 dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1838 if (rec->extrabits)
1839 debug(("recv: dist-extrabits %d/%d\n",
1840 dist - rec->min, rec->extrabits));
1841 EATBITS(rec->extrabits);
1842 dctx->state = INBLK;
1843 #ifdef ANALYSIS
1844 if (analyse_level > 0)
1845 printf("%lu: copy len=%d dist=%d [%d]\n", dctx->bytesout,
1846 dctx->len, dist,
1847 BITCOUNT(dctx) - dctx->bitcount_before);
1848 #endif
1849 while (dctx->len--)
1850 emit_char(dctx, dctx->window[(dctx->winpos - dist) &
1851 (DWINSIZE - 1)]);
1852 break;
1853 case UNCOMP_LEN:
1854 /*
1855 * Uncompressed block. We expect to see a 16-bit LEN.
1856 */
1857 if (dctx->nbits < 16)
1858 goto finished;
1859 dctx->uncomplen = dctx->bits & 0xFFFF;
1860 EATBITS(16);
1861 dctx->state = UNCOMP_NLEN;
1862 break;
1863 case UNCOMP_NLEN:
1864 /*
1865 * Uncompressed block. We expect to see a 16-bit NLEN,
1866 * which should be the one's complement of the previous
1867 * LEN.
1868 */
1869 if (dctx->nbits < 16)
1870 goto finished;
1871 nlen = dctx->bits & 0xFFFF;
1872 EATBITS(16);
1873 if (dctx->uncomplen != (nlen ^ 0xFFFF)) {
1874 error = DEFLATE_ERR_UNCOMP_HDR;
1875 goto finished;
1876 }
1877 if (dctx->uncomplen == 0) {/* block is empty */
1878 if (dctx->lastblock)
1879 dctx->state = END;
1880 else
1881 dctx->state = OUTSIDEBLK;
1882 } else
1883 dctx->state = UNCOMP_DATA;
1884 break;
1885 case UNCOMP_DATA:
1886 if (dctx->nbits < 8)
1887 goto finished;
1888 #ifdef ANALYSIS
1889 if (analyse_level > 0)
1890 printf("%lu: uncompressed %d [8]\n", dctx->bytesout,
1891 (int)(dctx->bits & 0xFF));
1892 #endif
1893 emit_char(dctx, dctx->bits & 0xFF);
1894 EATBITS(8);
1895 if (--dctx->uncomplen == 0) { /* end of uncompressed block */
1896 if (dctx->lastblock)
1897 dctx->state = END;
1898 else
1899 dctx->state = OUTSIDEBLK;
1900 }
1901 break;
1902 case END:
1903 /*
1904 * End of compressed data. We align to a byte boundary,
1905 * and then look for format-specific trailer data.
1906 */
1907 {
1908 int to_eat = dctx->nbits & 7;
1909 EATBITS(to_eat);
1910 }
1911 if (dctx->type == DEFLATE_TYPE_ZLIB)
1912 dctx->state = ADLER1;
1913 else if (dctx->type == DEFLATE_TYPE_GZIP)
1914 dctx->state = CRC1;
1915 else
1916 dctx->state = FINALSPIN;
1917 break;
1918 case ADLER1:
1919 if (dctx->nbits < 16)
1920 goto finished;
1921 cksum = (dctx->bits & 0xFF) << 8;
1922 EATBITS(8);
1923 cksum |= (dctx->bits & 0xFF);
1924 EATBITS(8);
1925 if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) {
1926 error = DEFLATE_ERR_CHECKSUM;
1927 goto finished;
1928 }
1929 dctx->state = ADLER2;
1930 break;
1931 case ADLER2:
1932 if (dctx->nbits < 16)
1933 goto finished;
1934 cksum = (dctx->bits & 0xFF) << 8;
1935 EATBITS(8);
1936 cksum |= (dctx->bits & 0xFF);
1937 EATBITS(8);
1938 if (cksum != (dctx->checksum & 0xFFFF)) {
1939 error = DEFLATE_ERR_CHECKSUM;
1940 goto finished;
1941 }
1942 dctx->state = FINALSPIN;
1943 break;
1944 case CRC1:
1945 if (dctx->nbits < 16)
1946 goto finished;
1947 cksum = dctx->bits & 0xFFFF;
1948 EATBITS(16);
1949 if (cksum != (dctx->checksum & 0xFFFF)) {
1950 error = DEFLATE_ERR_CHECKSUM;
1951 goto finished;
1952 }
1953 dctx->state = CRC2;
1954 break;
1955 case CRC2:
1956 if (dctx->nbits < 16)
1957 goto finished;
1958 cksum = dctx->bits & 0xFFFF;
1959 EATBITS(16);
1960 if (cksum != ((dctx->checksum >> 16) & 0xFFFF)) {
1961 error = DEFLATE_ERR_CHECKSUM;
1962 goto finished;
1963 }
1964 dctx->state = ILEN1;
1965 break;
1966 case ILEN1:
1967 if (dctx->nbits < 16)
1968 goto finished;
1969 cksum = dctx->bits & 0xFFFF;
1970 EATBITS(16);
1971 if (cksum != (dctx->bytesout & 0xFFFF)) {
1972 error = DEFLATE_ERR_INLEN;
1973 goto finished;
1974 }
1975 dctx->state = ILEN2;
1976 break;
1977 case ILEN2:
1978 if (dctx->nbits < 16)
1979 goto finished;
1980 cksum = dctx->bits & 0xFFFF;
1981 EATBITS(16);
1982 if (cksum != ((dctx->bytesout >> 16) & 0xFFFF)) {
1983 error = DEFLATE_ERR_INLEN;
1984 goto finished;
1985 }
1986 dctx->state = FINALSPIN;
1987 break;
1988 case FINALSPIN:
1989 /* Just ignore any trailing garbage on the data stream. */
1990 /* (We could alternatively throw an error here, if we wanted
1991 * to detect and complain about trailing garbage.) */
1992 EATBITS(dctx->nbits);
1993 break;
1994 }
1995 }
1996
1997 finished:
1998 *outblock = dctx->outblk;
1999 *outlen = dctx->outlen;
2000 return error;
2001 }
2002
2003 #define A(code,str) str
2004 const char *const deflate_error_msg[DEFLATE_NUM_ERRORS] = {
2005 DEFLATE_ERRORLIST(A)
2006 };
2007 #undef A
2008
2009 #define A(code,str) #code
2010 const char *const deflate_error_sym[DEFLATE_NUM_ERRORS] = {
2011 DEFLATE_ERRORLIST(A)
2012 };
2013 #undef A
2014
2015 #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(_WIN64)
2016 #define WINDOWS_IO
2017 #endif
2018
2019 #if defined(WINDOWS_IO) && (defined(STANDALONE) || defined(TESTMODE))
2020 #include <fcntl.h>
2021 #include <io.h>
2022 #endif
2023
2024 #ifdef STANDALONE
2025
main(int argc,char ** argv)2026 int main(int argc, char **argv)
2027 {
2028 unsigned char buf[65536];
2029 void *outbuf;
2030 int ret, err, outlen;
2031 deflate_decompress_ctx *dhandle;
2032 deflate_compress_ctx *chandle;
2033 int type = DEFLATE_TYPE_ZLIB, opts = TRUE;
2034 int compress = FALSE, decompress = FALSE;
2035 int got_arg = FALSE;
2036 char *filename = NULL;
2037 FILE *fp;
2038
2039 while (--argc) {
2040 char *p = *++argv;
2041
2042 got_arg = TRUE;
2043
2044 if (p[0] == '-' && opts) {
2045 if (!strcmp(p, "-b"))
2046 type = DEFLATE_TYPE_BARE;
2047 else if (!strcmp(p, "-g"))
2048 type = DEFLATE_TYPE_GZIP;
2049 else if (!strcmp(p, "-c"))
2050 compress = TRUE;
2051 else if (!strcmp(p, "-d"))
2052 decompress = TRUE;
2053 else if (!strcmp(p, "-a"))
2054 analyse_level++, decompress = TRUE;
2055 else if (!strcmp(p, "--"))
2056 opts = FALSE; /* next thing is filename */
2057 else {
2058 fprintf(stderr, "unknown command line option '%s'\n", p);
2059 return 1;
2060 }
2061 } else if (!filename) {
2062 filename = p;
2063 } else {
2064 fprintf(stderr, "can only handle one filename\n");
2065 return 1;
2066 }
2067 }
2068
2069 if (!compress && !decompress) {
2070 fprintf(stderr, "usage: deflate [ -c | -d | -a ] [ -b | -g ]"
2071 " [filename]\n");
2072 return (got_arg ? 1 : 0);
2073 }
2074
2075 if (compress && decompress) {
2076 fprintf(stderr, "please do not specify both compression and"
2077 " decompression\n");
2078 return (got_arg ? 1 : 0);
2079 }
2080
2081 if (compress) {
2082 chandle = deflate_compress_new(type);
2083 dhandle = NULL;
2084 } else {
2085 dhandle = deflate_decompress_new(type);
2086 chandle = NULL;
2087 }
2088
2089 if (filename)
2090 fp = fopen(filename, "rb");
2091 else
2092 fp = stdin;
2093
2094 if (!fp) {
2095 assert(filename);
2096 fprintf(stderr, "unable to open '%s'\n", filename);
2097 return 1;
2098 }
2099
2100 #ifdef WINDOWS_IO
2101 if(_setmode(_fileno(stdout), _O_BINARY ) == -1)
2102 {
2103 fprintf(stderr, "Can't set stdout to binary mode\n");
2104 return 1;
2105 }
2106 #endif
2107
2108 do {
2109 ret = fread(buf, 1, sizeof(buf), fp);
2110 outbuf = NULL;
2111 if (dhandle) {
2112 if (ret > 0)
2113 err = deflate_decompress_data(dhandle, buf, ret,
2114 (void **)&outbuf, &outlen);
2115 else
2116 err = deflate_decompress_data(dhandle, NULL, 0,
2117 (void **)&outbuf, &outlen);
2118 } else {
2119 if (ret > 0)
2120 deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2121 (void **)&outbuf, &outlen);
2122 else
2123 deflate_compress_data(chandle, buf, ret, DEFLATE_END_OF_DATA,
2124 (void **)&outbuf, &outlen);
2125 err = 0;
2126 }
2127 if (outbuf) {
2128 if (!analyse_level && outlen)
2129 fwrite(outbuf, 1, outlen, stdout);
2130 sfree(outbuf);
2131 }
2132 if (err > 0) {
2133 fprintf(stderr, "decoding error: %s\n", deflate_error_msg[err]);
2134 return 1;
2135 }
2136 } while (ret > 0);
2137
2138 if (dhandle)
2139 deflate_decompress_free(dhandle);
2140 if (chandle)
2141 deflate_compress_free(chandle);
2142
2143 if (filename)
2144 fclose(fp);
2145
2146 return 0;
2147 }
2148
2149 #endif
2150
2151 #ifdef TESTMODE
2152
main(int argc,char ** argv)2153 int main(int argc, char **argv)
2154 {
2155 char *filename = NULL;
2156 FILE *fp;
2157 deflate_compress_ctx *chandle;
2158 deflate_decompress_ctx *dhandle;
2159 unsigned char buf[65536], *outbuf, *outbuf2;
2160 int ret, err, outlen, outlen2;
2161 int dlen = 0, clen = 0;
2162 int opts = TRUE;
2163
2164 while (--argc) {
2165 char *p = *++argv;
2166
2167 if (p[0] == '-' && opts) {
2168 if (!strcmp(p, "--"))
2169 opts = FALSE; /* next thing is filename */
2170 else {
2171 fprintf(stderr, "unknown command line option '%s'\n", p);
2172 return 1;
2173 }
2174 } else if (!filename) {
2175 filename = p;
2176 } else {
2177 fprintf(stderr, "can only handle one filename\n");
2178 return 1;
2179 }
2180 }
2181
2182 if (filename)
2183 fp = fopen(filename, "rb");
2184 else
2185 fp = stdin;
2186
2187 if (!fp) {
2188 assert(filename);
2189 fprintf(stderr, "unable to open '%s'\n", filename);
2190 return 1;
2191 }
2192
2193 chandle = deflate_compress_new(DEFLATE_TYPE_ZLIB);
2194 dhandle = deflate_decompress_new(DEFLATE_TYPE_ZLIB);
2195
2196 #ifdef WINDOWS_IO
2197 if(_setmode(_fileno(stdout), _O_BINARY ) == -1)
2198 {
2199 fprintf(stderr, "Can't set stdout to binary mode\n");
2200 return 1;
2201 }
2202 #endif
2203
2204 do {
2205 ret = fread(buf, 1, sizeof(buf), fp);
2206 if (ret <= 0) {
2207 deflate_compress_data(chandle, NULL, 0, DEFLATE_END_OF_DATA,
2208 (void **)&outbuf, &outlen);
2209 } else {
2210 dlen += ret;
2211 deflate_compress_data(chandle, buf, ret, DEFLATE_NO_FLUSH,
2212 (void **)&outbuf, &outlen);
2213 }
2214 if (outbuf) {
2215 clen += outlen;
2216 err = deflate_decompress_data(dhandle, outbuf, outlen,
2217 (void **)&outbuf2, &outlen2);
2218 sfree(outbuf);
2219 if (outbuf2) {
2220 if (outlen2)
2221 fwrite(outbuf2, 1, outlen2, stdout);
2222 sfree(outbuf2);
2223 }
2224 if (!err && ret <= 0) {
2225 /*
2226 * signal EOF
2227 */
2228 err = deflate_decompress_data(dhandle, NULL, 0,
2229 (void **)&outbuf2, &outlen2);
2230 assert(outbuf2 == NULL);
2231 }
2232 if (err) {
2233 fprintf(stderr, "decoding error: %s\n",
2234 deflate_error_msg[err]);
2235 return 1;
2236 }
2237 }
2238 } while (ret > 0);
2239
2240 fprintf(stderr, "%d plaintext -> %d compressed\n", dlen, clen);
2241
2242 return 0;
2243 }
2244
2245 #endif
2246