1 /*
2     TiMidity++ -- MIDI to WAVE converter and player
3     Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp>
4     Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 */
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif /* HAVE_CONFIG_H */
24 /* deflate.c -- compress data using the deflation algorithm
25  * Copyright (C) 1992-1993 Jean-loup Gailly
26  * This is free software; you can redistribute it and/or modify it under the
27  * terms of the GNU General Public License, see the file COPYING.
28  */
29 
30 /*
31  *  PURPOSE
32  *
33  *	Identify new text as repetitions of old text within a fixed-
34  *	length sliding window trailing behind the new text.
35  *
36  *  DISCUSSION
37  *
38  *	The "deflation" process depends on being able to identify portions
39  *	of the input text which are identical to earlier input (within a
40  *	sliding window trailing behind the input currently being processed).
41  *
42  *	The most straightforward technique turns out to be the fastest for
43  *	most input files: try all possible matches and select the longest.
44  *	The key feature of this algorithm is that insertions into the string
45  *	dictionary are very simple and thus fast, and deletions are avoided
46  *	completely. Insertions are performed at each input character, whereas
47  *	string matches are performed only when the previous match ends. So it
48  *	is preferable to spend more time in matches to allow very fast string
49  *	insertions and avoid deletions. The matching algorithm for small
50  *	strings is inspired from that of Rabin & Karp. A brute force approach
51  *	is used to find longer strings when a small match has been found.
52  *	A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
53  *	(by Leonid Broukhis).
54  *	   A previous version of this file used a more sophisticated algorithm
55  *	(by Fiala and Greene) which is guaranteed to run in linear amortized
56  *	time, but has a larger average cost, uses more memory and is patented.
57  *	However the F&G algorithm may be faster for some highly redundant
58  *	files if the parameter max_chain_length (described below) is too large.
59  *
60  *  ACKNOWLEDGEMENTS
61  *
62  *	The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
63  *	I found it in 'freeze' written by Leonid Broukhis.
64  *	Thanks to many info-zippers for bug reports and testing.
65  *
66  *  REFERENCES
67  *
68  *	APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
69  *
70  *	A description of the Rabin and Karp algorithm is given in the book
71  *	   "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
72  *
73  *	Fiala,E.R., and Greene,D.H.
74  *	   Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
75  *
76  *  INTERFACE
77  *
78  *	void lm_init (void)
79  *	    Initialize the "longest match" routines for a new file
80  *
81  *	ulg deflate (void)
82  *	    Processes a new input file and return its compressed length. Sets
83  *	    the compressed length, crc, deflate flags and internal file
84  *	    attributes.
85  */
86 
87 #include <stdio.h>
88 #include <stdlib.h>
89 #ifndef NO_STRING_H
90 #include <string.h>
91 #else
92 #include <strings.h>
93 #endif
94 #include <ctype.h>
95 
96 #define FULL_SEARCH
97 /* #define UNALIGNED_OK */
98 /* #define DEBUG */
99 #include "timidity.h"
100 #include "common.h"
101 #include "mblock.h"
102 #include "zip.h"
103 
104 #define local static
105 
106 #define MIN_MATCH  3
107 #define MAX_MATCH  258
108 /* The minimum and maximum match lengths */
109 
110 #define BITS 16
111 
112 /* Compile with MEDIUM_MEM to reduce the memory requirements or
113  * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
114  * entire input file can be held in memory (not possible on 16 bit systems).
115  * Warning: defining these symbols affects HASH_BITS (see below) and thus
116  * affects the compression ratio. The compressed output
117  * is still correct, and might even be smaller in some cases.
118  */
119 #ifdef SMALL_MEM
120 #  define LIT_BUFSIZE  0x2000
121 #  define HASH_BITS  13	 /* Number of bits used to hash strings */
122 #else
123 #ifdef MEDIUM_MEM
124 #  define LIT_BUFSIZE  0x4000
125 #  define HASH_BITS  14
126 #else
127 #  define LIT_BUFSIZE  0x8000
128 #  define HASH_BITS  15
129 /* For portability to 16 bit machines, do not use values above 15. */
130 #endif
131 #endif
132 
133 #if LIT_BUFSIZE > INBUFSIZ
134     error cannot overlay l_buf and inbuf
135 #endif
136 #if (WSIZE<<1) > (1<<BITS)
137    error: cannot overlay window with tab_suffix and prev with tab_prefix0
138 #endif
139 #if HASH_BITS > BITS-1
140    error: cannot overlay head with tab_prefix1
141 #endif
142 
143 #define DIST_BUFSIZE  LIT_BUFSIZE
144 #define HASH_SIZE (unsigned)(1<<HASH_BITS)
145 #define HASH_MASK (HASH_SIZE-1)
146 #define WMASK	  (WSIZE-1)
147 /* HASH_SIZE and WSIZE must be powers of two */
148 
149 #define NIL 0		/* Tail of hash chains */
150 #define EQUAL 0		/* result of memcmp for equal strings */
151 
152 #define TOO_FAR 4096
153 /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
154 
155 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
156 /* Minimum amount of lookahead, except at the end of the input file.
157  * See deflate.c for comments about the MIN_MATCH+1.
158  */
159 
160 #define MAX_DIST  (WSIZE-MIN_LOOKAHEAD)
161 /* In order to simplify the code, particularly on 16 bit machines, match
162  * distances are limited to MAX_DIST instead of WSIZE.
163  */
164 
165 #define SMALLEST 1
166 /* Index within the heap array of least frequent node in the Huffman tree */
167 #define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */
168 #define MAX_BL_BITS 7 /* Bit length codes must not exceed MAX_BL_BITS bits */
169 #define LENGTH_CODES 29
170 /* number of length codes, not counting the special END_BLOCK code */
171 #define LITERALS  256 /* number of literal bytes 0..255 */
172 #define END_BLOCK 256 /* end of block literal code */
173 #define L_CODES (LITERALS+1+LENGTH_CODES)
174 /* number of Literal or Length codes, including the END_BLOCK code */
175 #define D_CODES	  30 /* number of distance codes */
176 #define BL_CODES  19 /* number of codes used to transfer the bit lengths */
177 #define REP_3_6	  16
178 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
179 #define REPZ_3_10 17
180 /* repeat a zero length 3-10 times  (3 bits of repeat count) */
181 #define REPZ_11_138 18
182 /* repeat a zero length 11-138 times  (7 bits of repeat count) */
183 #define HEAP_SIZE (2*L_CODES+1)		/* maximum heap size */
184 
185 #define H_SHIFT	 ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
186 /* Number of bits by which ins_h and del_h must be shifted at each
187  * input step. It must be such that after MIN_MATCH steps, the oldest
188  * byte no longer takes part in the hash key, that is:
189  *   H_SHIFT * MIN_MATCH >= HASH_BITS
190  */
191 
192 /* Data structure describing a single value and its code string. */
193 typedef struct ct_data {
194     union {
195 	ush  freq;	 /* frequency count */
196 	ush  code;	 /* bit string */
197     } fc;
198     union {
199 	ush  dad;	 /* father node in Huffman tree */
200 	ush  len;	 /* length of bit string */
201     } dl;
202 } ct_data;
203 #define Freq fc.freq
204 #define Code fc.code
205 #define Dad  dl.dad
206 #define Len  dl.len
207 
208 typedef struct tree_desc {
209     ct_data near *dyn_tree;	 /* the dynamic tree */
210     ct_data near *static_tree;	 /* corresponding static tree or NULL */
211     int	    near *extra_bits;	 /* extra bits for each code or NULL */
212     int	    extra_base;		 /* base index for extra_bits */
213     int	    elems;		 /* max number of elements in the tree */
214     int	    max_length;		 /* max bit length for the codes */
215     int	    max_code;		 /* largest code with non zero frequency */
216 } tree_desc;
217 
218 local int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */
219    = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
220 local int near extra_dbits[D_CODES] /* extra bits for each distance code */
221    = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
222 local int near extra_blbits[BL_CODES]/* extra bits for each bit length code */
223    = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
224 
225 local uch near bl_order[BL_CODES]
226    = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
227 /* The lengths of the bit length codes are sent in order of decreasing
228  * probability, to avoid transmitting the lengths for unused bit length codes.
229  */
230 
231 /* Values for max_lazy_match, good_match and max_chain_length, depending on
232  * the desired pack level (0..9). The values given below have been tuned to
233  * exclude worst case performance for pathological files. Better values may be
234  * found for specific files.
235  */
236 local struct
237 {
238    ush good_length; /* reduce lazy search above this match length */
239    ush max_lazy;    /* do not perform lazy search above this match length */
240    ush nice_length; /* quit search above this match length */
241    ush max_chain;
242 } configuration_table[10] = {
243 /*	good lazy nice chain */
244 /* 0 */ {0,    0,  0,	 0},  /* store only */
245 /* 1 */ {4,    4,  8,	 4},  /* maximum speed, no lazy matches */
246 /* 2 */ {4,    5, 16,	 8},
247 /* 3 */ {4,    6, 32,	32},
248 
249 /* 4 */ {4,    4, 16,	16},  /* lazy matches */
250 /* 5 */ {8,   16, 32,	32},
251 /* 6 */ {8,   16, 128, 128},
252 /* 7 */ {8,   32, 128, 256},
253 /* 8 */ {32, 128, 258, 1024},
254 /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
255 
256 struct deflate_buff_queue
257 {
258     struct deflate_buff_queue *next;
259     unsigned len;
260     uch *ptr;
261 };
262 local struct deflate_buff_queue *free_queue = NULL;
263 
reuse_queue(struct deflate_buff_queue * p)264 local void reuse_queue(struct deflate_buff_queue *p)
265 {
266     p->next = free_queue;
267     free_queue = p;
268 }
new_queue(void)269 local struct deflate_buff_queue *new_queue(void)
270 {
271     struct deflate_buff_queue *p;
272 
273     if(free_queue)
274     {
275 	p = free_queue;
276 	free_queue = free_queue->next;
277     }
278     else
279 	p = (struct deflate_buff_queue *)
280 	    safe_malloc(sizeof(struct deflate_buff_queue) + OUTBUFSIZ);
281     p->next = NULL;
282     p->len = 0;
283     p->ptr = (uch *)p + sizeof(struct deflate_buff_queue);
284 
285     return p;
286 }
287 
288 struct _DeflateHandler
289 {
290     void *user_val;
291     long (* read_func)(char *buf, long size, void *user_val);
292 
293     int initflag;
294     struct deflate_buff_queue *qhead;
295     struct deflate_buff_queue *qtail;
296     uch outbuf[OUTBUFSIZ];
297     unsigned outcnt, outoff;
298     int complete;
299 
300 #define window_size ((ulg)2*WSIZE)
301     uch window[window_size];
302     ush d_buf[DIST_BUFSIZE];		 /* buffer for distances */
303     uch l_buf[INBUFSIZ + INBUF_EXTRA]; /* buffer for literals or lengths */
304     ush prev[1L<<BITS];
305     unsigned short bi_buf;
306     int bi_valid;
307 
308     long block_start;
309 /* window position at the beginning of the current output block. Gets
310  * negative when the window is moved backwards.
311  */
312 
313     unsigned ins_h;		/* hash index of string to be inserted */
314 
315     unsigned hash_head;		/* head of hash chain */
316     unsigned prev_match;	/* previous match */
317     int match_available;	/* set if previous match exists */
318     unsigned match_length;	/* length of best match */
319     unsigned int near prev_length;
320 /* Length of the best match at previous step. Matches not greater than this
321  * are discarded. This is used in the lazy match evaluation.
322  */
323 
324     unsigned near strstart;	/* start of string to insert */
325     unsigned near match_start;	/* start of matching string */
326     int		  eofile;	/* flag set at end of input file */
327     unsigned	  lookahead;	/* number of valid bytes ahead in window */
328 
329     unsigned near max_chain_length;
330 /* To speed up deflation, hash chains are never searched beyond this length.
331  * A higher limit improves compression ratio but degrades the speed.
332  */
333 
334     unsigned int max_lazy_match;
335 /* Attempt to find a better match only when the current match is strictly
336  * smaller than this value. This mechanism is used only for compression
337  * levels >= 4.
338  */
339 
340     int compr_level;		/* compression level (1..9) */
341 
342     unsigned near good_match;
343 /* Use a faster search when the previous match is longer than this */
344 
345 #ifndef FULL_SEARCH
346     int near nice_match; /* Stop searching when current match exceeds this */
347 #endif
348 
349     ct_data near dyn_ltree[HEAP_SIZE];	 /* literal and length tree */
350     ct_data near dyn_dtree[2*D_CODES+1]; /* distance tree */
351     ct_data near static_ltree[L_CODES+2];
352 /* The static literal tree. Since the bit lengths are imposed, there is no
353  * need for the L_CODES extra codes used during heap construction. However
354  * The codes 286 and 287 are needed to build a canonical tree (see ct_init
355  * below).
356  */
357 
358     ct_data near static_dtree[D_CODES];
359 /* The static distance tree. (Actually a trivial tree since all codes use
360  * 5 bits.)
361  */
362 
363     ct_data near bl_tree[2*BL_CODES+1];/* Huffman tree for the bit lengths */
364 
365     tree_desc near l_desc;
366     tree_desc near d_desc;
367     tree_desc near bl_desc;
368 
369     ush near bl_count[MAX_BITS+1];
370 /* number of codes at each bit length for an optimal tree */
371 
372     int near heap[2*L_CODES+1];	/* heap used to build the Huffman trees */
373     int heap_len;		/* number of elements in the heap */
374     int heap_max;		/* element of largest frequency */
375 /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
376  * The same heap array is used to build all trees.
377  */
378 
379     uch near depth[2*L_CODES+1];
380 /* Depth of each subtree used as tie breaker for trees of equal frequency */
381 
382     uch length_code[MAX_MATCH-MIN_MATCH+1];
383 /* length code for each normalized match length (0 == MIN_MATCH) */
384 
385     uch dist_code[512];
386 /* distance codes. The first 256 values correspond to the distances
387  * 3 .. 258, the last 256 values correspond to the top 8 bits of
388  * the 15 bit distances.
389  */
390 
391     int near base_length[LENGTH_CODES];
392 /* First normalized length for each code (0 = MIN_MATCH) */
393 
394     int near base_dist[D_CODES];
395 /* First normalized distance for each code (0 = distance of 1) */
396 
397     uch near flag_buf[(LIT_BUFSIZE/8)];
398 /* flag_buf is a bit array distinguishing literals from lengths in
399  * l_buf, thus indicating the presence or absence of a distance.
400  */
401 
402     unsigned last_lit;	/* running index in l_buf */
403     unsigned last_dist;	/* running index in d_buf */
404     unsigned last_flags;/* running index in flag_buf */
405     uch flags;		/* current flags not yet saved in flag_buf */
406     uch flag_bit;	/* current bit used in flags */
407     ulg opt_len;	/* bit length of current block with optimal trees */
408     ulg static_len;	/* bit length of current block with static trees */
409 };
410 
411 local void lm_init(DeflateHandler);
412 local int  longest_match(DeflateHandler,unsigned cur_match);
413 local void fill_window(DeflateHandler);
414 local void deflate_fast(DeflateHandler);
415 local void deflate_better(DeflateHandler);
416 local long qcopy(DeflateHandler encoder, char *buff, long buff_size);
417 local void ct_init(DeflateHandler);
418 local void init_block(DeflateHandler);
419 local void pqdownheap(DeflateHandler,ct_data near *, int);
420 local void gen_bitlen(DeflateHandler,tree_desc near *);
421 local void gen_codes(DeflateHandler,ct_data near *, int);
422 local void build_tree(DeflateHandler,tree_desc near *);
423 local void scan_tree(DeflateHandler,ct_data near *, int);
424 local void send_tree(DeflateHandler,ct_data near *, int);
425 local int  build_bl_tree(DeflateHandler);
426 local void send_all_trees(DeflateHandler,int,int,int);
427 local void flush_block(DeflateHandler,int);
428 local int  ct_tally(DeflateHandler,int,int);
429 local void compress_block(DeflateHandler,ct_data near *, ct_data near *);
430 local void send_bits(DeflateHandler,int,int);
431 local unsigned bi_reverse(unsigned, int);
432 local void bi_windup(DeflateHandler);
433 local void qoutbuf(DeflateHandler);
434 
435 #ifdef DEBUG
error(char * m)436 local void error(char *m)
437 {
438     fprintf(stderr, "%s\n", m);
439     exit(1);
440 }
441 #define Assert(cond,msg) {if(!(cond)) error(msg);}
442 local int verbose = 0;		/* verbose */
443 local void check_match (DeflateHandler,unsigned, unsigned, int);
444 #else
445 #define Assert(cond,msg)
446 #endif
447 
448 #ifndef MAX
449 #define MAX(a,b) (a >= b ? a : b)
450 #endif /* MAX */
451 
452 #define head(i) ((encoder->prev+WSIZE)[i])
453 
454 /* put_byte is used for the compressed output, put_ubyte for the
455  * uncompressed output. However unlzw() uses window for its
456  * suffix table instead of its output buffer, so it does not use put_ubyte
457  * (to be cleaned up).
458  */
459 #define put_byte(c) {encoder->outbuf[encoder->outoff + encoder->outcnt++] = \
460   (uch)(c); if(encoder->outoff + encoder->outcnt == OUTBUFSIZ) \
461   qoutbuf(encoder);}
462 
463 /* Output a 16 bit value, lsb first */
464 #define put_short(w) \
465 { if(encoder->outoff + encoder->outcnt < OUTBUFSIZ - 2) { \
466     encoder->outbuf[encoder->outoff+encoder->outcnt++] = (uch) ((w) & 0xff); \
467     encoder->outbuf[encoder->outoff+encoder->outcnt++] = (uch) ((ush)(w) >> 8); \
468   } else { put_byte((uch)((w) & 0xff));	 put_byte((uch)((ush)(w) >> 8)); }}
469 /* ===========================================================================
470  * Update a hash value with the given input byte
471  * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
472  *    input characters, so that a running hash key can be computed from the
473  *    previous key instead of complete recalculation each time.
474  */
475 #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
476 
477 /* ===========================================================================
478  * Insert string s in the dictionary and set match_head to the previous head
479  * of the hash chain (the most recent string with same hash key). Return
480  * the previous length of the hash chain.
481  * IN  assertion: all calls to to INSERT_STRING are made with consecutive
482  *    input characters and the first MIN_MATCH bytes of s are valid
483  *    (except for the last MIN_MATCH-1 bytes of the input file).
484  */
485 #define INSERT_STRING(s, match_head) \
486    (UPDATE_HASH(encoder->ins_h, encoder->window[(s) + MIN_MATCH-1]), \
487     encoder->prev[(s) & WMASK] =match_head = head(encoder->ins_h), \
488     head(encoder->ins_h) = (s))
489 
490 #define SEND_CODE(c, tree) send_bits(encoder, (tree)[c].Code, (tree)[c].Len)
491 /* Send a code of the given tree. c and tree must not have side effects */
492 
493 #define D_CODE(dist) ((dist)<256 ? encoder->dist_code[dist] : encoder->dist_code[256+((dist)>>7)])
494 /* Mapping from a distance to a distance code. dist is the distance - 1 and
495  * must not have side effects. dist_code[256] and dist_code[257] are never
496  * used.
497  */
498 
499 /* ===========================================================================
500  * Compares to subtrees, using the tree depth as tie breaker when
501  * the subtrees have equal frequency. This minimizes the worst case length.
502  */
503 #define SMALLER(tree, n, m) \
504    ((tree)[n].Freq < (tree)[m].Freq || \
505    ((tree)[n].Freq == (tree)[m].Freq && encoder->depth[n] <= encoder->depth[m]))
506 
507 
508 /* ===========================================================================
509  * Initialize the "longest match" routines for a new file
510  */
lm_init(DeflateHandler encoder)511 local void lm_init(DeflateHandler encoder)
512 {
513     register unsigned j;
514 
515     /* Initialize the hash table. */
516 #if defined(MAXSEG_64K) && HASH_BITS == 15
517     for(j = 0; j < HASH_SIZE; j++) head(j) = NIL;
518 #else
519     memset((char*)&head(0), 0, HASH_SIZE*sizeof(head(0)));
520 #endif
521     /* prev will be initialized on the fly */
522 
523     /* Set the default configuration parameters:
524      */
525     encoder->max_lazy_match   = configuration_table[encoder->compr_level].max_lazy;
526     encoder->good_match	      = configuration_table[encoder->compr_level].good_length;
527 #ifndef FULL_SEARCH
528     encoder->nice_match	      = configuration_table[encoder->compr_level].nice_length;
529 #endif
530     encoder->max_chain_length = configuration_table[encoder->compr_level].max_chain;
531 
532     encoder->strstart = 0;
533     encoder->block_start = 0L;
534 
535     encoder->lookahead =
536 	encoder->read_func((char*)encoder->window,
537 			   (long)(sizeof(int)<=2 ? (unsigned)WSIZE : 2*WSIZE),
538 			   encoder->user_val);
539 
540     if(encoder->lookahead == 0 || encoder->lookahead == (unsigned)EOF) {
541 	encoder->eofile = 1;
542 	encoder->lookahead = 0;
543 	return;
544     }
545     encoder->eofile = 0;
546     /* Make sure that we always have enough lookahead. This is important
547      * if input comes from a device such as a tty.
548      */
549     while(encoder->lookahead < MIN_LOOKAHEAD && !encoder->eofile)
550 	fill_window(encoder);
551 
552     encoder->ins_h = 0;
553     for(j=0; j<MIN_MATCH-1; j++)
554 	UPDATE_HASH(encoder->ins_h, encoder->window[j]);
555     /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
556      * not important since only literal bytes will be emitted.
557      */
558 }
559 
560 /* ===========================================================================
561  * Set match_start to the longest match starting at the given string and
562  * return its length. Matches shorter or equal to prev_length are discarded,
563  * in which case the result is equal to prev_length and match_start is
564  * garbage.
565  * IN assertions: cur_match is the head of the hash chain for the current
566  *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
567  */
longest_match(DeflateHandler encoder,unsigned cur_match)568 local int longest_match(DeflateHandler encoder, unsigned cur_match)
569 {
570     unsigned chain_length = encoder->max_chain_length;
571     /* max hash chain length */
572 
573     register uch *scan = encoder->window + encoder->strstart;
574     /* current string */
575 
576     register uch *match;			/* matched string */
577     register int len;				/* length of current match */
578     int best_len = encoder->prev_length;	 /* best match length so far */
579 
580     unsigned limit = (encoder->strstart > (unsigned)MAX_DIST ?
581 		      encoder->strstart - (unsigned)MAX_DIST : NIL);
582     /* Stop when cur_match becomes <= limit. To simplify the code,
583      * we prevent matches with the string of window index 0.
584      */
585 
586 #if HASH_BITS < 8 || MAX_MATCH != 258
587    error: Code too clever
588 #endif
589 
590 #ifdef UNALIGNED_OK
591     /* Compare two bytes at a time. Note: this is not always beneficial.
592      * Try with and without -DUNALIGNED_OK to check.
593      */
594     register uch *strend = encoder->window + encoder->strstart + MAX_MATCH - 1;
595     register ush scan_start = *(ush*)scan;
596     register ush scan_end   = *(ush*)(scan+best_len-1);
597 #else
598     register uch *strend    = encoder->window + encoder->strstart + MAX_MATCH;
599     register uch scan_end1  = scan[best_len-1];
600     register uch scan_end   = scan[best_len];
601 #endif
602 
603     /* Do not waste too much time if we already have a good match: */
604     if(encoder->prev_length >= encoder->good_match) {
605 	chain_length >>= 2;
606     }
607     Assert(encoder->strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead");
608 
609     do {
610 	Assert(cur_match < encoder->strstart, "no future");
611 	match = encoder->window + cur_match;
612 
613 	/* Skip to next match if the match length cannot increase
614 	 * or if the match length is less than 2:
615 	 */
616 #if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
617 	/* This code assumes sizeof(unsigned short) == 2. Do not use
618 	 * UNALIGNED_OK if your compiler uses a different size.
619 	 */
620 	if(*(ush*)(match+best_len-1) != scan_end ||
621 	   *(ush*)match != scan_start) continue;
622 
623 	/* It is not necessary to compare scan[2] and match[2] since they are
624 	 * always equal when the other bytes match, given that the hash keys
625 	 * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
626 	 * strstart+3, +5, ... up to strstart+257. We check for insufficient
627 	 * lookahead only every 4th comparison; the 128th check will be made
628 	 * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
629 	 * necessary to put more guard bytes at the end of the window, or
630 	 * to check more often for insufficient lookahead.
631 	 */
632 	scan++, match++;
633 	do {
634 	} while(*(ush*)(scan+=2) == *(ush*)(match+=2) &&
635 		*(ush*)(scan+=2) == *(ush*)(match+=2) &&
636 		*(ush*)(scan+=2) == *(ush*)(match+=2) &&
637 		*(ush*)(scan+=2) == *(ush*)(match+=2) &&
638 		scan < strend);
639 	/* The funny "do {}" generates better code on most compilers */
640 
641 	/* Here, scan <= window+strstart+257 */
642 	Assert(scan <= encoder->window+(unsigned)(window_size-1), "wild scan");
643 	if(*scan == *match) scan++;
644 
645 	len = (MAX_MATCH - 1) - (int)(strend-scan);
646 	scan = strend - (MAX_MATCH-1);
647 
648 #else /* UNALIGNED_OK */
649 
650 	if(match[best_len]   != scan_end  ||
651 	   match[best_len-1] != scan_end1 ||
652 	   *match	      != *scan	   ||
653 	   *++match	      != scan[1])      continue;
654 
655 	/* The check at best_len-1 can be removed because it will be made
656 	 * again later. (This heuristic is not always a win.)
657 	 * It is not necessary to compare scan[2] and match[2] since they
658 	 * are always equal when the other bytes match, given that
659 	 * the hash keys are equal and that HASH_BITS >= 8.
660 	 */
661 	scan += 2, match++;
662 
663 	/* We check for insufficient lookahead only every 8th comparison;
664 	 * the 256th check will be made at strstart+258.
665 	 */
666 	do {
667 	} while(*++scan == *++match && *++scan == *++match &&
668 		*++scan == *++match && *++scan == *++match &&
669 		*++scan == *++match && *++scan == *++match &&
670 		*++scan == *++match && *++scan == *++match &&
671 		scan < strend);
672 
673 	len = MAX_MATCH - (int)(strend - scan);
674 	scan = strend - MAX_MATCH;
675 
676 #endif /* UNALIGNED_OK */
677 
678 	if(len > best_len) {
679 	    encoder->match_start = cur_match;
680 	    best_len = len;
681 #ifdef FULL_SEARCH
682 	    if(len >= MAX_MATCH) break;
683 #else
684 	    if(len >= encoder->nice_match) break;
685 #endif /* FULL_SEARCH */
686 
687 
688 #ifdef UNALIGNED_OK
689 	    scan_end = *(ush*)(scan+best_len-1);
690 #else
691 	    scan_end1  = scan[best_len-1];
692 	    scan_end   = scan[best_len];
693 #endif
694 	}
695     } while((cur_match = encoder->prev[cur_match & WMASK]) > limit
696 	    && --chain_length != 0);
697 
698     return best_len;
699 }
700 
701 #ifdef DEBUG
702 /* ===========================================================================
703  * Check that the match at match_start is indeed a match.
704  */
check_match(DeflateHandler encoder,unsigned start,unsigned match,int length)705 local void check_match(DeflateHandler encoder,
706 		       unsigned start, unsigned match, int length)
707 {
708     /* check that the match is indeed a match */
709     if(memcmp((char*)encoder->window + match,
710 	      (char*)encoder->window + start, length) != EQUAL) {
711 	fprintf(stderr,
712 	    " start %d, match %d, length %d\n",
713 	    start, match, length);
714 	error("invalid match");
715     }
716     if(verbose > 1) {
717 	fprintf(stderr,"\\[%d,%d]", start-match, length);
718 	do { putc(encoder->window[start++], stderr); } while(--length != 0);
719     }
720 }
721 #else
722 #  define check_match(encoder, start, match, length)
723 #endif
724 
725 /* ===========================================================================
726  * Fill the window when the lookahead becomes insufficient.
727  * Updates strstart and lookahead, and sets eofile if end of input file.
728  * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
729  * OUT assertions: at least one byte has been read, or eofile is set;
730  *    file reads are performed for at least two bytes (required for the
731  *    translate_eol option).
732  */
fill_window(DeflateHandler encoder)733 local void fill_window(DeflateHandler encoder)
734 {
735     register unsigned n, m;
736     unsigned more = (unsigned)(window_size - (ulg)encoder->lookahead - (ulg)encoder->strstart);
737     /* Amount of free space at the end of the window. */
738 
739     /* If the window is almost full and there is insufficient lookahead,
740      * move the upper half to the lower one to make room in the upper half.
741      */
742     if(more == (unsigned)EOF) {
743 	/* Very unlikely, but possible on 16 bit machine if strstart == 0
744 	 * and lookahead == 1 (input done one byte at time)
745 	 */
746 	more--;
747     } else if(encoder->strstart >= WSIZE+MAX_DIST) {
748 	/* By the IN assertion, the window is not empty so we can't confuse
749 	 * more == 0 with more == 64K on a 16 bit machine.
750 	 */
751 	Assert(window_size == (ulg)2*WSIZE, "no sliding with BIG_MEM");
752 
753 	memcpy((char*)encoder->window, (char*)encoder->window+WSIZE, (unsigned)WSIZE);
754 	encoder->match_start -= WSIZE;
755 	encoder->strstart    -= WSIZE; /* we now have strstart >= MAX_DIST: */
756 	encoder->block_start -= (long) WSIZE;
757 
758 	for(n = 0; n < HASH_SIZE; n++) {
759 	    m = head(n);
760 	    head(n) = (ush)(m >= WSIZE ? m-WSIZE : NIL);
761 	}
762 	for(n = 0; n < WSIZE; n++) {
763 	    m = encoder->prev[n];
764 	    encoder->prev[n] = (ush)(m >= WSIZE ? m-WSIZE : NIL);
765 	    /* If n is not on any hash chain, prev[n] is garbage but
766 	     * its value will never be used.
767 	     */
768 	}
769 	more += WSIZE;
770     }
771     /* At this point, more >= 2 */
772     if(!encoder->eofile) {
773 	n = encoder->read_func((char*)encoder->window+encoder->strstart
774 			       + encoder->lookahead,
775 			       (long)more,
776 			       encoder->user_val);
777 	if(n == 0 || n == (unsigned)EOF) {
778 	    encoder->eofile = 1;
779 	} else {
780 	    encoder->lookahead += n;
781 	}
782     }
783 }
784 
785 /* ===========================================================================
786  * Processes a new input file and return its compressed length. This
787  * function does not perform lazy evaluationof matches and inserts
788  * new strings in the dictionary only for unmatched strings or for short
789  * matches. It is used only for the fast compression options.
790  */
deflate_fast(DeflateHandler encoder)791 local void deflate_fast(DeflateHandler encoder)
792 {
793 /*
794     encoder->prev_length = MIN_MATCH-1;
795     encoder->match_length = 0;
796 */
797     while(encoder->lookahead != 0 && encoder->qhead == NULL) {
798 	int flush; /* set if current block must be flushed */
799 
800 	/* Insert the string window[strstart .. strstart+2] in the
801 	 * dictionary, and set hash_head to the head of the hash chain:
802 	 */
803 	INSERT_STRING(encoder->strstart, encoder->hash_head);
804 
805 	/* Find the longest match, discarding those <= prev_length.
806 	 * At this point we have always match_length < MIN_MATCH
807 	 */
808 	if(encoder->hash_head != NIL &&
809 	   encoder->strstart - encoder->hash_head <= MAX_DIST) {
810 	    /* To simplify the code, we prevent matches with the string
811 	     * of window index 0 (in particular we have to avoid a match
812 	     * of the string with itself at the start of the input file).
813 	     */
814 	    encoder->match_length = longest_match(encoder, encoder->hash_head);
815 	    /* longest_match() sets match_start */
816 	    if(encoder->match_length > encoder->lookahead)
817 		encoder->match_length = encoder->lookahead;
818 	}
819 	if(encoder->match_length >= MIN_MATCH) {
820 	    check_match(encoder, encoder->strstart,
821 			encoder->match_start, encoder->match_length);
822 
823 	    flush = ct_tally(encoder, encoder->strstart - encoder->match_start,
824 			     encoder->match_length - MIN_MATCH);
825 
826 	    encoder->lookahead -= encoder->match_length;
827 
828 	    /* Insert new strings in the hash table only if the match length
829 	     * is not too large. This saves time but degrades compression.
830 	     */
831 	    if(encoder->match_length <= encoder->max_lazy_match) {
832 		encoder->match_length--; /* string at strstart already in hash table */
833 		do {
834 		    encoder->strstart++;
835 		    INSERT_STRING(encoder->strstart, encoder->hash_head);
836 		    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
837 		     * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
838 		     * these bytes are garbage, but it does not matter since
839 		     * the next lookahead bytes will be emitted as literals.
840 		     */
841 		} while(--encoder->match_length != 0);
842 		encoder->strstart++;
843 	    } else {
844 		encoder->strstart += encoder->match_length;
845 		encoder->match_length = 0;
846 		encoder->ins_h = encoder->window[encoder->strstart];
847 		UPDATE_HASH(encoder->ins_h,
848 			    encoder->window[encoder->strstart + 1]);
849 #if MIN_MATCH != 3
850 		Call UPDATE_HASH() MIN_MATCH-3 more times
851 #endif
852 	    }
853 	} else {
854 	    /* No match, output a literal byte */
855 	    Tracevv((stderr,"%c",encoder->window[encoder->strstart]));
856 	    flush = ct_tally (encoder, 0, encoder->window[encoder->strstart]);
857 	    encoder->lookahead--;
858 	    encoder->strstart++;
859 	}
860 	if(flush)
861 	{
862 	    flush_block(encoder, 0);
863 	    encoder->block_start = (long)encoder->strstart;
864 	}
865 
866 	/* Make sure that we always have enough lookahead, except
867 	 * at the end of the input file. We need MAX_MATCH bytes
868 	 * for the next match, plus MIN_MATCH bytes to insert the
869 	 * string following the next match.
870 	 */
871 	while(encoder->lookahead < MIN_LOOKAHEAD && !encoder->eofile)
872 	    fill_window(encoder);
873     }
874 }
875 
deflate_better(DeflateHandler encoder)876 local void deflate_better(DeflateHandler encoder) {
877     /* Process the input block. */
878     while(encoder->lookahead != 0 && encoder->qhead == NULL) {
879 	/* Insert the string window[strstart .. strstart+2] in the
880 	 * dictionary, and set hash_head to the head of the hash chain:
881 	 */
882 	INSERT_STRING(encoder->strstart, encoder->hash_head);
883 
884 	/* Find the longest match, discarding those <= prev_length.
885 	 */
886 	encoder->prev_length = encoder->match_length;
887 	encoder->prev_match = encoder->match_start;
888 	encoder->match_length = MIN_MATCH-1;
889 
890 	if(encoder->hash_head != NIL &&
891 	   encoder->prev_length < encoder->max_lazy_match &&
892 	   encoder->strstart - encoder->hash_head <= MAX_DIST) {
893 	    /* To simplify the code, we prevent matches with the string
894 	     * of window index 0 (in particular we have to avoid a match
895 	     * of the string with itself at the start of the input file).
896 	     */
897 	    encoder->match_length = longest_match(encoder, encoder->hash_head);
898 	    /* longest_match() sets match_start */
899 	    if(encoder->match_length > encoder->lookahead)
900 		encoder->match_length = encoder->lookahead;
901 
902 	    /* Ignore a length 3 match if it is too distant: */
903 	    if(encoder->match_length == MIN_MATCH &&
904 		encoder->strstart - encoder->match_start > TOO_FAR){
905 		/* If prev_match is also MIN_MATCH, match_start is garbage
906 		 * but we will ignore the current match anyway.
907 		 */
908 		encoder->match_length--;
909 	    }
910 	}
911 	/* If there was a match at the previous step and the current
912 	 * match is not better, output the previous match:
913 	 */
914 	if(encoder->prev_length >= MIN_MATCH &&
915 	   encoder->match_length <= encoder->prev_length) {
916 	    int flush; /* set if current block must be flushed */
917 
918 	    check_match(encoder, encoder->strstart-1,
919 			encoder->prev_match, encoder->prev_length);
920 
921 	    flush = ct_tally(encoder, encoder->strstart-1-encoder->prev_match,
922 			     encoder->prev_length - MIN_MATCH);
923 
924 	    /* Insert in hash table all strings up to the end of the match.
925 	     * strstart-1 and strstart are already inserted.
926 	     */
927 	    encoder->lookahead -= encoder->prev_length-1;
928 	    encoder->prev_length -= 2;
929 	    do {
930 		encoder->strstart++;
931 		INSERT_STRING(encoder->strstart, encoder->hash_head);
932 		/* strstart never exceeds WSIZE-MAX_MATCH, so there are
933 		 * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
934 		 * these bytes are garbage, but it does not matter since the
935 		 * next lookahead bytes will always be emitted as literals.
936 		 */
937 	    } while(--encoder->prev_length != 0);
938 	    encoder->match_available = 0;
939 	    encoder->match_length = MIN_MATCH-1;
940 	    encoder->strstart++;
941 	    if(flush) {
942 		flush_block(encoder, 0);
943 		encoder->block_start = (long)encoder->strstart;
944 	    }
945 
946 	} else if(encoder->match_available) {
947 	    /* If there was no match at the previous position, output a
948 	     * single literal. If there was a match but the current match
949 	     * is longer, truncate the previous match to a single literal.
950 	     */
951 	    Tracevv((stderr,"%c",encoder->window[encoder->strstart-1]));
952 	    if(ct_tally (encoder, 0, encoder->window[encoder->strstart-1])) {
953 		flush_block(encoder, 0);
954 		encoder->block_start = (long)encoder->strstart;
955 	    }
956 	    encoder->strstart++;
957 	    encoder->lookahead--;
958 	} else {
959 	    /* There is no previous match to compare with, wait for
960 	     * the next step to decide.
961 	     */
962 	    encoder->match_available = 1;
963 	    encoder->strstart++;
964 	    encoder->lookahead--;
965 	}
966 
967 	/* Make sure that we always have enough lookahead, except
968 	 * at the end of the input file. We need MAX_MATCH bytes
969 	 * for the next match, plus MIN_MATCH bytes to insert the
970 	 * string following the next match.
971 	 */
972 	while(encoder->lookahead < MIN_LOOKAHEAD && !encoder->eofile)
973 	    fill_window(encoder);
974     }
975 }
976 
977 /*ARGSUSED*/
default_read_func(char * buf,long size,void * v)978 static long default_read_func(char *buf, long size, void *v)
979 {
980     return (long)fread(buf, 1, size, stdin);
981 }
982 
open_deflate_handler(long (* read_func)(char * buf,long size,void * user_val),void * user_val,int level)983 DeflateHandler open_deflate_handler(
984     long (* read_func)(char *buf, long size, void *user_val),
985     void *user_val,
986     int level)
987 {
988     DeflateHandler encoder;
989 
990     if(level < 1 || level > 9)
991 	return NULL; /* error("bad compression level"); */
992 
993     encoder = (DeflateHandler)safe_malloc(sizeof(struct _DeflateHandler));
994     if(encoder == NULL)
995 	return NULL;
996     memset(encoder, 0, sizeof(struct _DeflateHandler));
997     encoder->compr_level = level;
998     if(read_func == NULL)
999 	encoder->read_func = default_read_func;
1000     else
1001 	encoder->read_func = read_func;
1002     encoder->user_val = user_val;
1003 
1004     return encoder;
1005 }
1006 
close_deflate_handler(DeflateHandler encoder)1007 void close_deflate_handler(DeflateHandler encoder)
1008 {
1009     free(encoder);
1010 }
1011 
init_deflate(DeflateHandler encoder)1012 local void init_deflate(DeflateHandler encoder)
1013 {
1014     if(encoder->eofile)
1015 	return;
1016     encoder->bi_buf = 0;
1017     encoder->bi_valid = 0;
1018     ct_init(encoder);
1019     lm_init(encoder);
1020 
1021     encoder->qhead = NULL;
1022     encoder->outcnt = 0;
1023 
1024     if(encoder->compr_level <= 3)
1025     {
1026 	encoder->prev_length = MIN_MATCH - 1;
1027 	encoder->match_length = 0;
1028     }
1029     else
1030     {
1031 	encoder->match_length = MIN_MATCH - 1;
1032 	encoder->match_available = 0;
1033     }
1034 
1035     encoder->complete = 0;
1036 }
1037 
1038 /* ===========================================================================
1039  * Same as above, but achieves better compression. We use a lazy
1040  * evaluation for matches: a match is finally adopted only if there is
1041  * no better match at the next window position.
1042  */
zip_deflate(DeflateHandler encoder,char * buff,long buff_size)1043 long zip_deflate(DeflateHandler encoder, char *buff, long buff_size)
1044 {
1045     long n;
1046 
1047     if(!encoder->initflag)
1048     {
1049 	init_deflate(encoder);
1050 	encoder->initflag = 1;
1051 	if(encoder->lookahead == 0) { /* empty */
1052 	    encoder->complete = 1;
1053 	    return 0;
1054 	}
1055     }
1056 
1057     if((n = qcopy(encoder, buff, buff_size)) == buff_size)
1058 	return buff_size;
1059 
1060     if(encoder->complete)
1061 	return n;
1062 
1063     if(encoder->compr_level <= 3) /* optimized for speed */
1064 	deflate_fast(encoder);
1065     else
1066 	deflate_better(encoder);
1067     if(encoder->lookahead == 0)
1068     {
1069 	if(encoder->match_available)
1070 	    ct_tally(encoder, 0, encoder->window[encoder->strstart - 1]);
1071 	flush_block(encoder, 1);
1072 	encoder->complete = 1;
1073     }
1074     return n + qcopy(encoder, buff + n, buff_size - n);
1075 }
1076 
qcopy(DeflateHandler encoder,char * buff,long buff_size)1077 local long qcopy(DeflateHandler encoder, char *buff, long buff_size)
1078 {
1079   struct deflate_buff_queue *q;
1080     long n, i;
1081 
1082     n = 0;
1083     q = encoder->qhead;
1084     while(q != NULL && n < buff_size)
1085     {
1086 	i = buff_size - n;
1087 	if(i > q->len)
1088 	    i = q->len;
1089 	memcpy(buff + n, q->ptr, i);
1090 	q->ptr += i;
1091 	q->len -= i;
1092 	n += i;
1093 
1094 	if(q->len == 0)
1095 	{
1096 	    struct deflate_buff_queue *p;
1097 	    p = q;
1098 	    q = q->next;
1099 	    reuse_queue(p);
1100 	}
1101     }
1102     encoder->qhead = q;
1103     if(n == buff_size)
1104       return n;
1105 
1106     if(encoder->outoff < encoder->outcnt)
1107     {
1108 	i = buff_size - n;
1109 	if(i > encoder->outcnt - encoder->outoff)
1110 	    i = encoder->outcnt - encoder->outoff;
1111 	memcpy(buff + n, encoder->outbuf + encoder->outoff, i);
1112 	encoder->outoff += i;
1113 	n += i;
1114 	if(encoder->outcnt == encoder->outoff)
1115 	    encoder->outcnt = encoder->outoff = 0;
1116     }
1117     return n;
1118 }
1119 
1120 
1121 /* ===========================================================================
1122  * Allocate the match buffer, initialize the various tables and save the
1123  * location of the internal file attribute (ascii/binary) and method
1124  * (DEFLATE/STORE).
1125  */
ct_init(DeflateHandler encoder)1126 local void ct_init(DeflateHandler encoder)
1127 {
1128     int n;	/* iterates over tree elements */
1129     int bits;	/* bit counter */
1130     int length;	/* length value */
1131     int code;	/* code value */
1132     int dist;	/* distance index */
1133 
1134     if(encoder->static_dtree[0].Len != 0) return; /* ct_init already called */
1135 
1136     encoder->l_desc.dyn_tree	= encoder->dyn_ltree;
1137     encoder->l_desc.static_tree = encoder->static_ltree;
1138     encoder->l_desc.extra_bits	= extra_lbits;
1139     encoder->l_desc.extra_base	= LITERALS + 1;
1140     encoder->l_desc.elems	= L_CODES;
1141     encoder->l_desc.max_length	= MAX_BITS;
1142     encoder->l_desc.max_code	= 0;
1143 
1144     encoder->d_desc.dyn_tree	= encoder->dyn_dtree;
1145     encoder->d_desc.static_tree = encoder->static_dtree;
1146     encoder->d_desc.extra_bits	= extra_dbits;
1147     encoder->d_desc.extra_base	= 0;
1148     encoder->d_desc.elems	= D_CODES;
1149     encoder->d_desc.max_length	= MAX_BITS;
1150     encoder->d_desc.max_code	= 0;
1151 
1152     encoder->bl_desc.dyn_tree	 = encoder->bl_tree;
1153     encoder->bl_desc.static_tree = NULL;
1154     encoder->bl_desc.extra_bits	 = extra_blbits;
1155     encoder->bl_desc.extra_base	 = 0;
1156     encoder->bl_desc.elems	 = BL_CODES;
1157     encoder->bl_desc.max_length	 = MAX_BL_BITS;
1158     encoder->bl_desc.max_code	 = 0;
1159 
1160     /* Initialize the mapping length (0..255) -> length code (0..28) */
1161     length = 0;
1162     for(code = 0; code < LENGTH_CODES-1; code++) {
1163 	encoder->base_length[code] = length;
1164 	for(n = 0; n < (1<<extra_lbits[code]); n++) {
1165 	    encoder->length_code[length++] = (uch)code;
1166 	}
1167     }
1168     Assert (length == 256, "ct_init: length != 256");
1169     /* Note that the length 255 (match length 258) can be represented
1170      * in two different ways: code 284 + 5 bits or code 285, so we
1171      * overwrite length_code[255] to use the best encoding:
1172      */
1173     encoder->length_code[length-1] = (uch)code;
1174 
1175     /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
1176     dist = 0;
1177     for(code = 0 ; code < 16; code++) {
1178 	encoder->base_dist[code] = dist;
1179 	for(n = 0; n < (1<<extra_dbits[code]); n++) {
1180 	    encoder->dist_code[dist++] = (uch)code;
1181 	}
1182     }
1183     Assert (dist == 256, "ct_init: dist != 256");
1184     dist >>= 7; /* from now on, all distances are divided by 128 */
1185     for( ; code < D_CODES; code++) {
1186 	encoder->base_dist[code] = dist << 7;
1187 	for(n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
1188 	    encoder->dist_code[256 + dist++] = (uch)code;
1189 	}
1190     }
1191     Assert (dist == 256, "ct_init: 256+dist != 512");
1192 
1193     /* Construct the codes of the static literal tree */
1194     for(bits = 0; bits <= MAX_BITS; bits++) encoder->bl_count[bits] = 0;
1195     n = 0;
1196     while(n <= 143) encoder->static_ltree[n++].Len = 8, encoder->bl_count[8]++;
1197     while(n <= 255) encoder->static_ltree[n++].Len = 9, encoder->bl_count[9]++;
1198     while(n <= 279) encoder->static_ltree[n++].Len = 7, encoder->bl_count[7]++;
1199     while(n <= 287) encoder->static_ltree[n++].Len = 8, encoder->bl_count[8]++;
1200     /* Codes 286 and 287 do not exist, but we must include them in the
1201      * tree construction to get a canonical Huffman tree (longest code
1202      * all ones)
1203      */
1204     gen_codes(encoder, (ct_data near *)encoder->static_ltree, L_CODES+1);
1205 
1206     /* The static distance tree is trivial: */
1207     for(n = 0; n < D_CODES; n++) {
1208 	encoder->static_dtree[n].Len = 5;
1209 	encoder->static_dtree[n].Code = bi_reverse(n, 5);
1210     }
1211 
1212     /* Initialize the first block of the first file: */
1213     init_block(encoder);
1214 }
1215 
1216 /* ===========================================================================
1217  * Initialize a new block.
1218  */
init_block(DeflateHandler encoder)1219 local void init_block(DeflateHandler encoder)
1220 {
1221     int n; /* iterates over tree elements */
1222 
1223     /* Initialize the trees. */
1224     for(n = 0; n < L_CODES;  n++) encoder->dyn_ltree[n].Freq = 0;
1225     for(n = 0; n < D_CODES;  n++) encoder->dyn_dtree[n].Freq = 0;
1226     for(n = 0; n < BL_CODES; n++) encoder->bl_tree[n].Freq = 0;
1227 
1228     encoder->dyn_ltree[END_BLOCK].Freq = 1;
1229     encoder->opt_len = encoder->static_len = 0L;
1230     encoder->last_lit = encoder->last_dist = encoder->last_flags = 0;
1231     encoder->flags = 0;
1232     encoder->flag_bit = 1;
1233 }
1234 
1235 /* ===========================================================================
1236  * Restore the heap property by moving down the tree starting at node k,
1237  * exchanging a node with the smallest of its two sons if necessary, stopping
1238  * when the heap property is re-established (each father smaller than its
1239  * two sons).
1240  */
pqdownheap(DeflateHandler encoder,ct_data near * tree,int k)1241 local void pqdownheap(
1242     DeflateHandler encoder,
1243     ct_data near *tree,	/* the tree to restore */
1244     int k)		/* node to move down */
1245 {
1246     int v = encoder->heap[k];
1247     int j = k << 1;  /* left son of k */
1248     while(j <= encoder->heap_len) {
1249 	/* Set j to the smallest of the two sons: */
1250 	if(j < encoder->heap_len &&
1251 	   SMALLER(tree, encoder->heap[j+1], encoder->heap[j]))
1252 	    j++;
1253 
1254 	/* Exit if v is smaller than both sons */
1255 	if(SMALLER(tree, v, encoder->heap[j]))
1256 	    break;
1257 
1258 	/* Exchange v with the smallest son */
1259 	encoder->heap[k] = encoder->heap[j];
1260 	k = j;
1261 
1262 	/* And continue down the tree, setting j to the left son of k */
1263 	j <<= 1;
1264     }
1265     encoder->heap[k] = v;
1266 }
1267 
1268 /* ===========================================================================
1269  * Compute the optimal bit lengths for a tree and update the total bit length
1270  * for the current block.
1271  * IN assertion: the fields freq and dad are set, heap[heap_max] and
1272  *    above are the tree nodes sorted by increasing frequency.
1273  * OUT assertions: the field len is set to the optimal bit length, the
1274  *     array bl_count contains the frequencies for each bit length.
1275  *     The length opt_len is updated; static_len is also updated if stree is
1276  *     not null.
1277  */
gen_bitlen(DeflateHandler encoder,tree_desc near * desc)1278 local void gen_bitlen(
1279     DeflateHandler encoder,
1280     tree_desc near *desc) /* the tree descriptor */
1281 {
1282     ct_data near *tree	= desc->dyn_tree;
1283     int near *extra	= desc->extra_bits;
1284     int base		= desc->extra_base;
1285     int max_code	= desc->max_code;
1286     int max_length	= desc->max_length;
1287     ct_data near *stree = desc->static_tree;
1288     int h;		/* heap index */
1289     int n, m;		/* iterate over the tree elements */
1290     int bits;		/* bit length */
1291     int xbits;		/* extra bits */
1292     ush f;		/* frequency */
1293     int overflow = 0;	/* number of elements with bit length too large */
1294 
1295     for(bits = 0; bits <= MAX_BITS; bits++)
1296 	encoder->bl_count[bits] = 0;
1297 
1298     /* In a first pass, compute the optimal bit lengths (which may
1299      * overflow in the case of the bit length tree).
1300      */
1301     tree[encoder->heap[encoder->heap_max]].Len = 0; /* root of the heap */
1302 
1303     for(h = encoder->heap_max+1; h < HEAP_SIZE; h++) {
1304 	n = encoder->heap[h];
1305 	bits = tree[tree[n].Dad].Len + 1;
1306 	if(bits > max_length)
1307 	    bits = max_length, overflow++;
1308 	tree[n].Len = (ush)bits;
1309 	/* We overwrite tree[n].Dad which is no longer needed */
1310 
1311 	if(n > max_code)
1312 	    continue; /* not a leaf node */
1313 
1314 	encoder->bl_count[bits]++;
1315 	xbits = 0;
1316 	if(n >= base)
1317 	    xbits = extra[n-base];
1318 	f = tree[n].Freq;
1319 	encoder->opt_len += (ulg)f * (bits + xbits);
1320 	if(stree)
1321 	    encoder->static_len += (ulg)f * (stree[n].Len + xbits);
1322     }
1323     if(overflow == 0) return;
1324 
1325     Trace((stderr,"\nbit length overflow\n"));
1326     /* This happens for example on obj2 and pic of the Calgary corpus */
1327 
1328     /* Find the first bit length which could increase: */
1329     do {
1330 	bits = max_length-1;
1331 	while(encoder->bl_count[bits] == 0) bits--;
1332 	encoder->bl_count[bits]--;	/* move one leaf down the tree */
1333 	encoder->bl_count[bits+1] += 2; /* move one overflow item as its brother */
1334 	encoder->bl_count[max_length]--;
1335 	/* The brother of the overflow item also moves one step up,
1336 	 * but this does not affect bl_count[max_length]
1337 	 */
1338 	overflow -= 2;
1339     } while(overflow > 0);
1340 
1341     /* Now recompute all bit lengths, scanning in increasing frequency.
1342      * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
1343      * lengths instead of fixing only the wrong ones. This idea is taken
1344      * from 'ar' written by Haruhiko Okumura.)
1345      */
1346     for(bits = max_length; bits != 0; bits--) {
1347 	n = encoder->bl_count[bits];
1348 	while(n != 0) {
1349 	    m = encoder->heap[--h];
1350 	    if(m > max_code) continue;
1351 	    if(tree[m].Len != (unsigned) bits) {
1352 		Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
1353 		encoder->opt_len +=
1354 		    ((long)bits-(long)tree[m].Len)*(long)tree[m].Freq;
1355 		tree[m].Len = (ush)bits;
1356 	    }
1357 	    n--;
1358 	}
1359     }
1360 }
1361 
1362 /* ===========================================================================
1363  * Generate the codes for a given tree and bit counts (which need not be
1364  * optimal).
1365  * IN assertion: the array bl_count contains the bit length statistics for
1366  * the given tree and the field len is set for all tree elements.
1367  * OUT assertion: the field code is set for all tree elements of non
1368  *     zero code length.
1369  */
gen_codes(DeflateHandler encoder,ct_data near * tree,int max_code)1370 local void gen_codes(
1371     DeflateHandler encoder,
1372     ct_data near *tree,	/* the tree to decorate */
1373     int max_code)	/* largest code with non zero frequency */
1374 {
1375     ush next_code[MAX_BITS+1];	/* next code value for each bit length */
1376     ush code = 0;		/* running code value */
1377     int bits;			/* bit index */
1378     int n;			/* code index */
1379 
1380     /* The distribution counts are first used to generate the code values
1381      * without bit reversal.
1382      */
1383     for(bits = 1; bits <= MAX_BITS; bits++) {
1384 	next_code[bits] = code = (code + encoder->bl_count[bits-1]) << 1;
1385     }
1386     /* Check that the bit counts in bl_count are consistent. The last code
1387      * must be all ones.
1388      */
1389     Assert (code + encoder->bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
1390 	    "inconsistent bit counts");
1391     Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
1392 
1393     for(n = 0;	 n <= max_code; n++) {
1394 	int len = tree[n].Len;
1395 	if(len == 0)
1396 	    continue;
1397 	/* Now reverse the bits */
1398 	tree[n].Code = bi_reverse(next_code[len]++, len);
1399 
1400 	Tracec(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
1401 	     n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
1402     }
1403 }
1404 
1405 /* ===========================================================================
1406  * Construct one Huffman tree and assigns the code bit strings and lengths.
1407  * Update the total bit length for the current block.
1408  * IN assertion: the field freq is set for all tree elements.
1409  * OUT assertions: the fields len and code are set to the optimal bit length
1410  *     and corresponding code. The length opt_len is updated; static_len is
1411  *     also updated if stree is not null. The field max_code is set.
1412  */
build_tree(DeflateHandler encoder,tree_desc near * desc)1413 local void build_tree(
1414     DeflateHandler encoder,
1415     tree_desc near *desc) /* the tree descriptor */
1416 {
1417     ct_data near *tree	= desc->dyn_tree;
1418     ct_data near *stree	= desc->static_tree;
1419     int elems		= desc->elems;
1420     int n, m;		/* iterate over heap elements */
1421     int max_code = -1;	/* largest code with non zero frequency */
1422     int node = elems;	/* next internal node of the tree */
1423 
1424     /* Construct the initial heap, with least frequent element in
1425      * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
1426      * heap[0] is not used.
1427      */
1428     encoder->heap_len = 0;
1429     encoder->heap_max = HEAP_SIZE;
1430 
1431     for(n = 0; n < elems; n++) {
1432 	if(tree[n].Freq != 0) {
1433 	    encoder->heap[++encoder->heap_len] = max_code = n;
1434 	    encoder->depth[n] = 0;
1435 	} else {
1436 	    tree[n].Len = 0;
1437 	}
1438     }
1439 
1440     /* The pkzip format requires that at least one distance code exists,
1441      * and that at least one bit should be sent even if there is only one
1442      * possible code. So to avoid special checks later on we force at least
1443      * two codes of non zero frequency.
1444      */
1445     while(encoder->heap_len < 2) {
1446 	int new = encoder->heap[++encoder->heap_len] =
1447 	    (max_code < 2 ? ++max_code : 0);
1448 	tree[new].Freq = 1;
1449 	encoder->depth[new] = 0;
1450 	encoder->opt_len--;
1451 	if(stree)
1452 	    encoder->static_len -= stree[new].Len;
1453 	/* new is 0 or 1 so it does not have extra bits */
1454     }
1455     desc->max_code = max_code;
1456 
1457     /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
1458      * establish sub-heaps of increasing lengths:
1459      */
1460     for(n = encoder->heap_len/2; n >= 1; n--)
1461 	pqdownheap(encoder, tree, n);
1462 
1463     /* Construct the Huffman tree by repeatedly combining the least two
1464      * frequent nodes.
1465      */
1466     do {
1467 	n = encoder->heap[SMALLEST];
1468 	encoder->heap[SMALLEST] = encoder->heap[encoder->heap_len--];
1469 	pqdownheap(encoder, tree, SMALLEST);
1470 
1471 	m = encoder->heap[SMALLEST];  /* m = node of next least frequency */
1472 
1473 	/* keep the nodes sorted by frequency */
1474 	encoder->heap[--encoder->heap_max] = n;
1475 	encoder->heap[--encoder->heap_max] = m;
1476 
1477 	/* Create a new node father of n and m */
1478 	tree[node].Freq = tree[n].Freq + tree[m].Freq;
1479 	encoder->depth[node] =
1480 	    (uch)(MAX(encoder->depth[n], encoder->depth[m]) + 1);
1481 	tree[n].Dad = tree[m].Dad = (ush)node;
1482 
1483 	/* and insert the new node in the heap */
1484 	encoder->heap[SMALLEST] = node++;
1485 	pqdownheap(encoder, tree, SMALLEST);
1486 
1487     } while(encoder->heap_len >= 2);
1488 
1489     encoder->heap[--encoder->heap_max] = encoder->heap[SMALLEST];
1490 
1491     /* At this point, the fields freq and dad are set. We can now
1492      * generate the bit lengths.
1493      */
1494     gen_bitlen(encoder, (tree_desc near *)desc);
1495 
1496     /* The field len is now set, we can generate the bit codes */
1497     gen_codes (encoder, (ct_data near *)tree, max_code);
1498 }
1499 
1500 /* ===========================================================================
1501  * Scan a literal or distance tree to determine the frequencies of the codes
1502  * in the bit length tree. Updates opt_len to take into account the repeat
1503  * counts. (The contribution of the bit length codes will be added later
1504  * during the construction of bl_tree.)
1505  */
scan_tree(DeflateHandler encoder,ct_data near * tree,int max_code)1506 local void scan_tree(
1507     DeflateHandler encoder,
1508     ct_data near *tree,	/* the tree to be scanned */
1509     int max_code)	/* and its largest code of non zero frequency */
1510 {
1511     int n;			/* iterates over all tree elements */
1512     int prevlen = -1;		/* last emitted length */
1513     int curlen;			/* length of current code */
1514     int nextlen = tree[0].Len;	/* length of next code */
1515     int count = 0;		/* repeat count of the current code */
1516     int max_count = 7;		/* max repeat count */
1517     int min_count = 4;		/* min repeat count */
1518 
1519     if(nextlen == 0)
1520 	max_count = 138, min_count = 3;
1521     tree[max_code+1].Len = (ush)0xffff; /* guard */
1522 
1523     for(n = 0; n <= max_code; n++) {
1524 	curlen = nextlen; nextlen = tree[n+1].Len;
1525 	if(++count < max_count && curlen == nextlen) {
1526 	    continue;
1527 	} else if(count < min_count) {
1528 	    encoder->bl_tree[curlen].Freq += count;
1529 	} else if(curlen != 0) {
1530 	    if(curlen != prevlen)
1531 		encoder->bl_tree[curlen].Freq++;
1532 	    encoder->bl_tree[REP_3_6].Freq++;
1533 	} else if(count <= 10) {
1534 	    encoder->bl_tree[REPZ_3_10].Freq++;
1535 	} else {
1536 	    encoder->bl_tree[REPZ_11_138].Freq++;
1537 	}
1538 	count = 0; prevlen = curlen;
1539 	if(nextlen == 0) {
1540 	    max_count = 138, min_count = 3;
1541 	} else if(curlen == nextlen) {
1542 	    max_count = 6, min_count = 3;
1543 	} else {
1544 	    max_count = 7, min_count = 4;
1545 	}
1546     }
1547 }
1548 
1549 /* ===========================================================================
1550  * Send a literal or distance tree in compressed form, using the codes in
1551  * bl_tree.
1552  */
send_tree(DeflateHandler encoder,ct_data near * tree,int max_code)1553 local void send_tree(
1554     DeflateHandler encoder,
1555     ct_data near *tree,	/* the tree to be scanned */
1556     int max_code)	/* and its largest code of non zero frequency */
1557 {
1558     int n;			/* iterates over all tree elements */
1559     int prevlen = -1;		/* last emitted length */
1560     int curlen;			/* length of current code */
1561     int nextlen = tree[0].Len;	/* length of next code */
1562     int count = 0;		/* repeat count of the current code */
1563     int max_count = 7;		/* max repeat count */
1564     int min_count = 4;		/* min repeat count */
1565 
1566     /* tree[max_code+1].Len = -1; */  /* guard already set */
1567     if(nextlen == 0) max_count = 138, min_count = 3;
1568 
1569     for(n = 0; n <= max_code; n++) {
1570 	curlen = nextlen; nextlen = tree[n+1].Len;
1571 	if(++count < max_count && curlen == nextlen) {
1572 	    continue;
1573 	} else if(count < min_count) {
1574 	    do { SEND_CODE(curlen, encoder->bl_tree); } while(--count != 0);
1575 
1576 	} else if(curlen != 0) {
1577 	    if(curlen != prevlen) {
1578 		SEND_CODE(curlen, encoder->bl_tree);
1579 		count--;
1580 	    }
1581 	    Assert(count >= 3 && count <= 6, " 3_6?");
1582 	    SEND_CODE(REP_3_6, encoder->bl_tree);
1583 	    send_bits(encoder, count-3, 2);
1584 
1585 	} else if(count <= 10) {
1586 	    SEND_CODE(REPZ_3_10, encoder->bl_tree);
1587 	    send_bits(encoder, count-3, 3);
1588 
1589 	} else {
1590 	    SEND_CODE(REPZ_11_138, encoder->bl_tree);
1591 	    send_bits(encoder, count-11, 7);
1592 	}
1593 	count = 0; prevlen = curlen;
1594 	if(nextlen == 0) {
1595 	    max_count = 138, min_count = 3;
1596 	} else if(curlen == nextlen) {
1597 	    max_count = 6, min_count = 3;
1598 	} else {
1599 	    max_count = 7, min_count = 4;
1600 	}
1601     }
1602 }
1603 
1604 /* ===========================================================================
1605  * Construct the Huffman tree for the bit lengths and return the index in
1606  * bl_order of the last bit length code to send.
1607  */
build_bl_tree(DeflateHandler encoder)1608 local int build_bl_tree(DeflateHandler encoder)
1609 {
1610     int max_blindex;  /* index of last bit length code of non zero freq */
1611 
1612     /* Determine the bit length frequencies for literal and distance trees */
1613     scan_tree(encoder,
1614 	      (ct_data near *)encoder->dyn_ltree, encoder->l_desc.max_code);
1615     scan_tree(encoder,
1616 	      (ct_data near *)encoder->dyn_dtree, encoder->d_desc.max_code);
1617 
1618     /* Build the bit length tree: */
1619     build_tree(encoder, (tree_desc near *)(&encoder->bl_desc));
1620     /* opt_len now includes the length of the tree representations, except
1621      * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1622      */
1623 
1624     /* Determine the number of bit length codes to send. The pkzip format
1625      * requires that at least 4 bit length codes be sent. (appnote.txt says
1626      * 3 but the actual value used is 4.)
1627      */
1628     for(max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
1629 	if(encoder->bl_tree[bl_order[max_blindex]].Len != 0) break;
1630     }
1631     /* Update opt_len to include the bit length tree and counts */
1632     encoder->opt_len += 3*(max_blindex+1) + 5+5+4;
1633     Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
1634 	    encoder->opt_len, encoder->static_len));
1635 
1636     return max_blindex;
1637 }
1638 
1639 /* ===========================================================================
1640  * Send the header for a block using dynamic Huffman trees: the counts, the
1641  * lengths of the bit length codes, the literal tree and the distance tree.
1642  * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1643  */
send_all_trees(DeflateHandler encoder,int lcodes,int dcodes,int blcodes)1644 local void send_all_trees(
1645     DeflateHandler encoder,
1646     int lcodes, int dcodes, int blcodes) /* number of codes for each tree */
1647 {
1648     int rank; /* index in bl_order */
1649 
1650     Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
1651     Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
1652 	    "too many codes");
1653     Tracev((stderr, "\nbl counts: "));
1654     send_bits(encoder, lcodes-257, 5); /* not +255 as stated in appnote.txt */
1655     send_bits(encoder, dcodes-1,   5);
1656     send_bits(encoder, blcodes-4,  4); /* not -3 as stated in appnote.txt */
1657     for(rank = 0; rank < blcodes; rank++) {
1658 	Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
1659 	send_bits(encoder, encoder->bl_tree[bl_order[rank]].Len, 3);
1660     }
1661 
1662     /* send the literal tree */
1663     send_tree(encoder, (ct_data near *)encoder->dyn_ltree,lcodes-1);
1664 
1665     /* send the distance tree */
1666     send_tree(encoder, (ct_data near *)encoder->dyn_dtree,dcodes-1);
1667 }
1668 
1669 /* ===========================================================================
1670  * Determine the best encoding for the current block: dynamic trees, static
1671  * trees or store, and output the encoded block to the zip file.
1672  */
flush_block(DeflateHandler encoder,int eof)1673 local void flush_block(
1674     DeflateHandler encoder,
1675     int eof) /* true if this is the last block for a file */
1676 {
1677     ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
1678     int max_blindex;	/* index of last bit length code of non zero freq */
1679     ulg stored_len;	/* length of input block */
1680 
1681     stored_len = (ulg)(encoder->strstart - encoder->block_start);
1682     encoder->flag_buf[encoder->last_flags] = encoder->flags; /* Save the flags for the last 8 items */
1683 
1684     /* Construct the literal and distance trees */
1685     build_tree(encoder, (tree_desc near *)(&encoder->l_desc));
1686     Tracev((stderr, "\nlit data: dyn %ld, stat %ld",
1687 	    encoder->opt_len, encoder->static_len));
1688 
1689     build_tree(encoder, (tree_desc near *)(&encoder->d_desc));
1690     Tracev((stderr, "\ndist data: dyn %ld, stat %ld",
1691 	    encoder->opt_len, encoder->static_len));
1692     /* At this point, opt_len and static_len are the total bit lengths of
1693      * the compressed block data, excluding the tree representations.
1694      */
1695 
1696     /* Build the bit length tree for the above two trees, and get the index
1697      * in bl_order of the last bit length code to send.
1698      */
1699     max_blindex = build_bl_tree(encoder);
1700 
1701     /* Determine the best encoding. Compute first the block length in bytes */
1702     opt_lenb	= (encoder->opt_len   +3+7)>>3;
1703     static_lenb = (encoder->static_len+3+7)>>3;
1704 
1705     Trace((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
1706 	   opt_lenb, encoder->opt_len,
1707 	   static_lenb, encoder->static_len, stored_len,
1708 	   encoder->last_lit, encoder->last_dist));
1709 
1710     if(static_lenb <= opt_lenb)
1711 	opt_lenb = static_lenb;
1712     if(stored_len + 4 <= opt_lenb /* 4: two words for the lengths */
1713        && encoder->block_start >= 0L) {
1714 	unsigned int i;
1715 	uch *p;
1716 
1717 	/* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
1718 	 * Otherwise we can't have processed more than WSIZE input bytes since
1719 	 * the last block flush, because compression would have been
1720 	 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
1721 	 * transform a block into a stored block.
1722 	 */
1723 	send_bits(encoder, (STORED_BLOCK<<1)+eof, 3);  /* send block type */
1724 	bi_windup(encoder);		 /* align on byte boundary */
1725 	put_short((ush)stored_len);
1726 	put_short((ush)~stored_len);
1727 
1728 	/* copy block */
1729 	p = &encoder->window[(unsigned)encoder->block_start];
1730 	for(i = 0; i < stored_len; i++)
1731 	    put_byte(p[i]);
1732     } else if(static_lenb == opt_lenb) {
1733 	send_bits(encoder, (STATIC_TREES<<1)+eof, 3);
1734 	compress_block(encoder,
1735 		       (ct_data near *)encoder->static_ltree,
1736 		       (ct_data near *)encoder->static_dtree);
1737     } else {
1738 	send_bits(encoder, (DYN_TREES<<1)+eof, 3);
1739 	send_all_trees(encoder,
1740 		       encoder->l_desc.max_code+1,
1741 		       encoder->d_desc.max_code+1,
1742 		       max_blindex+1);
1743 	compress_block(encoder,
1744 		       (ct_data near *)encoder->dyn_ltree,
1745 		       (ct_data near *)encoder->dyn_dtree);
1746     }
1747 
1748     init_block(encoder);
1749 
1750     if(eof)
1751 	bi_windup(encoder);
1752 }
1753 
1754 /* ===========================================================================
1755  * Save the match info and tally the frequency counts. Return true if
1756  * the current block must be flushed.
1757  */
ct_tally(DeflateHandler encoder,int dist,int lc)1758 local int ct_tally(
1759     DeflateHandler encoder,
1760     int dist,	/* distance of matched string */
1761     int lc)	/* match length-MIN_MATCH or unmatched char (if dist==0) */
1762 {
1763     encoder->l_buf[encoder->last_lit++] = (uch)lc;
1764     if(dist == 0) {
1765 	/* lc is the unmatched char */
1766 	encoder->dyn_ltree[lc].Freq++;
1767     } else {
1768 	/* Here, lc is the match length - MIN_MATCH */
1769 	dist--;		    /* dist = match distance - 1 */
1770 	Assert((ush)dist < (ush)MAX_DIST &&
1771 	       (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1772 	       (ush)D_CODE(dist) < (ush)D_CODES,  "ct_tally: bad match");
1773 
1774 	encoder->dyn_ltree[encoder->length_code[lc]+LITERALS+1].Freq++;
1775 	encoder->dyn_dtree[D_CODE(dist)].Freq++;
1776 
1777 	encoder->d_buf[encoder->last_dist++] = (ush)dist;
1778 	encoder->flags |= encoder->flag_bit;
1779     }
1780     encoder->flag_bit <<= 1;
1781 
1782     /* Output the flags if they fill a byte: */
1783     if((encoder->last_lit & 7) == 0) {
1784 	encoder->flag_buf[encoder->last_flags++] = encoder->flags;
1785 	encoder->flags = 0;
1786 	encoder->flag_bit = 1;
1787     }
1788     /* Try to guess if it is profitable to stop the current block here */
1789     if(encoder->compr_level > 2 && (encoder->last_lit & 0xfff) == 0) {
1790 	/* Compute an upper bound for the compressed length */
1791 	ulg out_length = (ulg)encoder->last_lit*8L;
1792 	ulg in_length = (ulg)encoder->strstart - encoder->block_start;
1793 	int dcode;
1794 
1795 	for(dcode = 0; dcode < D_CODES; dcode++) {
1796 	    out_length +=
1797 		(ulg)encoder->dyn_dtree[dcode].Freq *
1798 		    (5L + extra_dbits[dcode]);
1799 	}
1800 	out_length >>= 3;
1801 	Trace((stderr,"\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ",
1802 	       encoder->last_lit, encoder->last_dist, in_length, out_length,
1803 	       100L - out_length*100L/in_length));
1804 	if(encoder->last_dist < encoder->last_lit/2 &&
1805 	    out_length < in_length/2)
1806 	    return 1;
1807     }
1808     return (encoder->last_lit == LIT_BUFSIZE-1 ||
1809 	    encoder->last_dist == DIST_BUFSIZE);
1810     /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K
1811      * on 16 bit machines and because stored blocks are restricted to
1812      * 64K-1 bytes.
1813      */
1814 }
1815 
1816 /* ===========================================================================
1817  * Send the block data compressed using the given Huffman trees
1818  */
compress_block(DeflateHandler encoder,ct_data near * ltree,ct_data near * dtree)1819 local void compress_block(
1820     DeflateHandler encoder,
1821     ct_data near *ltree, /* literal tree */
1822     ct_data near *dtree) /* distance tree */
1823 {
1824     unsigned dist;	/* distance of matched string */
1825     int lc;		/* match length or unmatched char (if dist == 0) */
1826     unsigned lx = 0;	/* running index in l_buf */
1827     unsigned dx = 0;	/* running index in d_buf */
1828     unsigned fx = 0;	/* running index in flag_buf */
1829     uch flag = 0;	/* current flags */
1830     unsigned code;	/* the code to send */
1831     int extra;		/* number of extra bits to send */
1832 
1833     if(encoder->last_lit != 0) do {
1834 	if((lx & 7) == 0)
1835 	    flag = encoder->flag_buf[fx++];
1836 	lc = encoder->l_buf[lx++];
1837 	if((flag & 1) == 0) {
1838 	    SEND_CODE(lc, ltree); /* send a literal byte */
1839 	    Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1840 	} else {
1841 	    /* Here, lc is the match length - MIN_MATCH */
1842 	    code = encoder->length_code[lc];
1843 	    SEND_CODE(code+LITERALS+1, ltree); /* send the length code */
1844 	    extra = extra_lbits[code];
1845 	    if(extra != 0) {
1846 		lc -= encoder->base_length[code];
1847 		send_bits(encoder, lc, extra); /* send the extra length bits */
1848 	    }
1849 	    dist = encoder->d_buf[dx++];
1850 	    /* Here, dist is the match distance - 1 */
1851 	    code = D_CODE(dist);
1852 	    Assert (code < D_CODES, "bad d_code");
1853 
1854 	    SEND_CODE(code, dtree);	  /* send the distance code */
1855 	    extra = extra_dbits[code];
1856 	    if(extra != 0) {
1857 		dist -= encoder->base_dist[code];
1858 		send_bits(encoder, dist, extra);   /* send the extra distance bits */
1859 	    }
1860 	} /* literal or match pair ? */
1861 	flag >>= 1;
1862     } while(lx < encoder->last_lit);
1863 
1864     SEND_CODE(END_BLOCK, ltree);
1865 }
1866 
1867 /* ===========================================================================
1868  * Send a value on a given number of bits.
1869  * IN assertion: length <= 16 and value fits in length bits.
1870  */
1871 #define Buf_size (8 * sizeof(ush)) /* bit size of bi_buf */
send_bits(DeflateHandler encoder,int value,int length)1872 local void send_bits(
1873     DeflateHandler encoder,
1874     int value,	/* value to send */
1875     int length)	/* number of bits */
1876 {
1877     /* If not enough room in bi_buf, use (valid) bits from bi_buf and
1878      * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
1879      * unused bits in value.
1880      */
1881     if(encoder->bi_valid > Buf_size - length) {
1882 	encoder->bi_buf |= (value << encoder->bi_valid);
1883 	put_short(encoder->bi_buf);
1884 	encoder->bi_buf = (ush)value >> (Buf_size - encoder->bi_valid);
1885 	encoder->bi_valid += length - Buf_size;
1886     } else {
1887 	encoder->bi_buf |= value << encoder->bi_valid;
1888 	encoder->bi_valid += length;
1889     }
1890 }
1891 
1892 /* ===========================================================================
1893  * Reverse the first len bits of a code, using straightforward code (a faster
1894  * method would use a table)
1895  * IN assertion: 1 <= len <= 15
1896  */
bi_reverse(unsigned code,int len)1897 local unsigned bi_reverse(
1898     unsigned code,	/* the value to invert */
1899     int len)		/* its bit length */
1900 {
1901     register unsigned res = 0;
1902     do {
1903 	res |= code & 1;
1904 	code >>= 1, res <<= 1;
1905     } while(--len > 0);
1906     return res >> 1;
1907 }
1908 
1909 /* ===========================================================================
1910  * Write out any remaining bits in an incomplete byte.
1911  */
bi_windup(DeflateHandler encoder)1912 local void bi_windup(DeflateHandler encoder)
1913 {
1914     if(encoder->bi_valid > 8) {
1915 	put_short(encoder->bi_buf);
1916     } else if(encoder->bi_valid > 0) {
1917 	put_byte(encoder->bi_buf);
1918     }
1919     encoder->bi_buf = 0;
1920     encoder->bi_valid = 0;
1921 }
1922 
qoutbuf(DeflateHandler encoder)1923 local void qoutbuf(DeflateHandler encoder)
1924 {
1925     if(encoder->outcnt != 0)
1926     {
1927 	struct deflate_buff_queue *q;
1928 	q = new_queue();
1929 	if(encoder->qhead == NULL)
1930 	    encoder->qhead = encoder->qtail = q;
1931 	else
1932 	    encoder->qtail = encoder->qtail->next = q;
1933 	q->len = encoder->outcnt - encoder->outoff;
1934 	memcpy(q->ptr, encoder->outbuf + encoder->outoff, q->len);
1935 	encoder->outcnt = encoder->outoff = 0;
1936     }
1937 }
1938