1 /*
2 TiMidity++ -- MIDI to WAVE converter and player
3 Copyright (C) 1999-2002 Masanao Izumo <mo@goice.co.jp>
4 Copyright (C) 1995 Tuukka Toivonen <tt@cgs.fi>
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20
21 #ifdef HAVE_CONFIG_H
22 #include "config.h"
23 #endif /* HAVE_CONFIG_H */
24 /* deflate.c -- compress data using the deflation algorithm
25 * Copyright (C) 1992-1993 Jean-loup Gailly
26 * This is free software; you can redistribute it and/or modify it under the
27 * terms of the GNU General Public License, see the file COPYING.
28 */
29
30 /*
31 * PURPOSE
32 *
33 * Identify new text as repetitions of old text within a fixed-
34 * length sliding window trailing behind the new text.
35 *
36 * DISCUSSION
37 *
38 * The "deflation" process depends on being able to identify portions
39 * of the input text which are identical to earlier input (within a
40 * sliding window trailing behind the input currently being processed).
41 *
42 * The most straightforward technique turns out to be the fastest for
43 * most input files: try all possible matches and select the longest.
44 * The key feature of this algorithm is that insertions into the string
45 * dictionary are very simple and thus fast, and deletions are avoided
46 * completely. Insertions are performed at each input character, whereas
47 * string matches are performed only when the previous match ends. So it
48 * is preferable to spend more time in matches to allow very fast string
49 * insertions and avoid deletions. The matching algorithm for small
50 * strings is inspired from that of Rabin & Karp. A brute force approach
51 * is used to find longer strings when a small match has been found.
52 * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
53 * (by Leonid Broukhis).
54 * A previous version of this file used a more sophisticated algorithm
55 * (by Fiala and Greene) which is guaranteed to run in linear amortized
56 * time, but has a larger average cost, uses more memory and is patented.
57 * However the F&G algorithm may be faster for some highly redundant
58 * files if the parameter max_chain_length (described below) is too large.
59 *
60 * ACKNOWLEDGEMENTS
61 *
62 * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
63 * I found it in 'freeze' written by Leonid Broukhis.
64 * Thanks to many info-zippers for bug reports and testing.
65 *
66 * REFERENCES
67 *
68 * APPNOTE.TXT documentation file in PKZIP 1.93a distribution.
69 *
70 * A description of the Rabin and Karp algorithm is given in the book
71 * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
72 *
73 * Fiala,E.R., and Greene,D.H.
74 * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
75 *
76 * INTERFACE
77 *
78 * void lm_init (void)
79 * Initialize the "longest match" routines for a new file
80 *
81 * ulg deflate (void)
82 * Processes a new input file and return its compressed length. Sets
83 * the compressed length, crc, deflate flags and internal file
84 * attributes.
85 */
86
87 #include <stdio.h>
88 #include <stdlib.h>
89 #ifndef NO_STRING_H
90 #include <string.h>
91 #else
92 #include <strings.h>
93 #endif
94 #include <ctype.h>
95
96 #define FULL_SEARCH
97 /* #define UNALIGNED_OK */
98 /* #define DEBUG */
99 #include "timidity.h"
100 #include "common.h"
101 #include "mblock.h"
102 #include "zip.h"
103
104 #define local static
105
106 #define MIN_MATCH 3
107 #define MAX_MATCH 258
108 /* The minimum and maximum match lengths */
109
110 #define BITS 16
111
112 /* Compile with MEDIUM_MEM to reduce the memory requirements or
113 * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
114 * entire input file can be held in memory (not possible on 16 bit systems).
115 * Warning: defining these symbols affects HASH_BITS (see below) and thus
116 * affects the compression ratio. The compressed output
117 * is still correct, and might even be smaller in some cases.
118 */
119 #ifdef SMALL_MEM
120 # define LIT_BUFSIZE 0x2000
121 # define HASH_BITS 13 /* Number of bits used to hash strings */
122 #else
123 #ifdef MEDIUM_MEM
124 # define LIT_BUFSIZE 0x4000
125 # define HASH_BITS 14
126 #else
127 # define LIT_BUFSIZE 0x8000
128 # define HASH_BITS 15
129 /* For portability to 16 bit machines, do not use values above 15. */
130 #endif
131 #endif
132
133 #if LIT_BUFSIZE > INBUFSIZ
134 error cannot overlay l_buf and inbuf
135 #endif
136 #if (WSIZE<<1) > (1<<BITS)
137 error: cannot overlay window with tab_suffix and prev with tab_prefix0
138 #endif
139 #if HASH_BITS > BITS-1
140 error: cannot overlay head with tab_prefix1
141 #endif
142
143 #define DIST_BUFSIZE LIT_BUFSIZE
144 #define HASH_SIZE (unsigned)(1<<HASH_BITS)
145 #define HASH_MASK (HASH_SIZE-1)
146 #define WMASK (WSIZE-1)
147 /* HASH_SIZE and WSIZE must be powers of two */
148
149 #define NIL 0 /* Tail of hash chains */
150 #define EQUAL 0 /* result of memcmp for equal strings */
151
152 #define TOO_FAR 4096
153 /* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
154
155 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
156 /* Minimum amount of lookahead, except at the end of the input file.
157 * See deflate.c for comments about the MIN_MATCH+1.
158 */
159
160 #define MAX_DIST (WSIZE-MIN_LOOKAHEAD)
161 /* In order to simplify the code, particularly on 16 bit machines, match
162 * distances are limited to MAX_DIST instead of WSIZE.
163 */
164
165 #define SMALLEST 1
166 /* Index within the heap array of least frequent node in the Huffman tree */
167 #define MAX_BITS 15 /* All codes must not exceed MAX_BITS bits */
168 #define MAX_BL_BITS 7 /* Bit length codes must not exceed MAX_BL_BITS bits */
169 #define LENGTH_CODES 29
170 /* number of length codes, not counting the special END_BLOCK code */
171 #define LITERALS 256 /* number of literal bytes 0..255 */
172 #define END_BLOCK 256 /* end of block literal code */
173 #define L_CODES (LITERALS+1+LENGTH_CODES)
174 /* number of Literal or Length codes, including the END_BLOCK code */
175 #define D_CODES 30 /* number of distance codes */
176 #define BL_CODES 19 /* number of codes used to transfer the bit lengths */
177 #define REP_3_6 16
178 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
179 #define REPZ_3_10 17
180 /* repeat a zero length 3-10 times (3 bits of repeat count) */
181 #define REPZ_11_138 18
182 /* repeat a zero length 11-138 times (7 bits of repeat count) */
183 #define HEAP_SIZE (2*L_CODES+1) /* maximum heap size */
184
185 #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
186 /* Number of bits by which ins_h and del_h must be shifted at each
187 * input step. It must be such that after MIN_MATCH steps, the oldest
188 * byte no longer takes part in the hash key, that is:
189 * H_SHIFT * MIN_MATCH >= HASH_BITS
190 */
191
192 /* Data structure describing a single value and its code string. */
193 typedef struct ct_data {
194 union {
195 ush freq; /* frequency count */
196 ush code; /* bit string */
197 } fc;
198 union {
199 ush dad; /* father node in Huffman tree */
200 ush len; /* length of bit string */
201 } dl;
202 } ct_data;
203 #define Freq fc.freq
204 #define Code fc.code
205 #define Dad dl.dad
206 #define Len dl.len
207
208 typedef struct tree_desc {
209 ct_data near *dyn_tree; /* the dynamic tree */
210 ct_data near *static_tree; /* corresponding static tree or NULL */
211 int near *extra_bits; /* extra bits for each code or NULL */
212 int extra_base; /* base index for extra_bits */
213 int elems; /* max number of elements in the tree */
214 int max_length; /* max bit length for the codes */
215 int max_code; /* largest code with non zero frequency */
216 } tree_desc;
217
218 local int near extra_lbits[LENGTH_CODES] /* extra bits for each length code */
219 = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
220 local int near extra_dbits[D_CODES] /* extra bits for each distance code */
221 = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
222 local int near extra_blbits[BL_CODES]/* extra bits for each bit length code */
223 = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
224
225 local uch near bl_order[BL_CODES]
226 = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
227 /* The lengths of the bit length codes are sent in order of decreasing
228 * probability, to avoid transmitting the lengths for unused bit length codes.
229 */
230
231 /* Values for max_lazy_match, good_match and max_chain_length, depending on
232 * the desired pack level (0..9). The values given below have been tuned to
233 * exclude worst case performance for pathological files. Better values may be
234 * found for specific files.
235 */
236 local struct
237 {
238 ush good_length; /* reduce lazy search above this match length */
239 ush max_lazy; /* do not perform lazy search above this match length */
240 ush nice_length; /* quit search above this match length */
241 ush max_chain;
242 } configuration_table[10] = {
243 /* good lazy nice chain */
244 /* 0 */ {0, 0, 0, 0}, /* store only */
245 /* 1 */ {4, 4, 8, 4}, /* maximum speed, no lazy matches */
246 /* 2 */ {4, 5, 16, 8},
247 /* 3 */ {4, 6, 32, 32},
248
249 /* 4 */ {4, 4, 16, 16}, /* lazy matches */
250 /* 5 */ {8, 16, 32, 32},
251 /* 6 */ {8, 16, 128, 128},
252 /* 7 */ {8, 32, 128, 256},
253 /* 8 */ {32, 128, 258, 1024},
254 /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
255
256 struct deflate_buff_queue
257 {
258 struct deflate_buff_queue *next;
259 unsigned len;
260 uch *ptr;
261 };
262 local struct deflate_buff_queue *free_queue = NULL;
263
reuse_queue(struct deflate_buff_queue * p)264 local void reuse_queue(struct deflate_buff_queue *p)
265 {
266 p->next = free_queue;
267 free_queue = p;
268 }
new_queue(void)269 local struct deflate_buff_queue *new_queue(void)
270 {
271 struct deflate_buff_queue *p;
272
273 if(free_queue)
274 {
275 p = free_queue;
276 free_queue = free_queue->next;
277 }
278 else
279 p = (struct deflate_buff_queue *)
280 safe_malloc(sizeof(struct deflate_buff_queue) + OUTBUFSIZ);
281 p->next = NULL;
282 p->len = 0;
283 p->ptr = (uch *)p + sizeof(struct deflate_buff_queue);
284
285 return p;
286 }
287
288 struct _DeflateHandler
289 {
290 void *user_val;
291 long (* read_func)(char *buf, long size, void *user_val);
292
293 int initflag;
294 struct deflate_buff_queue *qhead;
295 struct deflate_buff_queue *qtail;
296 uch outbuf[OUTBUFSIZ];
297 unsigned outcnt, outoff;
298 int complete;
299
300 #define window_size ((ulg)2*WSIZE)
301 uch window[window_size];
302 ush d_buf[DIST_BUFSIZE]; /* buffer for distances */
303 uch l_buf[INBUFSIZ + INBUF_EXTRA]; /* buffer for literals or lengths */
304 ush prev[1L<<BITS];
305 unsigned short bi_buf;
306 int bi_valid;
307
308 long block_start;
309 /* window position at the beginning of the current output block. Gets
310 * negative when the window is moved backwards.
311 */
312
313 unsigned ins_h; /* hash index of string to be inserted */
314
315 unsigned hash_head; /* head of hash chain */
316 unsigned prev_match; /* previous match */
317 int match_available; /* set if previous match exists */
318 unsigned match_length; /* length of best match */
319 unsigned int near prev_length;
320 /* Length of the best match at previous step. Matches not greater than this
321 * are discarded. This is used in the lazy match evaluation.
322 */
323
324 unsigned near strstart; /* start of string to insert */
325 unsigned near match_start; /* start of matching string */
326 int eofile; /* flag set at end of input file */
327 unsigned lookahead; /* number of valid bytes ahead in window */
328
329 unsigned near max_chain_length;
330 /* To speed up deflation, hash chains are never searched beyond this length.
331 * A higher limit improves compression ratio but degrades the speed.
332 */
333
334 unsigned int max_lazy_match;
335 /* Attempt to find a better match only when the current match is strictly
336 * smaller than this value. This mechanism is used only for compression
337 * levels >= 4.
338 */
339
340 int compr_level; /* compression level (1..9) */
341
342 unsigned near good_match;
343 /* Use a faster search when the previous match is longer than this */
344
345 #ifndef FULL_SEARCH
346 int near nice_match; /* Stop searching when current match exceeds this */
347 #endif
348
349 ct_data near dyn_ltree[HEAP_SIZE]; /* literal and length tree */
350 ct_data near dyn_dtree[2*D_CODES+1]; /* distance tree */
351 ct_data near static_ltree[L_CODES+2];
352 /* The static literal tree. Since the bit lengths are imposed, there is no
353 * need for the L_CODES extra codes used during heap construction. However
354 * The codes 286 and 287 are needed to build a canonical tree (see ct_init
355 * below).
356 */
357
358 ct_data near static_dtree[D_CODES];
359 /* The static distance tree. (Actually a trivial tree since all codes use
360 * 5 bits.)
361 */
362
363 ct_data near bl_tree[2*BL_CODES+1];/* Huffman tree for the bit lengths */
364
365 tree_desc near l_desc;
366 tree_desc near d_desc;
367 tree_desc near bl_desc;
368
369 ush near bl_count[MAX_BITS+1];
370 /* number of codes at each bit length for an optimal tree */
371
372 int near heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
373 int heap_len; /* number of elements in the heap */
374 int heap_max; /* element of largest frequency */
375 /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
376 * The same heap array is used to build all trees.
377 */
378
379 uch near depth[2*L_CODES+1];
380 /* Depth of each subtree used as tie breaker for trees of equal frequency */
381
382 uch length_code[MAX_MATCH-MIN_MATCH+1];
383 /* length code for each normalized match length (0 == MIN_MATCH) */
384
385 uch dist_code[512];
386 /* distance codes. The first 256 values correspond to the distances
387 * 3 .. 258, the last 256 values correspond to the top 8 bits of
388 * the 15 bit distances.
389 */
390
391 int near base_length[LENGTH_CODES];
392 /* First normalized length for each code (0 = MIN_MATCH) */
393
394 int near base_dist[D_CODES];
395 /* First normalized distance for each code (0 = distance of 1) */
396
397 uch near flag_buf[(LIT_BUFSIZE/8)];
398 /* flag_buf is a bit array distinguishing literals from lengths in
399 * l_buf, thus indicating the presence or absence of a distance.
400 */
401
402 unsigned last_lit; /* running index in l_buf */
403 unsigned last_dist; /* running index in d_buf */
404 unsigned last_flags;/* running index in flag_buf */
405 uch flags; /* current flags not yet saved in flag_buf */
406 uch flag_bit; /* current bit used in flags */
407 ulg opt_len; /* bit length of current block with optimal trees */
408 ulg static_len; /* bit length of current block with static trees */
409 };
410
411 local void lm_init(DeflateHandler);
412 local int longest_match(DeflateHandler,unsigned cur_match);
413 local void fill_window(DeflateHandler);
414 local void deflate_fast(DeflateHandler);
415 local void deflate_better(DeflateHandler);
416 local long qcopy(DeflateHandler encoder, char *buff, long buff_size);
417 local void ct_init(DeflateHandler);
418 local void init_block(DeflateHandler);
419 local void pqdownheap(DeflateHandler,ct_data near *, int);
420 local void gen_bitlen(DeflateHandler,tree_desc near *);
421 local void gen_codes(DeflateHandler,ct_data near *, int);
422 local void build_tree(DeflateHandler,tree_desc near *);
423 local void scan_tree(DeflateHandler,ct_data near *, int);
424 local void send_tree(DeflateHandler,ct_data near *, int);
425 local int build_bl_tree(DeflateHandler);
426 local void send_all_trees(DeflateHandler,int,int,int);
427 local void flush_block(DeflateHandler,int);
428 local int ct_tally(DeflateHandler,int,int);
429 local void compress_block(DeflateHandler,ct_data near *, ct_data near *);
430 local void send_bits(DeflateHandler,int,int);
431 local unsigned bi_reverse(unsigned, int);
432 local void bi_windup(DeflateHandler);
433 local void qoutbuf(DeflateHandler);
434
435 #ifdef DEBUG
error(char * m)436 local void error(char *m)
437 {
438 fprintf(stderr, "%s\n", m);
439 exit(1);
440 }
441 #define Assert(cond,msg) {if(!(cond)) error(msg);}
442 local int verbose = 0; /* verbose */
443 local void check_match (DeflateHandler,unsigned, unsigned, int);
444 #else
445 #define Assert(cond,msg)
446 #endif
447
448 #ifndef MAX
449 #define MAX(a,b) (a >= b ? a : b)
450 #endif /* MAX */
451
452 #define head(i) ((encoder->prev+WSIZE)[i])
453
454 /* put_byte is used for the compressed output, put_ubyte for the
455 * uncompressed output. However unlzw() uses window for its
456 * suffix table instead of its output buffer, so it does not use put_ubyte
457 * (to be cleaned up).
458 */
459 #define put_byte(c) {encoder->outbuf[encoder->outoff + encoder->outcnt++] = \
460 (uch)(c); if(encoder->outoff + encoder->outcnt == OUTBUFSIZ) \
461 qoutbuf(encoder);}
462
463 /* Output a 16 bit value, lsb first */
464 #define put_short(w) \
465 { if(encoder->outoff + encoder->outcnt < OUTBUFSIZ - 2) { \
466 encoder->outbuf[encoder->outoff+encoder->outcnt++] = (uch) ((w) & 0xff); \
467 encoder->outbuf[encoder->outoff+encoder->outcnt++] = (uch) ((ush)(w) >> 8); \
468 } else { put_byte((uch)((w) & 0xff)); put_byte((uch)((ush)(w) >> 8)); }}
469 /* ===========================================================================
470 * Update a hash value with the given input byte
471 * IN assertion: all calls to to UPDATE_HASH are made with consecutive
472 * input characters, so that a running hash key can be computed from the
473 * previous key instead of complete recalculation each time.
474 */
475 #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
476
477 /* ===========================================================================
478 * Insert string s in the dictionary and set match_head to the previous head
479 * of the hash chain (the most recent string with same hash key). Return
480 * the previous length of the hash chain.
481 * IN assertion: all calls to to INSERT_STRING are made with consecutive
482 * input characters and the first MIN_MATCH bytes of s are valid
483 * (except for the last MIN_MATCH-1 bytes of the input file).
484 */
485 #define INSERT_STRING(s, match_head) \
486 (UPDATE_HASH(encoder->ins_h, encoder->window[(s) + MIN_MATCH-1]), \
487 encoder->prev[(s) & WMASK] =match_head = head(encoder->ins_h), \
488 head(encoder->ins_h) = (s))
489
490 #define SEND_CODE(c, tree) send_bits(encoder, (tree)[c].Code, (tree)[c].Len)
491 /* Send a code of the given tree. c and tree must not have side effects */
492
493 #define D_CODE(dist) ((dist)<256 ? encoder->dist_code[dist] : encoder->dist_code[256+((dist)>>7)])
494 /* Mapping from a distance to a distance code. dist is the distance - 1 and
495 * must not have side effects. dist_code[256] and dist_code[257] are never
496 * used.
497 */
498
499 /* ===========================================================================
500 * Compares to subtrees, using the tree depth as tie breaker when
501 * the subtrees have equal frequency. This minimizes the worst case length.
502 */
503 #define SMALLER(tree, n, m) \
504 ((tree)[n].Freq < (tree)[m].Freq || \
505 ((tree)[n].Freq == (tree)[m].Freq && encoder->depth[n] <= encoder->depth[m]))
506
507
508 /* ===========================================================================
509 * Initialize the "longest match" routines for a new file
510 */
lm_init(DeflateHandler encoder)511 local void lm_init(DeflateHandler encoder)
512 {
513 register unsigned j;
514
515 /* Initialize the hash table. */
516 #if defined(MAXSEG_64K) && HASH_BITS == 15
517 for(j = 0; j < HASH_SIZE; j++) head(j) = NIL;
518 #else
519 memset((char*)&head(0), 0, HASH_SIZE*sizeof(head(0)));
520 #endif
521 /* prev will be initialized on the fly */
522
523 /* Set the default configuration parameters:
524 */
525 encoder->max_lazy_match = configuration_table[encoder->compr_level].max_lazy;
526 encoder->good_match = configuration_table[encoder->compr_level].good_length;
527 #ifndef FULL_SEARCH
528 encoder->nice_match = configuration_table[encoder->compr_level].nice_length;
529 #endif
530 encoder->max_chain_length = configuration_table[encoder->compr_level].max_chain;
531
532 encoder->strstart = 0;
533 encoder->block_start = 0L;
534
535 encoder->lookahead =
536 encoder->read_func((char*)encoder->window,
537 (long)(sizeof(int)<=2 ? (unsigned)WSIZE : 2*WSIZE),
538 encoder->user_val);
539
540 if(encoder->lookahead == 0 || encoder->lookahead == (unsigned)EOF) {
541 encoder->eofile = 1;
542 encoder->lookahead = 0;
543 return;
544 }
545 encoder->eofile = 0;
546 /* Make sure that we always have enough lookahead. This is important
547 * if input comes from a device such as a tty.
548 */
549 while(encoder->lookahead < MIN_LOOKAHEAD && !encoder->eofile)
550 fill_window(encoder);
551
552 encoder->ins_h = 0;
553 for(j=0; j<MIN_MATCH-1; j++)
554 UPDATE_HASH(encoder->ins_h, encoder->window[j]);
555 /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
556 * not important since only literal bytes will be emitted.
557 */
558 }
559
560 /* ===========================================================================
561 * Set match_start to the longest match starting at the given string and
562 * return its length. Matches shorter or equal to prev_length are discarded,
563 * in which case the result is equal to prev_length and match_start is
564 * garbage.
565 * IN assertions: cur_match is the head of the hash chain for the current
566 * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
567 */
longest_match(DeflateHandler encoder,unsigned cur_match)568 local int longest_match(DeflateHandler encoder, unsigned cur_match)
569 {
570 unsigned chain_length = encoder->max_chain_length;
571 /* max hash chain length */
572
573 register uch *scan = encoder->window + encoder->strstart;
574 /* current string */
575
576 register uch *match; /* matched string */
577 register int len; /* length of current match */
578 int best_len = encoder->prev_length; /* best match length so far */
579
580 unsigned limit = (encoder->strstart > (unsigned)MAX_DIST ?
581 encoder->strstart - (unsigned)MAX_DIST : NIL);
582 /* Stop when cur_match becomes <= limit. To simplify the code,
583 * we prevent matches with the string of window index 0.
584 */
585
586 #if HASH_BITS < 8 || MAX_MATCH != 258
587 error: Code too clever
588 #endif
589
590 #ifdef UNALIGNED_OK
591 /* Compare two bytes at a time. Note: this is not always beneficial.
592 * Try with and without -DUNALIGNED_OK to check.
593 */
594 register uch *strend = encoder->window + encoder->strstart + MAX_MATCH - 1;
595 register ush scan_start = *(ush*)scan;
596 register ush scan_end = *(ush*)(scan+best_len-1);
597 #else
598 register uch *strend = encoder->window + encoder->strstart + MAX_MATCH;
599 register uch scan_end1 = scan[best_len-1];
600 register uch scan_end = scan[best_len];
601 #endif
602
603 /* Do not waste too much time if we already have a good match: */
604 if(encoder->prev_length >= encoder->good_match) {
605 chain_length >>= 2;
606 }
607 Assert(encoder->strstart <= window_size-MIN_LOOKAHEAD, "insufficient lookahead");
608
609 do {
610 Assert(cur_match < encoder->strstart, "no future");
611 match = encoder->window + cur_match;
612
613 /* Skip to next match if the match length cannot increase
614 * or if the match length is less than 2:
615 */
616 #if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
617 /* This code assumes sizeof(unsigned short) == 2. Do not use
618 * UNALIGNED_OK if your compiler uses a different size.
619 */
620 if(*(ush*)(match+best_len-1) != scan_end ||
621 *(ush*)match != scan_start) continue;
622
623 /* It is not necessary to compare scan[2] and match[2] since they are
624 * always equal when the other bytes match, given that the hash keys
625 * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
626 * strstart+3, +5, ... up to strstart+257. We check for insufficient
627 * lookahead only every 4th comparison; the 128th check will be made
628 * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
629 * necessary to put more guard bytes at the end of the window, or
630 * to check more often for insufficient lookahead.
631 */
632 scan++, match++;
633 do {
634 } while(*(ush*)(scan+=2) == *(ush*)(match+=2) &&
635 *(ush*)(scan+=2) == *(ush*)(match+=2) &&
636 *(ush*)(scan+=2) == *(ush*)(match+=2) &&
637 *(ush*)(scan+=2) == *(ush*)(match+=2) &&
638 scan < strend);
639 /* The funny "do {}" generates better code on most compilers */
640
641 /* Here, scan <= window+strstart+257 */
642 Assert(scan <= encoder->window+(unsigned)(window_size-1), "wild scan");
643 if(*scan == *match) scan++;
644
645 len = (MAX_MATCH - 1) - (int)(strend-scan);
646 scan = strend - (MAX_MATCH-1);
647
648 #else /* UNALIGNED_OK */
649
650 if(match[best_len] != scan_end ||
651 match[best_len-1] != scan_end1 ||
652 *match != *scan ||
653 *++match != scan[1]) continue;
654
655 /* The check at best_len-1 can be removed because it will be made
656 * again later. (This heuristic is not always a win.)
657 * It is not necessary to compare scan[2] and match[2] since they
658 * are always equal when the other bytes match, given that
659 * the hash keys are equal and that HASH_BITS >= 8.
660 */
661 scan += 2, match++;
662
663 /* We check for insufficient lookahead only every 8th comparison;
664 * the 256th check will be made at strstart+258.
665 */
666 do {
667 } while(*++scan == *++match && *++scan == *++match &&
668 *++scan == *++match && *++scan == *++match &&
669 *++scan == *++match && *++scan == *++match &&
670 *++scan == *++match && *++scan == *++match &&
671 scan < strend);
672
673 len = MAX_MATCH - (int)(strend - scan);
674 scan = strend - MAX_MATCH;
675
676 #endif /* UNALIGNED_OK */
677
678 if(len > best_len) {
679 encoder->match_start = cur_match;
680 best_len = len;
681 #ifdef FULL_SEARCH
682 if(len >= MAX_MATCH) break;
683 #else
684 if(len >= encoder->nice_match) break;
685 #endif /* FULL_SEARCH */
686
687
688 #ifdef UNALIGNED_OK
689 scan_end = *(ush*)(scan+best_len-1);
690 #else
691 scan_end1 = scan[best_len-1];
692 scan_end = scan[best_len];
693 #endif
694 }
695 } while((cur_match = encoder->prev[cur_match & WMASK]) > limit
696 && --chain_length != 0);
697
698 return best_len;
699 }
700
701 #ifdef DEBUG
702 /* ===========================================================================
703 * Check that the match at match_start is indeed a match.
704 */
check_match(DeflateHandler encoder,unsigned start,unsigned match,int length)705 local void check_match(DeflateHandler encoder,
706 unsigned start, unsigned match, int length)
707 {
708 /* check that the match is indeed a match */
709 if(memcmp((char*)encoder->window + match,
710 (char*)encoder->window + start, length) != EQUAL) {
711 fprintf(stderr,
712 " start %d, match %d, length %d\n",
713 start, match, length);
714 error("invalid match");
715 }
716 if(verbose > 1) {
717 fprintf(stderr,"\\[%d,%d]", start-match, length);
718 do { putc(encoder->window[start++], stderr); } while(--length != 0);
719 }
720 }
721 #else
722 # define check_match(encoder, start, match, length)
723 #endif
724
725 /* ===========================================================================
726 * Fill the window when the lookahead becomes insufficient.
727 * Updates strstart and lookahead, and sets eofile if end of input file.
728 * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
729 * OUT assertions: at least one byte has been read, or eofile is set;
730 * file reads are performed for at least two bytes (required for the
731 * translate_eol option).
732 */
fill_window(DeflateHandler encoder)733 local void fill_window(DeflateHandler encoder)
734 {
735 register unsigned n, m;
736 unsigned more = (unsigned)(window_size - (ulg)encoder->lookahead - (ulg)encoder->strstart);
737 /* Amount of free space at the end of the window. */
738
739 /* If the window is almost full and there is insufficient lookahead,
740 * move the upper half to the lower one to make room in the upper half.
741 */
742 if(more == (unsigned)EOF) {
743 /* Very unlikely, but possible on 16 bit machine if strstart == 0
744 * and lookahead == 1 (input done one byte at time)
745 */
746 more--;
747 } else if(encoder->strstart >= WSIZE+MAX_DIST) {
748 /* By the IN assertion, the window is not empty so we can't confuse
749 * more == 0 with more == 64K on a 16 bit machine.
750 */
751 Assert(window_size == (ulg)2*WSIZE, "no sliding with BIG_MEM");
752
753 memcpy((char*)encoder->window, (char*)encoder->window+WSIZE, (unsigned)WSIZE);
754 encoder->match_start -= WSIZE;
755 encoder->strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
756 encoder->block_start -= (long) WSIZE;
757
758 for(n = 0; n < HASH_SIZE; n++) {
759 m = head(n);
760 head(n) = (ush)(m >= WSIZE ? m-WSIZE : NIL);
761 }
762 for(n = 0; n < WSIZE; n++) {
763 m = encoder->prev[n];
764 encoder->prev[n] = (ush)(m >= WSIZE ? m-WSIZE : NIL);
765 /* If n is not on any hash chain, prev[n] is garbage but
766 * its value will never be used.
767 */
768 }
769 more += WSIZE;
770 }
771 /* At this point, more >= 2 */
772 if(!encoder->eofile) {
773 n = encoder->read_func((char*)encoder->window+encoder->strstart
774 + encoder->lookahead,
775 (long)more,
776 encoder->user_val);
777 if(n == 0 || n == (unsigned)EOF) {
778 encoder->eofile = 1;
779 } else {
780 encoder->lookahead += n;
781 }
782 }
783 }
784
785 /* ===========================================================================
786 * Processes a new input file and return its compressed length. This
787 * function does not perform lazy evaluationof matches and inserts
788 * new strings in the dictionary only for unmatched strings or for short
789 * matches. It is used only for the fast compression options.
790 */
deflate_fast(DeflateHandler encoder)791 local void deflate_fast(DeflateHandler encoder)
792 {
793 /*
794 encoder->prev_length = MIN_MATCH-1;
795 encoder->match_length = 0;
796 */
797 while(encoder->lookahead != 0 && encoder->qhead == NULL) {
798 int flush; /* set if current block must be flushed */
799
800 /* Insert the string window[strstart .. strstart+2] in the
801 * dictionary, and set hash_head to the head of the hash chain:
802 */
803 INSERT_STRING(encoder->strstart, encoder->hash_head);
804
805 /* Find the longest match, discarding those <= prev_length.
806 * At this point we have always match_length < MIN_MATCH
807 */
808 if(encoder->hash_head != NIL &&
809 encoder->strstart - encoder->hash_head <= MAX_DIST) {
810 /* To simplify the code, we prevent matches with the string
811 * of window index 0 (in particular we have to avoid a match
812 * of the string with itself at the start of the input file).
813 */
814 encoder->match_length = longest_match(encoder, encoder->hash_head);
815 /* longest_match() sets match_start */
816 if(encoder->match_length > encoder->lookahead)
817 encoder->match_length = encoder->lookahead;
818 }
819 if(encoder->match_length >= MIN_MATCH) {
820 check_match(encoder, encoder->strstart,
821 encoder->match_start, encoder->match_length);
822
823 flush = ct_tally(encoder, encoder->strstart - encoder->match_start,
824 encoder->match_length - MIN_MATCH);
825
826 encoder->lookahead -= encoder->match_length;
827
828 /* Insert new strings in the hash table only if the match length
829 * is not too large. This saves time but degrades compression.
830 */
831 if(encoder->match_length <= encoder->max_lazy_match) {
832 encoder->match_length--; /* string at strstart already in hash table */
833 do {
834 encoder->strstart++;
835 INSERT_STRING(encoder->strstart, encoder->hash_head);
836 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
837 * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
838 * these bytes are garbage, but it does not matter since
839 * the next lookahead bytes will be emitted as literals.
840 */
841 } while(--encoder->match_length != 0);
842 encoder->strstart++;
843 } else {
844 encoder->strstart += encoder->match_length;
845 encoder->match_length = 0;
846 encoder->ins_h = encoder->window[encoder->strstart];
847 UPDATE_HASH(encoder->ins_h,
848 encoder->window[encoder->strstart + 1]);
849 #if MIN_MATCH != 3
850 Call UPDATE_HASH() MIN_MATCH-3 more times
851 #endif
852 }
853 } else {
854 /* No match, output a literal byte */
855 Tracevv((stderr,"%c",encoder->window[encoder->strstart]));
856 flush = ct_tally (encoder, 0, encoder->window[encoder->strstart]);
857 encoder->lookahead--;
858 encoder->strstart++;
859 }
860 if(flush)
861 {
862 flush_block(encoder, 0);
863 encoder->block_start = (long)encoder->strstart;
864 }
865
866 /* Make sure that we always have enough lookahead, except
867 * at the end of the input file. We need MAX_MATCH bytes
868 * for the next match, plus MIN_MATCH bytes to insert the
869 * string following the next match.
870 */
871 while(encoder->lookahead < MIN_LOOKAHEAD && !encoder->eofile)
872 fill_window(encoder);
873 }
874 }
875
deflate_better(DeflateHandler encoder)876 local void deflate_better(DeflateHandler encoder) {
877 /* Process the input block. */
878 while(encoder->lookahead != 0 && encoder->qhead == NULL) {
879 /* Insert the string window[strstart .. strstart+2] in the
880 * dictionary, and set hash_head to the head of the hash chain:
881 */
882 INSERT_STRING(encoder->strstart, encoder->hash_head);
883
884 /* Find the longest match, discarding those <= prev_length.
885 */
886 encoder->prev_length = encoder->match_length;
887 encoder->prev_match = encoder->match_start;
888 encoder->match_length = MIN_MATCH-1;
889
890 if(encoder->hash_head != NIL &&
891 encoder->prev_length < encoder->max_lazy_match &&
892 encoder->strstart - encoder->hash_head <= MAX_DIST) {
893 /* To simplify the code, we prevent matches with the string
894 * of window index 0 (in particular we have to avoid a match
895 * of the string with itself at the start of the input file).
896 */
897 encoder->match_length = longest_match(encoder, encoder->hash_head);
898 /* longest_match() sets match_start */
899 if(encoder->match_length > encoder->lookahead)
900 encoder->match_length = encoder->lookahead;
901
902 /* Ignore a length 3 match if it is too distant: */
903 if(encoder->match_length == MIN_MATCH &&
904 encoder->strstart - encoder->match_start > TOO_FAR){
905 /* If prev_match is also MIN_MATCH, match_start is garbage
906 * but we will ignore the current match anyway.
907 */
908 encoder->match_length--;
909 }
910 }
911 /* If there was a match at the previous step and the current
912 * match is not better, output the previous match:
913 */
914 if(encoder->prev_length >= MIN_MATCH &&
915 encoder->match_length <= encoder->prev_length) {
916 int flush; /* set if current block must be flushed */
917
918 check_match(encoder, encoder->strstart-1,
919 encoder->prev_match, encoder->prev_length);
920
921 flush = ct_tally(encoder, encoder->strstart-1-encoder->prev_match,
922 encoder->prev_length - MIN_MATCH);
923
924 /* Insert in hash table all strings up to the end of the match.
925 * strstart-1 and strstart are already inserted.
926 */
927 encoder->lookahead -= encoder->prev_length-1;
928 encoder->prev_length -= 2;
929 do {
930 encoder->strstart++;
931 INSERT_STRING(encoder->strstart, encoder->hash_head);
932 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
933 * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH
934 * these bytes are garbage, but it does not matter since the
935 * next lookahead bytes will always be emitted as literals.
936 */
937 } while(--encoder->prev_length != 0);
938 encoder->match_available = 0;
939 encoder->match_length = MIN_MATCH-1;
940 encoder->strstart++;
941 if(flush) {
942 flush_block(encoder, 0);
943 encoder->block_start = (long)encoder->strstart;
944 }
945
946 } else if(encoder->match_available) {
947 /* If there was no match at the previous position, output a
948 * single literal. If there was a match but the current match
949 * is longer, truncate the previous match to a single literal.
950 */
951 Tracevv((stderr,"%c",encoder->window[encoder->strstart-1]));
952 if(ct_tally (encoder, 0, encoder->window[encoder->strstart-1])) {
953 flush_block(encoder, 0);
954 encoder->block_start = (long)encoder->strstart;
955 }
956 encoder->strstart++;
957 encoder->lookahead--;
958 } else {
959 /* There is no previous match to compare with, wait for
960 * the next step to decide.
961 */
962 encoder->match_available = 1;
963 encoder->strstart++;
964 encoder->lookahead--;
965 }
966
967 /* Make sure that we always have enough lookahead, except
968 * at the end of the input file. We need MAX_MATCH bytes
969 * for the next match, plus MIN_MATCH bytes to insert the
970 * string following the next match.
971 */
972 while(encoder->lookahead < MIN_LOOKAHEAD && !encoder->eofile)
973 fill_window(encoder);
974 }
975 }
976
977 /*ARGSUSED*/
default_read_func(char * buf,long size,void * v)978 static long default_read_func(char *buf, long size, void *v)
979 {
980 return (long)fread(buf, 1, size, stdin);
981 }
982
open_deflate_handler(long (* read_func)(char * buf,long size,void * user_val),void * user_val,int level)983 DeflateHandler open_deflate_handler(
984 long (* read_func)(char *buf, long size, void *user_val),
985 void *user_val,
986 int level)
987 {
988 DeflateHandler encoder;
989
990 if(level < 1 || level > 9)
991 return NULL; /* error("bad compression level"); */
992
993 encoder = (DeflateHandler)safe_malloc(sizeof(struct _DeflateHandler));
994 if(encoder == NULL)
995 return NULL;
996 memset(encoder, 0, sizeof(struct _DeflateHandler));
997 encoder->compr_level = level;
998 if(read_func == NULL)
999 encoder->read_func = default_read_func;
1000 else
1001 encoder->read_func = read_func;
1002 encoder->user_val = user_val;
1003
1004 return encoder;
1005 }
1006
close_deflate_handler(DeflateHandler encoder)1007 void close_deflate_handler(DeflateHandler encoder)
1008 {
1009 free(encoder);
1010 }
1011
init_deflate(DeflateHandler encoder)1012 local void init_deflate(DeflateHandler encoder)
1013 {
1014 if(encoder->eofile)
1015 return;
1016 encoder->bi_buf = 0;
1017 encoder->bi_valid = 0;
1018 ct_init(encoder);
1019 lm_init(encoder);
1020
1021 encoder->qhead = NULL;
1022 encoder->outcnt = 0;
1023
1024 if(encoder->compr_level <= 3)
1025 {
1026 encoder->prev_length = MIN_MATCH - 1;
1027 encoder->match_length = 0;
1028 }
1029 else
1030 {
1031 encoder->match_length = MIN_MATCH - 1;
1032 encoder->match_available = 0;
1033 }
1034
1035 encoder->complete = 0;
1036 }
1037
1038 /* ===========================================================================
1039 * Same as above, but achieves better compression. We use a lazy
1040 * evaluation for matches: a match is finally adopted only if there is
1041 * no better match at the next window position.
1042 */
zip_deflate(DeflateHandler encoder,char * buff,long buff_size)1043 long zip_deflate(DeflateHandler encoder, char *buff, long buff_size)
1044 {
1045 long n;
1046
1047 if(!encoder->initflag)
1048 {
1049 init_deflate(encoder);
1050 encoder->initflag = 1;
1051 if(encoder->lookahead == 0) { /* empty */
1052 encoder->complete = 1;
1053 return 0;
1054 }
1055 }
1056
1057 if((n = qcopy(encoder, buff, buff_size)) == buff_size)
1058 return buff_size;
1059
1060 if(encoder->complete)
1061 return n;
1062
1063 if(encoder->compr_level <= 3) /* optimized for speed */
1064 deflate_fast(encoder);
1065 else
1066 deflate_better(encoder);
1067 if(encoder->lookahead == 0)
1068 {
1069 if(encoder->match_available)
1070 ct_tally(encoder, 0, encoder->window[encoder->strstart - 1]);
1071 flush_block(encoder, 1);
1072 encoder->complete = 1;
1073 }
1074 return n + qcopy(encoder, buff + n, buff_size - n);
1075 }
1076
qcopy(DeflateHandler encoder,char * buff,long buff_size)1077 local long qcopy(DeflateHandler encoder, char *buff, long buff_size)
1078 {
1079 struct deflate_buff_queue *q;
1080 long n, i;
1081
1082 n = 0;
1083 q = encoder->qhead;
1084 while(q != NULL && n < buff_size)
1085 {
1086 i = buff_size - n;
1087 if(i > q->len)
1088 i = q->len;
1089 memcpy(buff + n, q->ptr, i);
1090 q->ptr += i;
1091 q->len -= i;
1092 n += i;
1093
1094 if(q->len == 0)
1095 {
1096 struct deflate_buff_queue *p;
1097 p = q;
1098 q = q->next;
1099 reuse_queue(p);
1100 }
1101 }
1102 encoder->qhead = q;
1103 if(n == buff_size)
1104 return n;
1105
1106 if(encoder->outoff < encoder->outcnt)
1107 {
1108 i = buff_size - n;
1109 if(i > encoder->outcnt - encoder->outoff)
1110 i = encoder->outcnt - encoder->outoff;
1111 memcpy(buff + n, encoder->outbuf + encoder->outoff, i);
1112 encoder->outoff += i;
1113 n += i;
1114 if(encoder->outcnt == encoder->outoff)
1115 encoder->outcnt = encoder->outoff = 0;
1116 }
1117 return n;
1118 }
1119
1120
1121 /* ===========================================================================
1122 * Allocate the match buffer, initialize the various tables and save the
1123 * location of the internal file attribute (ascii/binary) and method
1124 * (DEFLATE/STORE).
1125 */
ct_init(DeflateHandler encoder)1126 local void ct_init(DeflateHandler encoder)
1127 {
1128 int n; /* iterates over tree elements */
1129 int bits; /* bit counter */
1130 int length; /* length value */
1131 int code; /* code value */
1132 int dist; /* distance index */
1133
1134 if(encoder->static_dtree[0].Len != 0) return; /* ct_init already called */
1135
1136 encoder->l_desc.dyn_tree = encoder->dyn_ltree;
1137 encoder->l_desc.static_tree = encoder->static_ltree;
1138 encoder->l_desc.extra_bits = extra_lbits;
1139 encoder->l_desc.extra_base = LITERALS + 1;
1140 encoder->l_desc.elems = L_CODES;
1141 encoder->l_desc.max_length = MAX_BITS;
1142 encoder->l_desc.max_code = 0;
1143
1144 encoder->d_desc.dyn_tree = encoder->dyn_dtree;
1145 encoder->d_desc.static_tree = encoder->static_dtree;
1146 encoder->d_desc.extra_bits = extra_dbits;
1147 encoder->d_desc.extra_base = 0;
1148 encoder->d_desc.elems = D_CODES;
1149 encoder->d_desc.max_length = MAX_BITS;
1150 encoder->d_desc.max_code = 0;
1151
1152 encoder->bl_desc.dyn_tree = encoder->bl_tree;
1153 encoder->bl_desc.static_tree = NULL;
1154 encoder->bl_desc.extra_bits = extra_blbits;
1155 encoder->bl_desc.extra_base = 0;
1156 encoder->bl_desc.elems = BL_CODES;
1157 encoder->bl_desc.max_length = MAX_BL_BITS;
1158 encoder->bl_desc.max_code = 0;
1159
1160 /* Initialize the mapping length (0..255) -> length code (0..28) */
1161 length = 0;
1162 for(code = 0; code < LENGTH_CODES-1; code++) {
1163 encoder->base_length[code] = length;
1164 for(n = 0; n < (1<<extra_lbits[code]); n++) {
1165 encoder->length_code[length++] = (uch)code;
1166 }
1167 }
1168 Assert (length == 256, "ct_init: length != 256");
1169 /* Note that the length 255 (match length 258) can be represented
1170 * in two different ways: code 284 + 5 bits or code 285, so we
1171 * overwrite length_code[255] to use the best encoding:
1172 */
1173 encoder->length_code[length-1] = (uch)code;
1174
1175 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
1176 dist = 0;
1177 for(code = 0 ; code < 16; code++) {
1178 encoder->base_dist[code] = dist;
1179 for(n = 0; n < (1<<extra_dbits[code]); n++) {
1180 encoder->dist_code[dist++] = (uch)code;
1181 }
1182 }
1183 Assert (dist == 256, "ct_init: dist != 256");
1184 dist >>= 7; /* from now on, all distances are divided by 128 */
1185 for( ; code < D_CODES; code++) {
1186 encoder->base_dist[code] = dist << 7;
1187 for(n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
1188 encoder->dist_code[256 + dist++] = (uch)code;
1189 }
1190 }
1191 Assert (dist == 256, "ct_init: 256+dist != 512");
1192
1193 /* Construct the codes of the static literal tree */
1194 for(bits = 0; bits <= MAX_BITS; bits++) encoder->bl_count[bits] = 0;
1195 n = 0;
1196 while(n <= 143) encoder->static_ltree[n++].Len = 8, encoder->bl_count[8]++;
1197 while(n <= 255) encoder->static_ltree[n++].Len = 9, encoder->bl_count[9]++;
1198 while(n <= 279) encoder->static_ltree[n++].Len = 7, encoder->bl_count[7]++;
1199 while(n <= 287) encoder->static_ltree[n++].Len = 8, encoder->bl_count[8]++;
1200 /* Codes 286 and 287 do not exist, but we must include them in the
1201 * tree construction to get a canonical Huffman tree (longest code
1202 * all ones)
1203 */
1204 gen_codes(encoder, (ct_data near *)encoder->static_ltree, L_CODES+1);
1205
1206 /* The static distance tree is trivial: */
1207 for(n = 0; n < D_CODES; n++) {
1208 encoder->static_dtree[n].Len = 5;
1209 encoder->static_dtree[n].Code = bi_reverse(n, 5);
1210 }
1211
1212 /* Initialize the first block of the first file: */
1213 init_block(encoder);
1214 }
1215
1216 /* ===========================================================================
1217 * Initialize a new block.
1218 */
init_block(DeflateHandler encoder)1219 local void init_block(DeflateHandler encoder)
1220 {
1221 int n; /* iterates over tree elements */
1222
1223 /* Initialize the trees. */
1224 for(n = 0; n < L_CODES; n++) encoder->dyn_ltree[n].Freq = 0;
1225 for(n = 0; n < D_CODES; n++) encoder->dyn_dtree[n].Freq = 0;
1226 for(n = 0; n < BL_CODES; n++) encoder->bl_tree[n].Freq = 0;
1227
1228 encoder->dyn_ltree[END_BLOCK].Freq = 1;
1229 encoder->opt_len = encoder->static_len = 0L;
1230 encoder->last_lit = encoder->last_dist = encoder->last_flags = 0;
1231 encoder->flags = 0;
1232 encoder->flag_bit = 1;
1233 }
1234
1235 /* ===========================================================================
1236 * Restore the heap property by moving down the tree starting at node k,
1237 * exchanging a node with the smallest of its two sons if necessary, stopping
1238 * when the heap property is re-established (each father smaller than its
1239 * two sons).
1240 */
pqdownheap(DeflateHandler encoder,ct_data near * tree,int k)1241 local void pqdownheap(
1242 DeflateHandler encoder,
1243 ct_data near *tree, /* the tree to restore */
1244 int k) /* node to move down */
1245 {
1246 int v = encoder->heap[k];
1247 int j = k << 1; /* left son of k */
1248 while(j <= encoder->heap_len) {
1249 /* Set j to the smallest of the two sons: */
1250 if(j < encoder->heap_len &&
1251 SMALLER(tree, encoder->heap[j+1], encoder->heap[j]))
1252 j++;
1253
1254 /* Exit if v is smaller than both sons */
1255 if(SMALLER(tree, v, encoder->heap[j]))
1256 break;
1257
1258 /* Exchange v with the smallest son */
1259 encoder->heap[k] = encoder->heap[j];
1260 k = j;
1261
1262 /* And continue down the tree, setting j to the left son of k */
1263 j <<= 1;
1264 }
1265 encoder->heap[k] = v;
1266 }
1267
1268 /* ===========================================================================
1269 * Compute the optimal bit lengths for a tree and update the total bit length
1270 * for the current block.
1271 * IN assertion: the fields freq and dad are set, heap[heap_max] and
1272 * above are the tree nodes sorted by increasing frequency.
1273 * OUT assertions: the field len is set to the optimal bit length, the
1274 * array bl_count contains the frequencies for each bit length.
1275 * The length opt_len is updated; static_len is also updated if stree is
1276 * not null.
1277 */
gen_bitlen(DeflateHandler encoder,tree_desc near * desc)1278 local void gen_bitlen(
1279 DeflateHandler encoder,
1280 tree_desc near *desc) /* the tree descriptor */
1281 {
1282 ct_data near *tree = desc->dyn_tree;
1283 int near *extra = desc->extra_bits;
1284 int base = desc->extra_base;
1285 int max_code = desc->max_code;
1286 int max_length = desc->max_length;
1287 ct_data near *stree = desc->static_tree;
1288 int h; /* heap index */
1289 int n, m; /* iterate over the tree elements */
1290 int bits; /* bit length */
1291 int xbits; /* extra bits */
1292 ush f; /* frequency */
1293 int overflow = 0; /* number of elements with bit length too large */
1294
1295 for(bits = 0; bits <= MAX_BITS; bits++)
1296 encoder->bl_count[bits] = 0;
1297
1298 /* In a first pass, compute the optimal bit lengths (which may
1299 * overflow in the case of the bit length tree).
1300 */
1301 tree[encoder->heap[encoder->heap_max]].Len = 0; /* root of the heap */
1302
1303 for(h = encoder->heap_max+1; h < HEAP_SIZE; h++) {
1304 n = encoder->heap[h];
1305 bits = tree[tree[n].Dad].Len + 1;
1306 if(bits > max_length)
1307 bits = max_length, overflow++;
1308 tree[n].Len = (ush)bits;
1309 /* We overwrite tree[n].Dad which is no longer needed */
1310
1311 if(n > max_code)
1312 continue; /* not a leaf node */
1313
1314 encoder->bl_count[bits]++;
1315 xbits = 0;
1316 if(n >= base)
1317 xbits = extra[n-base];
1318 f = tree[n].Freq;
1319 encoder->opt_len += (ulg)f * (bits + xbits);
1320 if(stree)
1321 encoder->static_len += (ulg)f * (stree[n].Len + xbits);
1322 }
1323 if(overflow == 0) return;
1324
1325 Trace((stderr,"\nbit length overflow\n"));
1326 /* This happens for example on obj2 and pic of the Calgary corpus */
1327
1328 /* Find the first bit length which could increase: */
1329 do {
1330 bits = max_length-1;
1331 while(encoder->bl_count[bits] == 0) bits--;
1332 encoder->bl_count[bits]--; /* move one leaf down the tree */
1333 encoder->bl_count[bits+1] += 2; /* move one overflow item as its brother */
1334 encoder->bl_count[max_length]--;
1335 /* The brother of the overflow item also moves one step up,
1336 * but this does not affect bl_count[max_length]
1337 */
1338 overflow -= 2;
1339 } while(overflow > 0);
1340
1341 /* Now recompute all bit lengths, scanning in increasing frequency.
1342 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
1343 * lengths instead of fixing only the wrong ones. This idea is taken
1344 * from 'ar' written by Haruhiko Okumura.)
1345 */
1346 for(bits = max_length; bits != 0; bits--) {
1347 n = encoder->bl_count[bits];
1348 while(n != 0) {
1349 m = encoder->heap[--h];
1350 if(m > max_code) continue;
1351 if(tree[m].Len != (unsigned) bits) {
1352 Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
1353 encoder->opt_len +=
1354 ((long)bits-(long)tree[m].Len)*(long)tree[m].Freq;
1355 tree[m].Len = (ush)bits;
1356 }
1357 n--;
1358 }
1359 }
1360 }
1361
1362 /* ===========================================================================
1363 * Generate the codes for a given tree and bit counts (which need not be
1364 * optimal).
1365 * IN assertion: the array bl_count contains the bit length statistics for
1366 * the given tree and the field len is set for all tree elements.
1367 * OUT assertion: the field code is set for all tree elements of non
1368 * zero code length.
1369 */
gen_codes(DeflateHandler encoder,ct_data near * tree,int max_code)1370 local void gen_codes(
1371 DeflateHandler encoder,
1372 ct_data near *tree, /* the tree to decorate */
1373 int max_code) /* largest code with non zero frequency */
1374 {
1375 ush next_code[MAX_BITS+1]; /* next code value for each bit length */
1376 ush code = 0; /* running code value */
1377 int bits; /* bit index */
1378 int n; /* code index */
1379
1380 /* The distribution counts are first used to generate the code values
1381 * without bit reversal.
1382 */
1383 for(bits = 1; bits <= MAX_BITS; bits++) {
1384 next_code[bits] = code = (code + encoder->bl_count[bits-1]) << 1;
1385 }
1386 /* Check that the bit counts in bl_count are consistent. The last code
1387 * must be all ones.
1388 */
1389 Assert (code + encoder->bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
1390 "inconsistent bit counts");
1391 Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
1392
1393 for(n = 0; n <= max_code; n++) {
1394 int len = tree[n].Len;
1395 if(len == 0)
1396 continue;
1397 /* Now reverse the bits */
1398 tree[n].Code = bi_reverse(next_code[len]++, len);
1399
1400 Tracec(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
1401 n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
1402 }
1403 }
1404
1405 /* ===========================================================================
1406 * Construct one Huffman tree and assigns the code bit strings and lengths.
1407 * Update the total bit length for the current block.
1408 * IN assertion: the field freq is set for all tree elements.
1409 * OUT assertions: the fields len and code are set to the optimal bit length
1410 * and corresponding code. The length opt_len is updated; static_len is
1411 * also updated if stree is not null. The field max_code is set.
1412 */
build_tree(DeflateHandler encoder,tree_desc near * desc)1413 local void build_tree(
1414 DeflateHandler encoder,
1415 tree_desc near *desc) /* the tree descriptor */
1416 {
1417 ct_data near *tree = desc->dyn_tree;
1418 ct_data near *stree = desc->static_tree;
1419 int elems = desc->elems;
1420 int n, m; /* iterate over heap elements */
1421 int max_code = -1; /* largest code with non zero frequency */
1422 int node = elems; /* next internal node of the tree */
1423
1424 /* Construct the initial heap, with least frequent element in
1425 * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
1426 * heap[0] is not used.
1427 */
1428 encoder->heap_len = 0;
1429 encoder->heap_max = HEAP_SIZE;
1430
1431 for(n = 0; n < elems; n++) {
1432 if(tree[n].Freq != 0) {
1433 encoder->heap[++encoder->heap_len] = max_code = n;
1434 encoder->depth[n] = 0;
1435 } else {
1436 tree[n].Len = 0;
1437 }
1438 }
1439
1440 /* The pkzip format requires that at least one distance code exists,
1441 * and that at least one bit should be sent even if there is only one
1442 * possible code. So to avoid special checks later on we force at least
1443 * two codes of non zero frequency.
1444 */
1445 while(encoder->heap_len < 2) {
1446 int new = encoder->heap[++encoder->heap_len] =
1447 (max_code < 2 ? ++max_code : 0);
1448 tree[new].Freq = 1;
1449 encoder->depth[new] = 0;
1450 encoder->opt_len--;
1451 if(stree)
1452 encoder->static_len -= stree[new].Len;
1453 /* new is 0 or 1 so it does not have extra bits */
1454 }
1455 desc->max_code = max_code;
1456
1457 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
1458 * establish sub-heaps of increasing lengths:
1459 */
1460 for(n = encoder->heap_len/2; n >= 1; n--)
1461 pqdownheap(encoder, tree, n);
1462
1463 /* Construct the Huffman tree by repeatedly combining the least two
1464 * frequent nodes.
1465 */
1466 do {
1467 n = encoder->heap[SMALLEST];
1468 encoder->heap[SMALLEST] = encoder->heap[encoder->heap_len--];
1469 pqdownheap(encoder, tree, SMALLEST);
1470
1471 m = encoder->heap[SMALLEST]; /* m = node of next least frequency */
1472
1473 /* keep the nodes sorted by frequency */
1474 encoder->heap[--encoder->heap_max] = n;
1475 encoder->heap[--encoder->heap_max] = m;
1476
1477 /* Create a new node father of n and m */
1478 tree[node].Freq = tree[n].Freq + tree[m].Freq;
1479 encoder->depth[node] =
1480 (uch)(MAX(encoder->depth[n], encoder->depth[m]) + 1);
1481 tree[n].Dad = tree[m].Dad = (ush)node;
1482
1483 /* and insert the new node in the heap */
1484 encoder->heap[SMALLEST] = node++;
1485 pqdownheap(encoder, tree, SMALLEST);
1486
1487 } while(encoder->heap_len >= 2);
1488
1489 encoder->heap[--encoder->heap_max] = encoder->heap[SMALLEST];
1490
1491 /* At this point, the fields freq and dad are set. We can now
1492 * generate the bit lengths.
1493 */
1494 gen_bitlen(encoder, (tree_desc near *)desc);
1495
1496 /* The field len is now set, we can generate the bit codes */
1497 gen_codes (encoder, (ct_data near *)tree, max_code);
1498 }
1499
1500 /* ===========================================================================
1501 * Scan a literal or distance tree to determine the frequencies of the codes
1502 * in the bit length tree. Updates opt_len to take into account the repeat
1503 * counts. (The contribution of the bit length codes will be added later
1504 * during the construction of bl_tree.)
1505 */
scan_tree(DeflateHandler encoder,ct_data near * tree,int max_code)1506 local void scan_tree(
1507 DeflateHandler encoder,
1508 ct_data near *tree, /* the tree to be scanned */
1509 int max_code) /* and its largest code of non zero frequency */
1510 {
1511 int n; /* iterates over all tree elements */
1512 int prevlen = -1; /* last emitted length */
1513 int curlen; /* length of current code */
1514 int nextlen = tree[0].Len; /* length of next code */
1515 int count = 0; /* repeat count of the current code */
1516 int max_count = 7; /* max repeat count */
1517 int min_count = 4; /* min repeat count */
1518
1519 if(nextlen == 0)
1520 max_count = 138, min_count = 3;
1521 tree[max_code+1].Len = (ush)0xffff; /* guard */
1522
1523 for(n = 0; n <= max_code; n++) {
1524 curlen = nextlen; nextlen = tree[n+1].Len;
1525 if(++count < max_count && curlen == nextlen) {
1526 continue;
1527 } else if(count < min_count) {
1528 encoder->bl_tree[curlen].Freq += count;
1529 } else if(curlen != 0) {
1530 if(curlen != prevlen)
1531 encoder->bl_tree[curlen].Freq++;
1532 encoder->bl_tree[REP_3_6].Freq++;
1533 } else if(count <= 10) {
1534 encoder->bl_tree[REPZ_3_10].Freq++;
1535 } else {
1536 encoder->bl_tree[REPZ_11_138].Freq++;
1537 }
1538 count = 0; prevlen = curlen;
1539 if(nextlen == 0) {
1540 max_count = 138, min_count = 3;
1541 } else if(curlen == nextlen) {
1542 max_count = 6, min_count = 3;
1543 } else {
1544 max_count = 7, min_count = 4;
1545 }
1546 }
1547 }
1548
1549 /* ===========================================================================
1550 * Send a literal or distance tree in compressed form, using the codes in
1551 * bl_tree.
1552 */
send_tree(DeflateHandler encoder,ct_data near * tree,int max_code)1553 local void send_tree(
1554 DeflateHandler encoder,
1555 ct_data near *tree, /* the tree to be scanned */
1556 int max_code) /* and its largest code of non zero frequency */
1557 {
1558 int n; /* iterates over all tree elements */
1559 int prevlen = -1; /* last emitted length */
1560 int curlen; /* length of current code */
1561 int nextlen = tree[0].Len; /* length of next code */
1562 int count = 0; /* repeat count of the current code */
1563 int max_count = 7; /* max repeat count */
1564 int min_count = 4; /* min repeat count */
1565
1566 /* tree[max_code+1].Len = -1; */ /* guard already set */
1567 if(nextlen == 0) max_count = 138, min_count = 3;
1568
1569 for(n = 0; n <= max_code; n++) {
1570 curlen = nextlen; nextlen = tree[n+1].Len;
1571 if(++count < max_count && curlen == nextlen) {
1572 continue;
1573 } else if(count < min_count) {
1574 do { SEND_CODE(curlen, encoder->bl_tree); } while(--count != 0);
1575
1576 } else if(curlen != 0) {
1577 if(curlen != prevlen) {
1578 SEND_CODE(curlen, encoder->bl_tree);
1579 count--;
1580 }
1581 Assert(count >= 3 && count <= 6, " 3_6?");
1582 SEND_CODE(REP_3_6, encoder->bl_tree);
1583 send_bits(encoder, count-3, 2);
1584
1585 } else if(count <= 10) {
1586 SEND_CODE(REPZ_3_10, encoder->bl_tree);
1587 send_bits(encoder, count-3, 3);
1588
1589 } else {
1590 SEND_CODE(REPZ_11_138, encoder->bl_tree);
1591 send_bits(encoder, count-11, 7);
1592 }
1593 count = 0; prevlen = curlen;
1594 if(nextlen == 0) {
1595 max_count = 138, min_count = 3;
1596 } else if(curlen == nextlen) {
1597 max_count = 6, min_count = 3;
1598 } else {
1599 max_count = 7, min_count = 4;
1600 }
1601 }
1602 }
1603
1604 /* ===========================================================================
1605 * Construct the Huffman tree for the bit lengths and return the index in
1606 * bl_order of the last bit length code to send.
1607 */
build_bl_tree(DeflateHandler encoder)1608 local int build_bl_tree(DeflateHandler encoder)
1609 {
1610 int max_blindex; /* index of last bit length code of non zero freq */
1611
1612 /* Determine the bit length frequencies for literal and distance trees */
1613 scan_tree(encoder,
1614 (ct_data near *)encoder->dyn_ltree, encoder->l_desc.max_code);
1615 scan_tree(encoder,
1616 (ct_data near *)encoder->dyn_dtree, encoder->d_desc.max_code);
1617
1618 /* Build the bit length tree: */
1619 build_tree(encoder, (tree_desc near *)(&encoder->bl_desc));
1620 /* opt_len now includes the length of the tree representations, except
1621 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1622 */
1623
1624 /* Determine the number of bit length codes to send. The pkzip format
1625 * requires that at least 4 bit length codes be sent. (appnote.txt says
1626 * 3 but the actual value used is 4.)
1627 */
1628 for(max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
1629 if(encoder->bl_tree[bl_order[max_blindex]].Len != 0) break;
1630 }
1631 /* Update opt_len to include the bit length tree and counts */
1632 encoder->opt_len += 3*(max_blindex+1) + 5+5+4;
1633 Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
1634 encoder->opt_len, encoder->static_len));
1635
1636 return max_blindex;
1637 }
1638
1639 /* ===========================================================================
1640 * Send the header for a block using dynamic Huffman trees: the counts, the
1641 * lengths of the bit length codes, the literal tree and the distance tree.
1642 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1643 */
send_all_trees(DeflateHandler encoder,int lcodes,int dcodes,int blcodes)1644 local void send_all_trees(
1645 DeflateHandler encoder,
1646 int lcodes, int dcodes, int blcodes) /* number of codes for each tree */
1647 {
1648 int rank; /* index in bl_order */
1649
1650 Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
1651 Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
1652 "too many codes");
1653 Tracev((stderr, "\nbl counts: "));
1654 send_bits(encoder, lcodes-257, 5); /* not +255 as stated in appnote.txt */
1655 send_bits(encoder, dcodes-1, 5);
1656 send_bits(encoder, blcodes-4, 4); /* not -3 as stated in appnote.txt */
1657 for(rank = 0; rank < blcodes; rank++) {
1658 Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
1659 send_bits(encoder, encoder->bl_tree[bl_order[rank]].Len, 3);
1660 }
1661
1662 /* send the literal tree */
1663 send_tree(encoder, (ct_data near *)encoder->dyn_ltree,lcodes-1);
1664
1665 /* send the distance tree */
1666 send_tree(encoder, (ct_data near *)encoder->dyn_dtree,dcodes-1);
1667 }
1668
1669 /* ===========================================================================
1670 * Determine the best encoding for the current block: dynamic trees, static
1671 * trees or store, and output the encoded block to the zip file.
1672 */
flush_block(DeflateHandler encoder,int eof)1673 local void flush_block(
1674 DeflateHandler encoder,
1675 int eof) /* true if this is the last block for a file */
1676 {
1677 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
1678 int max_blindex; /* index of last bit length code of non zero freq */
1679 ulg stored_len; /* length of input block */
1680
1681 stored_len = (ulg)(encoder->strstart - encoder->block_start);
1682 encoder->flag_buf[encoder->last_flags] = encoder->flags; /* Save the flags for the last 8 items */
1683
1684 /* Construct the literal and distance trees */
1685 build_tree(encoder, (tree_desc near *)(&encoder->l_desc));
1686 Tracev((stderr, "\nlit data: dyn %ld, stat %ld",
1687 encoder->opt_len, encoder->static_len));
1688
1689 build_tree(encoder, (tree_desc near *)(&encoder->d_desc));
1690 Tracev((stderr, "\ndist data: dyn %ld, stat %ld",
1691 encoder->opt_len, encoder->static_len));
1692 /* At this point, opt_len and static_len are the total bit lengths of
1693 * the compressed block data, excluding the tree representations.
1694 */
1695
1696 /* Build the bit length tree for the above two trees, and get the index
1697 * in bl_order of the last bit length code to send.
1698 */
1699 max_blindex = build_bl_tree(encoder);
1700
1701 /* Determine the best encoding. Compute first the block length in bytes */
1702 opt_lenb = (encoder->opt_len +3+7)>>3;
1703 static_lenb = (encoder->static_len+3+7)>>3;
1704
1705 Trace((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
1706 opt_lenb, encoder->opt_len,
1707 static_lenb, encoder->static_len, stored_len,
1708 encoder->last_lit, encoder->last_dist));
1709
1710 if(static_lenb <= opt_lenb)
1711 opt_lenb = static_lenb;
1712 if(stored_len + 4 <= opt_lenb /* 4: two words for the lengths */
1713 && encoder->block_start >= 0L) {
1714 unsigned int i;
1715 uch *p;
1716
1717 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
1718 * Otherwise we can't have processed more than WSIZE input bytes since
1719 * the last block flush, because compression would have been
1720 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
1721 * transform a block into a stored block.
1722 */
1723 send_bits(encoder, (STORED_BLOCK<<1)+eof, 3); /* send block type */
1724 bi_windup(encoder); /* align on byte boundary */
1725 put_short((ush)stored_len);
1726 put_short((ush)~stored_len);
1727
1728 /* copy block */
1729 p = &encoder->window[(unsigned)encoder->block_start];
1730 for(i = 0; i < stored_len; i++)
1731 put_byte(p[i]);
1732 } else if(static_lenb == opt_lenb) {
1733 send_bits(encoder, (STATIC_TREES<<1)+eof, 3);
1734 compress_block(encoder,
1735 (ct_data near *)encoder->static_ltree,
1736 (ct_data near *)encoder->static_dtree);
1737 } else {
1738 send_bits(encoder, (DYN_TREES<<1)+eof, 3);
1739 send_all_trees(encoder,
1740 encoder->l_desc.max_code+1,
1741 encoder->d_desc.max_code+1,
1742 max_blindex+1);
1743 compress_block(encoder,
1744 (ct_data near *)encoder->dyn_ltree,
1745 (ct_data near *)encoder->dyn_dtree);
1746 }
1747
1748 init_block(encoder);
1749
1750 if(eof)
1751 bi_windup(encoder);
1752 }
1753
1754 /* ===========================================================================
1755 * Save the match info and tally the frequency counts. Return true if
1756 * the current block must be flushed.
1757 */
ct_tally(DeflateHandler encoder,int dist,int lc)1758 local int ct_tally(
1759 DeflateHandler encoder,
1760 int dist, /* distance of matched string */
1761 int lc) /* match length-MIN_MATCH or unmatched char (if dist==0) */
1762 {
1763 encoder->l_buf[encoder->last_lit++] = (uch)lc;
1764 if(dist == 0) {
1765 /* lc is the unmatched char */
1766 encoder->dyn_ltree[lc].Freq++;
1767 } else {
1768 /* Here, lc is the match length - MIN_MATCH */
1769 dist--; /* dist = match distance - 1 */
1770 Assert((ush)dist < (ush)MAX_DIST &&
1771 (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1772 (ush)D_CODE(dist) < (ush)D_CODES, "ct_tally: bad match");
1773
1774 encoder->dyn_ltree[encoder->length_code[lc]+LITERALS+1].Freq++;
1775 encoder->dyn_dtree[D_CODE(dist)].Freq++;
1776
1777 encoder->d_buf[encoder->last_dist++] = (ush)dist;
1778 encoder->flags |= encoder->flag_bit;
1779 }
1780 encoder->flag_bit <<= 1;
1781
1782 /* Output the flags if they fill a byte: */
1783 if((encoder->last_lit & 7) == 0) {
1784 encoder->flag_buf[encoder->last_flags++] = encoder->flags;
1785 encoder->flags = 0;
1786 encoder->flag_bit = 1;
1787 }
1788 /* Try to guess if it is profitable to stop the current block here */
1789 if(encoder->compr_level > 2 && (encoder->last_lit & 0xfff) == 0) {
1790 /* Compute an upper bound for the compressed length */
1791 ulg out_length = (ulg)encoder->last_lit*8L;
1792 ulg in_length = (ulg)encoder->strstart - encoder->block_start;
1793 int dcode;
1794
1795 for(dcode = 0; dcode < D_CODES; dcode++) {
1796 out_length +=
1797 (ulg)encoder->dyn_dtree[dcode].Freq *
1798 (5L + extra_dbits[dcode]);
1799 }
1800 out_length >>= 3;
1801 Trace((stderr,"\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ",
1802 encoder->last_lit, encoder->last_dist, in_length, out_length,
1803 100L - out_length*100L/in_length));
1804 if(encoder->last_dist < encoder->last_lit/2 &&
1805 out_length < in_length/2)
1806 return 1;
1807 }
1808 return (encoder->last_lit == LIT_BUFSIZE-1 ||
1809 encoder->last_dist == DIST_BUFSIZE);
1810 /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K
1811 * on 16 bit machines and because stored blocks are restricted to
1812 * 64K-1 bytes.
1813 */
1814 }
1815
1816 /* ===========================================================================
1817 * Send the block data compressed using the given Huffman trees
1818 */
compress_block(DeflateHandler encoder,ct_data near * ltree,ct_data near * dtree)1819 local void compress_block(
1820 DeflateHandler encoder,
1821 ct_data near *ltree, /* literal tree */
1822 ct_data near *dtree) /* distance tree */
1823 {
1824 unsigned dist; /* distance of matched string */
1825 int lc; /* match length or unmatched char (if dist == 0) */
1826 unsigned lx = 0; /* running index in l_buf */
1827 unsigned dx = 0; /* running index in d_buf */
1828 unsigned fx = 0; /* running index in flag_buf */
1829 uch flag = 0; /* current flags */
1830 unsigned code; /* the code to send */
1831 int extra; /* number of extra bits to send */
1832
1833 if(encoder->last_lit != 0) do {
1834 if((lx & 7) == 0)
1835 flag = encoder->flag_buf[fx++];
1836 lc = encoder->l_buf[lx++];
1837 if((flag & 1) == 0) {
1838 SEND_CODE(lc, ltree); /* send a literal byte */
1839 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1840 } else {
1841 /* Here, lc is the match length - MIN_MATCH */
1842 code = encoder->length_code[lc];
1843 SEND_CODE(code+LITERALS+1, ltree); /* send the length code */
1844 extra = extra_lbits[code];
1845 if(extra != 0) {
1846 lc -= encoder->base_length[code];
1847 send_bits(encoder, lc, extra); /* send the extra length bits */
1848 }
1849 dist = encoder->d_buf[dx++];
1850 /* Here, dist is the match distance - 1 */
1851 code = D_CODE(dist);
1852 Assert (code < D_CODES, "bad d_code");
1853
1854 SEND_CODE(code, dtree); /* send the distance code */
1855 extra = extra_dbits[code];
1856 if(extra != 0) {
1857 dist -= encoder->base_dist[code];
1858 send_bits(encoder, dist, extra); /* send the extra distance bits */
1859 }
1860 } /* literal or match pair ? */
1861 flag >>= 1;
1862 } while(lx < encoder->last_lit);
1863
1864 SEND_CODE(END_BLOCK, ltree);
1865 }
1866
1867 /* ===========================================================================
1868 * Send a value on a given number of bits.
1869 * IN assertion: length <= 16 and value fits in length bits.
1870 */
1871 #define Buf_size (8 * sizeof(ush)) /* bit size of bi_buf */
send_bits(DeflateHandler encoder,int value,int length)1872 local void send_bits(
1873 DeflateHandler encoder,
1874 int value, /* value to send */
1875 int length) /* number of bits */
1876 {
1877 /* If not enough room in bi_buf, use (valid) bits from bi_buf and
1878 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
1879 * unused bits in value.
1880 */
1881 if(encoder->bi_valid > Buf_size - length) {
1882 encoder->bi_buf |= (value << encoder->bi_valid);
1883 put_short(encoder->bi_buf);
1884 encoder->bi_buf = (ush)value >> (Buf_size - encoder->bi_valid);
1885 encoder->bi_valid += length - Buf_size;
1886 } else {
1887 encoder->bi_buf |= value << encoder->bi_valid;
1888 encoder->bi_valid += length;
1889 }
1890 }
1891
1892 /* ===========================================================================
1893 * Reverse the first len bits of a code, using straightforward code (a faster
1894 * method would use a table)
1895 * IN assertion: 1 <= len <= 15
1896 */
bi_reverse(unsigned code,int len)1897 local unsigned bi_reverse(
1898 unsigned code, /* the value to invert */
1899 int len) /* its bit length */
1900 {
1901 register unsigned res = 0;
1902 do {
1903 res |= code & 1;
1904 code >>= 1, res <<= 1;
1905 } while(--len > 0);
1906 return res >> 1;
1907 }
1908
1909 /* ===========================================================================
1910 * Write out any remaining bits in an incomplete byte.
1911 */
bi_windup(DeflateHandler encoder)1912 local void bi_windup(DeflateHandler encoder)
1913 {
1914 if(encoder->bi_valid > 8) {
1915 put_short(encoder->bi_buf);
1916 } else if(encoder->bi_valid > 0) {
1917 put_byte(encoder->bi_buf);
1918 }
1919 encoder->bi_buf = 0;
1920 encoder->bi_valid = 0;
1921 }
1922
qoutbuf(DeflateHandler encoder)1923 local void qoutbuf(DeflateHandler encoder)
1924 {
1925 if(encoder->outcnt != 0)
1926 {
1927 struct deflate_buff_queue *q;
1928 q = new_queue();
1929 if(encoder->qhead == NULL)
1930 encoder->qhead = encoder->qtail = q;
1931 else
1932 encoder->qtail = encoder->qtail->next = q;
1933 q->len = encoder->outcnt - encoder->outoff;
1934 memcpy(q->ptr, encoder->outbuf + encoder->outoff, q->len);
1935 encoder->outcnt = encoder->outoff = 0;
1936 }
1937 }
1938