1 /*
2     File lzx_layer.c, part of lzxcomp library
3     Copyright (C) 2002 Matthew T. Russotto
4 
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU Lesser General Public License as published by
7     the Free Software Foundation; version 2.1 only
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU Lesser General Public License for more details.
13 
14     You should have received a copy of the GNU Lesser General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include <string.h> /* for memset on Linux */
22 #include <assert.h>
23 #include <math.h>
24 #include "lzx_config.h"
25 #ifdef NONSLIDE
26 #include "lz_nonslide.h"
27 #else
28 #include "hash_slide.h"
29 #include "lz_slide.h"
30 #endif
31 #include "lzx_compress.h"
32 #include "lzx_constants.h"
33 
34 /* Debugging defines useful during development.  All add diagnostic output
35    at various points in the system */
36 
37 /*#define DEBUG_MATCHES       *//* When matches come in from the LZ engine */
38 /*#define DEBUG_MATCHES_2     *//* When matches are being output           */
39 /*#define DEBUG_HUFFMAN       *//* When huffman trees are built            */
40 /*#define DEBUG_ENTROPY       *//* In entropy calculation                  */
41 /*#define DEBUG_LZ            *//* Uncompressed input reconstructed from
42                                  LZ engine                               */
43 /*#define DEBUG_BITBUF        *//* Raw output to upper layer               */
44 /*#define DEBUG_EXTRA_BITS    *//* Savings due to extra bits huffman tree  */
45 /*#define DEBUG_POSITION_SLOT_LOOKUP */
46 /*#define DEBUG_TREE_COMPRESSION   *//* During RLE compression of trees    */
47 
48 /* number of position slots given window_size-5 */
49 /* as corrected by Caie */
50 short num_position_slots[] = {30, 32, 34, 36, 38, 42, 50};
51 unsigned long position_base[51];
52 u_char extra_bits[52];
53 double rloge2;
54 
55 typedef struct ih_elem {
56   int freq;
57   short sym;
58   short pathlength;
59   struct ih_elem *parent;
60   struct ih_elem *left;
61   struct ih_elem *right;
62 } ih_elem;
63 
64 typedef struct h_elem {
65   int freq;
66   short sym;
67   short pathlength;
68   struct ih_elem *parent;
69   unsigned short code;
70 } h_elem;
71 
72 typedef struct huff_entry {
73   short codelength;
74   unsigned short code;
75 } huff_entry;
76 
cmp_leaves(const void * in_a,const void * in_b)77 static int cmp_leaves(const void *in_a, const void *in_b)
78 {
79   const struct h_elem *a = in_a;
80   const struct h_elem *b = in_b;
81 
82   if (!a->freq && b->freq)
83     return 1;
84   if (a->freq && !b->freq)
85     return -1;
86 
87   if (a->freq == b->freq)
88     return a->sym - b->sym;
89 
90   return a->freq - b->freq;
91 }
92 
93 static int
cmp_pathlengths(const void * in_a,const void * in_b)94 cmp_pathlengths(const void *in_a, const void *in_b)
95 {
96   const struct h_elem *a = in_a;
97   const struct h_elem *b = in_b;
98 
99   if (a->pathlength == b->pathlength)
100 #if 0
101     return a->sym - b->sym;
102 #else
103   /* see note on canonical pathlengths */
104     return b->sym - a->sym;
105 #endif
106   return b->pathlength - a->pathlength;
107 }
108 
109 /* standard huffman building algorithm */
110 static void
build_huffman_tree(int nelem,int max_code_length,int * freq,huff_entry * tree)111 build_huffman_tree(int nelem, int max_code_length, int *freq, huff_entry *tree)
112 {
113   h_elem *leaves = malloc(nelem * sizeof(h_elem));
114   ih_elem *inodes;
115   ih_elem *next_inode;
116   ih_elem *cur_inode;
117   h_elem *cur_leaf;
118   int leaves_left;
119   int nleaves;
120   int pathlength;
121   unsigned short cur_code;
122   short codes_too_long = 0;
123   ih_elem *f1, *f2;
124   int i;
125 
126   for (i = 0; i < nelem; i++) {
127     leaves[i].freq = freq[i];
128     leaves[i].sym = i;
129     leaves[i].pathlength = 0;
130   }
131   qsort(leaves, nelem, sizeof(h_elem), cmp_leaves);
132   for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
133 #ifdef DEBUG_HUFFMAN
134     fprintf(stderr, "%3d: %3d '%c'\n", leaves_left, leaves[leaves_left].freq,
135 	    leaves[leaves_left].sym);
136 #endif
137     if (!leaves[leaves_left].freq) break;
138   }
139   nleaves = leaves_left;
140 
141   if (nleaves >= 2) {
142     inodes = malloc((nelem-1) * sizeof(ih_elem));
143     do {
144       if (codes_too_long) {
145 	for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
146 	  if (!leaves[leaves_left].freq) break;
147 	  if (leaves[leaves_left].freq != 1) {
148 	    leaves[leaves_left].freq >>= 1;
149 	    codes_too_long = 0;
150 	  }
151 	}
152 	assert (!codes_too_long);
153       }
154 
155       cur_leaf = leaves;
156       next_inode = cur_inode = inodes;
157 
158       do {
159 	f1 = f2 = NULL;
160 	if (leaves_left &&
161 	    ((cur_inode == next_inode) ||
162 	     (cur_leaf->freq <= cur_inode->freq))) {
163 	  f1 = (ih_elem *)cur_leaf++;
164 	  leaves_left--;
165 	}
166 	else if (cur_inode != next_inode) {
167 	  f1 = cur_inode++;
168 	}
169 
170 	if (leaves_left &&
171 	    ((cur_inode == next_inode) ||
172 	     (cur_leaf->freq <= cur_inode->freq))) {
173 	  f2 = (ih_elem *)cur_leaf++;
174 	  leaves_left--;
175 	}
176 	else if (cur_inode != next_inode) {
177 	  f2 = cur_inode++;
178 	}
179 
180 #ifdef DEBUG_HUFFMAN
181 	fprintf(stderr, "%d %d\n", f1, f2);
182 #endif
183 	if (f1 && f2) {
184 	  next_inode->freq = f1->freq + f2->freq;
185 	  next_inode->sym = -1;
186 	  next_inode->left = f1;
187 	  next_inode->right = f2;
188 	  next_inode->parent = NULL;
189 	  f1->parent = next_inode;
190 	  f2->parent = next_inode;
191 	  if (f1->pathlength > f2->pathlength)
192 	    next_inode->pathlength = f1->pathlength + 1;
193 	  else
194 	    next_inode->pathlength = f2->pathlength + 1;
195 	  if (next_inode->pathlength > max_code_length) {
196 	    codes_too_long = 1;
197 	    break;
198 	  }
199 	  next_inode++;
200 	}
201       }
202       while (f1 && f2);
203     }
204     while (codes_too_long);
205 
206 #ifdef DEBUG_HUFFMAN
207     cur_inode = inodes;
208     while (cur_inode < next_inode) {
209       fprintf(stderr, "%d l: %3d%c  r: %3d%c  freq: %8d\n",
210 	      cur_inode - inodes,
211 	      (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
212 	      (cur_inode->left->sym!=-1)?'l':'i',
213 	      (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
214 	      (cur_inode->right->sym!=-1)?'l':'i',
215 	      (cur_inode->freq)
216 	      );
217       cur_inode++;
218     }
219 #endif
220 
221     /* now traverse tree depth-first */
222     cur_inode = next_inode - 1;
223     pathlength = 0;
224     cur_inode->pathlength = -1;
225     do {
226       /* precondition: at unmarked node*/
227       if (cur_inode->sym == -1) /*&& (cur_inode->left)*/ {
228 	/* left node of unmarked node is unmarked */
229 	cur_inode = cur_inode->left;
230 	cur_inode->pathlength = -1;
231 	pathlength++;
232       }
233       else {
234 	/* mark node */
235 	cur_inode->pathlength = pathlength;
236 #if 0
237 	if (cur_inode->right) {
238 	  /* right node of previously unmarked node is unmarked */
239 	  cur_inode = cur_inode->right;
240 	  cur_inode->pathlength = -1;
241 	  pathlength++;
242 	}
243 	else
244 #endif
245 	  {
246 
247 	    /* time to come up.  Keep coming up until an unmarked node is reached */
248 	    /* or the tree is exhausted */
249 	    do {
250 	      cur_inode = cur_inode->parent;
251 	      pathlength--;
252 	    }
253 	    while (cur_inode && (cur_inode->pathlength != -1));
254 	    if (cur_inode) {
255 	      /* found unmarked node; mark it and go right */
256 	      cur_inode->pathlength = pathlength;
257 	      cur_inode = cur_inode->right;
258 	      cur_inode->pathlength = -1;
259 	      pathlength++;
260 	      /* would be complex if cur_inode could be null here.  It can't */
261 	    }
262 	  }
263       }
264     }
265     while (cur_inode);
266 
267 #ifdef DEBUG_HUFFMAN
268     cur_inode = inodes;
269     while (cur_inode < next_inode) {
270       fprintf(stderr, "%d l: %3d%c  r: %3d%c  freq: %8d  pathlength %4d\n",
271 	      cur_inode - inodes,
272 	      (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
273 	      (cur_inode->left->sym!=-1)?'l':'i',
274 	      (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
275 	      (cur_inode->right->sym!=-1)?'l':'i',
276 	      (cur_inode->freq),
277 	      (cur_inode->pathlength)
278 	      );
279       cur_inode++;
280     }
281 #endif
282     free(inodes);
283 
284     /* the pathlengths are already in order, so this sorts by symbol */
285     qsort(leaves, nelem, sizeof(h_elem), cmp_pathlengths);
286 
287     /**
288 	Microsoft's second condition on its canonical huffman codes is:
289 
290 	For each level, starting at the deepest level of the tree and then
291 	moving upwards, leaf nodes must start as far left as possible. An
292 	alternative way of stating this constraint is that if any tree node
293 	has children then all tree nodes to the left of it with the same path
294 	length must also have children.
295 
296 	These 'alternatives' are not equivalent.  The latter alternative gives
297 	the common canonical code where the longest code is all zeros.  The former
298 	gives an opposite code where the longest code is all ones.  Microsoft uses the
299 	former alternative.
300     **/
301 
302 #if 0
303     pathlength = leaves[0].pathlength;
304     cur_code = 0;
305     for (i = 0; i < nleaves; i++) {
306       while (leaves[i].pathlength < pathlength) {
307 	assert(!(cur_code & 1));
308 	cur_code >>= 1;
309 	pathlength--;
310       }
311       leaves[i].code = cur_code;
312       cur_code++;
313     }
314 #else
315     pathlength = leaves[nleaves-1].pathlength;
316     assert(leaves[0].pathlength <= 16); /* this method cannot deal with bigger codes, though
317 					   the other canonical method can in some cases
318 					   (because it starts with zeros ) */
319     cur_code = 0;
320     for (i = nleaves - 1; i >= 0; i--) {
321       while (leaves[i].pathlength > pathlength) {
322 	cur_code <<= 1;
323 	pathlength++;
324       }
325       leaves[i].code = cur_code;
326       cur_code++;
327     }
328 #endif
329 
330 #ifdef DEBUG_HUFFMAN
331     for (i = 0; i < nleaves; i++) {
332       char code[18];
333       int j;
334 
335       cur_code = leaves[i].code;
336       code[leaves[i].pathlength] = 0;
337       for (j = leaves[i].pathlength-1; j >= 0; j--) {
338 	if (cur_code & 1) code[j] = '1';
339 	else code[j] = '0';
340 	cur_code >>= 1;
341       }
342       fprintf(stderr, "%3d: %3d %3d %-16.16s '%c'\n", i, leaves[i].freq, leaves[i].pathlength, code,
343 	      leaves[i].sym);
344     }
345 #endif
346   }
347   else if (nleaves == 1) {
348     /* 0 symbols is OK (not according to doc, but according to Caie) */
349     /* but if only one symbol is present, two symbols are required */
350     nleaves = 2;
351     leaves[0].pathlength = leaves[1].pathlength = 1;
352     if (leaves[1].sym > leaves[0].sym) {
353       leaves[1].code = 1;
354       leaves[0].code = 0;
355     }
356     else {
357       leaves[0].code = 1;
358       leaves[1].code = 0;
359     }
360   }
361 
362   memset(tree, 0, nelem * sizeof(huff_entry));
363   for (i = 0; i < nleaves; i++) {
364     tree[leaves[i].sym].codelength = leaves[i].pathlength;
365     tree[leaves[i].sym].code = leaves[i].code;
366   }
367 
368   free(leaves);
369 }
370 
371 /* from Stuart Caie's code -- I'm hoping this code is too small to encumber
372    this file.  If not, you could rip it out and hard-code the tables */
373 
lzx_init_static(void)374 static void lzx_init_static(void)
375 {
376   int i, j;
377 
378   if (extra_bits[49]) return;
379 
380   rloge2 = 1.0/log(2);
381   for (i=0, j=0; i <= 50; i += 2) {
382     extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */
383     if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
384   }
385 
386   for (i=0, j=0; i <= 50; i++) {
387     position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
388     j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
389   }
390 }
391 
392 struct lzx_data
393 {
394   void *in_arg;
395   void *out_arg;
396   void *mark_frame_arg;
397   lzx_get_bytes_t get_bytes;
398   lzx_at_eof_t at_eof;
399   lzx_put_bytes_t put_bytes;
400   lzx_mark_frame_t mark_frame;
401   struct lz_info *lzi;
402   /* a 'frame' is an 0x8000 byte thing.  Called that because otherwise
403      I'd confuse myself overloading 'block' */
404   int left_in_frame;
405   int left_in_block;
406   int R0, R1, R2;
407   int num_position_slots;
408   /* this is the LZX block size */
409   int block_size;
410   int *main_freq_table;
411   int length_freq_table[NUM_SECONDARY_LENGTHS];
412   int aligned_freq_table[LZX_ALIGNED_SIZE];
413   uint32_t *block_codes;
414   uint32_t *block_codesp;
415   huff_entry *main_tree;
416   huff_entry length_tree[NUM_SECONDARY_LENGTHS];
417   huff_entry aligned_tree[LZX_ALIGNED_SIZE];
418   int main_tree_size;
419   uint16_t bit_buf;
420   int bits_in_buf;
421   double main_entropy;
422   double last_ratio;
423   uint8_t *prev_main_treelengths;
424   uint8_t prev_length_treelengths[NUM_SECONDARY_LENGTHS];
425   uint32_t len_uncompressed_input;
426   uint32_t len_compressed_output;
427   short need_1bit_header;
428   short subdivide; /* 0 = don't subdivide, 1 = allowed, -1 = requested */
429 };
430 
431 static int
lzx_get_chars(lz_info * lzi,int n,u_char * buf)432 lzx_get_chars(lz_info *lzi, int n, u_char *buf)
433 {
434   /* force lz compression to stop after every block */
435   int chars_read;
436   int chars_pad;
437 
438   lzx_data *lzud = (lzx_data *)lzi->user_data;
439 #ifdef OLDFRAMING
440   if (lzud->subdivide < 0) return 0;
441   if (n > lzud->left_in_frame)
442     n = lzud->left_in_frame;
443   if (n > lzud->left_in_block)
444     n = lzud->left_in_block;
445 #endif
446   chars_read = lzud->get_bytes(lzud->in_arg, n, buf);
447 #ifdef OLDFRAMING
448   lzud->left_in_frame -= chars_read;
449   lzud->left_in_block -= chars_read;
450 #else
451   lzud->left_in_frame -= chars_read % LZX_FRAME_SIZE;
452   if (lzud->left_in_frame < 0)
453     lzud->left_in_frame += LZX_FRAME_SIZE;
454 #endif
455   if ((chars_read < n) && (lzud->left_in_frame)) {
456     chars_pad = n - chars_read;
457     if (chars_pad > lzud->left_in_frame) chars_pad = lzud->left_in_frame;
458     /* never emit a full frame of padding.  This prevents silliness when
459        lzx_compress is called when at EOF but EOF not yet detected */
460     if (chars_pad == LZX_FRAME_SIZE) chars_pad = 0;
461 #ifdef OLDFRAMING
462     if (chars_pad > lzud->left_in_block) chars_pad = lzud->left_in_block;
463 #endif
464     memset(buf + chars_read, 0, chars_pad);
465     lzud->left_in_frame -= chars_pad;
466 #ifdef OLDFRAMING
467     lzud->left_in_block -= chars_pad;
468 #endif
469     chars_read += chars_pad;
470   }
471   return chars_read;
472 }
473 
474 #ifdef NONSLIDE
find_match_at(lz_info * lzi,int loc,int match_len,int * match_locp)475 static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
476 {
477   u_char *matchb;
478   u_char *nmatchb;
479   u_char *c1, *c2;
480   int j;
481 
482   if (-*match_locp == loc) return -1;
483   if (loc < match_len) return -1;
484 
485   matchb = lzi->block_buf + lzi->block_loc + *match_locp;
486   nmatchb = lzi->block_buf + lzi->block_loc - loc;
487   c1 = matchb;
488   c2 = nmatchb;
489   for (j = 0; j < match_len; j++) {
490     if (*c1++ != *c2++) break;
491   }
492   if (j == match_len) {
493 #ifdef DEBUG_MATCHES
494     fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
495 #endif
496     *match_locp = -loc;
497     return 0;
498   }
499   return -1;
500 }
501 #else
find_match_at(lz_info * lzi,int loc,int match_len,int * match_locp)502 static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
503 {
504   u_char *matchb;
505   u_char *nmatchb;
506   u_char *c1, *c2;
507   int j;
508 
509   if (-*match_locp == loc) return -1;
510   if (loc < match_len) return -1;
511 
512   matchb = lzi->slide_bufp + *match_locp;
513   if (matchb < lzi->slide_buf) matchb += lzi->slide_buf_size;
514   nmatchb = lzi->slide_bufp - loc;
515   if (nmatchb < lzi->slide_buf) nmatchb += lzi->slide_buf_size;
516   c1 = matchb;
517   c2 = nmatchb;
518   for (j = 0; j < match_len; j++) {
519     if (*c1++ != *c2++) break;
520     if (c1 == lzi->slide_bufe) c1 = lzi->slide_buf;
521     if (c2 == lzi->slide_bufe) c2 = lzi->slide_buf;
522   }
523   if (j == match_len) {
524 #ifdef DEBUG_MATCHES
525     fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
526 #endif
527     *match_locp = -loc;
528     return 0;
529   }
530   return -1;
531 }
532 #endif
check_entropy(lzx_data * lzud,int main_index)533 static void check_entropy(lzx_data *lzud, int main_index)
534 {
535   /* entropy = - sum_alphabet P(x) * log2 P(x) */
536   /* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */
537   /* entropy = - 1/N sum_alphabet f(x) * (log2 f(x) - log2 N) */
538   /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - sum_alphabet f(x) log2 N */
539   /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - log2 N sum_alphabet f(x)  */
540   /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - N * log2 N   */
541 
542   /* entropy = - 1/N ((sum_alphabet f(x) * log2 f(x) ) - N * log2 N) */
543   /* entropy = - 1/N ((sum_alphabet f(x) * ln f(x) * 1/ln 2) - N * ln N * 1/ln 2) */
544   /* entropy = 1/(N ln 2) (N * ln N - (sum_alphabet f(x) * ln f(x))) */
545   /* entropy = 1/(N ln 2) (N * ln N + (sum_alphabet -f(x) * ln f(x))) */
546 
547   /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (sum_alphabet -f(x) * ln f(x))) */
548   /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) +  (-f(x) * ln f(x))) */
549   /* entropy = -1/(N ln 2) ( sum_alphabet -ln N * f(x) +  (f(x) * ln f(x))) */
550   /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(- ln N  + ln f(x))) */
551   /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(ln f(x)/N)) */
552   /* entropy = -1/N  ( sum_alphabet (1/(ln 2))f(x)(ln f(x)/N)) */
553   /* entropy = -1/N  ( sum_alphabet f(x)(log2 f(x)/N)) */
554   /* entropy = -  ( sum_alphabet f(x)/N(log2 f(x)/N)) */
555   /* entropy = -  ( sum_alphabet P(x)(log2 P(x))) */
556 
557 
558     double freq;
559     double n_ln_n;
560     double rn_ln2;
561     double cur_ratio;
562     int n;
563 
564     /* delete old entropy accumulation */
565     if (lzud->main_freq_table[main_index] != 1) {
566       freq = (double)lzud->main_freq_table[main_index]-1;
567       lzud->main_entropy += freq * log(freq);
568     }
569     /* add new entropy accumulation */
570     freq = (double)lzud->main_freq_table[main_index];
571     lzud->main_entropy -= freq * log(freq);
572     n = lzud->block_codesp - lzud->block_codes;
573 
574     if (((n & 0xFFF) == 0) && (lzud->left_in_block >= 0x1000)) {
575       n_ln_n = (double)n * log((double)n);
576       rn_ln2 = rloge2 / (double)n;
577       cur_ratio = (n * rn_ln2 *(n_ln_n + lzud->main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud->main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / (double)n;
578 #ifdef DEBUG_ENTROPY
579       fprintf(stderr, "n = %d\n", n);
580       fprintf(stderr, "main entropy = %f\n", rn_ln2 *(n_ln_n + lzud->main_entropy) );
581       fprintf(stderr, "compression ratio (raw) = %f\n", 100.0 * rn_ln2 *(n_ln_n + lzud->main_entropy) /9.0 );
582       fprintf(stderr, "compression ratio (ovh) = %f\n", 100.0 * cur_ratio/9.0);
583 #endif
584       if (cur_ratio > lzud->last_ratio) {
585 #ifdef DEBUG_ENTROPY
586 	fprintf(stderr, "resetting huffman tables at %d\n", n);
587 #endif
588 	lzud->subdivide = -1;
589 	lz_stop_compressing(lzud->lzi);
590       }
591       lzud->last_ratio = cur_ratio;
592     }
593 }
594 
595 static int
lzx_output_match(lz_info * lzi,int match_pos,int match_len)596 lzx_output_match(lz_info *lzi, int match_pos, int match_len)
597 {
598   lzx_data *lzud = (lzx_data *)lzi->user_data;
599   uint32_t formatted_offset;
600   uint32_t position_footer;
601   uint8_t length_footer;
602   uint8_t length_header;
603   uint16_t len_pos_header;
604   int position_slot;
605   short btdt;
606 
607 #ifdef DEBUG_LZ
608   {
609     int i;
610     int pos;
611     for (i = 0; i < match_len; i++) {
612 
613 #ifdef NONSLIDE
614       pos = match_pos + lzi->block_loc + i;
615       fprintf(stderr, "%c", lzi->block_buf[pos]);
616 #else
617       pos = match_pos + lzi->front_offset + i;
618       if (pos > lzi->slide_buf_size)
619 	pos -= lzi->slide_buf_size;
620       fprintf(stderr, "%c", lzi->slide_buf[pos]);
621 #endif
622     }
623   }
624 #endif
625   position_footer = 0;
626   btdt = 0;
627  testforr:
628   if (match_pos == -lzud->R0) {
629     match_pos = 0;
630     formatted_offset = 0;
631     position_slot = 0;
632   }
633   else if (match_pos == -lzud->R1) {
634     lzud->R1 = lzud->R0;
635     lzud->R0 = -match_pos;
636     match_pos = 1;
637     formatted_offset = 1;
638     position_slot = 1;
639   }
640   else if (match_pos == -lzud->R2) {
641     lzud->R2 = lzud->R0;
642     lzud->R0 = -match_pos;
643     match_pos = 2;
644     formatted_offset = 2;
645     position_slot = 2;
646   }
647   else {
648     if (!btdt) {
649       btdt = 1;
650       if (find_match_at(lzi, lzud->R0, match_len, &match_pos) == 0)
651 	goto testforr;
652       if (find_match_at(lzi, lzud->R1, match_len, &match_pos) == 0)
653 	goto testforr;
654       if (find_match_at(lzi, lzud->R2, match_len, &match_pos) == 0)
655 	goto testforr;
656     }
657 
658     formatted_offset = -match_pos + 2;
659 
660     if ((match_len < 3) ||
661 	((formatted_offset >= 64) && (match_len < 4)) ||
662 	((formatted_offset >= 2048) && (match_len < 5)) ||
663 	((formatted_offset >= 65536) && (match_len < 6))) {
664       /* reject matches where extra_bits will likely be bigger than just outputting
665 	 literals.  The numbers are basically derived through guessing
666          and trial and error */
667       return -1; /* reject the match */
668     }
669 
670     lzud->R2 = lzud->R1;
671     lzud->R1 = lzud->R0;
672     lzud->R0 = -match_pos;
673 
674   /* calculate position base using binary search of table; if log2 can be
675      done in hardware, approximation might work;
676      trunc(log2(formatted_offset*formatted_offset)) gets either the proper
677      position slot or the next one, except for slots 0, 1, and 39-49
678 
679      Slots 0-1 are handled by the R0-R1 procedures
680 
681      Slots 36-49 (formatted_offset >= 262144) can be found by
682      (formatted_offset/131072) + 34 ==
683      (formatted_offset >> 17) + 34;
684   */
685     if (formatted_offset >= 262144) {
686       position_slot = (formatted_offset >> 17) + 34;
687     }
688     else {
689       int left, right, mid;
690 
691       left = 3;
692       right = lzud->num_position_slots - 1;
693       position_slot = -1;
694       while (left <= right) {
695 	mid = (left + right)/2;
696 	if ((position_base[mid] <= formatted_offset) &&
697 	    position_base[mid+1] > formatted_offset) {
698 	  position_slot = mid;
699 	  break;
700 	}
701 #if 0
702 	fprintf(stderr, "BEFORE: %06x %06x %06x %06x\n",
703 		position_base[left], position_base[mid],
704 		formatted_offset, position_base[right]);
705 #endif
706 	if (formatted_offset > position_base[mid])
707 	  /* too low */
708 	  left = mid + 1;
709 	else /* too high */
710 	  right = mid;
711 #if 0
712 	fprintf(stderr, "AFTER : %06x %06x %06x %06x\n",
713 		position_base[left], position_base[mid],
714 		formatted_offset, position_base[right]);
715 #endif
716       }
717 #ifdef DEBUG_POSITION_SLOT_LOOKUP
718       if (position_slot < 0) {
719 	fprintf(stderr, "lmr npr: %d %d %d %d\n", left, mid, right, lzud->num_position_slots);
720 	fprintf(stderr, "AFTER : %07d %07d %07d %07d\n",
721 		position_base[left], position_base[mid],
722 		formatted_offset, position_base[right]);
723 	fprintf(stderr, "(%d, %d, %d, %d, %d)\n", match_pos, match_len, formatted_offset, position_slot, position_footer);
724       }
725 #endif
726       assert(position_slot >= 0);
727       /* FIXME precalc extra_mask table */
728     }
729     position_footer = ((1UL << extra_bits[position_slot]) - 1) & formatted_offset;
730   }
731 #ifdef DEBUG_MATCHES
732 #ifdef NONSLIDE
733   fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc , match_pos, match_len, formatted_offset, position_slot, position_footer);
734 #else
735   fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc - lzud->lzi->chars_in_match , match_pos, match_len, formatted_offset, position_slot, position_footer);
736 #endif
737 #endif
738   /* match length = 8 bits */
739   /* position_slot = 6 bits */
740   /* position_footer = 17 bits */
741   /* total = 31 bits */
742   /* plus one to say whether it's a literal or not */
743   *lzud->block_codesp++ = 0x80000000 | /* bit 31 in intelligent bit ordering */
744     (position_slot << 25) | /* bits 30-25 */
745     (position_footer << 8) | /* bits 8-24 */
746     (match_len - MIN_MATCH); /* bits 0-7 */
747 
748   if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) {
749     length_header = match_len - MIN_MATCH;
750     /*    length_footer = 255; */ /* not necessary */
751   }
752   else {
753     length_header = NUM_PRIMARY_LENGTHS;
754     length_footer = match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH);
755     lzud->length_freq_table[length_footer]++;
756   }
757   len_pos_header = (position_slot << 3) | length_header;
758   lzud->main_freq_table[len_pos_header + NUM_CHARS]++;
759   if (extra_bits[position_slot] >= 3) {
760     lzud->aligned_freq_table[position_footer & 7]++;
761   }
762 #ifndef OLDFRAMING
763   lzud->left_in_block -= match_len;
764 #endif
765   if (lzud->subdivide)
766     check_entropy(lzud, len_pos_header + NUM_CHARS);
767   return 0; /* accept the match */
768 }
769 
770 static void
lzx_output_literal(lz_info * lzi,u_char ch)771 lzx_output_literal(lz_info *lzi, u_char ch)
772 {
773   lzx_data *lzud = (lzx_data *)lzi->user_data;
774 
775 #ifndef OLDFRAMING
776   lzud->left_in_block--;
777 #endif
778   *lzud->block_codesp++ = ch;
779 #ifdef DEBUG_LZ
780   fprintf(stderr, "%c", ch);
781 #endif
782   lzud->main_freq_table[ch]++;
783   if (lzud->subdivide)
784     check_entropy(lzud, ch);
785 }
786 
lzx_write_bits(lzx_data * lzxd,int nbits,uint32_t bits)787 static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
788 {
789   int cur_bits;
790   int shift_bits;
791   int rshift_bits;
792   uint16_t mask_bits;
793 
794 #ifdef DEBUG_BITBUF
795   fprintf(stderr, "WB: %2d %08x\n", nbits, bits);
796 #endif
797   cur_bits = lzxd->bits_in_buf;
798   while ((cur_bits + nbits) >= 16) {
799     shift_bits = 16 - cur_bits;
800     rshift_bits = nbits - shift_bits;
801     if (shift_bits == 16) {
802       lzxd->bit_buf = (bits>>rshift_bits) & 0xFFFF;
803     }
804     else {
805       mask_bits = (1U << shift_bits) - 1;
806       lzxd->bit_buf <<= shift_bits;
807       lzxd->bit_buf |= (bits>>rshift_bits) & mask_bits;
808     }
809 #ifdef DEBUG_BITBUF
810     fprintf(stderr, "WBB: %04x\n", lzxd->bit_buf);
811 #endif
812 #ifdef LZX_BIG_ENDIAN
813     lzxd->bit_buf = ((lzxd->bit_buf & 0xFF)<<8) | (lzxd->bit_buf >> 8);
814 #endif
815     lzxd->put_bytes(lzxd->out_arg, sizeof(lzxd->bit_buf), &lzxd->bit_buf);
816     lzxd->len_compressed_output += sizeof(lzxd->bit_buf);
817     lzxd->bit_buf = 0;
818     nbits -= shift_bits;
819     cur_bits = 0;
820   }
821   /* (cur_bits + nbits) < 16.  If nbits = 0, we're done.
822      otherwise move bits in */
823   shift_bits = nbits;
824   mask_bits = (1U << shift_bits) - 1;
825   lzxd->bit_buf <<= shift_bits;
826   lzxd->bit_buf |= bits & mask_bits;
827   cur_bits += nbits;
828 
829 #ifdef DEBUG_BITBUF
830   fprintf(stderr, "OBB: %2d %04x\n", cur_bits, lzxd->bit_buf);
831 #endif
832   lzxd->bits_in_buf = cur_bits;
833 }
834 
lzx_align_output(lzx_data * lzxd)835 static void lzx_align_output(lzx_data *lzxd)
836 {
837   if (lzxd->bits_in_buf) {
838     lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0);
839   }
840   if (lzxd->mark_frame)
841     lzxd->mark_frame(lzxd->mark_frame_arg, lzxd->len_uncompressed_input, lzxd->len_compressed_output);
842 }
843 
844 static void
lzx_write_compressed_literals(lzx_data * lzxd,int block_type)845 lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
846 {
847   uint32_t *cursor = lzxd->block_codes;
848   uint32_t *endp = lzxd->block_codesp;
849   uint16_t position_slot;
850   uint32_t position_footer;
851   uint32_t match_len_m2; /* match length minus 2, which is MIN_MATCH */
852   uint32_t verbatim_bits;
853   uint32_t block_code;
854   uint16_t length_header;
855   uint16_t length_footer;
856   uint16_t len_pos_header;
857   huff_entry *huffe;
858   int frame_count = (lzxd->len_uncompressed_input % LZX_FRAME_SIZE);
859 
860   lzxd->len_uncompressed_input -= frame_count; /* will be added back in later */
861   while (cursor < endp) {
862     block_code = *cursor++;
863     if (block_code & 0x80000000) {
864       /*
865        *    0x80000000 |                bit 31 in intelligent bit ordering
866        * (position_slot << 25) |        bits 30-25
867        * (position_footer << 8) |       bits 8-24
868        * (match_len - MIN_MATCH);       bits 0-7
869        *
870        */
871 
872       match_len_m2 = block_code & 0xFF; /* 8 bits */
873       position_footer = (block_code >> 8)& 0x1FFFF; /* 17 bits */
874       position_slot = (block_code >> 25) & 0x3F; /* 6 bits */
875 
876 #ifdef DEBUG_MATCHES_2
877       fprintf(stderr, "%08x, %3d %2d %d\n", lzxd->len_uncompressed_input + frame_count, match_len_m2, position_slot, position_footer);
878 #endif
879       if (match_len_m2 < NUM_PRIMARY_LENGTHS) {
880 	length_header = match_len_m2;
881 	length_footer = 255; /* personal encoding for NULL */
882       }
883       else {
884 	length_header = NUM_PRIMARY_LENGTHS;
885 	length_footer = match_len_m2 - NUM_PRIMARY_LENGTHS;
886       }
887       len_pos_header = (position_slot << 3) | length_header;
888       huffe = &lzxd->main_tree[len_pos_header+NUM_CHARS];
889       lzx_write_bits(lzxd, huffe->codelength, huffe->code);
890       if (length_footer != 255) {
891 	huffe = &lzxd->length_tree[length_footer];
892 	lzx_write_bits(lzxd, huffe->codelength, huffe->code);
893       }
894       if ((block_type == LZX_ALIGNED_OFFSET_BLOCK) && (extra_bits[position_slot] >= 3)) {
895 	/* aligned offset block and code */
896 	verbatim_bits = position_footer >> 3;
897 	lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits);
898 	huffe = &lzxd->aligned_tree[position_footer&7];
899 	lzx_write_bits(lzxd, huffe->codelength, huffe->code);
900       }
901       else {
902 	verbatim_bits = position_footer;
903 	lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits);
904       }
905       frame_count += match_len_m2 + 2;
906     }
907     else {
908       /* literal */
909       assert(block_code < NUM_CHARS);
910       huffe = &lzxd->main_tree[block_code];
911       lzx_write_bits(lzxd, huffe->codelength, huffe->code);
912       frame_count++;
913     }
914     if (frame_count == LZX_FRAME_SIZE) {
915       lzxd->len_uncompressed_input += frame_count;
916       lzx_align_output(lzxd);
917       frame_count = 0;
918     }
919 #ifdef DEBUG_MATCHES_2
920     if (frame_count > LZX_FRAME_SIZE) {
921       fprintf(stderr, "uncomp_len = %x, frame_count = %x, block_code = %08x, match_len_m2 = %d", lzxd->len_uncompressed_input, frame_count, block_code, match_len_m2);
922     }
923 #endif
924     assert (frame_count < LZX_FRAME_SIZE);
925   }
926   lzxd->len_uncompressed_input += frame_count;
927 }
928 
929 static int
lzx_write_compressed_tree(struct lzx_data * lzxd,struct huff_entry * tree,uint8_t * prevlengths,int treesize)930 lzx_write_compressed_tree(struct lzx_data *lzxd,
931 			  struct huff_entry *tree, uint8_t *prevlengths,
932 			  int treesize)
933 {
934   u_char *codes;
935   u_char *runs;
936   int freqs[LZX_PRETREE_SIZE];
937   int cur_run;
938   int last_len;
939   huff_entry pretree[20];
940   u_char *codep;
941   u_char *codee;
942   u_char *runp;
943   int excess;
944   int i;
945   int cur_code;
946 
947   codep = codes = malloc(treesize*sizeof(char));
948   runp = runs = malloc(treesize*sizeof(char));
949   memset(freqs, 0, sizeof(freqs));
950   cur_run = 1;
951   last_len = tree[0].codelength;
952   for (i = 1; i <= treesize; i++) {
953     if ((i == treesize) || (tree[i].codelength != last_len)) {
954       if (last_len == 0) {
955 	while (cur_run >= 20) {
956 	  excess =  cur_run - 20;
957 	  if (excess > 31) excess = 31;
958 	  *codep++ = 18;
959 	  *runp++ = excess;
960 	  cur_run -= excess + 20;
961 	  freqs[18]++;
962 	}
963 	while (cur_run >= 4) {
964 	  excess =  cur_run - 4;
965 	  if (excess > 15) excess = 15;
966 	  *codep++ = 17;
967 	  *runp++ = excess;
968 	  cur_run -= excess + 4;
969 	  freqs[17]++;
970 	}
971 	while (cur_run > 0) {
972 	  *codep = prevlengths[i - cur_run];
973 	  freqs[*codep++]++;
974 	  *runp++ = 0; /* not necessary */
975 	  cur_run--;
976 	}
977       }
978       else {
979 	while (cur_run >= 4) {
980 	  if (cur_run == 4) excess = 0;
981 	  else excess = 1;
982 	  *codep++ = 19;
983 	  *runp++ = excess;
984 	  freqs[19]++;
985 	  /* right, MS lies again.  Code is NOT
986 	     prev_len + len (mod 17), it's prev_len - len (mod 17)*/
987 	  *codep = prevlengths[i-cur_run] - last_len;
988 	  if (*codep > 16) *codep += 17;
989 	  freqs[*codep++]++;
990 	  *runp++ = 0; /* not necessary */
991 	  cur_run -= excess+4;
992 	}
993 	while (cur_run > 0) {
994 	  *codep = prevlengths[i-cur_run] - last_len;
995 	  if (*codep > 16) *codep += 17;
996 	  *runp++ = 0; /* not necessary */
997 	  cur_run--;
998 	  freqs[*codep++]++;
999 	}
1000       }
1001       if (i != treesize)
1002 	last_len = tree[i].codelength;
1003       cur_run = 0;
1004     }
1005     cur_run++;
1006   }
1007   codee = codep;
1008 #ifdef DEBUG_TREE_COMPRESSION
1009   *codep++ = 255;
1010   *runp++ = 255;
1011   fprintf(stderr, "num:  len  code  run\n");
1012   for (i = 0; i < treesize; i++) {
1013     fprintf(stderr, "%3d:  %2d   %2d    %2d\n", i, tree[i].codelength, codes[i], runs[i]);
1014   }
1015 #endif
1016   /* now create the huffman table and write out the pretree */
1017   build_huffman_tree(LZX_PRETREE_SIZE, 16, freqs, pretree);
1018   for (i = 0; i < LZX_PRETREE_SIZE; i++) {
1019     lzx_write_bits(lzxd, 4, pretree[i].codelength);
1020   }
1021   codep = codes;
1022   runp = runs;
1023   cur_run = 0;
1024   while (codep < codee) {
1025     cur_code = *codep++;
1026     lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
1027     if (cur_code == 17) {
1028       cur_run += *runp + 4;
1029       lzx_write_bits(lzxd, 4, *runp);
1030     }
1031     else if (cur_code == 18) {
1032       cur_run += *runp + 20;
1033       lzx_write_bits(lzxd, 5, *runp);
1034     }
1035     else if (cur_code == 19) {
1036       cur_run += *runp + 4;
1037       lzx_write_bits(lzxd, 1, *runp);
1038       cur_code = *codep++;
1039       lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
1040       runp++;
1041     }
1042     else {
1043       cur_run++;
1044     }
1045     runp++;
1046   }
1047   free(codes);
1048   free(runs);
1049   return 0;
1050 }
1051 
1052 void
lzx_reset(lzx_data * lzxd)1053 lzx_reset(lzx_data *lzxd)
1054 {
1055   lzxd->need_1bit_header = 1;
1056   lzxd->R0 = lzxd->R1 = lzxd->R2 = 1;
1057   memset(lzxd->prev_main_treelengths, 0, lzxd->main_tree_size * sizeof(uint8_t));
1058   memset(lzxd->prev_length_treelengths, 0, NUM_SECONDARY_LENGTHS * sizeof(uint8_t));
1059   lz_reset(lzxd->lzi);
1060 }
1061 
lzx_compress_block(lzx_data * lzxd,int block_size,int subdivide)1062 int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
1063 {
1064   int i;
1065   uint32_t written_sofar = 0;
1066   int block_type;
1067   long uncomp_bits;
1068   long comp_bits;
1069   long comp_bits_ovh;
1070   long uncomp_length;
1071 
1072   if ((lzxd->block_size != block_size) || (lzxd->block_codes == NULL)) {
1073     if (lzxd->block_codes != NULL) free(lzxd->block_codes);
1074     lzxd->block_size = block_size;
1075     lzxd->block_codes =  malloc(block_size * sizeof(uint32_t));
1076   }
1077   lzxd->subdivide = subdivide?1:0;
1078 
1079   lzxd->left_in_block = block_size;
1080   lzxd->left_in_frame = LZX_FRAME_SIZE;
1081   lzxd->main_entropy = 0.0;
1082   lzxd->last_ratio = 9999999.0;
1083   lzxd->block_codesp = lzxd->block_codes;
1084 
1085   memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
1086   memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
1087   memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
1088   do {
1089     lz_compress(lzxd->lzi, lzxd->left_in_block);
1090     if (lzxd->left_in_frame == 0)
1091       lzxd->left_in_frame = LZX_FRAME_SIZE;
1092 
1093     if ((lzxd->subdivide<0) || !lzxd->left_in_block ||
1094 	(!lz_left_to_process(lzxd->lzi) && lzxd->at_eof(lzxd->in_arg))) {
1095       /* now one block is LZ-analyzed. */
1096       /* time to write it out */
1097       uncomp_length = lzxd->block_size - lzxd->left_in_block - written_sofar;
1098       /* uncomp_length will sometimes be 0 when input length is
1099 	 an exact multiple of frame size */
1100       if (uncomp_length == 0)
1101 	  continue;
1102       if (lzxd->subdivide < 0) {
1103 #ifdef DEBUG_ENTROPY
1104 	fprintf(stderr, "subdivided\n");
1105 #endif
1106 	lzxd->subdivide = 1;
1107       }
1108 
1109       if (lzxd->need_1bit_header) {
1110 	/* one bit Intel preprocessing header */
1111 	/* always 0 because this implementation doesn't do Intel preprocessing */
1112 	lzx_write_bits(lzxd, 1, 0);
1113 	lzxd->need_1bit_header = 0;
1114       }
1115 
1116       /* handle extra bits */
1117       uncomp_bits = comp_bits = 0;
1118       build_huffman_tree(LZX_ALIGNED_SIZE, 7, lzxd->aligned_freq_table, lzxd->aligned_tree);
1119       for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
1120 	uncomp_bits += lzxd->aligned_freq_table[i]* 3;
1121 	comp_bits += lzxd->aligned_freq_table[i]* lzxd->aligned_tree[i].codelength;
1122       }
1123       comp_bits_ovh = comp_bits + LZX_ALIGNED_SIZE * 3;
1124       if (comp_bits_ovh < uncomp_bits)
1125       	block_type = LZX_ALIGNED_OFFSET_BLOCK;
1126       else
1127 	block_type = LZX_VERBATIM_BLOCK;
1128 
1129 #ifdef DEBUG_EXTRA_BITS
1130       fprintf(stderr, "Extra bits uncompressed: %5d  compressed:  %5d  compressed w/overhead %5d gain/loss %5d\n", uncomp_bits, comp_bits, comp_bits_ovh, uncomp_bits - comp_bits_ovh);
1131 #endif
1132 
1133       /* block type */
1134       lzx_write_bits(lzxd, 3, block_type);
1135       /* uncompressed length */
1136       lzx_write_bits(lzxd, 24, uncomp_length);
1137 
1138       written_sofar = lzxd->block_size - lzxd->left_in_block;
1139 
1140       /* now write out the aligned offset trees if present */
1141       if (block_type == LZX_ALIGNED_OFFSET_BLOCK) {
1142 	for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
1143 	  lzx_write_bits(lzxd, 3, lzxd->aligned_tree[i].codelength);
1144 	}
1145       }
1146       /* end extra bits */
1147       build_huffman_tree(lzxd->main_tree_size, LZX_MAX_CODE_LENGTH,
1148 			 lzxd->main_freq_table, lzxd->main_tree);
1149       build_huffman_tree(NUM_SECONDARY_LENGTHS, 16,
1150 			 lzxd->length_freq_table, lzxd->length_tree);
1151 
1152 
1153 
1154       /* now write the pre-tree and tree for main 1 */
1155       lzx_write_compressed_tree(lzxd, lzxd->main_tree, lzxd->prev_main_treelengths, NUM_CHARS);
1156 
1157       /* now write the pre-tree and tree for main 2*/
1158       lzx_write_compressed_tree(lzxd, lzxd->main_tree + NUM_CHARS,
1159 				lzxd->prev_main_treelengths + NUM_CHARS,
1160 				lzxd->main_tree_size - NUM_CHARS);
1161 
1162       /* now write the pre tree and tree for length */
1163       lzx_write_compressed_tree(lzxd, lzxd->length_tree, lzxd->prev_length_treelengths,
1164 				NUM_SECONDARY_LENGTHS);
1165 
1166       /* now write literals */
1167       lzx_write_compressed_literals(lzxd, block_type);
1168 
1169       /* copy treelengths somewhere safe to do delta compression */
1170       for (i = 0; i < lzxd->main_tree_size; i++) {
1171 	lzxd->prev_main_treelengths[i] = lzxd->main_tree[i].codelength;
1172       }
1173       for (i = 0; i < NUM_SECONDARY_LENGTHS; i++) {
1174 	lzxd->prev_length_treelengths[i] = lzxd->length_tree[i].codelength;
1175       }
1176       lzxd->main_entropy = 0.0;
1177       lzxd->last_ratio = 9999999.0;
1178       lzxd->block_codesp = lzxd->block_codes;
1179 
1180       memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
1181       memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
1182       memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
1183     }
1184   }
1185   while (lzxd->left_in_block && (lz_left_to_process(lzxd->lzi) || !lzxd->at_eof(lzxd->in_arg)));
1186   return 0;
1187 }
1188 
lzx_init(struct lzx_data ** lzxdp,int wsize_code,lzx_get_bytes_t get_bytes,void * get_bytes_arg,lzx_at_eof_t at_eof,lzx_put_bytes_t put_bytes,void * put_bytes_arg,lzx_mark_frame_t mark_frame,void * mark_frame_arg)1189 int lzx_init(struct lzx_data **lzxdp, int wsize_code,
1190 	     lzx_get_bytes_t get_bytes, void *get_bytes_arg,
1191 	     lzx_at_eof_t at_eof,
1192 	     lzx_put_bytes_t put_bytes, void *put_bytes_arg,
1193 	     lzx_mark_frame_t mark_frame, void *mark_frame_arg)
1194 {
1195   int wsize;
1196   struct lzx_data *lzxd;
1197 
1198   if ((wsize_code < 15) || (wsize_code > 21)) {
1199     return -1;
1200   }
1201   lzx_init_static();
1202 
1203   *lzxdp = lzxd = malloc(sizeof(*lzxd));
1204   if (lzxd == 0)
1205     return -2;
1206 
1207   lzxd->in_arg = get_bytes_arg;
1208   lzxd->out_arg = put_bytes_arg;
1209   lzxd->mark_frame_arg = mark_frame_arg;
1210   lzxd->get_bytes = get_bytes;
1211   lzxd->put_bytes = put_bytes;
1212   lzxd->at_eof = at_eof;
1213   lzxd->mark_frame = mark_frame;
1214 
1215   wsize = 1 << (wsize_code);
1216 
1217   lzxd->bits_in_buf = 0;
1218   lzxd->block_codes = NULL;
1219   lzxd->num_position_slots = num_position_slots[wsize_code-15];
1220   lzxd->main_tree_size = (NUM_CHARS + 8 * lzxd->num_position_slots);
1221 
1222   lzxd->main_freq_table = malloc(sizeof(int) * lzxd->main_tree_size);
1223   lzxd->main_tree = malloc(sizeof(huff_entry)* lzxd->main_tree_size);
1224   lzxd->prev_main_treelengths = malloc(sizeof(uint8_t)*lzxd->main_tree_size);
1225 
1226   lzxd->lzi = malloc(sizeof (*lzxd->lzi));
1227   /* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */
1228   lz_init(lzxd->lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE,
1229 	  lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd);
1230   lzxd->len_uncompressed_input = 0;
1231   lzxd->len_compressed_output = 0;
1232   lzx_reset(lzxd);
1233   return 0;
1234 }
1235 
lzx_finish(struct lzx_data * lzxd,struct lzx_results * lzxr)1236 int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr)
1237 {
1238   /*  lzx_align_output(lzxd);  Not needed as long as frame padding is in place */
1239   if (lzxr) {
1240     lzxr->len_compressed_output = lzxd->len_compressed_output;
1241     lzxr->len_uncompressed_input = lzxd->len_uncompressed_input;
1242   }
1243   lz_release(lzxd->lzi);
1244   free(lzxd->lzi);
1245   free(lzxd->prev_main_treelengths);
1246   free(lzxd->main_tree);
1247   free(lzxd->main_freq_table);
1248   free(lzxd);
1249   return 0;
1250 }
1251 
1252