1 /*
2 File lzx_layer.c, part of lzxcomp library
3 Copyright (C) 2002 Matthew T. Russotto
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; version 2.1 only
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include <string.h> /* for memset on Linux */
22 #include <assert.h>
23 #include <math.h>
24 #include "lzx_config.h"
25 #ifdef NONSLIDE
26 #include "lz_nonslide.h"
27 #else
28 #include "hash_slide.h"
29 #include "lz_slide.h"
30 #endif
31 #include "lzx_compress.h"
32 #include "lzx_constants.h"
33
34 /* Debugging defines useful during development. All add diagnostic output
35 at various points in the system */
36
37 /*#define DEBUG_MATCHES *//* When matches come in from the LZ engine */
38 /*#define DEBUG_MATCHES_2 *//* When matches are being output */
39 /*#define DEBUG_HUFFMAN *//* When huffman trees are built */
40 /*#define DEBUG_ENTROPY *//* In entropy calculation */
41 /*#define DEBUG_LZ *//* Uncompressed input reconstructed from
42 LZ engine */
43 /*#define DEBUG_BITBUF *//* Raw output to upper layer */
44 /*#define DEBUG_EXTRA_BITS *//* Savings due to extra bits huffman tree */
45 /*#define DEBUG_POSITION_SLOT_LOOKUP */
46 /*#define DEBUG_TREE_COMPRESSION *//* During RLE compression of trees */
47
48 /* number of position slots given window_size-5 */
49 /* as corrected by Caie */
50 short num_position_slots[] = {30, 32, 34, 36, 38, 42, 50};
51 unsigned long position_base[51];
52 u_char extra_bits[52];
53 double rloge2;
54
55 typedef struct ih_elem {
56 int freq;
57 short sym;
58 short pathlength;
59 struct ih_elem *parent;
60 struct ih_elem *left;
61 struct ih_elem *right;
62 } ih_elem;
63
64 typedef struct h_elem {
65 int freq;
66 short sym;
67 short pathlength;
68 struct ih_elem *parent;
69 unsigned short code;
70 } h_elem;
71
72 typedef struct huff_entry {
73 short codelength;
74 unsigned short code;
75 } huff_entry;
76
cmp_leaves(const void * in_a,const void * in_b)77 static int cmp_leaves(const void *in_a, const void *in_b)
78 {
79 const struct h_elem *a = in_a;
80 const struct h_elem *b = in_b;
81
82 if (!a->freq && b->freq)
83 return 1;
84 if (a->freq && !b->freq)
85 return -1;
86
87 if (a->freq == b->freq)
88 return a->sym - b->sym;
89
90 return a->freq - b->freq;
91 }
92
93 static int
cmp_pathlengths(const void * in_a,const void * in_b)94 cmp_pathlengths(const void *in_a, const void *in_b)
95 {
96 const struct h_elem *a = in_a;
97 const struct h_elem *b = in_b;
98
99 if (a->pathlength == b->pathlength)
100 #if 0
101 return a->sym - b->sym;
102 #else
103 /* see note on canonical pathlengths */
104 return b->sym - a->sym;
105 #endif
106 return b->pathlength - a->pathlength;
107 }
108
109 /* standard huffman building algorithm */
110 static void
build_huffman_tree(int nelem,int max_code_length,int * freq,huff_entry * tree)111 build_huffman_tree(int nelem, int max_code_length, int *freq, huff_entry *tree)
112 {
113 h_elem *leaves = malloc(nelem * sizeof(h_elem));
114 ih_elem *inodes;
115 ih_elem *next_inode;
116 ih_elem *cur_inode;
117 h_elem *cur_leaf;
118 int leaves_left;
119 int nleaves;
120 int pathlength;
121 unsigned short cur_code;
122 short codes_too_long = 0;
123 ih_elem *f1, *f2;
124 int i;
125
126 for (i = 0; i < nelem; i++) {
127 leaves[i].freq = freq[i];
128 leaves[i].sym = i;
129 leaves[i].pathlength = 0;
130 }
131 qsort(leaves, nelem, sizeof(h_elem), cmp_leaves);
132 for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
133 #ifdef DEBUG_HUFFMAN
134 fprintf(stderr, "%3d: %3d '%c'\n", leaves_left, leaves[leaves_left].freq,
135 leaves[leaves_left].sym);
136 #endif
137 if (!leaves[leaves_left].freq) break;
138 }
139 nleaves = leaves_left;
140
141 if (nleaves >= 2) {
142 inodes = malloc((nelem-1) * sizeof(ih_elem));
143 do {
144 if (codes_too_long) {
145 for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
146 if (!leaves[leaves_left].freq) break;
147 if (leaves[leaves_left].freq != 1) {
148 leaves[leaves_left].freq >>= 1;
149 codes_too_long = 0;
150 }
151 }
152 assert (!codes_too_long);
153 }
154
155 cur_leaf = leaves;
156 next_inode = cur_inode = inodes;
157
158 do {
159 f1 = f2 = NULL;
160 if (leaves_left &&
161 ((cur_inode == next_inode) ||
162 (cur_leaf->freq <= cur_inode->freq))) {
163 f1 = (ih_elem *)cur_leaf++;
164 leaves_left--;
165 }
166 else if (cur_inode != next_inode) {
167 f1 = cur_inode++;
168 }
169
170 if (leaves_left &&
171 ((cur_inode == next_inode) ||
172 (cur_leaf->freq <= cur_inode->freq))) {
173 f2 = (ih_elem *)cur_leaf++;
174 leaves_left--;
175 }
176 else if (cur_inode != next_inode) {
177 f2 = cur_inode++;
178 }
179
180 #ifdef DEBUG_HUFFMAN
181 fprintf(stderr, "%d %d\n", f1, f2);
182 #endif
183 if (f1 && f2) {
184 next_inode->freq = f1->freq + f2->freq;
185 next_inode->sym = -1;
186 next_inode->left = f1;
187 next_inode->right = f2;
188 next_inode->parent = NULL;
189 f1->parent = next_inode;
190 f2->parent = next_inode;
191 if (f1->pathlength > f2->pathlength)
192 next_inode->pathlength = f1->pathlength + 1;
193 else
194 next_inode->pathlength = f2->pathlength + 1;
195 if (next_inode->pathlength > max_code_length) {
196 codes_too_long = 1;
197 break;
198 }
199 next_inode++;
200 }
201 }
202 while (f1 && f2);
203 }
204 while (codes_too_long);
205
206 #ifdef DEBUG_HUFFMAN
207 cur_inode = inodes;
208 while (cur_inode < next_inode) {
209 fprintf(stderr, "%d l: %3d%c r: %3d%c freq: %8d\n",
210 cur_inode - inodes,
211 (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
212 (cur_inode->left->sym!=-1)?'l':'i',
213 (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
214 (cur_inode->right->sym!=-1)?'l':'i',
215 (cur_inode->freq)
216 );
217 cur_inode++;
218 }
219 #endif
220
221 /* now traverse tree depth-first */
222 cur_inode = next_inode - 1;
223 pathlength = 0;
224 cur_inode->pathlength = -1;
225 do {
226 /* precondition: at unmarked node*/
227 if (cur_inode->sym == -1) /*&& (cur_inode->left)*/ {
228 /* left node of unmarked node is unmarked */
229 cur_inode = cur_inode->left;
230 cur_inode->pathlength = -1;
231 pathlength++;
232 }
233 else {
234 /* mark node */
235 cur_inode->pathlength = pathlength;
236 #if 0
237 if (cur_inode->right) {
238 /* right node of previously unmarked node is unmarked */
239 cur_inode = cur_inode->right;
240 cur_inode->pathlength = -1;
241 pathlength++;
242 }
243 else
244 #endif
245 {
246
247 /* time to come up. Keep coming up until an unmarked node is reached */
248 /* or the tree is exhausted */
249 do {
250 cur_inode = cur_inode->parent;
251 pathlength--;
252 }
253 while (cur_inode && (cur_inode->pathlength != -1));
254 if (cur_inode) {
255 /* found unmarked node; mark it and go right */
256 cur_inode->pathlength = pathlength;
257 cur_inode = cur_inode->right;
258 cur_inode->pathlength = -1;
259 pathlength++;
260 /* would be complex if cur_inode could be null here. It can't */
261 }
262 }
263 }
264 }
265 while (cur_inode);
266
267 #ifdef DEBUG_HUFFMAN
268 cur_inode = inodes;
269 while (cur_inode < next_inode) {
270 fprintf(stderr, "%d l: %3d%c r: %3d%c freq: %8d pathlength %4d\n",
271 cur_inode - inodes,
272 (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
273 (cur_inode->left->sym!=-1)?'l':'i',
274 (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
275 (cur_inode->right->sym!=-1)?'l':'i',
276 (cur_inode->freq),
277 (cur_inode->pathlength)
278 );
279 cur_inode++;
280 }
281 #endif
282 free(inodes);
283
284 /* the pathlengths are already in order, so this sorts by symbol */
285 qsort(leaves, nelem, sizeof(h_elem), cmp_pathlengths);
286
287 /**
288 Microsoft's second condition on its canonical huffman codes is:
289
290 For each level, starting at the deepest level of the tree and then
291 moving upwards, leaf nodes must start as far left as possible. An
292 alternative way of stating this constraint is that if any tree node
293 has children then all tree nodes to the left of it with the same path
294 length must also have children.
295
296 These 'alternatives' are not equivalent. The latter alternative gives
297 the common canonical code where the longest code is all zeros. The former
298 gives an opposite code where the longest code is all ones. Microsoft uses the
299 former alternative.
300 **/
301
302 #if 0
303 pathlength = leaves[0].pathlength;
304 cur_code = 0;
305 for (i = 0; i < nleaves; i++) {
306 while (leaves[i].pathlength < pathlength) {
307 assert(!(cur_code & 1));
308 cur_code >>= 1;
309 pathlength--;
310 }
311 leaves[i].code = cur_code;
312 cur_code++;
313 }
314 #else
315 pathlength = leaves[nleaves-1].pathlength;
316 assert(leaves[0].pathlength <= 16); /* this method cannot deal with bigger codes, though
317 the other canonical method can in some cases
318 (because it starts with zeros ) */
319 cur_code = 0;
320 for (i = nleaves - 1; i >= 0; i--) {
321 while (leaves[i].pathlength > pathlength) {
322 cur_code <<= 1;
323 pathlength++;
324 }
325 leaves[i].code = cur_code;
326 cur_code++;
327 }
328 #endif
329
330 #ifdef DEBUG_HUFFMAN
331 for (i = 0; i < nleaves; i++) {
332 char code[18];
333 int j;
334
335 cur_code = leaves[i].code;
336 code[leaves[i].pathlength] = 0;
337 for (j = leaves[i].pathlength-1; j >= 0; j--) {
338 if (cur_code & 1) code[j] = '1';
339 else code[j] = '0';
340 cur_code >>= 1;
341 }
342 fprintf(stderr, "%3d: %3d %3d %-16.16s '%c'\n", i, leaves[i].freq, leaves[i].pathlength, code,
343 leaves[i].sym);
344 }
345 #endif
346 }
347 else if (nleaves == 1) {
348 /* 0 symbols is OK (not according to doc, but according to Caie) */
349 /* but if only one symbol is present, two symbols are required */
350 nleaves = 2;
351 leaves[0].pathlength = leaves[1].pathlength = 1;
352 if (leaves[1].sym > leaves[0].sym) {
353 leaves[1].code = 1;
354 leaves[0].code = 0;
355 }
356 else {
357 leaves[0].code = 1;
358 leaves[1].code = 0;
359 }
360 }
361
362 memset(tree, 0, nelem * sizeof(huff_entry));
363 for (i = 0; i < nleaves; i++) {
364 tree[leaves[i].sym].codelength = leaves[i].pathlength;
365 tree[leaves[i].sym].code = leaves[i].code;
366 }
367
368 free(leaves);
369 }
370
371 /* from Stuart Caie's code -- I'm hoping this code is too small to encumber
372 this file. If not, you could rip it out and hard-code the tables */
373
lzx_init_static(void)374 static void lzx_init_static(void)
375 {
376 int i, j;
377
378 if (extra_bits[49]) return;
379
380 rloge2 = 1.0/log(2);
381 for (i=0, j=0; i <= 50; i += 2) {
382 extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */
383 if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
384 }
385
386 for (i=0, j=0; i <= 50; i++) {
387 position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
388 j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
389 }
390 }
391
392 struct lzx_data
393 {
394 void *in_arg;
395 void *out_arg;
396 void *mark_frame_arg;
397 lzx_get_bytes_t get_bytes;
398 lzx_at_eof_t at_eof;
399 lzx_put_bytes_t put_bytes;
400 lzx_mark_frame_t mark_frame;
401 struct lz_info *lzi;
402 /* a 'frame' is an 0x8000 byte thing. Called that because otherwise
403 I'd confuse myself overloading 'block' */
404 int left_in_frame;
405 int left_in_block;
406 int R0, R1, R2;
407 int num_position_slots;
408 /* this is the LZX block size */
409 int block_size;
410 int *main_freq_table;
411 int length_freq_table[NUM_SECONDARY_LENGTHS];
412 int aligned_freq_table[LZX_ALIGNED_SIZE];
413 uint32_t *block_codes;
414 uint32_t *block_codesp;
415 huff_entry *main_tree;
416 huff_entry length_tree[NUM_SECONDARY_LENGTHS];
417 huff_entry aligned_tree[LZX_ALIGNED_SIZE];
418 int main_tree_size;
419 uint16_t bit_buf;
420 int bits_in_buf;
421 double main_entropy;
422 double last_ratio;
423 uint8_t *prev_main_treelengths;
424 uint8_t prev_length_treelengths[NUM_SECONDARY_LENGTHS];
425 uint32_t len_uncompressed_input;
426 uint32_t len_compressed_output;
427 short need_1bit_header;
428 short subdivide; /* 0 = don't subdivide, 1 = allowed, -1 = requested */
429 };
430
431 static int
lzx_get_chars(lz_info * lzi,int n,u_char * buf)432 lzx_get_chars(lz_info *lzi, int n, u_char *buf)
433 {
434 /* force lz compression to stop after every block */
435 int chars_read;
436 int chars_pad;
437
438 lzx_data *lzud = (lzx_data *)lzi->user_data;
439 #ifdef OLDFRAMING
440 if (lzud->subdivide < 0) return 0;
441 if (n > lzud->left_in_frame)
442 n = lzud->left_in_frame;
443 if (n > lzud->left_in_block)
444 n = lzud->left_in_block;
445 #endif
446 chars_read = lzud->get_bytes(lzud->in_arg, n, buf);
447 #ifdef OLDFRAMING
448 lzud->left_in_frame -= chars_read;
449 lzud->left_in_block -= chars_read;
450 #else
451 lzud->left_in_frame -= chars_read % LZX_FRAME_SIZE;
452 if (lzud->left_in_frame < 0)
453 lzud->left_in_frame += LZX_FRAME_SIZE;
454 #endif
455 if ((chars_read < n) && (lzud->left_in_frame)) {
456 chars_pad = n - chars_read;
457 if (chars_pad > lzud->left_in_frame) chars_pad = lzud->left_in_frame;
458 /* never emit a full frame of padding. This prevents silliness when
459 lzx_compress is called when at EOF but EOF not yet detected */
460 if (chars_pad == LZX_FRAME_SIZE) chars_pad = 0;
461 #ifdef OLDFRAMING
462 if (chars_pad > lzud->left_in_block) chars_pad = lzud->left_in_block;
463 #endif
464 memset(buf + chars_read, 0, chars_pad);
465 lzud->left_in_frame -= chars_pad;
466 #ifdef OLDFRAMING
467 lzud->left_in_block -= chars_pad;
468 #endif
469 chars_read += chars_pad;
470 }
471 return chars_read;
472 }
473
474 #ifdef NONSLIDE
find_match_at(lz_info * lzi,int loc,int match_len,int * match_locp)475 static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
476 {
477 u_char *matchb;
478 u_char *nmatchb;
479 u_char *c1, *c2;
480 int j;
481
482 if (-*match_locp == loc) return -1;
483 if (loc < match_len) return -1;
484
485 matchb = lzi->block_buf + lzi->block_loc + *match_locp;
486 nmatchb = lzi->block_buf + lzi->block_loc - loc;
487 c1 = matchb;
488 c2 = nmatchb;
489 for (j = 0; j < match_len; j++) {
490 if (*c1++ != *c2++) break;
491 }
492 if (j == match_len) {
493 #ifdef DEBUG_MATCHES
494 fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
495 #endif
496 *match_locp = -loc;
497 return 0;
498 }
499 return -1;
500 }
501 #else
find_match_at(lz_info * lzi,int loc,int match_len,int * match_locp)502 static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
503 {
504 u_char *matchb;
505 u_char *nmatchb;
506 u_char *c1, *c2;
507 int j;
508
509 if (-*match_locp == loc) return -1;
510 if (loc < match_len) return -1;
511
512 matchb = lzi->slide_bufp + *match_locp;
513 if (matchb < lzi->slide_buf) matchb += lzi->slide_buf_size;
514 nmatchb = lzi->slide_bufp - loc;
515 if (nmatchb < lzi->slide_buf) nmatchb += lzi->slide_buf_size;
516 c1 = matchb;
517 c2 = nmatchb;
518 for (j = 0; j < match_len; j++) {
519 if (*c1++ != *c2++) break;
520 if (c1 == lzi->slide_bufe) c1 = lzi->slide_buf;
521 if (c2 == lzi->slide_bufe) c2 = lzi->slide_buf;
522 }
523 if (j == match_len) {
524 #ifdef DEBUG_MATCHES
525 fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
526 #endif
527 *match_locp = -loc;
528 return 0;
529 }
530 return -1;
531 }
532 #endif
check_entropy(lzx_data * lzud,int main_index)533 static void check_entropy(lzx_data *lzud, int main_index)
534 {
535 /* entropy = - sum_alphabet P(x) * log2 P(x) */
536 /* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */
537 /* entropy = - 1/N sum_alphabet f(x) * (log2 f(x) - log2 N) */
538 /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - sum_alphabet f(x) log2 N */
539 /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - log2 N sum_alphabet f(x) */
540 /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - N * log2 N */
541
542 /* entropy = - 1/N ((sum_alphabet f(x) * log2 f(x) ) - N * log2 N) */
543 /* entropy = - 1/N ((sum_alphabet f(x) * ln f(x) * 1/ln 2) - N * ln N * 1/ln 2) */
544 /* entropy = 1/(N ln 2) (N * ln N - (sum_alphabet f(x) * ln f(x))) */
545 /* entropy = 1/(N ln 2) (N * ln N + (sum_alphabet -f(x) * ln f(x))) */
546
547 /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (sum_alphabet -f(x) * ln f(x))) */
548 /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (-f(x) * ln f(x))) */
549 /* entropy = -1/(N ln 2) ( sum_alphabet -ln N * f(x) + (f(x) * ln f(x))) */
550 /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(- ln N + ln f(x))) */
551 /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(ln f(x)/N)) */
552 /* entropy = -1/N ( sum_alphabet (1/(ln 2))f(x)(ln f(x)/N)) */
553 /* entropy = -1/N ( sum_alphabet f(x)(log2 f(x)/N)) */
554 /* entropy = - ( sum_alphabet f(x)/N(log2 f(x)/N)) */
555 /* entropy = - ( sum_alphabet P(x)(log2 P(x))) */
556
557
558 double freq;
559 double n_ln_n;
560 double rn_ln2;
561 double cur_ratio;
562 int n;
563
564 /* delete old entropy accumulation */
565 if (lzud->main_freq_table[main_index] != 1) {
566 freq = (double)lzud->main_freq_table[main_index]-1;
567 lzud->main_entropy += freq * log(freq);
568 }
569 /* add new entropy accumulation */
570 freq = (double)lzud->main_freq_table[main_index];
571 lzud->main_entropy -= freq * log(freq);
572 n = lzud->block_codesp - lzud->block_codes;
573
574 if (((n & 0xFFF) == 0) && (lzud->left_in_block >= 0x1000)) {
575 n_ln_n = (double)n * log((double)n);
576 rn_ln2 = rloge2 / (double)n;
577 cur_ratio = (n * rn_ln2 *(n_ln_n + lzud->main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud->main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / (double)n;
578 #ifdef DEBUG_ENTROPY
579 fprintf(stderr, "n = %d\n", n);
580 fprintf(stderr, "main entropy = %f\n", rn_ln2 *(n_ln_n + lzud->main_entropy) );
581 fprintf(stderr, "compression ratio (raw) = %f\n", 100.0 * rn_ln2 *(n_ln_n + lzud->main_entropy) /9.0 );
582 fprintf(stderr, "compression ratio (ovh) = %f\n", 100.0 * cur_ratio/9.0);
583 #endif
584 if (cur_ratio > lzud->last_ratio) {
585 #ifdef DEBUG_ENTROPY
586 fprintf(stderr, "resetting huffman tables at %d\n", n);
587 #endif
588 lzud->subdivide = -1;
589 lz_stop_compressing(lzud->lzi);
590 }
591 lzud->last_ratio = cur_ratio;
592 }
593 }
594
595 static int
lzx_output_match(lz_info * lzi,int match_pos,int match_len)596 lzx_output_match(lz_info *lzi, int match_pos, int match_len)
597 {
598 lzx_data *lzud = (lzx_data *)lzi->user_data;
599 uint32_t formatted_offset;
600 uint32_t position_footer;
601 uint8_t length_footer;
602 uint8_t length_header;
603 uint16_t len_pos_header;
604 int position_slot;
605 short btdt;
606
607 #ifdef DEBUG_LZ
608 {
609 int i;
610 int pos;
611 for (i = 0; i < match_len; i++) {
612
613 #ifdef NONSLIDE
614 pos = match_pos + lzi->block_loc + i;
615 fprintf(stderr, "%c", lzi->block_buf[pos]);
616 #else
617 pos = match_pos + lzi->front_offset + i;
618 if (pos > lzi->slide_buf_size)
619 pos -= lzi->slide_buf_size;
620 fprintf(stderr, "%c", lzi->slide_buf[pos]);
621 #endif
622 }
623 }
624 #endif
625 position_footer = 0;
626 btdt = 0;
627 testforr:
628 if (match_pos == -lzud->R0) {
629 match_pos = 0;
630 formatted_offset = 0;
631 position_slot = 0;
632 }
633 else if (match_pos == -lzud->R1) {
634 lzud->R1 = lzud->R0;
635 lzud->R0 = -match_pos;
636 match_pos = 1;
637 formatted_offset = 1;
638 position_slot = 1;
639 }
640 else if (match_pos == -lzud->R2) {
641 lzud->R2 = lzud->R0;
642 lzud->R0 = -match_pos;
643 match_pos = 2;
644 formatted_offset = 2;
645 position_slot = 2;
646 }
647 else {
648 if (!btdt) {
649 btdt = 1;
650 if (find_match_at(lzi, lzud->R0, match_len, &match_pos) == 0)
651 goto testforr;
652 if (find_match_at(lzi, lzud->R1, match_len, &match_pos) == 0)
653 goto testforr;
654 if (find_match_at(lzi, lzud->R2, match_len, &match_pos) == 0)
655 goto testforr;
656 }
657
658 formatted_offset = -match_pos + 2;
659
660 if ((match_len < 3) ||
661 ((formatted_offset >= 64) && (match_len < 4)) ||
662 ((formatted_offset >= 2048) && (match_len < 5)) ||
663 ((formatted_offset >= 65536) && (match_len < 6))) {
664 /* reject matches where extra_bits will likely be bigger than just outputting
665 literals. The numbers are basically derived through guessing
666 and trial and error */
667 return -1; /* reject the match */
668 }
669
670 lzud->R2 = lzud->R1;
671 lzud->R1 = lzud->R0;
672 lzud->R0 = -match_pos;
673
674 /* calculate position base using binary search of table; if log2 can be
675 done in hardware, approximation might work;
676 trunc(log2(formatted_offset*formatted_offset)) gets either the proper
677 position slot or the next one, except for slots 0, 1, and 39-49
678
679 Slots 0-1 are handled by the R0-R1 procedures
680
681 Slots 36-49 (formatted_offset >= 262144) can be found by
682 (formatted_offset/131072) + 34 ==
683 (formatted_offset >> 17) + 34;
684 */
685 if (formatted_offset >= 262144) {
686 position_slot = (formatted_offset >> 17) + 34;
687 }
688 else {
689 int left, right, mid;
690
691 left = 3;
692 right = lzud->num_position_slots - 1;
693 position_slot = -1;
694 while (left <= right) {
695 mid = (left + right)/2;
696 if ((position_base[mid] <= formatted_offset) &&
697 position_base[mid+1] > formatted_offset) {
698 position_slot = mid;
699 break;
700 }
701 #if 0
702 fprintf(stderr, "BEFORE: %06x %06x %06x %06x\n",
703 position_base[left], position_base[mid],
704 formatted_offset, position_base[right]);
705 #endif
706 if (formatted_offset > position_base[mid])
707 /* too low */
708 left = mid + 1;
709 else /* too high */
710 right = mid;
711 #if 0
712 fprintf(stderr, "AFTER : %06x %06x %06x %06x\n",
713 position_base[left], position_base[mid],
714 formatted_offset, position_base[right]);
715 #endif
716 }
717 #ifdef DEBUG_POSITION_SLOT_LOOKUP
718 if (position_slot < 0) {
719 fprintf(stderr, "lmr npr: %d %d %d %d\n", left, mid, right, lzud->num_position_slots);
720 fprintf(stderr, "AFTER : %07d %07d %07d %07d\n",
721 position_base[left], position_base[mid],
722 formatted_offset, position_base[right]);
723 fprintf(stderr, "(%d, %d, %d, %d, %d)\n", match_pos, match_len, formatted_offset, position_slot, position_footer);
724 }
725 #endif
726 assert(position_slot >= 0);
727 /* FIXME precalc extra_mask table */
728 }
729 position_footer = ((1UL << extra_bits[position_slot]) - 1) & formatted_offset;
730 }
731 #ifdef DEBUG_MATCHES
732 #ifdef NONSLIDE
733 fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc , match_pos, match_len, formatted_offset, position_slot, position_footer);
734 #else
735 fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc - lzud->lzi->chars_in_match , match_pos, match_len, formatted_offset, position_slot, position_footer);
736 #endif
737 #endif
738 /* match length = 8 bits */
739 /* position_slot = 6 bits */
740 /* position_footer = 17 bits */
741 /* total = 31 bits */
742 /* plus one to say whether it's a literal or not */
743 *lzud->block_codesp++ = 0x80000000 | /* bit 31 in intelligent bit ordering */
744 (position_slot << 25) | /* bits 30-25 */
745 (position_footer << 8) | /* bits 8-24 */
746 (match_len - MIN_MATCH); /* bits 0-7 */
747
748 if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) {
749 length_header = match_len - MIN_MATCH;
750 /* length_footer = 255; */ /* not necessary */
751 }
752 else {
753 length_header = NUM_PRIMARY_LENGTHS;
754 length_footer = match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH);
755 lzud->length_freq_table[length_footer]++;
756 }
757 len_pos_header = (position_slot << 3) | length_header;
758 lzud->main_freq_table[len_pos_header + NUM_CHARS]++;
759 if (extra_bits[position_slot] >= 3) {
760 lzud->aligned_freq_table[position_footer & 7]++;
761 }
762 #ifndef OLDFRAMING
763 lzud->left_in_block -= match_len;
764 #endif
765 if (lzud->subdivide)
766 check_entropy(lzud, len_pos_header + NUM_CHARS);
767 return 0; /* accept the match */
768 }
769
770 static void
lzx_output_literal(lz_info * lzi,u_char ch)771 lzx_output_literal(lz_info *lzi, u_char ch)
772 {
773 lzx_data *lzud = (lzx_data *)lzi->user_data;
774
775 #ifndef OLDFRAMING
776 lzud->left_in_block--;
777 #endif
778 *lzud->block_codesp++ = ch;
779 #ifdef DEBUG_LZ
780 fprintf(stderr, "%c", ch);
781 #endif
782 lzud->main_freq_table[ch]++;
783 if (lzud->subdivide)
784 check_entropy(lzud, ch);
785 }
786
lzx_write_bits(lzx_data * lzxd,int nbits,uint32_t bits)787 static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
788 {
789 int cur_bits;
790 int shift_bits;
791 int rshift_bits;
792 uint16_t mask_bits;
793
794 #ifdef DEBUG_BITBUF
795 fprintf(stderr, "WB: %2d %08x\n", nbits, bits);
796 #endif
797 cur_bits = lzxd->bits_in_buf;
798 while ((cur_bits + nbits) >= 16) {
799 shift_bits = 16 - cur_bits;
800 rshift_bits = nbits - shift_bits;
801 if (shift_bits == 16) {
802 lzxd->bit_buf = (bits>>rshift_bits) & 0xFFFF;
803 }
804 else {
805 mask_bits = (1U << shift_bits) - 1;
806 lzxd->bit_buf <<= shift_bits;
807 lzxd->bit_buf |= (bits>>rshift_bits) & mask_bits;
808 }
809 #ifdef DEBUG_BITBUF
810 fprintf(stderr, "WBB: %04x\n", lzxd->bit_buf);
811 #endif
812 #ifdef LZX_BIG_ENDIAN
813 lzxd->bit_buf = ((lzxd->bit_buf & 0xFF)<<8) | (lzxd->bit_buf >> 8);
814 #endif
815 lzxd->put_bytes(lzxd->out_arg, sizeof(lzxd->bit_buf), &lzxd->bit_buf);
816 lzxd->len_compressed_output += sizeof(lzxd->bit_buf);
817 lzxd->bit_buf = 0;
818 nbits -= shift_bits;
819 cur_bits = 0;
820 }
821 /* (cur_bits + nbits) < 16. If nbits = 0, we're done.
822 otherwise move bits in */
823 shift_bits = nbits;
824 mask_bits = (1U << shift_bits) - 1;
825 lzxd->bit_buf <<= shift_bits;
826 lzxd->bit_buf |= bits & mask_bits;
827 cur_bits += nbits;
828
829 #ifdef DEBUG_BITBUF
830 fprintf(stderr, "OBB: %2d %04x\n", cur_bits, lzxd->bit_buf);
831 #endif
832 lzxd->bits_in_buf = cur_bits;
833 }
834
lzx_align_output(lzx_data * lzxd)835 static void lzx_align_output(lzx_data *lzxd)
836 {
837 if (lzxd->bits_in_buf) {
838 lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0);
839 }
840 if (lzxd->mark_frame)
841 lzxd->mark_frame(lzxd->mark_frame_arg, lzxd->len_uncompressed_input, lzxd->len_compressed_output);
842 }
843
844 static void
lzx_write_compressed_literals(lzx_data * lzxd,int block_type)845 lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
846 {
847 uint32_t *cursor = lzxd->block_codes;
848 uint32_t *endp = lzxd->block_codesp;
849 uint16_t position_slot;
850 uint32_t position_footer;
851 uint32_t match_len_m2; /* match length minus 2, which is MIN_MATCH */
852 uint32_t verbatim_bits;
853 uint32_t block_code;
854 uint16_t length_header;
855 uint16_t length_footer;
856 uint16_t len_pos_header;
857 huff_entry *huffe;
858 int frame_count = (lzxd->len_uncompressed_input % LZX_FRAME_SIZE);
859
860 lzxd->len_uncompressed_input -= frame_count; /* will be added back in later */
861 while (cursor < endp) {
862 block_code = *cursor++;
863 if (block_code & 0x80000000) {
864 /*
865 * 0x80000000 | bit 31 in intelligent bit ordering
866 * (position_slot << 25) | bits 30-25
867 * (position_footer << 8) | bits 8-24
868 * (match_len - MIN_MATCH); bits 0-7
869 *
870 */
871
872 match_len_m2 = block_code & 0xFF; /* 8 bits */
873 position_footer = (block_code >> 8)& 0x1FFFF; /* 17 bits */
874 position_slot = (block_code >> 25) & 0x3F; /* 6 bits */
875
876 #ifdef DEBUG_MATCHES_2
877 fprintf(stderr, "%08x, %3d %2d %d\n", lzxd->len_uncompressed_input + frame_count, match_len_m2, position_slot, position_footer);
878 #endif
879 if (match_len_m2 < NUM_PRIMARY_LENGTHS) {
880 length_header = match_len_m2;
881 length_footer = 255; /* personal encoding for NULL */
882 }
883 else {
884 length_header = NUM_PRIMARY_LENGTHS;
885 length_footer = match_len_m2 - NUM_PRIMARY_LENGTHS;
886 }
887 len_pos_header = (position_slot << 3) | length_header;
888 huffe = &lzxd->main_tree[len_pos_header+NUM_CHARS];
889 lzx_write_bits(lzxd, huffe->codelength, huffe->code);
890 if (length_footer != 255) {
891 huffe = &lzxd->length_tree[length_footer];
892 lzx_write_bits(lzxd, huffe->codelength, huffe->code);
893 }
894 if ((block_type == LZX_ALIGNED_OFFSET_BLOCK) && (extra_bits[position_slot] >= 3)) {
895 /* aligned offset block and code */
896 verbatim_bits = position_footer >> 3;
897 lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits);
898 huffe = &lzxd->aligned_tree[position_footer&7];
899 lzx_write_bits(lzxd, huffe->codelength, huffe->code);
900 }
901 else {
902 verbatim_bits = position_footer;
903 lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits);
904 }
905 frame_count += match_len_m2 + 2;
906 }
907 else {
908 /* literal */
909 assert(block_code < NUM_CHARS);
910 huffe = &lzxd->main_tree[block_code];
911 lzx_write_bits(lzxd, huffe->codelength, huffe->code);
912 frame_count++;
913 }
914 if (frame_count == LZX_FRAME_SIZE) {
915 lzxd->len_uncompressed_input += frame_count;
916 lzx_align_output(lzxd);
917 frame_count = 0;
918 }
919 #ifdef DEBUG_MATCHES_2
920 if (frame_count > LZX_FRAME_SIZE) {
921 fprintf(stderr, "uncomp_len = %x, frame_count = %x, block_code = %08x, match_len_m2 = %d", lzxd->len_uncompressed_input, frame_count, block_code, match_len_m2);
922 }
923 #endif
924 assert (frame_count < LZX_FRAME_SIZE);
925 }
926 lzxd->len_uncompressed_input += frame_count;
927 }
928
929 static int
lzx_write_compressed_tree(struct lzx_data * lzxd,struct huff_entry * tree,uint8_t * prevlengths,int treesize)930 lzx_write_compressed_tree(struct lzx_data *lzxd,
931 struct huff_entry *tree, uint8_t *prevlengths,
932 int treesize)
933 {
934 u_char *codes;
935 u_char *runs;
936 int freqs[LZX_PRETREE_SIZE];
937 int cur_run;
938 int last_len;
939 huff_entry pretree[20];
940 u_char *codep;
941 u_char *codee;
942 u_char *runp;
943 int excess;
944 int i;
945 int cur_code;
946
947 codep = codes = malloc(treesize*sizeof(char));
948 runp = runs = malloc(treesize*sizeof(char));
949 memset(freqs, 0, sizeof(freqs));
950 cur_run = 1;
951 last_len = tree[0].codelength;
952 for (i = 1; i <= treesize; i++) {
953 if ((i == treesize) || (tree[i].codelength != last_len)) {
954 if (last_len == 0) {
955 while (cur_run >= 20) {
956 excess = cur_run - 20;
957 if (excess > 31) excess = 31;
958 *codep++ = 18;
959 *runp++ = excess;
960 cur_run -= excess + 20;
961 freqs[18]++;
962 }
963 while (cur_run >= 4) {
964 excess = cur_run - 4;
965 if (excess > 15) excess = 15;
966 *codep++ = 17;
967 *runp++ = excess;
968 cur_run -= excess + 4;
969 freqs[17]++;
970 }
971 while (cur_run > 0) {
972 *codep = prevlengths[i - cur_run];
973 freqs[*codep++]++;
974 *runp++ = 0; /* not necessary */
975 cur_run--;
976 }
977 }
978 else {
979 while (cur_run >= 4) {
980 if (cur_run == 4) excess = 0;
981 else excess = 1;
982 *codep++ = 19;
983 *runp++ = excess;
984 freqs[19]++;
985 /* right, MS lies again. Code is NOT
986 prev_len + len (mod 17), it's prev_len - len (mod 17)*/
987 *codep = prevlengths[i-cur_run] - last_len;
988 if (*codep > 16) *codep += 17;
989 freqs[*codep++]++;
990 *runp++ = 0; /* not necessary */
991 cur_run -= excess+4;
992 }
993 while (cur_run > 0) {
994 *codep = prevlengths[i-cur_run] - last_len;
995 if (*codep > 16) *codep += 17;
996 *runp++ = 0; /* not necessary */
997 cur_run--;
998 freqs[*codep++]++;
999 }
1000 }
1001 if (i != treesize)
1002 last_len = tree[i].codelength;
1003 cur_run = 0;
1004 }
1005 cur_run++;
1006 }
1007 codee = codep;
1008 #ifdef DEBUG_TREE_COMPRESSION
1009 *codep++ = 255;
1010 *runp++ = 255;
1011 fprintf(stderr, "num: len code run\n");
1012 for (i = 0; i < treesize; i++) {
1013 fprintf(stderr, "%3d: %2d %2d %2d\n", i, tree[i].codelength, codes[i], runs[i]);
1014 }
1015 #endif
1016 /* now create the huffman table and write out the pretree */
1017 build_huffman_tree(LZX_PRETREE_SIZE, 16, freqs, pretree);
1018 for (i = 0; i < LZX_PRETREE_SIZE; i++) {
1019 lzx_write_bits(lzxd, 4, pretree[i].codelength);
1020 }
1021 codep = codes;
1022 runp = runs;
1023 cur_run = 0;
1024 while (codep < codee) {
1025 cur_code = *codep++;
1026 lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
1027 if (cur_code == 17) {
1028 cur_run += *runp + 4;
1029 lzx_write_bits(lzxd, 4, *runp);
1030 }
1031 else if (cur_code == 18) {
1032 cur_run += *runp + 20;
1033 lzx_write_bits(lzxd, 5, *runp);
1034 }
1035 else if (cur_code == 19) {
1036 cur_run += *runp + 4;
1037 lzx_write_bits(lzxd, 1, *runp);
1038 cur_code = *codep++;
1039 lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
1040 runp++;
1041 }
1042 else {
1043 cur_run++;
1044 }
1045 runp++;
1046 }
1047 free(codes);
1048 free(runs);
1049 return 0;
1050 }
1051
1052 void
lzx_reset(lzx_data * lzxd)1053 lzx_reset(lzx_data *lzxd)
1054 {
1055 lzxd->need_1bit_header = 1;
1056 lzxd->R0 = lzxd->R1 = lzxd->R2 = 1;
1057 memset(lzxd->prev_main_treelengths, 0, lzxd->main_tree_size * sizeof(uint8_t));
1058 memset(lzxd->prev_length_treelengths, 0, NUM_SECONDARY_LENGTHS * sizeof(uint8_t));
1059 lz_reset(lzxd->lzi);
1060 }
1061
lzx_compress_block(lzx_data * lzxd,int block_size,int subdivide)1062 int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
1063 {
1064 int i;
1065 uint32_t written_sofar = 0;
1066 int block_type;
1067 long uncomp_bits;
1068 long comp_bits;
1069 long comp_bits_ovh;
1070 long uncomp_length;
1071
1072 if ((lzxd->block_size != block_size) || (lzxd->block_codes == NULL)) {
1073 if (lzxd->block_codes != NULL) free(lzxd->block_codes);
1074 lzxd->block_size = block_size;
1075 lzxd->block_codes = malloc(block_size * sizeof(uint32_t));
1076 }
1077 lzxd->subdivide = subdivide?1:0;
1078
1079 lzxd->left_in_block = block_size;
1080 lzxd->left_in_frame = LZX_FRAME_SIZE;
1081 lzxd->main_entropy = 0.0;
1082 lzxd->last_ratio = 9999999.0;
1083 lzxd->block_codesp = lzxd->block_codes;
1084
1085 memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
1086 memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
1087 memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
1088 do {
1089 lz_compress(lzxd->lzi, lzxd->left_in_block);
1090 if (lzxd->left_in_frame == 0)
1091 lzxd->left_in_frame = LZX_FRAME_SIZE;
1092
1093 if ((lzxd->subdivide<0) || !lzxd->left_in_block ||
1094 (!lz_left_to_process(lzxd->lzi) && lzxd->at_eof(lzxd->in_arg))) {
1095 /* now one block is LZ-analyzed. */
1096 /* time to write it out */
1097 uncomp_length = lzxd->block_size - lzxd->left_in_block - written_sofar;
1098 /* uncomp_length will sometimes be 0 when input length is
1099 an exact multiple of frame size */
1100 if (uncomp_length == 0)
1101 continue;
1102 if (lzxd->subdivide < 0) {
1103 #ifdef DEBUG_ENTROPY
1104 fprintf(stderr, "subdivided\n");
1105 #endif
1106 lzxd->subdivide = 1;
1107 }
1108
1109 if (lzxd->need_1bit_header) {
1110 /* one bit Intel preprocessing header */
1111 /* always 0 because this implementation doesn't do Intel preprocessing */
1112 lzx_write_bits(lzxd, 1, 0);
1113 lzxd->need_1bit_header = 0;
1114 }
1115
1116 /* handle extra bits */
1117 uncomp_bits = comp_bits = 0;
1118 build_huffman_tree(LZX_ALIGNED_SIZE, 7, lzxd->aligned_freq_table, lzxd->aligned_tree);
1119 for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
1120 uncomp_bits += lzxd->aligned_freq_table[i]* 3;
1121 comp_bits += lzxd->aligned_freq_table[i]* lzxd->aligned_tree[i].codelength;
1122 }
1123 comp_bits_ovh = comp_bits + LZX_ALIGNED_SIZE * 3;
1124 if (comp_bits_ovh < uncomp_bits)
1125 block_type = LZX_ALIGNED_OFFSET_BLOCK;
1126 else
1127 block_type = LZX_VERBATIM_BLOCK;
1128
1129 #ifdef DEBUG_EXTRA_BITS
1130 fprintf(stderr, "Extra bits uncompressed: %5d compressed: %5d compressed w/overhead %5d gain/loss %5d\n", uncomp_bits, comp_bits, comp_bits_ovh, uncomp_bits - comp_bits_ovh);
1131 #endif
1132
1133 /* block type */
1134 lzx_write_bits(lzxd, 3, block_type);
1135 /* uncompressed length */
1136 lzx_write_bits(lzxd, 24, uncomp_length);
1137
1138 written_sofar = lzxd->block_size - lzxd->left_in_block;
1139
1140 /* now write out the aligned offset trees if present */
1141 if (block_type == LZX_ALIGNED_OFFSET_BLOCK) {
1142 for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
1143 lzx_write_bits(lzxd, 3, lzxd->aligned_tree[i].codelength);
1144 }
1145 }
1146 /* end extra bits */
1147 build_huffman_tree(lzxd->main_tree_size, LZX_MAX_CODE_LENGTH,
1148 lzxd->main_freq_table, lzxd->main_tree);
1149 build_huffman_tree(NUM_SECONDARY_LENGTHS, 16,
1150 lzxd->length_freq_table, lzxd->length_tree);
1151
1152
1153
1154 /* now write the pre-tree and tree for main 1 */
1155 lzx_write_compressed_tree(lzxd, lzxd->main_tree, lzxd->prev_main_treelengths, NUM_CHARS);
1156
1157 /* now write the pre-tree and tree for main 2*/
1158 lzx_write_compressed_tree(lzxd, lzxd->main_tree + NUM_CHARS,
1159 lzxd->prev_main_treelengths + NUM_CHARS,
1160 lzxd->main_tree_size - NUM_CHARS);
1161
1162 /* now write the pre tree and tree for length */
1163 lzx_write_compressed_tree(lzxd, lzxd->length_tree, lzxd->prev_length_treelengths,
1164 NUM_SECONDARY_LENGTHS);
1165
1166 /* now write literals */
1167 lzx_write_compressed_literals(lzxd, block_type);
1168
1169 /* copy treelengths somewhere safe to do delta compression */
1170 for (i = 0; i < lzxd->main_tree_size; i++) {
1171 lzxd->prev_main_treelengths[i] = lzxd->main_tree[i].codelength;
1172 }
1173 for (i = 0; i < NUM_SECONDARY_LENGTHS; i++) {
1174 lzxd->prev_length_treelengths[i] = lzxd->length_tree[i].codelength;
1175 }
1176 lzxd->main_entropy = 0.0;
1177 lzxd->last_ratio = 9999999.0;
1178 lzxd->block_codesp = lzxd->block_codes;
1179
1180 memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
1181 memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
1182 memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
1183 }
1184 }
1185 while (lzxd->left_in_block && (lz_left_to_process(lzxd->lzi) || !lzxd->at_eof(lzxd->in_arg)));
1186 return 0;
1187 }
1188
lzx_init(struct lzx_data ** lzxdp,int wsize_code,lzx_get_bytes_t get_bytes,void * get_bytes_arg,lzx_at_eof_t at_eof,lzx_put_bytes_t put_bytes,void * put_bytes_arg,lzx_mark_frame_t mark_frame,void * mark_frame_arg)1189 int lzx_init(struct lzx_data **lzxdp, int wsize_code,
1190 lzx_get_bytes_t get_bytes, void *get_bytes_arg,
1191 lzx_at_eof_t at_eof,
1192 lzx_put_bytes_t put_bytes, void *put_bytes_arg,
1193 lzx_mark_frame_t mark_frame, void *mark_frame_arg)
1194 {
1195 int wsize;
1196 struct lzx_data *lzxd;
1197
1198 if ((wsize_code < 15) || (wsize_code > 21)) {
1199 return -1;
1200 }
1201 lzx_init_static();
1202
1203 *lzxdp = lzxd = malloc(sizeof(*lzxd));
1204 if (lzxd == 0)
1205 return -2;
1206
1207 lzxd->in_arg = get_bytes_arg;
1208 lzxd->out_arg = put_bytes_arg;
1209 lzxd->mark_frame_arg = mark_frame_arg;
1210 lzxd->get_bytes = get_bytes;
1211 lzxd->put_bytes = put_bytes;
1212 lzxd->at_eof = at_eof;
1213 lzxd->mark_frame = mark_frame;
1214
1215 wsize = 1 << (wsize_code);
1216
1217 lzxd->bits_in_buf = 0;
1218 lzxd->block_codes = NULL;
1219 lzxd->num_position_slots = num_position_slots[wsize_code-15];
1220 lzxd->main_tree_size = (NUM_CHARS + 8 * lzxd->num_position_slots);
1221
1222 lzxd->main_freq_table = malloc(sizeof(int) * lzxd->main_tree_size);
1223 lzxd->main_tree = malloc(sizeof(huff_entry)* lzxd->main_tree_size);
1224 lzxd->prev_main_treelengths = malloc(sizeof(uint8_t)*lzxd->main_tree_size);
1225
1226 lzxd->lzi = malloc(sizeof (*lzxd->lzi));
1227 /* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */
1228 lz_init(lzxd->lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE,
1229 lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd);
1230 lzxd->len_uncompressed_input = 0;
1231 lzxd->len_compressed_output = 0;
1232 lzx_reset(lzxd);
1233 return 0;
1234 }
1235
lzx_finish(struct lzx_data * lzxd,struct lzx_results * lzxr)1236 int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr)
1237 {
1238 /* lzx_align_output(lzxd); Not needed as long as frame padding is in place */
1239 if (lzxr) {
1240 lzxr->len_compressed_output = lzxd->len_compressed_output;
1241 lzxr->len_uncompressed_input = lzxd->len_uncompressed_input;
1242 }
1243 lz_release(lzxd->lzi);
1244 free(lzxd->lzi);
1245 free(lzxd->prev_main_treelengths);
1246 free(lzxd->main_tree);
1247 free(lzxd->main_freq_table);
1248 free(lzxd);
1249 return 0;
1250 }
1251
1252