1 /* $Id: lzx.c,v 1.5 2002/10/09 01:16:33 jedwin Exp $ */
2 /***************************************************************************
3 * lzx.c - LZX decompression routines *
4 * ------------------- *
5 * *
6 * maintainer: Jed Wing <jedwin@ugcs.caltech.edu> *
7 * source: modified lzx.c from cabextract v0.5 *
8 * notes: This file was taken from cabextract v0.5, which was, *
9 * itself, a modified version of the lzx decompression code *
10 * from unlzx. *
11 * *
12 * platforms: In its current incarnation, this file has been tested on *
13 * two different Linux platforms (one, redhat-based, with a *
14 * 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with *
15 * 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were *
16 * Intel x86 compatible machines. *
17 ***************************************************************************/
18
19 /***************************************************************************
20 * *
21 * This program is free software; you can redistribute it and/or modify *
22 * it under the terms of the GNU General Public License as published by *
23 * the Free Software Foundation; either version 2 of the License, or *
24 * (at your option) any later version. Note that an exemption to this *
25 * license has been granted by Stuart Caie for the purposes of *
26 * distribution with chmlib. This does not, to the best of my *
27 * knowledge, constitute a change in the license of this (the LZX) code *
28 * in general. *
29 * *
30 ***************************************************************************/
31
32 #include "lzx.h"
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 #ifdef __GNUC__
38 #define memcpy __builtin_memcpy
39 #endif
40
41 /* sized types */
42 typedef unsigned char UBYTE; /* 8 bits exactly */
43 typedef unsigned short UWORD; /* 16 bits (or more) */
44 typedef unsigned int ULONG; /* 32 bits (or more) */
45 typedef signed int LONG; /* 32 bits (or more) */
46
47 /* some constants defined by the LZX specification */
48 #define LZX_MIN_MATCH (2)
49 #define LZX_MAX_MATCH (257)
50 #define LZX_NUM_CHARS (256)
51 #define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */
52 #define LZX_BLOCKTYPE_VERBATIM (1)
53 #define LZX_BLOCKTYPE_ALIGNED (2)
54 #define LZX_BLOCKTYPE_UNCOMPRESSED (3)
55 #define LZX_PRETREE_NUM_ELEMENTS (20)
56 #define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */
57 #define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */
58 #define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */
59
60 /* LZX huffman defines: tweak tablebits as desired */
61 #define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS)
62 #define LZX_PRETREE_TABLEBITS (6)
63 #define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
64 #define LZX_MAINTREE_TABLEBITS (12)
65 #define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1)
66 #define LZX_LENGTH_TABLEBITS (12)
67 #define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS)
68 #define LZX_ALIGNED_TABLEBITS (7)
69
70 #define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */
71
72 #define LZX_DECLARE_TABLE(tbl) \
73 UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\
74 UBYTE tbl##_len [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]
75
76 struct LZXstate
77 {
78 UBYTE *window; /* the actual decoding window */
79 ULONG window_size; /* window size (32Kb through 2Mb) */
80 ULONG actual_size; /* window size when it was first allocated */
81 ULONG window_posn; /* current offset within the window */
82 ULONG R0, R1, R2; /* for the LRU offset system */
83 UWORD main_elements; /* number of main tree elements */
84 int header_read; /* have we started decoding at all yet? */
85 UWORD block_type; /* type of this block */
86 ULONG block_length; /* uncompressed length of this block */
87 ULONG block_remaining; /* uncompressed bytes still left to decode */
88 ULONG frames_read; /* the number of CFDATA blocks processed */
89 LONG intel_filesize; /* magic header value used for transform */
90 LONG intel_curpos; /* current offset in transform space */
91 int intel_started; /* have we seen any translatable data yet? */
92
93 LZX_DECLARE_TABLE(PRETREE);
94 LZX_DECLARE_TABLE(MAINTREE);
95 LZX_DECLARE_TABLE(LENGTH);
96 LZX_DECLARE_TABLE(ALIGNED);
97 };
98
99 /* LZX decruncher */
100
101 /* Microsoft's LZX document and their implementation of the
102 * com.ms.util.cab Java package do not concur.
103 *
104 * In the LZX document, there is a table showing the correlation between
105 * window size and the number of position slots. It states that the 1MB
106 * window = 40 slots and the 2MB window = 42 slots. In the implementation,
107 * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
108 * first slot whose position base is equal to or more than the required
109 * window size'. This would explain why other tables in the document refer
110 * to 50 slots rather than 42.
111 *
112 * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
113 * is not defined in the specification.
114 *
115 * The LZX document does not state the uncompressed block has an
116 * uncompressed length field. Where does this length field come from, so
117 * we can know how large the block is? The implementation has it as the 24
118 * bits following after the 3 blocktype bits, before the alignment
119 * padding.
120 *
121 * The LZX document states that aligned offset blocks have their aligned
122 * offset huffman tree AFTER the main and length trees. The implementation
123 * suggests that the aligned offset tree is BEFORE the main and length
124 * trees.
125 *
126 * The LZX document decoding algorithm states that, in an aligned offset
127 * block, if an extra_bits value is 1, 2 or 3, then that number of bits
128 * should be read and the result added to the match offset. This is
129 * correct for 1 and 2, but not 3, where just a huffman symbol (using the
130 * aligned tree) should be read.
131 *
132 * Regarding the E8 preprocessing, the LZX document states 'No translation
133 * may be performed on the last 6 bytes of the input block'. This is
134 * correct. However, the pseudocode provided checks for the *E8 leader*
135 * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
136 * from the end, this would cause the next four bytes to be modified, at
137 * least one of which would be in the last 6 bytes, which is not allowed
138 * according to the spec.
139 *
140 * The specification states that the huffman trees must always contain at
141 * least one element. However, many CAB files contain blocks where the
142 * length tree is completely empty (because there are no matches), and
143 * this is expected to succeed.
144 */
145
146
147 /* LZX uses what it calls 'position slots' to represent match offsets.
148 * What this means is that a small 'position slot' number and a small
149 * offset from that slot are encoded instead of one large offset for
150 * every match.
151 * - position_base is an index to the position slot bases
152 * - extra_bits states how many bits of offset-from-base data is needed.
153 */
154 static const UBYTE extra_bits[51] = {
155 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
156 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
157 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
158 17, 17, 17
159 };
160
161 static const ULONG position_base[51] = {
162 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
163 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
164 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
165 1835008, 1966080, 2097152
166 };
167
LZXinit(int window)168 struct LZXstate *LZXinit(int window)
169 {
170 struct LZXstate *pState=NULL;
171 ULONG wndsize = 1 << window;
172 int i, posn_slots;
173
174 /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
175 /* if a previously allocated window is big enough, keep it */
176 if (window < 15 || window > 21) return NULL;
177
178 /* allocate state and associated window */
179 pState = (struct LZXstate *)malloc(sizeof(struct LZXstate));
180 if (!(pState->window = (UBYTE *)malloc(wndsize)))
181 {
182 free(pState);
183 return NULL;
184 }
185 pState->actual_size = wndsize;
186 pState->window_size = wndsize;
187
188 /* calculate required position slots */
189 if (window == 20) posn_slots = 42;
190 else if (window == 21) posn_slots = 50;
191 else posn_slots = window << 1;
192
193 /** alternatively **/
194 /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
195
196 /* initialize other state */
197 pState->R0 = pState->R1 = pState->R2 = 1;
198 pState->main_elements = LZX_NUM_CHARS + (posn_slots << 3);
199 pState->header_read = 0;
200 pState->frames_read = 0;
201 pState->block_remaining = 0;
202 pState->block_type = LZX_BLOCKTYPE_INVALID;
203 pState->intel_curpos = 0;
204 pState->intel_started = 0;
205 pState->window_posn = 0;
206
207 /* initialise tables to 0 (because deltas will be applied to them) */
208 for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0;
209 for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) pState->LENGTH_len[i] = 0;
210
211 return pState;
212 }
213
LZXteardown(struct LZXstate * pState)214 void LZXteardown(struct LZXstate *pState)
215 {
216 if (pState)
217 {
218 if (pState->window)
219 free(pState->window);
220 free(pState);
221 }
222 }
223
LZXreset(struct LZXstate * pState)224 int LZXreset(struct LZXstate *pState)
225 {
226 int i;
227
228 pState->R0 = pState->R1 = pState->R2 = 1;
229 pState->header_read = 0;
230 pState->frames_read = 0;
231 pState->block_remaining = 0;
232 pState->block_type = LZX_BLOCKTYPE_INVALID;
233 pState->intel_curpos = 0;
234 pState->intel_started = 0;
235 pState->window_posn = 0;
236
237 for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0;
238 for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->LENGTH_len[i] = 0;
239
240 return DECR_OK;
241 }
242
243
244 /* Bitstream reading macros:
245 *
246 * INIT_BITSTREAM should be used first to set up the system
247 * READ_BITS(var,n) takes N bits from the buffer and puts them in var
248 *
249 * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer
250 * PEEK_BITS(n) extracts (without removing) N bits from the bit buffer
251 * REMOVE_BITS(n) removes N bits from the bit buffer
252 *
253 * These bit access routines work by using the area beyond the MSB and the
254 * LSB as a free source of zeroes. This avoids having to mask any bits.
255 * So we have to know the bit width of the bitbuffer variable. This is
256 * sizeof(ULONG) * 8, also defined as ULONG_BITS
257 */
258
259 /* number of bits in ULONG. Note: This must be at multiple of 16, and at
260 * least 32 for the bitbuffer code to work (ie, it must be able to ensure
261 * up to 17 bits - that's adding 16 bits when there's one bit left, or
262 * adding 32 bits when there are no bits left. The code should work fine
263 * for machines where ULONG >= 32 bits.
264 */
265 #define ULONG_BITS (sizeof(ULONG)<<3)
266
267 #define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)
268
269 #define ENSURE_BITS(n) \
270 while (bitsleft < (n)) { \
271 bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft); \
272 bitsleft += 16; inpos+=2; \
273 }
274
275 #define PEEK_BITS(n) (bitbuf >> (ULONG_BITS - (n)))
276 #define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n)))
277
278 #define READ_BITS(v,n) do { \
279 ENSURE_BITS(n); \
280 (v) = PEEK_BITS(n); \
281 REMOVE_BITS(n); \
282 } while (0)
283
284
285 /* Huffman macros */
286
287 #define TABLEBITS(tbl) (LZX_##tbl##_TABLEBITS)
288 #define MAXSYMBOLS(tbl) (LZX_##tbl##_MAXSYMBOLS)
289 #define SYMTABLE(tbl) (pState->tbl##_table)
290 #define LENTABLE(tbl) (pState->tbl##_len)
291
292 /* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths.
293 * In reality, it just calls make_decode_table() with the appropriate
294 * values - they're all fixed by some #defines anyway, so there's no point
295 * writing each call out in full by hand.
296 */
297 #define BUILD_TABLE(tbl) \
298 if (make_decode_table( \
299 MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl) \
300 )) { return DECR_ILLEGALDATA; }
301
302
303 /* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
304 * bitstream using the stated table and puts it in var.
305 */
306 #define READ_HUFFSYM(tbl,var) do { \
307 ENSURE_BITS(16); \
308 hufftbl = SYMTABLE(tbl); \
309 if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) { \
310 j = 1 << (ULONG_BITS - TABLEBITS(tbl)); \
311 do { \
312 j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0; \
313 if (!j) { return DECR_ILLEGALDATA; } \
314 } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl)); \
315 } \
316 j = LENTABLE(tbl)[(var) = i]; \
317 REMOVE_BITS(j); \
318 } while (0)
319
320
321 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
322 * first to last in the given table. The code lengths are stored in their
323 * own special LZX way.
324 */
325 #define READ_LENGTHS(tbl,first,last) do { \
326 lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \
327 if (lzx_read_lens(pState, LENTABLE(tbl),(first),(last),&lb)) { \
328 return DECR_ILLEGALDATA; \
329 } \
330 bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \
331 } while (0)
332
333
334 /* make_decode_table(nsyms, nbits, length[], table[])
335 *
336 * This function was coded by David Tritscher. It builds a fast huffman
337 * decoding table out of just a canonical huffman code lengths table.
338 *
339 * nsyms = total number of symbols in this huffman tree.
340 * nbits = any symbols with a code length of nbits or less can be decoded
341 * in one lookup of the table.
342 * length = A table to get code lengths from [0 to syms-1]
343 * table = The table to fill up with decoded symbols and pointers.
344 *
345 * Returns 0 for OK or 1 for error
346 */
347
make_decode_table(ULONG nsyms,ULONG nbits,UBYTE * length,UWORD * table)348 static int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) {
349 register UWORD sym;
350 register ULONG leaf;
351 register UBYTE bit_num = 1;
352 ULONG fill;
353 ULONG pos = 0; /* the current position in the decode table */
354 ULONG table_mask = 1 << nbits;
355 ULONG bit_mask = table_mask >> 1; /* don't do 0 length codes */
356 ULONG next_symbol = bit_mask; /* base of allocation for long codes */
357
358 /* fill entries for codes short enough for a direct mapping */
359 while (bit_num <= nbits) {
360 for (sym = 0; sym < nsyms; sym++) {
361 if (length[sym] == bit_num) {
362 leaf = pos;
363
364 if((pos += bit_mask) > table_mask) return 1; /* table overrun */
365
366 /* fill all possible lookups of this symbol with the symbol itself */
367 fill = bit_mask;
368 while (fill-- > 0) table[leaf++] = sym;
369 }
370 }
371 bit_mask >>= 1;
372 bit_num++;
373 }
374
375 /* if there are any codes longer than nbits */
376 if (pos != table_mask) {
377 /* clear the remainder of the table */
378 for (sym = pos; sym < table_mask; sym++) table[sym] = 0;
379
380 /* give ourselves room for codes to grow by up to 16 more bits */
381 pos <<= 16;
382 table_mask <<= 16;
383 bit_mask = 1 << 15;
384
385 while (bit_num <= 16) {
386 for (sym = 0; sym < nsyms; sym++) {
387 if (length[sym] == bit_num) {
388 leaf = pos >> 16;
389 for (fill = 0; fill < bit_num - nbits; fill++) {
390 /* if this path hasn't been taken yet, 'allocate' two entries */
391 if (table[leaf] == 0) {
392 table[(next_symbol << 1)] = 0;
393 table[(next_symbol << 1) + 1] = 0;
394 table[leaf] = next_symbol++;
395 }
396 /* follow the path and select either left or right for next bit */
397 leaf = table[leaf] << 1;
398 if ((pos >> (15-fill)) & 1) leaf++;
399 }
400 table[leaf] = sym;
401
402 if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
403 }
404 }
405 bit_mask >>= 1;
406 bit_num++;
407 }
408 }
409
410 /* full table? */
411 if (pos == table_mask) return 0;
412
413 /* either erroneous table, or all elements are 0 - let's find out. */
414 for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
415 return 0;
416 }
417
418 struct lzx_bits {
419 ULONG bb;
420 int bl;
421 UBYTE *ip;
422 };
423
lzx_read_lens(struct LZXstate * pState,UBYTE * lens,ULONG first,ULONG last,struct lzx_bits * lb)424 static int lzx_read_lens(struct LZXstate *pState, UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) {
425 ULONG i,j, x,y;
426 int z;
427
428 register ULONG bitbuf = lb->bb;
429 register int bitsleft = lb->bl;
430 UBYTE *inpos = lb->ip;
431 UWORD *hufftbl;
432
433 for (x = 0; x < 20; x++) {
434 READ_BITS(y, 4);
435 LENTABLE(PRETREE)[x] = y;
436 }
437 BUILD_TABLE(PRETREE);
438
439 for (x = first; x < last; ) {
440 READ_HUFFSYM(PRETREE, z);
441 if (z == 17) {
442 READ_BITS(y, 4); y += 4;
443 while (y--) lens[x++] = 0;
444 }
445 else if (z == 18) {
446 READ_BITS(y, 5); y += 20;
447 while (y--) lens[x++] = 0;
448 }
449 else if (z == 19) {
450 READ_BITS(y, 1); y += 4;
451 READ_HUFFSYM(PRETREE, z);
452 z = lens[x] - z; if (z < 0) z += 17;
453 while (y--) lens[x++] = z;
454 }
455 else {
456 z = lens[x] - z; if (z < 0) z += 17;
457 lens[x++] = z;
458 }
459 }
460
461 lb->bb = bitbuf;
462 lb->bl = bitsleft;
463 lb->ip = inpos;
464 return 0;
465 }
466
LZXdecompress(struct LZXstate * pState,unsigned char * inpos,unsigned char * outpos,int inlen,int outlen)467 int LZXdecompress(struct LZXstate *pState, unsigned char *inpos, unsigned char *outpos, int inlen, int outlen) {
468 UBYTE *endinp = inpos + inlen;
469 UBYTE *window = pState->window;
470 UBYTE *runsrc, *rundest;
471 UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */
472
473 ULONG window_posn = pState->window_posn;
474 ULONG window_size = pState->window_size;
475 ULONG R0 = pState->R0;
476 ULONG R1 = pState->R1;
477 ULONG R2 = pState->R2;
478
479 register ULONG bitbuf;
480 register int bitsleft;
481 ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */
482 struct lzx_bits lb; /* used in READ_LENGTHS macro */
483
484 int togo = outlen, this_run, main_element, aligned_bits;
485 int match_length, length_footer, extra, verbatim_bits;
486
487 INIT_BITSTREAM;
488
489 /* read header if necessary */
490 if (!pState->header_read) {
491 i = j = 0;
492 READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); }
493 pState->intel_filesize = (i << 16) | j; /* or 0 if not encoded */
494 pState->header_read = 1;
495 }
496
497 /* main decoding loop */
498 while (togo > 0) {
499 /* last block finished, new block expected */
500 if (pState->block_remaining == 0) {
501 if (pState->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) {
502 if (pState->block_length & 1) inpos++; /* realign bitstream to word */
503 INIT_BITSTREAM;
504 }
505
506 READ_BITS(pState->block_type, 3);
507 READ_BITS(i, 16);
508 READ_BITS(j, 8);
509 pState->block_remaining = pState->block_length = (i << 8) | j;
510
511 switch (pState->block_type) {
512 case LZX_BLOCKTYPE_ALIGNED:
513 for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; }
514 BUILD_TABLE(ALIGNED);
515 /* rest of aligned header is same as verbatim */
516
517 case LZX_BLOCKTYPE_VERBATIM:
518 READ_LENGTHS(MAINTREE, 0, 256);
519 READ_LENGTHS(MAINTREE, 256, pState->main_elements);
520 BUILD_TABLE(MAINTREE);
521 if (LENTABLE(MAINTREE)[0xE8] != 0) pState->intel_started = 1;
522
523 READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
524 BUILD_TABLE(LENGTH);
525 break;
526
527 case LZX_BLOCKTYPE_UNCOMPRESSED:
528 pState->intel_started = 1; /* because we can't assume otherwise */
529 ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */
530 if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */
531 R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
532 R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
533 R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
534 break;
535
536 default:
537 return DECR_ILLEGALDATA;
538 }
539 }
540
541 /* buffer exhaustion check */
542 if (inpos > endinp) {
543 /* it's possible to have a file where the next run is less than
544 * 16 bits in size. In this case, the READ_HUFFSYM() macro used
545 * in building the tables will exhaust the buffer, so we should
546 * allow for this, but not allow those accidentally read bits to
547 * be used (so we check that there are at least 16 bits
548 * remaining - in this boundary case they aren't really part of
549 * the compressed data)
550 */
551 if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA;
552 }
553
554 while ((this_run = pState->block_remaining) > 0 && togo > 0) {
555 if (this_run > togo) this_run = togo;
556 togo -= this_run;
557 pState->block_remaining -= this_run;
558
559 /* apply 2^x-1 mask */
560 window_posn &= window_size - 1;
561 /* runs can't straddle the window wraparound */
562 if ((window_posn + this_run) > window_size)
563 return DECR_DATAFORMAT;
564
565 switch (pState->block_type) {
566
567 case LZX_BLOCKTYPE_VERBATIM:
568 while (this_run > 0) {
569 READ_HUFFSYM(MAINTREE, main_element);
570
571 if (main_element < LZX_NUM_CHARS) {
572 /* literal: 0 to LZX_NUM_CHARS-1 */
573 window[window_posn++] = main_element;
574 this_run--;
575 }
576 else {
577 /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
578 main_element -= LZX_NUM_CHARS;
579
580 match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
581 if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
582 READ_HUFFSYM(LENGTH, length_footer);
583 match_length += length_footer;
584 }
585 match_length += LZX_MIN_MATCH;
586
587 match_offset = main_element >> 3;
588
589 if (match_offset > 2) {
590 /* not repeated offset */
591 if (match_offset != 3) {
592 extra = extra_bits[match_offset];
593 READ_BITS(verbatim_bits, extra);
594 match_offset = position_base[match_offset] - 2 + verbatim_bits;
595 }
596 else {
597 match_offset = 1;
598 }
599
600 /* update repeated offset LRU queue */
601 R2 = R1; R1 = R0; R0 = match_offset;
602 }
603 else if (match_offset == 0) {
604 match_offset = R0;
605 }
606 else if (match_offset == 1) {
607 match_offset = R1;
608 R1 = R0; R0 = match_offset;
609 }
610 else /* match_offset == 2 */ {
611 match_offset = R2;
612 R2 = R0; R0 = match_offset;
613 }
614
615 rundest = window + window_posn;
616 runsrc = rundest - match_offset;
617 window_posn += match_length;
618 if (window_posn > window_size) return DECR_ILLEGALDATA;
619 this_run -= match_length;
620
621 /* copy any wrapped around source data */
622 while ((runsrc < window) && (match_length-- > 0)) {
623 *rundest++ = *(runsrc + window_size); runsrc++;
624 }
625 /* copy match data - no worries about destination wraps */
626 while (match_length-- > 0) *rundest++ = *runsrc++;
627
628 }
629 }
630 break;
631
632 case LZX_BLOCKTYPE_ALIGNED:
633 while (this_run > 0) {
634 READ_HUFFSYM(MAINTREE, main_element);
635
636 if (main_element < LZX_NUM_CHARS) {
637 /* literal: 0 to LZX_NUM_CHARS-1 */
638 window[window_posn++] = main_element;
639 this_run--;
640 }
641 else {
642 /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
643 main_element -= LZX_NUM_CHARS;
644
645 match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
646 if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
647 READ_HUFFSYM(LENGTH, length_footer);
648 match_length += length_footer;
649 }
650 match_length += LZX_MIN_MATCH;
651
652 match_offset = main_element >> 3;
653
654 if (match_offset > 2) {
655 /* not repeated offset */
656 extra = extra_bits[match_offset];
657 match_offset = position_base[match_offset] - 2;
658 if (extra > 3) {
659 /* verbatim and aligned bits */
660 extra -= 3;
661 READ_BITS(verbatim_bits, extra);
662 match_offset += (verbatim_bits << 3);
663 READ_HUFFSYM(ALIGNED, aligned_bits);
664 match_offset += aligned_bits;
665 }
666 else if (extra == 3) {
667 /* aligned bits only */
668 READ_HUFFSYM(ALIGNED, aligned_bits);
669 match_offset += aligned_bits;
670 }
671 else if (extra > 0) { /* extra==1, extra==2 */
672 /* verbatim bits only */
673 READ_BITS(verbatim_bits, extra);
674 match_offset += verbatim_bits;
675 }
676 else /* extra == 0 */ {
677 /* ??? */
678 match_offset = 1;
679 }
680
681 /* update repeated offset LRU queue */
682 R2 = R1; R1 = R0; R0 = match_offset;
683 }
684 else if (match_offset == 0) {
685 match_offset = R0;
686 }
687 else if (match_offset == 1) {
688 match_offset = R1;
689 R1 = R0; R0 = match_offset;
690 }
691 else /* match_offset == 2 */ {
692 match_offset = R2;
693 R2 = R0; R0 = match_offset;
694 }
695
696 rundest = window + window_posn;
697 runsrc = rundest - match_offset;
698 window_posn += match_length;
699 if (window_posn > window_size) return DECR_ILLEGALDATA;
700 this_run -= match_length;
701
702 /* copy any wrapped around source data */
703 while ((runsrc < window) && (match_length-- > 0)) {
704 *rundest++ = *(runsrc + window_size); runsrc++;
705 }
706 /* copy match data - no worries about destination wraps */
707 while (match_length-- > 0) *rundest++ = *runsrc++;
708
709 }
710 }
711 break;
712
713 case LZX_BLOCKTYPE_UNCOMPRESSED:
714 if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA;
715 memcpy(window + window_posn, inpos, (size_t) this_run);
716 inpos += this_run; window_posn += this_run;
717 break;
718
719 default:
720 return DECR_ILLEGALDATA; /* might as well */
721 }
722
723 }
724 }
725
726 if (togo != 0) return DECR_ILLEGALDATA;
727 memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen);
728
729 pState->window_posn = window_posn;
730 pState->R0 = R0;
731 pState->R1 = R1;
732 pState->R2 = R2;
733
734 /* intel E8 decoding */
735 if ((pState->frames_read++ < 32768) && pState->intel_filesize != 0) {
736 if (outlen <= 6 || !pState->intel_started) {
737 pState->intel_curpos += outlen;
738 }
739 else {
740 UBYTE *data = outpos;
741 UBYTE *dataend = data + outlen - 10;
742 LONG curpos = pState->intel_curpos;
743 LONG filesize = pState->intel_filesize;
744 LONG abs_off, rel_off;
745
746 pState->intel_curpos = curpos + outlen;
747
748 while (data < dataend) {
749 if (*data++ != 0xE8) { curpos++; continue; }
750 abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
751 if ((abs_off >= -curpos) && (abs_off < filesize)) {
752 rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
753 data[0] = (UBYTE) rel_off;
754 data[1] = (UBYTE) (rel_off >> 8);
755 data[2] = (UBYTE) (rel_off >> 16);
756 data[3] = (UBYTE) (rel_off >> 24);
757 }
758 data += 4;
759 curpos += 5;
760 }
761 }
762 }
763 return DECR_OK;
764 }
765
766 #ifdef LZX_CHM_TESTDRIVER
main(int c,char ** v)767 int main(int c, char **v)
768 {
769 FILE *fin, *fout;
770 struct LZXstate state;
771 UBYTE ibuf[16384];
772 UBYTE obuf[32768];
773 int ilen, olen;
774 int status;
775 int i;
776 int count=0;
777 int w = atoi(v[1]);
778 LZXinit(&state, w);
779 fout = fopen(v[2], "wb");
780 for (i=3; i<c; i++)
781 {
782 fin = fopen(v[i], "rb");
783 ilen = fread(ibuf, 1, 16384, fin);
784 status = LZXdecompress(&state, ibuf, obuf, ilen, 32768);
785 switch (status)
786 {
787 case DECR_OK:
788 printf("ok\n");
789 fwrite(obuf, 1, 32768, fout);
790 break;
791 case DECR_DATAFORMAT:
792 printf("bad format\n");
793 break;
794 case DECR_ILLEGALDATA:
795 printf("illegal data\n");
796 break;
797 case DECR_NOMEMORY:
798 printf("no memory\n");
799 break;
800 default:
801 break;
802 }
803 fclose(fin);
804 if (++count == 2)
805 {
806 count = 0;
807 LZXreset(&state);
808 }
809 }
810 fclose(fout);
811 }
812 #endif
813