1 /* $Id: lzx.c,v 1.5 2002/10/09 01:16:33 jedwin Exp $ */
2 /***************************************************************************
3  *                        lzx.c - LZX decompression routines               *
4  *                           -------------------                           *
5  *                                                                         *
6  *  maintainer: Jed Wing <jedwin@ugcs.caltech.edu>                         *
7  *  source:     modified lzx.c from cabextract v0.5                        *
8  *  notes:      This file was taken from cabextract v0.5, which was,       *
9  *              itself, a modified version of the lzx decompression code   *
10  *              from unlzx.                                                *
11  *                                                                         *
12  *  platforms:  In its current incarnation, this file has been tested on   *
13  *              two different Linux platforms (one, redhat-based, with a   *
14  *              2.1.2 glibc and gcc 2.95.x, and the other, Debian, with    *
15  *              2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2).  Both were *
16  *              Intel x86 compatible machines.                             *
17  ***************************************************************************/
18 
19 /***************************************************************************
20  *                                                                         *
21  *   This program is free software; you can redistribute it and/or modify  *
22  *   it under the terms of the GNU General Public License as published by  *
23  *   the Free Software Foundation; either version 2 of the License, or     *
24  *   (at your option) any later version.  Note that an exemption to this   *
25  *   license has been granted by Stuart Caie for the purposes of           *
26  *   distribution with chmlib.  This does not, to the best of my           *
27  *   knowledge, constitute a change in the license of this (the LZX) code  *
28  *   in general.                                                           *
29  *                                                                         *
30  ***************************************************************************/
31 
32 #include "lzx.h"
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef __GNUC__
38 #define memcpy __builtin_memcpy
39 #endif
40 
41 /* sized types */
42 typedef unsigned char  UBYTE; /* 8 bits exactly    */
43 typedef unsigned short UWORD; /* 16 bits (or more) */
44 typedef unsigned int   ULONG; /* 32 bits (or more) */
45 typedef   signed int    LONG; /* 32 bits (or more) */
46 
47 /* some constants defined by the LZX specification */
48 #define LZX_MIN_MATCH                (2)
49 #define LZX_MAX_MATCH                (257)
50 #define LZX_NUM_CHARS                (256)
51 #define LZX_BLOCKTYPE_INVALID        (0)   /* also blocktypes 4-7 invalid */
52 #define LZX_BLOCKTYPE_VERBATIM       (1)
53 #define LZX_BLOCKTYPE_ALIGNED        (2)
54 #define LZX_BLOCKTYPE_UNCOMPRESSED   (3)
55 #define LZX_PRETREE_NUM_ELEMENTS     (20)
56 #define LZX_ALIGNED_NUM_ELEMENTS     (8)   /* aligned offset tree #elements */
57 #define LZX_NUM_PRIMARY_LENGTHS      (7)   /* this one missing from spec! */
58 #define LZX_NUM_SECONDARY_LENGTHS    (249) /* length tree #elements */
59 
60 /* LZX huffman defines: tweak tablebits as desired */
61 #define LZX_PRETREE_MAXSYMBOLS  (LZX_PRETREE_NUM_ELEMENTS)
62 #define LZX_PRETREE_TABLEBITS   (6)
63 #define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
64 #define LZX_MAINTREE_TABLEBITS  (12)
65 #define LZX_LENGTH_MAXSYMBOLS   (LZX_NUM_SECONDARY_LENGTHS+1)
66 #define LZX_LENGTH_TABLEBITS    (12)
67 #define LZX_ALIGNED_MAXSYMBOLS  (LZX_ALIGNED_NUM_ELEMENTS)
68 #define LZX_ALIGNED_TABLEBITS   (7)
69 
70 #define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */
71 
72 #define LZX_DECLARE_TABLE(tbl) \
73   UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\
74   UBYTE tbl##_len  [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]
75 
76 struct LZXstate
77 {
78     UBYTE *window;         /* the actual decoding window              */
79     ULONG window_size;     /* window size (32Kb through 2Mb)          */
80     ULONG actual_size;     /* window size when it was first allocated */
81     ULONG window_posn;     /* current offset within the window        */
82     ULONG R0, R1, R2;      /* for the LRU offset system               */
83     UWORD main_elements;   /* number of main tree elements            */
84     int   header_read;     /* have we started decoding at all yet?    */
85     UWORD block_type;      /* type of this block                      */
86     ULONG block_length;    /* uncompressed length of this block       */
87     ULONG block_remaining; /* uncompressed bytes still left to decode */
88     ULONG frames_read;     /* the number of CFDATA blocks processed   */
89     LONG  intel_filesize;  /* magic header value used for transform   */
90     LONG  intel_curpos;    /* current offset in transform space       */
91     int   intel_started;   /* have we seen any translatable data yet? */
92 
93     LZX_DECLARE_TABLE(PRETREE);
94     LZX_DECLARE_TABLE(MAINTREE);
95     LZX_DECLARE_TABLE(LENGTH);
96     LZX_DECLARE_TABLE(ALIGNED);
97 };
98 
99 /* LZX decruncher */
100 
101 /* Microsoft's LZX document and their implementation of the
102  * com.ms.util.cab Java package do not concur.
103  *
104  * In the LZX document, there is a table showing the correlation between
105  * window size and the number of position slots. It states that the 1MB
106  * window = 40 slots and the 2MB window = 42 slots. In the implementation,
107  * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
108  * first slot whose position base is equal to or more than the required
109  * window size'. This would explain why other tables in the document refer
110  * to 50 slots rather than 42.
111  *
112  * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
113  * is not defined in the specification.
114  *
115  * The LZX document does not state the uncompressed block has an
116  * uncompressed length field. Where does this length field come from, so
117  * we can know how large the block is? The implementation has it as the 24
118  * bits following after the 3 blocktype bits, before the alignment
119  * padding.
120  *
121  * The LZX document states that aligned offset blocks have their aligned
122  * offset huffman tree AFTER the main and length trees. The implementation
123  * suggests that the aligned offset tree is BEFORE the main and length
124  * trees.
125  *
126  * The LZX document decoding algorithm states that, in an aligned offset
127  * block, if an extra_bits value is 1, 2 or 3, then that number of bits
128  * should be read and the result added to the match offset. This is
129  * correct for 1 and 2, but not 3, where just a huffman symbol (using the
130  * aligned tree) should be read.
131  *
132  * Regarding the E8 preprocessing, the LZX document states 'No translation
133  * may be performed on the last 6 bytes of the input block'. This is
134  * correct.  However, the pseudocode provided checks for the *E8 leader*
135  * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
136  * from the end, this would cause the next four bytes to be modified, at
137  * least one of which would be in the last 6 bytes, which is not allowed
138  * according to the spec.
139  *
140  * The specification states that the huffman trees must always contain at
141  * least one element. However, many CAB files contain blocks where the
142  * length tree is completely empty (because there are no matches), and
143  * this is expected to succeed.
144  */
145 
146 
147 /* LZX uses what it calls 'position slots' to represent match offsets.
148  * What this means is that a small 'position slot' number and a small
149  * offset from that slot are encoded instead of one large offset for
150  * every match.
151  * - position_base is an index to the position slot bases
152  * - extra_bits states how many bits of offset-from-base data is needed.
153  */
154 static const UBYTE extra_bits[51] = {
155      0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,
156      7,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
157     15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
158     17, 17, 17
159 };
160 
161 static const ULONG position_base[51] = {
162           0,       1,       2,      3,      4,      6,      8,     12,     16,     24,     32,       48,      64,      96,     128,     192,
163         256,     384,     512,    768,   1024,   1536,   2048,   3072,   4096,   6144,   8192,    12288,   16384,   24576,   32768,   49152,
164       65536,   98304,  131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
165     1835008, 1966080, 2097152
166 };
167 
LZXinit(int window)168 struct LZXstate *LZXinit(int window)
169 {
170     struct LZXstate *pState=NULL;
171     ULONG wndsize = 1 << window;
172     int i, posn_slots;
173 
174     /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
175     /* if a previously allocated window is big enough, keep it     */
176     if (window < 15 || window > 21) return NULL;
177 
178     /* allocate state and associated window */
179     pState = (struct LZXstate *)malloc(sizeof(struct LZXstate));
180     if (!(pState->window = (UBYTE *)malloc(wndsize)))
181     {
182         free(pState);
183         return NULL;
184     }
185     pState->actual_size = wndsize;
186     pState->window_size = wndsize;
187 
188     /* calculate required position slots */
189     if (window == 20) posn_slots = 42;
190     else if (window == 21) posn_slots = 50;
191     else posn_slots = window << 1;
192 
193     /** alternatively **/
194     /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
195 
196     /* initialize other state */
197     pState->R0  =  pState->R1  = pState->R2 = 1;
198     pState->main_elements   = LZX_NUM_CHARS + (posn_slots << 3);
199     pState->header_read     = 0;
200     pState->frames_read     = 0;
201     pState->block_remaining = 0;
202     pState->block_type      = LZX_BLOCKTYPE_INVALID;
203     pState->intel_curpos    = 0;
204     pState->intel_started   = 0;
205     pState->window_posn     = 0;
206 
207     /* initialise tables to 0 (because deltas will be applied to them) */
208     for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0;
209     for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   pState->LENGTH_len[i]   = 0;
210 
211     return pState;
212 }
213 
LZXteardown(struct LZXstate * pState)214 void LZXteardown(struct LZXstate *pState)
215 {
216     if (pState)
217     {
218         if (pState->window)
219             free(pState->window);
220         free(pState);
221     }
222 }
223 
LZXreset(struct LZXstate * pState)224 int LZXreset(struct LZXstate *pState)
225 {
226     int i;
227 
228     pState->R0  =  pState->R1  = pState->R2 = 1;
229     pState->header_read     = 0;
230     pState->frames_read     = 0;
231     pState->block_remaining = 0;
232     pState->block_type      = LZX_BLOCKTYPE_INVALID;
233     pState->intel_curpos    = 0;
234     pState->intel_started   = 0;
235     pState->window_posn     = 0;
236 
237     for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0;
238     for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++)   pState->LENGTH_len[i]   = 0;
239 
240     return DECR_OK;
241 }
242 
243 
244 /* Bitstream reading macros:
245  *
246  * INIT_BITSTREAM    should be used first to set up the system
247  * READ_BITS(var,n)  takes N bits from the buffer and puts them in var
248  *
249  * ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer
250  * PEEK_BITS(n)      extracts (without removing) N bits from the bit buffer
251  * REMOVE_BITS(n)    removes N bits from the bit buffer
252  *
253  * These bit access routines work by using the area beyond the MSB and the
254  * LSB as a free source of zeroes. This avoids having to mask any bits.
255  * So we have to know the bit width of the bitbuffer variable. This is
256  * sizeof(ULONG) * 8, also defined as ULONG_BITS
257  */
258 
259 /* number of bits in ULONG. Note: This must be at multiple of 16, and at
260  * least 32 for the bitbuffer code to work (ie, it must be able to ensure
261  * up to 17 bits - that's adding 16 bits when there's one bit left, or
262  * adding 32 bits when there are no bits left. The code should work fine
263  * for machines where ULONG >= 32 bits.
264  */
265 #define ULONG_BITS (sizeof(ULONG)<<3)
266 
267 #define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)
268 
269 #define ENSURE_BITS(n)							\
270   while (bitsleft < (n)) {						\
271     bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft);	\
272     bitsleft += 16; inpos+=2;						\
273   }
274 
275 #define PEEK_BITS(n)   (bitbuf >> (ULONG_BITS - (n)))
276 #define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n)))
277 
278 #define READ_BITS(v,n) do {						\
279   ENSURE_BITS(n);							\
280   (v) = PEEK_BITS(n);							\
281   REMOVE_BITS(n);							\
282 } while (0)
283 
284 
285 /* Huffman macros */
286 
287 #define TABLEBITS(tbl)   (LZX_##tbl##_TABLEBITS)
288 #define MAXSYMBOLS(tbl)  (LZX_##tbl##_MAXSYMBOLS)
289 #define SYMTABLE(tbl)    (pState->tbl##_table)
290 #define LENTABLE(tbl)    (pState->tbl##_len)
291 
292 /* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths.
293  * In reality, it just calls make_decode_table() with the appropriate
294  * values - they're all fixed by some #defines anyway, so there's no point
295  * writing each call out in full by hand.
296  */
297 #define BUILD_TABLE(tbl)						\
298   if (make_decode_table(						\
299     MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl)	\
300   )) { return DECR_ILLEGALDATA; }
301 
302 
303 /* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
304  * bitstream using the stated table and puts it in var.
305  */
306 #define READ_HUFFSYM(tbl,var) do {					\
307   ENSURE_BITS(16);							\
308   hufftbl = SYMTABLE(tbl);						\
309   if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) {	\
310     j = 1 << (ULONG_BITS - TABLEBITS(tbl));				\
311     do {								\
312       j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0;			\
313       if (!j) { return DECR_ILLEGALDATA; }	                        \
314     } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl));			\
315   }									\
316   j = LENTABLE(tbl)[(var) = i];						\
317   REMOVE_BITS(j);							\
318 } while (0)
319 
320 
321 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
322  * first to last in the given table. The code lengths are stored in their
323  * own special LZX way.
324  */
325 #define READ_LENGTHS(tbl,first,last) do { \
326   lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \
327   if (lzx_read_lens(pState, LENTABLE(tbl),(first),(last),&lb)) { \
328     return DECR_ILLEGALDATA; \
329   } \
330   bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \
331 } while (0)
332 
333 
334 /* make_decode_table(nsyms, nbits, length[], table[])
335  *
336  * This function was coded by David Tritscher. It builds a fast huffman
337  * decoding table out of just a canonical huffman code lengths table.
338  *
339  * nsyms  = total number of symbols in this huffman tree.
340  * nbits  = any symbols with a code length of nbits or less can be decoded
341  *          in one lookup of the table.
342  * length = A table to get code lengths from [0 to syms-1]
343  * table  = The table to fill up with decoded symbols and pointers.
344  *
345  * Returns 0 for OK or 1 for error
346  */
347 
make_decode_table(ULONG nsyms,ULONG nbits,UBYTE * length,UWORD * table)348 static int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) {
349     register UWORD sym;
350     register ULONG leaf;
351     register UBYTE bit_num = 1;
352     ULONG fill;
353     ULONG pos         = 0; /* the current position in the decode table */
354     ULONG table_mask  = 1 << nbits;
355     ULONG bit_mask    = table_mask >> 1; /* don't do 0 length codes */
356     ULONG next_symbol = bit_mask; /* base of allocation for long codes */
357 
358     /* fill entries for codes short enough for a direct mapping */
359     while (bit_num <= nbits) {
360         for (sym = 0; sym < nsyms; sym++) {
361             if (length[sym] == bit_num) {
362                 leaf = pos;
363 
364                 if((pos += bit_mask) > table_mask) return 1; /* table overrun */
365 
366                 /* fill all possible lookups of this symbol with the symbol itself */
367                 fill = bit_mask;
368                 while (fill-- > 0) table[leaf++] = sym;
369             }
370         }
371         bit_mask >>= 1;
372         bit_num++;
373     }
374 
375     /* if there are any codes longer than nbits */
376     if (pos != table_mask) {
377         /* clear the remainder of the table */
378         for (sym = pos; sym < table_mask; sym++) table[sym] = 0;
379 
380         /* give ourselves room for codes to grow by up to 16 more bits */
381         pos <<= 16;
382         table_mask <<= 16;
383         bit_mask = 1 << 15;
384 
385         while (bit_num <= 16) {
386             for (sym = 0; sym < nsyms; sym++) {
387                 if (length[sym] == bit_num) {
388                     leaf = pos >> 16;
389                     for (fill = 0; fill < bit_num - nbits; fill++) {
390                         /* if this path hasn't been taken yet, 'allocate' two entries */
391                         if (table[leaf] == 0) {
392                             table[(next_symbol << 1)] = 0;
393                             table[(next_symbol << 1) + 1] = 0;
394                             table[leaf] = next_symbol++;
395                         }
396                         /* follow the path and select either left or right for next bit */
397                         leaf = table[leaf] << 1;
398                         if ((pos >> (15-fill)) & 1) leaf++;
399                     }
400                     table[leaf] = sym;
401 
402                     if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
403                 }
404             }
405             bit_mask >>= 1;
406             bit_num++;
407         }
408     }
409 
410     /* full table? */
411     if (pos == table_mask) return 0;
412 
413     /* either erroneous table, or all elements are 0 - let's find out. */
414     for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
415     return 0;
416 }
417 
418 struct lzx_bits {
419   ULONG bb;
420   int bl;
421   UBYTE *ip;
422 };
423 
lzx_read_lens(struct LZXstate * pState,UBYTE * lens,ULONG first,ULONG last,struct lzx_bits * lb)424 static int lzx_read_lens(struct LZXstate *pState, UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) {
425     ULONG i,j, x,y;
426     int z;
427 
428     register ULONG bitbuf = lb->bb;
429     register int bitsleft = lb->bl;
430     UBYTE *inpos = lb->ip;
431     UWORD *hufftbl;
432 
433     for (x = 0; x < 20; x++) {
434         READ_BITS(y, 4);
435         LENTABLE(PRETREE)[x] = y;
436     }
437     BUILD_TABLE(PRETREE);
438 
439     for (x = first; x < last; ) {
440         READ_HUFFSYM(PRETREE, z);
441         if (z == 17) {
442             READ_BITS(y, 4); y += 4;
443             while (y--) lens[x++] = 0;
444         }
445         else if (z == 18) {
446             READ_BITS(y, 5); y += 20;
447             while (y--) lens[x++] = 0;
448         }
449         else if (z == 19) {
450             READ_BITS(y, 1); y += 4;
451             READ_HUFFSYM(PRETREE, z);
452             z = lens[x] - z; if (z < 0) z += 17;
453             while (y--) lens[x++] = z;
454         }
455         else {
456             z = lens[x] - z; if (z < 0) z += 17;
457             lens[x++] = z;
458         }
459     }
460 
461     lb->bb = bitbuf;
462     lb->bl = bitsleft;
463     lb->ip = inpos;
464     return 0;
465 }
466 
LZXdecompress(struct LZXstate * pState,unsigned char * inpos,unsigned char * outpos,int inlen,int outlen)467 int LZXdecompress(struct LZXstate *pState, unsigned char *inpos, unsigned char *outpos, int inlen, int outlen) {
468     UBYTE *endinp = inpos + inlen;
469     UBYTE *window = pState->window;
470     UBYTE *runsrc, *rundest;
471     UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */
472 
473     ULONG window_posn = pState->window_posn;
474     ULONG window_size = pState->window_size;
475     ULONG R0 = pState->R0;
476     ULONG R1 = pState->R1;
477     ULONG R2 = pState->R2;
478 
479     register ULONG bitbuf;
480     register int bitsleft;
481     ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */
482     struct lzx_bits lb; /* used in READ_LENGTHS macro */
483 
484     int togo = outlen, this_run, main_element, aligned_bits;
485     int match_length, length_footer, extra, verbatim_bits;
486 
487     INIT_BITSTREAM;
488 
489     /* read header if necessary */
490     if (!pState->header_read) {
491         i = j = 0;
492         READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); }
493         pState->intel_filesize = (i << 16) | j; /* or 0 if not encoded */
494         pState->header_read = 1;
495     }
496 
497     /* main decoding loop */
498     while (togo > 0) {
499         /* last block finished, new block expected */
500         if (pState->block_remaining == 0) {
501             if (pState->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) {
502                 if (pState->block_length & 1) inpos++; /* realign bitstream to word */
503                 INIT_BITSTREAM;
504             }
505 
506             READ_BITS(pState->block_type, 3);
507             READ_BITS(i, 16);
508             READ_BITS(j, 8);
509             pState->block_remaining = pState->block_length = (i << 8) | j;
510 
511             switch (pState->block_type) {
512                 case LZX_BLOCKTYPE_ALIGNED:
513                     for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; }
514                     BUILD_TABLE(ALIGNED);
515                     /* rest of aligned header is same as verbatim */
516 
517                 case LZX_BLOCKTYPE_VERBATIM:
518                     READ_LENGTHS(MAINTREE, 0, 256);
519                     READ_LENGTHS(MAINTREE, 256, pState->main_elements);
520                     BUILD_TABLE(MAINTREE);
521                     if (LENTABLE(MAINTREE)[0xE8] != 0) pState->intel_started = 1;
522 
523                     READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
524                     BUILD_TABLE(LENGTH);
525                     break;
526 
527                 case LZX_BLOCKTYPE_UNCOMPRESSED:
528                     pState->intel_started = 1; /* because we can't assume otherwise */
529                     ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */
530                     if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */
531                     R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
532                     R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
533                     R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
534                     break;
535 
536                 default:
537                     return DECR_ILLEGALDATA;
538             }
539         }
540 
541         /* buffer exhaustion check */
542         if (inpos > endinp) {
543             /* it's possible to have a file where the next run is less than
544              * 16 bits in size. In this case, the READ_HUFFSYM() macro used
545              * in building the tables will exhaust the buffer, so we should
546              * allow for this, but not allow those accidentally read bits to
547              * be used (so we check that there are at least 16 bits
548              * remaining - in this boundary case they aren't really part of
549              * the compressed data)
550              */
551             if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA;
552         }
553 
554         while ((this_run = pState->block_remaining) > 0 && togo > 0) {
555             if (this_run > togo) this_run = togo;
556             togo -= this_run;
557             pState->block_remaining -= this_run;
558 
559             /* apply 2^x-1 mask */
560             window_posn &= window_size - 1;
561             /* runs can't straddle the window wraparound */
562             if ((window_posn + this_run) > window_size)
563                 return DECR_DATAFORMAT;
564 
565             switch (pState->block_type) {
566 
567                 case LZX_BLOCKTYPE_VERBATIM:
568                     while (this_run > 0) {
569                         READ_HUFFSYM(MAINTREE, main_element);
570 
571                         if (main_element < LZX_NUM_CHARS) {
572                             /* literal: 0 to LZX_NUM_CHARS-1 */
573                             window[window_posn++] = main_element;
574                             this_run--;
575                         }
576                         else {
577                             /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
578                             main_element -= LZX_NUM_CHARS;
579 
580                             match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
581                             if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
582                                 READ_HUFFSYM(LENGTH, length_footer);
583                                 match_length += length_footer;
584                             }
585                             match_length += LZX_MIN_MATCH;
586 
587                             match_offset = main_element >> 3;
588 
589                             if (match_offset > 2) {
590                                 /* not repeated offset */
591                                 if (match_offset != 3) {
592                                     extra = extra_bits[match_offset];
593                                     READ_BITS(verbatim_bits, extra);
594                                     match_offset = position_base[match_offset] - 2 + verbatim_bits;
595                                 }
596                                 else {
597                                     match_offset = 1;
598                                 }
599 
600                                 /* update repeated offset LRU queue */
601                                 R2 = R1; R1 = R0; R0 = match_offset;
602                             }
603                             else if (match_offset == 0) {
604                                 match_offset = R0;
605                             }
606                             else if (match_offset == 1) {
607                                 match_offset = R1;
608                                 R1 = R0; R0 = match_offset;
609                             }
610                             else /* match_offset == 2 */ {
611                                 match_offset = R2;
612                                 R2 = R0; R0 = match_offset;
613                             }
614 
615                             rundest = window + window_posn;
616                             runsrc  = rundest - match_offset;
617                             window_posn += match_length;
618                             if (window_posn > window_size) return DECR_ILLEGALDATA;
619                             this_run -= match_length;
620 
621                             /* copy any wrapped around source data */
622                             while ((runsrc < window) && (match_length-- > 0)) {
623                                 *rundest++ = *(runsrc + window_size); runsrc++;
624                             }
625                             /* copy match data - no worries about destination wraps */
626                             while (match_length-- > 0) *rundest++ = *runsrc++;
627 
628                         }
629                     }
630                     break;
631 
632                 case LZX_BLOCKTYPE_ALIGNED:
633                     while (this_run > 0) {
634                         READ_HUFFSYM(MAINTREE, main_element);
635 
636                         if (main_element < LZX_NUM_CHARS) {
637                             /* literal: 0 to LZX_NUM_CHARS-1 */
638                             window[window_posn++] = main_element;
639                             this_run--;
640                         }
641                         else {
642                             /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
643                             main_element -= LZX_NUM_CHARS;
644 
645                             match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
646                             if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
647                                 READ_HUFFSYM(LENGTH, length_footer);
648                                 match_length += length_footer;
649                             }
650                             match_length += LZX_MIN_MATCH;
651 
652                             match_offset = main_element >> 3;
653 
654                             if (match_offset > 2) {
655                                 /* not repeated offset */
656                                 extra = extra_bits[match_offset];
657                                 match_offset = position_base[match_offset] - 2;
658                                 if (extra > 3) {
659                                     /* verbatim and aligned bits */
660                                     extra -= 3;
661                                     READ_BITS(verbatim_bits, extra);
662                                     match_offset += (verbatim_bits << 3);
663                                     READ_HUFFSYM(ALIGNED, aligned_bits);
664                                     match_offset += aligned_bits;
665                                 }
666                                 else if (extra == 3) {
667                                     /* aligned bits only */
668                                     READ_HUFFSYM(ALIGNED, aligned_bits);
669                                     match_offset += aligned_bits;
670                                 }
671                                 else if (extra > 0) { /* extra==1, extra==2 */
672                                     /* verbatim bits only */
673                                     READ_BITS(verbatim_bits, extra);
674                                     match_offset += verbatim_bits;
675                                 }
676                                 else /* extra == 0 */ {
677                                     /* ??? */
678                                     match_offset = 1;
679                                 }
680 
681                                 /* update repeated offset LRU queue */
682                                 R2 = R1; R1 = R0; R0 = match_offset;
683                             }
684                             else if (match_offset == 0) {
685                                 match_offset = R0;
686                             }
687                             else if (match_offset == 1) {
688                                 match_offset = R1;
689                                 R1 = R0; R0 = match_offset;
690                             }
691                             else /* match_offset == 2 */ {
692                                 match_offset = R2;
693                                 R2 = R0; R0 = match_offset;
694                             }
695 
696                             rundest = window + window_posn;
697                             runsrc  = rundest - match_offset;
698                             window_posn += match_length;
699                             if (window_posn > window_size) return DECR_ILLEGALDATA;
700                             this_run -= match_length;
701 
702                             /* copy any wrapped around source data */
703                             while ((runsrc < window) && (match_length-- > 0)) {
704                                 *rundest++ = *(runsrc + window_size); runsrc++;
705                             }
706                             /* copy match data - no worries about destination wraps */
707                             while (match_length-- > 0) *rundest++ = *runsrc++;
708 
709                         }
710                     }
711                     break;
712 
713                 case LZX_BLOCKTYPE_UNCOMPRESSED:
714                     if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA;
715                     memcpy(window + window_posn, inpos, (size_t) this_run);
716                     inpos += this_run; window_posn += this_run;
717                     break;
718 
719                 default:
720                     return DECR_ILLEGALDATA; /* might as well */
721             }
722 
723         }
724     }
725 
726     if (togo != 0) return DECR_ILLEGALDATA;
727     memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen);
728 
729     pState->window_posn = window_posn;
730     pState->R0 = R0;
731     pState->R1 = R1;
732     pState->R2 = R2;
733 
734     /* intel E8 decoding */
735     if ((pState->frames_read++ < 32768) && pState->intel_filesize != 0) {
736         if (outlen <= 6 || !pState->intel_started) {
737             pState->intel_curpos += outlen;
738         }
739         else {
740             UBYTE *data    = outpos;
741             UBYTE *dataend = data + outlen - 10;
742             LONG curpos    = pState->intel_curpos;
743             LONG filesize  = pState->intel_filesize;
744             LONG abs_off, rel_off;
745 
746             pState->intel_curpos = curpos + outlen;
747 
748             while (data < dataend) {
749                 if (*data++ != 0xE8) { curpos++; continue; }
750                 abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
751                 if ((abs_off >= -curpos) && (abs_off < filesize)) {
752                     rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
753                     data[0] = (UBYTE) rel_off;
754                     data[1] = (UBYTE) (rel_off >> 8);
755                     data[2] = (UBYTE) (rel_off >> 16);
756                     data[3] = (UBYTE) (rel_off >> 24);
757                 }
758                 data += 4;
759                 curpos += 5;
760             }
761         }
762     }
763     return DECR_OK;
764 }
765 
766 #ifdef LZX_CHM_TESTDRIVER
main(int c,char ** v)767 int main(int c, char **v)
768 {
769     FILE *fin, *fout;
770     struct LZXstate state;
771     UBYTE ibuf[16384];
772     UBYTE obuf[32768];
773     int ilen, olen;
774     int status;
775     int i;
776     int count=0;
777     int w = atoi(v[1]);
778     LZXinit(&state, w);
779     fout = fopen(v[2], "wb");
780     for (i=3; i<c; i++)
781     {
782         fin = fopen(v[i], "rb");
783         ilen = fread(ibuf, 1, 16384, fin);
784         status = LZXdecompress(&state, ibuf, obuf, ilen, 32768);
785         switch (status)
786         {
787             case DECR_OK:
788                 printf("ok\n");
789                 fwrite(obuf, 1, 32768, fout);
790                 break;
791             case DECR_DATAFORMAT:
792                 printf("bad format\n");
793                 break;
794             case DECR_ILLEGALDATA:
795                 printf("illegal data\n");
796                 break;
797             case DECR_NOMEMORY:
798                 printf("no memory\n");
799                 break;
800             default:
801                 break;
802         }
803         fclose(fin);
804         if (++count == 2)
805         {
806             count = 0;
807             LZXreset(&state);
808         }
809     }
810     fclose(fout);
811 }
812 #endif
813