1 /* This file is part of libmspack.
2  * (C) 2003-2013 Stuart Caie.
3  *
4  * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
5  * by Microsoft Corporation.
6  *
7  * libmspack is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU Lesser General Public License (LGPL) version 2.1
9  *
10  * For further details, see the file COPYING.LIB distributed with libmspack
11  */
12 
13 /* LZX decompression implementation */
14 
15 #include <system.h>
16 #include <lzx.h>
17 
18 /* Microsoft's LZX document (in cab-sdk.exe) and their implementation
19  * of the com.ms.util.cab Java package do not concur.
20  *
21  * In the LZX document, there is a table showing the correlation between
22  * window size and the number of position slots. It states that the 1MB
23  * window = 40 slots and the 2MB window = 42 slots. In the implementation,
24  * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
25  * first slot whose position base is equal to or more than the required
26  * window size'. This would explain why other tables in the document refer
27  * to 50 slots rather than 42.
28  *
29  * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
30  * is not defined in the specification.
31  *
32  * The LZX document does not state the uncompressed block has an
33  * uncompressed length field. Where does this length field come from, so
34  * we can know how large the block is? The implementation has it as the 24
35  * bits following after the 3 blocktype bits, before the alignment
36  * padding.
37  *
38  * The LZX document states that aligned offset blocks have their aligned
39  * offset huffman tree AFTER the main and length trees. The implementation
40  * suggests that the aligned offset tree is BEFORE the main and length
41  * trees.
42  *
43  * The LZX document decoding algorithm states that, in an aligned offset
44  * block, if an extra_bits value is 1, 2 or 3, then that number of bits
45  * should be read and the result added to the match offset. This is
46  * correct for 1 and 2, but not 3, where just a huffman symbol (using the
47  * aligned tree) should be read.
48  *
49  * Regarding the E8 preprocessing, the LZX document states 'No translation
50  * may be performed on the last 6 bytes of the input block'. This is
51  * correct.  However, the pseudocode provided checks for the *E8 leader*
52  * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
53  * from the end, this would cause the next four bytes to be modified, at
54  * least one of which would be in the last 6 bytes, which is not allowed
55  * according to the spec.
56  *
57  * The specification states that the huffman trees must always contain at
58  * least one element. However, many CAB files contain blocks where the
59  * length tree is completely empty (because there are no matches), and
60  * this is expected to succeed.
61  *
62  * The errors in LZX documentation appear have been corrected in the
63  * new documentation for the LZX DELTA format.
64  *
65  *     http://msdn.microsoft.com/en-us/library/cc483133.aspx
66  *
67  * However, this is a different format, an extension of regular LZX.
68  * I have noticed the following differences, there may be more:
69  *
70  * The maximum window size has increased from 2MB to 32MB. This also
71  * increases the maximum number of position slots, etc.
72  *
73  * If the match length is 257 (the maximum possible), this signals
74  * a further length decoding step, that allows for matches up to
75  * 33024 bytes long.
76  *
77  * The format now allows for "reference data", supplied by the caller.
78  * If match offsets go further back than the number of bytes
79  * decompressed so far, that is them accessing the reference data.
80  */
81 
82 /* import bit-reading macros and code */
83 #define BITS_TYPE struct lzxd_stream
84 #define BITS_VAR lzx
85 #define BITS_ORDER_MSB
86 #define READ_BYTES do {                 \
87     unsigned char b0, b1;               \
88     READ_IF_NEEDED; b0 = *i_ptr++;      \
89     READ_IF_NEEDED; b1 = *i_ptr++;      \
90     INJECT_BITS((b1 << 8) | b0, 16);    \
91 } while (0)
92 #include <readbits.h>
93 
94 /* import huffman-reading macros and code */
95 #define TABLEBITS(tbl)      LZX_##tbl##_TABLEBITS
96 #define MAXSYMBOLS(tbl)     LZX_##tbl##_MAXSYMBOLS
97 #define HUFF_TABLE(tbl,idx) lzx->tbl##_table[idx]
98 #define HUFF_LEN(tbl,idx)   lzx->tbl##_len[idx]
99 #define HUFF_ERROR          return lzx->error = MSPACK_ERR_DECRUNCH
100 #include <readhuff.h>
101 
102 /* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
103 #define BUILD_TABLE(tbl)                                                \
104     if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl),              \
105                           &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0)))        \
106     {                                                                   \
107         D(("failed to build %s table", #tbl))                           \
108         return lzx->error = MSPACK_ERR_DECRUNCH;                        \
109     }
110 
111 #define BUILD_TABLE_MAYBE_EMPTY(tbl) do {                               \
112     lzx->tbl##_empty = 0;                                               \
113     if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl),              \
114                           &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0)))        \
115     {                                                                   \
116         for (i = 0; i < MAXSYMBOLS(tbl); i++) {                         \
117             if (HUFF_LEN(tbl, i) > 0) {                                 \
118                 D(("failed to build %s table", #tbl))                   \
119                 return lzx->error = MSPACK_ERR_DECRUNCH;                \
120             }                                                           \
121         }                                                               \
122         /* empty tree - allow it, but don't decode symbols with it */   \
123         lzx->tbl##_empty = 1;                                           \
124     }                                                                   \
125 } while (0)
126 
127 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
128  * first to last in the given table. The code lengths are stored in their
129  * own special LZX way.
130  */
131 #define READ_LENGTHS(tbl, first, last) do {             \
132   STORE_BITS;                                           \
133   if (lzxd_read_lens(lzx, &HUFF_LEN(tbl, 0), (first),   \
134     (unsigned int)(last))) return lzx->error;           \
135   RESTORE_BITS;                                         \
136 } while (0)
137 
lzxd_read_lens(struct lzxd_stream * lzx,unsigned char * lens,unsigned int first,unsigned int last)138 static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
139                           unsigned int first, unsigned int last)
140 {
141   /* bit buffer and huffman symbol decode variables */
142   register unsigned int bit_buffer;
143   register int bits_left, i;
144   register unsigned short sym;
145   unsigned char *i_ptr, *i_end;
146 
147   unsigned int x, y;
148   int z;
149 
150   RESTORE_BITS;
151 
152   /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
153   for (x = 0; x < 20; x++) {
154     READ_BITS(y, 4);
155     lzx->PRETREE_len[x] = y;
156   }
157   BUILD_TABLE(PRETREE);
158 
159   for (x = first; x < last; ) {
160     READ_HUFFSYM(PRETREE, z);
161     if (z == 17) {
162       /* code = 17, run of ([read 4 bits]+4) zeros */
163       READ_BITS(y, 4); y += 4;
164       while (y--) lens[x++] = 0;
165     }
166     else if (z == 18) {
167       /* code = 18, run of ([read 5 bits]+20) zeros */
168       READ_BITS(y, 5); y += 20;
169       while (y--) lens[x++] = 0;
170     }
171     else if (z == 19) {
172       /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
173       READ_BITS(y, 1); y += 4;
174       READ_HUFFSYM(PRETREE, z);
175       z = lens[x] - z; if (z < 0) z += 17;
176       while (y--) lens[x++] = z;
177     }
178     else {
179       /* code = 0 to 16, delta current length entry */
180       z = lens[x] - z; if (z < 0) z += 17;
181       lens[x++] = z;
182     }
183   }
184 
185   STORE_BITS;
186 
187   return MSPACK_ERR_OK;
188 }
189 
190 /* LZX static data tables:
191  *
192  * LZX uses 'position slots' to represent match offsets.  For every match,
193  * a small 'position slot' number and a small offset from that slot are
194  * encoded instead of one large offset.
195  *
196  * The number of slots is decided by how many are needed to encode the
197  * largest offset for a given window size. This is easy when the gap between
198  * slots is less than 128Kb, it's a linear relationship. But when extra_bits
199  * reaches its limit of 17 (because LZX can only ensure reading 17 bits of
200  * data at a time), we can only jump 128Kb at a time and have to start
201  * using more and more position slots as each window size doubles.
202  *
203  * position_base[] is an index to the position slot bases
204  *
205  * extra_bits[] states how many bits of offset-from-base data is needed.
206  *
207  * They are calculated as follows:
208  * extra_bits[i] = 0 where i < 4
209  * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36
210  * extra_bits[i] = 17 where i >= 36
211  * position_base[0] = 0
212  * position_base[i] = position_base[i-1] + (1 << extra_bits[i-1])
213  */
214 static const unsigned int position_slots[11] = {
215     30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
216 };
217 static const unsigned char extra_bits[36] = {
218     0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
219     9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
220 };
221 static const unsigned int position_base[290] = {
222     0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
223     768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768,
224     49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360,
225     786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
226     1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512,
227     2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088,
228     3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664,
229     4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240,
230     6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816,
231     7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392,
232     8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968,
233     9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544,
234     10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048,
235     11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552,
236     12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056,
237     12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560,
238     13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064,
239     14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568,
240     15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072,
241     16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576,
242     17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080,
243     18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584,
244     19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088,
245     20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592,
246     21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096,
247     22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600,
248     23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104,
249     23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608,
250     24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112,
251     25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616,
252     26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120,
253     27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624,
254     28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128,
255     29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632,
256     30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136,
257     31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640,
258     32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144,
259     33161216, 33292288, 33423360
260 };
261 
lzxd_reset_state(struct lzxd_stream * lzx)262 static void lzxd_reset_state(struct lzxd_stream *lzx) {
263   int i;
264 
265   lzx->R0              = 1;
266   lzx->R1              = 1;
267   lzx->R2              = 1;
268   lzx->header_read     = 0;
269   lzx->block_remaining = 0;
270   lzx->block_type      = LZX_BLOCKTYPE_INVALID;
271 
272   /* initialise tables to 0 (because deltas will be applied to them) */
273   for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
274   for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   lzx->LENGTH_len[i]   = 0;
275 }
276 
277 /*-------- main LZX code --------*/
278 
lzxd_init(struct mspack_system * system,struct mspack_file * input,struct mspack_file * output,int window_bits,int reset_interval,int input_buffer_size,off_t output_length,char is_delta)279 struct lzxd_stream *lzxd_init(struct mspack_system *system,
280                               struct mspack_file *input,
281                               struct mspack_file *output,
282                               int window_bits,
283                               int reset_interval,
284                               int input_buffer_size,
285                               off_t output_length,
286                               char is_delta)
287 {
288   unsigned int window_size = 1 << window_bits;
289   struct lzxd_stream *lzx;
290 
291   if (!system) return NULL;
292 
293   /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB),
294    * regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB)
295    */
296   if (is_delta) {
297       if (window_bits < 17 || window_bits > 25) return NULL;
298   }
299   else {
300       if (window_bits < 15 || window_bits > 21) return NULL;
301   }
302 
303   if (reset_interval < 0 || output_length < 0) {
304       D(("reset interval or output length < 0"))
305       return NULL;
306   }
307 
308   /* round up input buffer size to multiple of two */
309   input_buffer_size = (input_buffer_size + 1) & -2;
310   if (input_buffer_size < 2) return NULL;
311 
312   /* allocate decompression state */
313   if (!(lzx = (struct lzxd_stream *) system->alloc(system, sizeof(struct lzxd_stream)))) {
314     return NULL;
315   }
316 
317   /* allocate decompression window and input buffer */
318   lzx->window = (unsigned char *) system->alloc(system, (size_t) window_size);
319   lzx->inbuf  = (unsigned char *) system->alloc(system, (size_t) input_buffer_size);
320   if (!lzx->window || !lzx->inbuf) {
321     system->free(lzx->window);
322     system->free(lzx->inbuf);
323     system->free(lzx);
324     return NULL;
325   }
326 
327   /* initialise decompression state */
328   lzx->sys             = system;
329   lzx->input           = input;
330   lzx->output          = output;
331   lzx->offset          = 0;
332   lzx->length          = output_length;
333 
334   lzx->inbuf_size      = input_buffer_size;
335   lzx->window_size     = 1 << window_bits;
336   lzx->ref_data_size   = 0;
337   lzx->window_posn     = 0;
338   lzx->frame_posn      = 0;
339   lzx->frame           = 0;
340   lzx->reset_interval  = reset_interval;
341   lzx->intel_filesize  = 0;
342   lzx->intel_curpos    = 0;
343   lzx->intel_started   = 0;
344   lzx->error           = MSPACK_ERR_OK;
345   lzx->num_offsets     = position_slots[window_bits - 15] << 3;
346   lzx->is_delta        = is_delta;
347 
348   lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
349   lzxd_reset_state(lzx);
350   INIT_BITS;
351   return lzx;
352 }
353 
lzxd_set_reference_data(struct lzxd_stream * lzx,struct mspack_system * system,struct mspack_file * input,unsigned int length)354 int lzxd_set_reference_data(struct lzxd_stream *lzx,
355                             struct mspack_system *system,
356                             struct mspack_file *input,
357                             unsigned int length)
358 {
359     if (!lzx) return MSPACK_ERR_ARGS;
360 
361     if (!lzx->is_delta) {
362         D(("only LZX DELTA streams support reference data"))
363         return MSPACK_ERR_ARGS;
364     }
365     if (lzx->offset) {
366         D(("too late to set reference data after decoding starts"))
367         return MSPACK_ERR_ARGS;
368     }
369     if (length > lzx->window_size) {
370         D(("reference length (%u) is longer than the window", length))
371         return MSPACK_ERR_ARGS;
372     }
373     if (length > 0 && (!system || !input)) {
374         D(("length > 0 but no system or input"))
375         return MSPACK_ERR_ARGS;
376     }
377 
378     lzx->ref_data_size = length;
379     if (length > 0) {
380         /* copy reference data */
381         unsigned char *pos = &lzx->window[lzx->window_size - length];
382         int bytes = system->read(input, pos, length);
383         /* length can't be more than 2^25, so no signedness problem */
384         if (bytes < (int)length) return MSPACK_ERR_READ;
385     }
386     lzx->ref_data_size = length;
387     return MSPACK_ERR_OK;
388 }
389 
lzxd_set_output_length(struct lzxd_stream * lzx,off_t out_bytes)390 void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
391   if (lzx && out_bytes > 0) lzx->length = out_bytes;
392 }
393 
lzxd_decompress(struct lzxd_stream * lzx,off_t out_bytes)394 int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
395   /* bitstream and huffman reading variables */
396   register unsigned int bit_buffer;
397   register int bits_left, i=0;
398   unsigned char *i_ptr, *i_end;
399   register unsigned short sym;
400 
401   int match_length, length_footer, extra, verbatim_bits, bytes_todo;
402   int this_run, main_element, aligned_bits, j, warned = 0;
403   unsigned char *window, *runsrc, *rundest, buf[12];
404   unsigned int frame_size=0, end_frame, match_offset, window_posn;
405   unsigned int R0, R1, R2;
406 
407   /* easy answers */
408   if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS;
409   if (lzx->error) return lzx->error;
410 
411   /* flush out any stored-up bytes before we begin */
412   i = lzx->o_end - lzx->o_ptr;
413   if ((off_t) i > out_bytes) i = (int) out_bytes;
414   if (i) {
415     if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
416       return lzx->error = MSPACK_ERR_WRITE;
417     }
418     lzx->o_ptr  += i;
419     lzx->offset += i;
420     out_bytes   -= i;
421   }
422   if (out_bytes == 0) return MSPACK_ERR_OK;
423 
424   /* restore local state */
425   RESTORE_BITS;
426   window = lzx->window;
427   window_posn = lzx->window_posn;
428   R0 = lzx->R0;
429   R1 = lzx->R1;
430   R2 = lzx->R2;
431 
432   end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
433 
434   while (lzx->frame < end_frame) {
435     /* have we reached the reset interval? (if there is one?) */
436     if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
437       if (lzx->block_remaining) {
438         /* this is a file format error, we can make a best effort to extract what we can */
439         D(("%d bytes remaining at reset interval", lzx->block_remaining))
440         if (!warned) {
441           lzx->sys->message(NULL, "WARNING; invalid reset interval detected during LZX decompression");
442           warned++;
443         }
444       }
445 
446       /* re-read the intel header and reset the huffman lengths */
447       lzxd_reset_state(lzx);
448       R0 = lzx->R0;
449       R1 = lzx->R1;
450       R2 = lzx->R2;
451     }
452 
453     /* LZX DELTA format has chunk_size, not present in LZX format */
454     if (lzx->is_delta) {
455       ENSURE_BITS(16);
456       REMOVE_BITS(16);
457     }
458 
459     /* read header if necessary */
460     if (!lzx->header_read) {
461       /* read 1 bit. if bit=0, intel filesize = 0.
462        * if bit=1, read intel filesize (32 bits) */
463       j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); }
464       lzx->intel_filesize = (i << 16) | j;
465       lzx->header_read = 1;
466     }
467 
468     /* calculate size of frame: all frames are 32k except the final frame
469      * which is 32kb or less. this can only be calculated when lzx->length
470      * has been filled in. */
471     frame_size = LZX_FRAME_SIZE;
472     if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
473       frame_size = lzx->length - lzx->offset;
474     }
475 
476     /* decode until one more frame is available */
477     bytes_todo = lzx->frame_posn + frame_size - window_posn;
478     while (bytes_todo > 0) {
479       /* initialise new block, if one is needed */
480       if (lzx->block_remaining == 0) {
481         /* realign if previous block was an odd-sized UNCOMPRESSED block */
482         if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
483             (lzx->block_length & 1))
484         {
485           READ_IF_NEEDED;
486           i_ptr++;
487         }
488 
489         /* read block type (3 bits) and block length (24 bits) */
490         READ_BITS(lzx->block_type, 3);
491         READ_BITS(i, 16); READ_BITS(j, 8);
492         lzx->block_remaining = lzx->block_length = (i << 8) | j;
493         /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/
494 
495         /* read individual block headers */
496         switch (lzx->block_type) {
497         case LZX_BLOCKTYPE_ALIGNED:
498           /* read lengths of and build aligned huffman decoding tree */
499           for (i = 0; i < 8; i++) { READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
500           BUILD_TABLE(ALIGNED);
501           /* rest of aligned header is same as verbatim */ /*@fallthrough@*/
502         case LZX_BLOCKTYPE_VERBATIM:
503           /* read lengths of and build main huffman decoding tree */
504           READ_LENGTHS(MAINTREE, 0, 256);
505           READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets);
506           BUILD_TABLE(MAINTREE);
507           /* if the literal 0xE8 is anywhere in the block... */
508           if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
509           /* read lengths of and build lengths huffman decoding tree */
510           READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
511           BUILD_TABLE_MAYBE_EMPTY(LENGTH);
512           break;
513 
514         case LZX_BLOCKTYPE_UNCOMPRESSED:
515           /* because we can't assume otherwise */
516           lzx->intel_started = 1;
517 
518           /* read 1-16 (not 0-15) bits to align to bytes */
519           if (bits_left == 0) ENSURE_BITS(16);
520           bits_left = 0; bit_buffer = 0;
521 
522           /* read 12 bytes of stored R0 / R1 / R2 values */
523           for (rundest = &buf[0], i = 0; i < 12; i++) {
524             READ_IF_NEEDED;
525             *rundest++ = *i_ptr++;
526           }
527           R0 = buf[0] | (buf[1] << 8) | (buf[2]  << 16) | (buf[3]  << 24);
528           R1 = buf[4] | (buf[5] << 8) | (buf[6]  << 16) | (buf[7]  << 24);
529           R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
530           break;
531 
532         default:
533           D(("bad block type"))
534           return lzx->error = MSPACK_ERR_DECRUNCH;
535         }
536       }
537 
538       /* decode more of the block:
539        * run = min(what's available, what's needed) */
540       this_run = lzx->block_remaining;
541       if (this_run > bytes_todo) this_run = bytes_todo;
542 
543       /* assume we decode exactly this_run bytes, for now */
544       bytes_todo           -= this_run;
545       lzx->block_remaining -= this_run;
546 
547       /* decode at least this_run bytes */
548       switch (lzx->block_type) {
549       case LZX_BLOCKTYPE_VERBATIM:
550         while (this_run > 0) {
551           READ_HUFFSYM(MAINTREE, main_element);
552           if (main_element < LZX_NUM_CHARS) {
553             /* literal: 0 to LZX_NUM_CHARS-1 */
554             window[window_posn++] = main_element;
555             this_run--;
556           }
557           else {
558             /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
559             main_element -= LZX_NUM_CHARS;
560 
561             /* get match length */
562             match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
563             if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
564               if (lzx->LENGTH_empty) {
565                 D(("LENGTH symbol needed but tree is empty"))
566                 return lzx->error = MSPACK_ERR_DECRUNCH;
567               }
568               READ_HUFFSYM(LENGTH, length_footer);
569               match_length += length_footer;
570             }
571             match_length += LZX_MIN_MATCH;
572 
573             /* get match offset */
574             switch ((match_offset = (main_element >> 3))) {
575             case 0: match_offset = R0;                                  break;
576             case 1: match_offset = R1; R1=R0;        R0 = match_offset; break;
577             case 2: match_offset = R2; R2=R0;        R0 = match_offset; break;
578             case 3: match_offset = 1;  R2=R1; R1=R0; R0 = match_offset; break;
579             default:
580               extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
581               READ_BITS(verbatim_bits, extra);
582               match_offset = position_base[match_offset] - 2 + verbatim_bits;
583               R2 = R1; R1 = R0; R0 = match_offset;
584             }
585 
586             /* LZX DELTA uses max match length to signal even longer match */
587             if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
588                 int extra_len = 0;
589                 ENSURE_BITS(3); /* 4 entry huffman tree */
590                 if (PEEK_BITS(1) == 0) {
591                     REMOVE_BITS(1); /* '0' -> 8 extra length bits */
592                     READ_BITS(extra_len, 8);
593                 }
594                 else if (PEEK_BITS(2) == 2) {
595                     REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
596                     READ_BITS(extra_len, 10);
597                     extra_len += 0x100;
598                 }
599                 else if (PEEK_BITS(3) == 6) {
600                     REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
601                     READ_BITS(extra_len, 12);
602                     extra_len += 0x500;
603                 }
604                 else {
605                     REMOVE_BITS(3); /* '111' -> 15 extra length bits */
606                     READ_BITS(extra_len, 15);
607                 }
608                 match_length += extra_len;
609             }
610 
611             if ((window_posn + match_length) > lzx->window_size) {
612               D(("match ran over window wrap"))
613               return lzx->error = MSPACK_ERR_DECRUNCH;
614             }
615 
616             /* copy match */
617             rundest = &window[window_posn];
618             i = match_length;
619             /* does match offset wrap the window? */
620             if (match_offset > window_posn) {
621               if (match_offset > lzx->offset &&
622                   (match_offset - window_posn) > lzx->ref_data_size)
623               {
624                 D(("match offset beyond LZX stream"))
625                 return lzx->error = MSPACK_ERR_DECRUNCH;
626               }
627               /* j = length from match offset to end of window */
628               j = match_offset - window_posn;
629               if (j > (int) lzx->window_size) {
630                 D(("match offset beyond window boundaries"))
631                 return lzx->error = MSPACK_ERR_DECRUNCH;
632               }
633               runsrc = &window[lzx->window_size - j];
634               if (j < i) {
635                 /* if match goes over the window edge, do two copy runs */
636                 i -= j; while (j-- > 0) *rundest++ = *runsrc++;
637                 runsrc = window;
638               }
639               while (i-- > 0) *rundest++ = *runsrc++;
640             }
641             else {
642               runsrc = rundest - match_offset;
643               while (i-- > 0) *rundest++ = *runsrc++;
644             }
645 
646             this_run    -= match_length;
647             window_posn += match_length;
648           }
649         } /* while (this_run > 0) */
650         break;
651 
652       case LZX_BLOCKTYPE_ALIGNED:
653         while (this_run > 0) {
654           READ_HUFFSYM(MAINTREE, main_element);
655           if (main_element < LZX_NUM_CHARS) {
656             /* literal: 0 to LZX_NUM_CHARS-1 */
657             window[window_posn++] = main_element;
658             this_run--;
659           }
660           else {
661             /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
662             main_element -= LZX_NUM_CHARS;
663 
664             /* get match length */
665             match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
666             if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
667               if (lzx->LENGTH_empty) {
668                 D(("LENGTH symbol needed but tree is empty"))
669                 return lzx->error = MSPACK_ERR_DECRUNCH;
670               }
671               READ_HUFFSYM(LENGTH, length_footer);
672               match_length += length_footer;
673             }
674             match_length += LZX_MIN_MATCH;
675 
676             /* get match offset */
677             switch ((match_offset = (main_element >> 3))) {
678             case 0: match_offset = R0;                             break;
679             case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
680             case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
681             default:
682               extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
683               match_offset = position_base[match_offset] - 2;
684               if (extra > 3) {
685                 /* verbatim and aligned bits */
686                 extra -= 3;
687                 READ_BITS(verbatim_bits, extra);
688                 match_offset += (verbatim_bits << 3);
689                 READ_HUFFSYM(ALIGNED, aligned_bits);
690                 match_offset += aligned_bits;
691               }
692               else if (extra == 3) {
693                 /* aligned bits only */
694                 READ_HUFFSYM(ALIGNED, aligned_bits);
695                 match_offset += aligned_bits;
696               }
697               else if (extra > 0) { /* extra==1, extra==2 */
698                 /* verbatim bits only */
699                 READ_BITS(verbatim_bits, extra);
700                 match_offset += verbatim_bits;
701               }
702               else /* extra == 0 */ {
703                 /* ??? not defined in LZX specification! */
704                 match_offset = 1;
705               }
706               /* update repeated offset LRU queue */
707               R2 = R1; R1 = R0; R0 = match_offset;
708             }
709 
710             /* LZX DELTA uses max match length to signal even longer match */
711             if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
712                 int extra_len = 0;
713                 ENSURE_BITS(3); /* 4 entry huffman tree */
714                 if (PEEK_BITS(1) == 0) {
715                     REMOVE_BITS(1); /* '0' -> 8 extra length bits */
716                     READ_BITS(extra_len, 8);
717                 }
718                 else if (PEEK_BITS(2) == 2) {
719                     REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
720                     READ_BITS(extra_len, 10);
721                     extra_len += 0x100;
722                 }
723                 else if (PEEK_BITS(3) == 6) {
724                     REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
725                     READ_BITS(extra_len, 12);
726                     extra_len += 0x500;
727                 }
728                 else {
729                     REMOVE_BITS(3); /* '111' -> 15 extra length bits */
730                     READ_BITS(extra_len, 15);
731                 }
732                 match_length += extra_len;
733             }
734 
735             if ((window_posn + match_length) > lzx->window_size) {
736               D(("match ran over window wrap"))
737               return lzx->error = MSPACK_ERR_DECRUNCH;
738             }
739 
740             /* copy match */
741             rundest = &window[window_posn];
742             i = match_length;
743             /* does match offset wrap the window? */
744             if (match_offset > window_posn) {
745               if (match_offset > lzx->offset &&
746                   (match_offset - window_posn) > lzx->ref_data_size)
747               {
748                 D(("match offset beyond LZX stream"))
749                 return lzx->error = MSPACK_ERR_DECRUNCH;
750               }
751               /* j = length from match offset to end of window */
752               j = match_offset - window_posn;
753               if (j > (int) lzx->window_size) {
754                 D(("match offset beyond window boundaries"))
755                 return lzx->error = MSPACK_ERR_DECRUNCH;
756               }
757               runsrc = &window[lzx->window_size - j];
758               if (j < i) {
759                 /* if match goes over the window edge, do two copy runs */
760                 i -= j; while (j-- > 0) *rundest++ = *runsrc++;
761                 runsrc = window;
762               }
763               while (i-- > 0) *rundest++ = *runsrc++;
764             }
765             else {
766               runsrc = rundest - match_offset;
767               while (i-- > 0) *rundest++ = *runsrc++;
768             }
769 
770             this_run    -= match_length;
771             window_posn += match_length;
772           }
773         } /* while (this_run > 0) */
774         break;
775 
776       case LZX_BLOCKTYPE_UNCOMPRESSED:
777         /* as this_run is limited not to wrap a frame, this also means it
778          * won't wrap the window (as the window is a multiple of 32k) */
779         rundest = &window[window_posn];
780         window_posn += this_run;
781         while (this_run > 0) {
782           if ((i = i_end - i_ptr) == 0) {
783             READ_IF_NEEDED;
784           }
785           else {
786             if (i > this_run) i = this_run;
787             lzx->sys->copy(i_ptr, rundest, (size_t) i);
788             rundest  += i;
789             i_ptr    += i;
790             this_run -= i;
791           }
792         }
793         break;
794 
795       default:
796         return lzx->error = MSPACK_ERR_DECRUNCH; /* might as well */
797       }
798 
799       /* did the final match overrun our desired this_run length? */
800       if (this_run < 0) {
801         if ((unsigned int)(-this_run) > lzx->block_remaining) {
802           D(("overrun went past end of block by %d (%d remaining)",
803              -this_run, lzx->block_remaining ))
804           return lzx->error = MSPACK_ERR_DECRUNCH;
805         }
806         lzx->block_remaining -= -this_run;
807       }
808     } /* while (bytes_todo > 0) */
809 
810     /* streams don't extend over frame boundaries */
811     if ((window_posn - lzx->frame_posn) != frame_size) {
812       D(("decode beyond output frame limits! %d != %d",
813          window_posn - lzx->frame_posn, frame_size))
814       return lzx->error = MSPACK_ERR_DECRUNCH;
815     }
816 
817     /* re-align input bitstream */
818     if (bits_left > 0) ENSURE_BITS(16);
819     if (bits_left & 15) REMOVE_BITS(bits_left & 15);
820 
821     /* check that we've used all of the previous frame first */
822     if (lzx->o_ptr != lzx->o_end) {
823       D(("%ld avail bytes, new %d frame",
824           (long)(lzx->o_end - lzx->o_ptr), frame_size))
825       return lzx->error = MSPACK_ERR_DECRUNCH;
826     }
827 
828     /* does this intel block _really_ need decoding? */
829     if (lzx->intel_started && lzx->intel_filesize &&
830         (lzx->frame <= 32768) && (frame_size > 10))
831     {
832       unsigned char *data    = &lzx->e8_buf[0];
833       unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
834       signed int curpos      = lzx->intel_curpos;
835       signed int filesize    = lzx->intel_filesize;
836       signed int abs_off, rel_off;
837 
838       /* copy e8 block to the e8 buffer and tweak if needed */
839       lzx->o_ptr = data;
840       lzx->sys->copy(&lzx->window[lzx->frame_posn], data, frame_size);
841 
842       while (data < dataend) {
843         if (*data++ != 0xE8) { curpos++; continue; }
844         abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
845         if ((abs_off >= -curpos) && (abs_off < filesize)) {
846           rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
847           data[0] = (unsigned char) rel_off;
848           data[1] = (unsigned char) (rel_off >> 8);
849           data[2] = (unsigned char) (rel_off >> 16);
850           data[3] = (unsigned char) (rel_off >> 24);
851         }
852         data += 4;
853         curpos += 5;
854       }
855       lzx->intel_curpos += frame_size;
856     }
857     else {
858       lzx->o_ptr = &lzx->window[lzx->frame_posn];
859       if (lzx->intel_filesize) lzx->intel_curpos += frame_size;
860     }
861     lzx->o_end = &lzx->o_ptr[frame_size];
862 
863     /* write a frame */
864     i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
865     if (lzx->sys->write(lzx->output, lzx->o_ptr, i) != i) {
866       return lzx->error = MSPACK_ERR_WRITE;
867     }
868     lzx->o_ptr  += i;
869     lzx->offset += i;
870     out_bytes   -= i;
871 
872     /* advance frame start position */
873     lzx->frame_posn += frame_size;
874     lzx->frame++;
875 
876     /* wrap window / frame position pointers */
877     if (window_posn == lzx->window_size)     window_posn = 0;
878     if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
879 
880   } /* while (lzx->frame < end_frame) */
881 
882   if (out_bytes) {
883     D(("bytes left to output"))
884     return lzx->error = MSPACK_ERR_DECRUNCH;
885   }
886 
887   /* store local state */
888   STORE_BITS;
889   lzx->window_posn = window_posn;
890   lzx->R0 = R0;
891   lzx->R1 = R1;
892   lzx->R2 = R2;
893 
894   return MSPACK_ERR_OK;
895 }
896 
lzxd_free(struct lzxd_stream * lzx)897 void lzxd_free(struct lzxd_stream *lzx) {
898   struct mspack_system *sys;
899   if (lzx) {
900     sys = lzx->sys;
901     sys->free(lzx->inbuf);
902     sys->free(lzx->window);
903     sys->free(lzx);
904   }
905 }
906