1 /* This file is part of libmspack.
2  * (C) 2003-2004 Stuart Caie.
3  *
4  * The Quantum method was created by David Stafford, adapted by Microsoft
5  * Corporation.
6  *
7  * This decompressor is based on an implementation by Matthew Russotto, used
8  * with permission.
9  *
10  * libmspack is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License (LGPL) version 2.1
12  *
13  * For further details, see the file COPYING.LIB distributed with libmspack
14  */
15 
16 /* Quantum decompression implementation */
17 
18 /* This decompressor was researched and implemented by Matthew Russotto. It
19  * has since been tidied up by Stuart Caie. More information can be found at
20  * http://www.speakeasy.org/~russotto/quantumcomp.html
21  */
22 
23 #include <system.h>
24 #include <qtm.h>
25 
26 /* import bit-reading macros and code */
27 #define BITS_TYPE struct qtmd_stream
28 #define BITS_VAR qtm
29 #define BITS_ORDER_MSB
30 #define READ_BYTES do {                 \
31     unsigned char b0, b1;               \
32     READ_IF_NEEDED; b0 = *i_ptr++;      \
33     READ_IF_NEEDED; b1 = *i_ptr++;      \
34     INJECT_BITS((b0 << 8) | b1, 16);    \
35 } while (0)
36 #include <readbits.h>
37 
38 /* Quantum static data tables:
39  *
40  * Quantum uses 'position slots' to represent match offsets.  For every
41  * match, a small 'position slot' number and a small offset from that slot
42  * are encoded instead of one large offset.
43  *
44  * position_base[] is an index to the position slot bases
45  *
46  * extra_bits[] states how many bits of offset-from-base data is needed.
47  *
48  * length_base[] and length_extra[] are equivalent in function, but are
49  * used for encoding selector 6 (variable length match) match lengths,
50  * instead of match offsets.
51  *
52  * They are generated with the following code:
53  *   unsigned int i, offset;
54  *   for (i = 0, offset = 0; i < 42; i++) {
55  *     position_base[i] = offset;
56  *     extra_bits[i] = ((i < 2) ? 0 : (i - 2)) >> 1;
57  *     offset += 1 << extra_bits[i];
58  *   }
59  *   for (i = 0, offset = 0; i < 26; i++) {
60  *     length_base[i] = offset;
61  *     length_extra[i] = (i < 2 ? 0 : i - 2) >> 2;
62  *     offset += 1 << length_extra[i];
63  *   }
64  *   length_base[26] = 254; length_extra[26] = 0;
65  */
66 static const unsigned int position_base[42] = {
67   0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768,
68   1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152,
69   65536, 98304, 131072, 196608, 262144, 393216, 524288, 786432, 1048576, 1572864
70 };
71 static const unsigned char extra_bits[42] = {
72   0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10,
73   11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19
74 };
75 static const unsigned char length_base[27] = {
76   0, 1, 2, 3, 4, 5, 6, 8, 10, 12, 14, 18, 22, 26,
77   30, 38, 46, 54, 62, 78, 94, 110, 126, 158, 190, 222, 254
78 };
79 static const unsigned char length_extra[27] = {
80   0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
81   3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
82 };
83 
84 
85 /* Arithmetic decoder:
86  *
87  * GET_SYMBOL(model, var) fetches the next symbol from the stated model
88  * and puts it in var.
89  *
90  * If necessary, qtmd_update_model() is called.
91  */
92 #define GET_SYMBOL(model, var) do {                                     \
93   range = ((H - L) & 0xFFFF) + 1;                                       \
94   symf = ((((C - L + 1) * model.syms[0].cumfreq)-1) / range) & 0xFFFF;  \
95                                                                         \
96   for (i = 1; i < model.entries; i++) {                                 \
97     if (model.syms[i].cumfreq <= symf) break;                           \
98   }                                                                     \
99   (var) = model.syms[i-1].sym;                                          \
100                                                                         \
101   range = (H - L) + 1;                                                  \
102   symf = model.syms[0].cumfreq;                                         \
103   H = L + ((model.syms[i-1].cumfreq * range) / symf) - 1;               \
104   L = L + ((model.syms[i].cumfreq   * range) / symf);                   \
105                                                                         \
106   do { model.syms[--i].cumfreq += 8; } while (i > 0);                   \
107   if (model.syms[0].cumfreq > 3800) qtmd_update_model(&model);          \
108                                                                         \
109   while (1) {                                                           \
110     if ((L & 0x8000) != (H & 0x8000)) {                                 \
111       if ((L & 0x4000) && !(H & 0x4000)) {                              \
112         /* underflow case */                                            \
113         C ^= 0x4000; L &= 0x3FFF; H |= 0x4000;                          \
114       }                                                                 \
115       else break;                                                       \
116     }                                                                   \
117     L <<= 1; H = (H << 1) | 1;                                          \
118     ENSURE_BITS(1);                                                     \
119     C  = (C << 1) | PEEK_BITS(1);                                       \
120     REMOVE_BITS(1);                                                     \
121   }                                                                     \
122 } while (0)
123 
qtmd_update_model(struct qtmd_model * model)124 static void qtmd_update_model(struct qtmd_model *model) {
125   struct qtmd_modelsym tmp;
126   int i, j;
127 
128   if (--model->shiftsleft) {
129     for (i = model->entries - 1; i >= 0; i--) {
130       /* -1, not -2; the 0 entry saves this */
131       model->syms[i].cumfreq >>= 1;
132       if (model->syms[i].cumfreq <= model->syms[i+1].cumfreq) {
133         model->syms[i].cumfreq = model->syms[i+1].cumfreq + 1;
134       }
135     }
136   }
137   else {
138     model->shiftsleft = 50;
139     for (i = 0; i < model->entries; i++) {
140       /* no -1, want to include the 0 entry */
141       /* this converts cumfreqs into frequencies, then shifts right */
142       model->syms[i].cumfreq -= model->syms[i+1].cumfreq;
143       model->syms[i].cumfreq++; /* avoid losing things entirely */
144       model->syms[i].cumfreq >>= 1;
145     }
146 
147     /* now sort by frequencies, decreasing order -- this must be an
148      * inplace selection sort, or a sort with the same (in)stability
149      * characteristics */
150     for (i = 0; i < model->entries - 1; i++) {
151       for (j = i + 1; j < model->entries; j++) {
152         if (model->syms[i].cumfreq < model->syms[j].cumfreq) {
153           tmp = model->syms[i];
154           model->syms[i] = model->syms[j];
155           model->syms[j] = tmp;
156         }
157       }
158     }
159 
160     /* then convert frequencies back to cumfreq */
161     for (i = model->entries - 1; i >= 0; i--) {
162       model->syms[i].cumfreq += model->syms[i+1].cumfreq;
163     }
164   }
165 }
166 
167 /* Initialises a model to decode symbols from [start] to [start]+[len]-1 */
qtmd_init_model(struct qtmd_model * model,struct qtmd_modelsym * syms,int start,int len)168 static void qtmd_init_model(struct qtmd_model *model,
169                             struct qtmd_modelsym *syms, int start, int len)
170 {
171   int i;
172 
173   model->shiftsleft = 4;
174   model->entries    = len;
175   model->syms       = syms;
176 
177   for (i = 0; i <= len; i++) {
178     syms[i].sym     = start + i; /* actual symbol */
179     syms[i].cumfreq = len - i;   /* current frequency of that symbol */
180   }
181 }
182 
183 
184 /*-------- main Quantum code --------*/
185 
qtmd_init(struct mspack_system * system,struct mspack_file * input,struct mspack_file * output,int window_bits,int input_buffer_size)186 struct qtmd_stream *qtmd_init(struct mspack_system *system,
187                               struct mspack_file *input,
188                               struct mspack_file *output,
189                               int window_bits, int input_buffer_size)
190 {
191   unsigned int window_size = 1 << window_bits;
192   struct qtmd_stream *qtm;
193   int i;
194 
195   if (!system) return NULL;
196 
197   /* Quantum supports window sizes of 2^10 (1Kb) through 2^21 (2Mb) */
198   if (window_bits < 10 || window_bits > 21) return NULL;
199 
200   /* round up input buffer size to multiple of two */
201   input_buffer_size = (input_buffer_size + 1) & -2;
202   if (input_buffer_size < 2) return NULL;
203 
204   /* allocate decompression state */
205   if (!(qtm = (struct qtmd_stream *) system->alloc(system, sizeof(struct qtmd_stream)))) {
206     return NULL;
207   }
208 
209   /* allocate decompression window and input buffer */
210   qtm->window = (unsigned char *) system->alloc(system, (size_t) window_size);
211   qtm->inbuf  = (unsigned char *) system->alloc(system, (size_t) input_buffer_size);
212   if (!qtm->window || !qtm->inbuf) {
213     system->free(qtm->window);
214     system->free(qtm->inbuf);
215     system->free(qtm);
216     return NULL;
217   }
218 
219   /* initialise decompression state */
220   qtm->sys         = system;
221   qtm->input       = input;
222   qtm->output      = output;
223   qtm->inbuf_size  = input_buffer_size;
224   qtm->window_size = window_size;
225   qtm->window_posn = 0;
226   qtm->frame_todo  = QTM_FRAME_SIZE;
227   qtm->header_read = 0;
228   qtm->error       = MSPACK_ERR_OK;
229 
230   qtm->i_ptr = qtm->i_end = &qtm->inbuf[0];
231   qtm->o_ptr = qtm->o_end = &qtm->window[0];
232   qtm->input_end = 0;
233   qtm->bits_left = 0;
234   qtm->bit_buffer = 0;
235 
236   /* initialise arithmetic coding models
237    * - model 4    depends on window size, ranges from 20 to 24
238    * - model 5    depends on window size, ranges from 20 to 36
239    * - model 6pos depends on window size, ranges from 20 to 42
240    */
241   i = window_bits * 2;
242   qtmd_init_model(&qtm->model0,    &qtm->m0sym[0],   0, 64);
243   qtmd_init_model(&qtm->model1,    &qtm->m1sym[0],  64, 64);
244   qtmd_init_model(&qtm->model2,    &qtm->m2sym[0], 128, 64);
245   qtmd_init_model(&qtm->model3,    &qtm->m3sym[0], 192, 64);
246   qtmd_init_model(&qtm->model4,    &qtm->m4sym[0],   0, (i > 24) ? 24 : i);
247   qtmd_init_model(&qtm->model5,    &qtm->m5sym[0],   0, (i > 36) ? 36 : i);
248   qtmd_init_model(&qtm->model6,    &qtm->m6sym[0],   0, i);
249   qtmd_init_model(&qtm->model6len, &qtm->m6lsym[0],  0, 27);
250   qtmd_init_model(&qtm->model7,    &qtm->m7sym[0],   0, 7);
251 
252   /* all ok */
253   return qtm;
254 }
255 
qtmd_decompress(struct qtmd_stream * qtm,off_t out_bytes)256 int qtmd_decompress(struct qtmd_stream *qtm, off_t out_bytes) {
257   unsigned int frame_todo, frame_end, window_posn, match_offset, range;
258   unsigned char *window, *i_ptr, *i_end, *runsrc, *rundest;
259   int i, j, selector, extra, sym, match_length;
260   unsigned short H, L, C, symf;
261 
262   register unsigned int bit_buffer;
263   register unsigned char bits_left;
264 
265   /* easy answers */
266   if (!qtm || (out_bytes < 0)) return MSPACK_ERR_ARGS;
267   if (qtm->error) return qtm->error;
268 
269   /* flush out any stored-up bytes before we begin */
270   i = qtm->o_end - qtm->o_ptr;
271   if ((off_t) i > out_bytes) i = (int) out_bytes;
272   if (i) {
273     if (qtm->sys->write(qtm->output, qtm->o_ptr, i) != i) {
274       return qtm->error = MSPACK_ERR_WRITE;
275     }
276     qtm->o_ptr  += i;
277     out_bytes   -= i;
278   }
279   if (out_bytes == 0) return MSPACK_ERR_OK;
280 
281   /* restore local state */
282   RESTORE_BITS;
283   window = qtm->window;
284   window_posn = qtm->window_posn;
285   frame_todo = qtm->frame_todo;
286   H = qtm->H;
287   L = qtm->L;
288   C = qtm->C;
289 
290   /* while we do not have enough decoded bytes in reserve: */
291   while ((qtm->o_end - qtm->o_ptr) < out_bytes) {
292     /* read header if necessary. Initialises H, L and C */
293     if (!qtm->header_read) {
294       H = 0xFFFF; L = 0; READ_BITS(C, 16);
295       qtm->header_read = 1;
296     }
297 
298     /* decode more, up to the number of bytes needed, the frame boundary,
299      * or the window boundary, whichever comes first */
300     frame_end = window_posn + (out_bytes - (qtm->o_end - qtm->o_ptr));
301     if ((window_posn + frame_todo) < frame_end) {
302       frame_end = window_posn + frame_todo;
303     }
304     if (frame_end > qtm->window_size) {
305       frame_end = qtm->window_size;
306     }
307 
308     while (window_posn < frame_end) {
309       GET_SYMBOL(qtm->model7, selector);
310       if (selector < 4) {
311         /* literal byte */
312         struct qtmd_model *mdl = (selector == 0) ? &qtm->model0 :
313                                 ((selector == 1) ? &qtm->model1 :
314                                 ((selector == 2) ? &qtm->model2 :
315                                                    &qtm->model3));
316         GET_SYMBOL((*mdl), sym);
317         window[window_posn++] = sym;
318         frame_todo--;
319       }
320       else {
321         /* match repeated string */
322         switch (selector) {
323         case 4: /* selector 4 = fixed length match (3 bytes) */
324           GET_SYMBOL(qtm->model4, sym);
325           READ_MANY_BITS(extra, extra_bits[sym]);
326           match_offset = position_base[sym] + extra + 1;
327           match_length = 3;
328           break;
329 
330         case 5: /* selector 5 = fixed length match (4 bytes) */
331           GET_SYMBOL(qtm->model5, sym);
332           READ_MANY_BITS(extra, extra_bits[sym]);
333           match_offset = position_base[sym] + extra + 1;
334           match_length = 4;
335           break;
336 
337         case 6: /* selector 6 = variable length match */
338           GET_SYMBOL(qtm->model6len, sym);
339           READ_MANY_BITS(extra, length_extra[sym]);
340           match_length = length_base[sym] + extra + 5;
341 
342           GET_SYMBOL(qtm->model6, sym);
343           READ_MANY_BITS(extra, extra_bits[sym]);
344           match_offset = position_base[sym] + extra + 1;
345           break;
346 
347         default:
348           /* should be impossible, model7 can only return 0-6 */
349           D(("got %d from selector", selector))
350           return qtm->error = MSPACK_ERR_DECRUNCH;
351         }
352 
353         rundest = &window[window_posn];
354         frame_todo -= match_length;
355 
356         /* does match destination wrap the window? This situation is possible
357          * where the window size is less than the 32k frame size, but matches
358          * must not go beyond a frame boundary */
359         if ((window_posn + match_length) > qtm->window_size) {
360           /* copy first part of match, before window end */
361           i = qtm->window_size - window_posn;
362           j = window_posn - match_offset;
363           while (i--) *rundest++ = window[j++ & (qtm->window_size - 1)];
364 
365           /* flush currently stored data */
366           i = (&window[qtm->window_size] - qtm->o_ptr);
367 
368           /* this should not happen, but if it does then this code
369            * can't handle the situation (can't flush up to the end of
370            * the window, but can't break out either because we haven't
371            * finished writing the match). bail out in this case */
372           if (i > out_bytes) {
373             D(("during window-wrap match; %d bytes to flush but only need %d",
374                i, (int) out_bytes))
375             return qtm->error = MSPACK_ERR_DECRUNCH;
376           }
377           if (qtm->sys->write(qtm->output, qtm->o_ptr, i) != i) {
378             return qtm->error = MSPACK_ERR_WRITE;
379           }
380           out_bytes -= i;
381           qtm->o_ptr = &window[0];
382           qtm->o_end = &window[0];
383 
384           /* copy second part of match, after window wrap */
385           rundest = &window[0];
386           i = match_length - (qtm->window_size - window_posn);
387           while (i--) *rundest++ = window[j++ & (qtm->window_size - 1)];
388           window_posn = window_posn + match_length - qtm->window_size;
389 
390           break; /* because "window_posn < frame_end" has now failed */
391         }
392         else {
393           /* normal match - output won't wrap window or frame end */
394           i = match_length;
395 
396           /* does match _offset_ wrap the window? */
397           if (match_offset > window_posn) {
398             /* j = length from match offset to end of window */
399             j = match_offset - window_posn;
400             if (j > (int) qtm->window_size) {
401               D(("match offset beyond window boundaries"))
402               return qtm->error = MSPACK_ERR_DECRUNCH;
403             }
404             runsrc = &window[qtm->window_size - j];
405             if (j < i) {
406               /* if match goes over the window edge, do two copy runs */
407               i -= j; while (j-- > 0) *rundest++ = *runsrc++;
408               runsrc = window;
409             }
410             while (i-- > 0) *rundest++ = *runsrc++;
411           }
412           else {
413             runsrc = rundest - match_offset;
414             while (i-- > 0) *rundest++ = *runsrc++;
415           }
416           window_posn += match_length;
417         }
418       } /* if (window_posn+match_length > frame_end) */
419     } /* while (window_posn < frame_end) */
420 
421     qtm->o_end = &window[window_posn];
422 
423    /* if we subtracted too much from frame_todo, it will
424     * wrap around past zero and go above its max value */
425    if (frame_todo > QTM_FRAME_SIZE) {
426      D(("overshot frame alignment"))
427      return qtm->error = MSPACK_ERR_DECRUNCH;
428    }
429 
430     /* another frame completed? */
431     if (frame_todo == 0) {
432       /* re-align input */
433       if (bits_left & 7) REMOVE_BITS(bits_left & 7);
434 
435       /* special Quantum hack -- cabd.c injects a trailer byte to allow the
436        * decompressor to realign itself. CAB Quantum blocks, unlike LZX
437        * blocks, can have anything from 0 to 4 trailing null bytes. */
438       do { READ_BITS(i, 8); } while (i != 0xFF);
439 
440       qtm->header_read = 0;
441 
442       frame_todo = QTM_FRAME_SIZE;
443     }
444 
445     /* window wrap? */
446     if (window_posn == qtm->window_size) {
447       /* flush all currently stored data */
448       i = (qtm->o_end - qtm->o_ptr);
449       /* break out if we have more than enough to finish this request */
450       if (i >= out_bytes) break;
451       if (qtm->sys->write(qtm->output, qtm->o_ptr, i) != i) {
452         return qtm->error = MSPACK_ERR_WRITE;
453       }
454       out_bytes -= i;
455       qtm->o_ptr = &window[0];
456       qtm->o_end = &window[0];
457       window_posn = 0;
458    }
459 
460   } /* while (more bytes needed) */
461 
462   if (out_bytes) {
463     i = (int) out_bytes;
464     if (qtm->sys->write(qtm->output, qtm->o_ptr, i) != i) {
465       return qtm->error = MSPACK_ERR_WRITE;
466     }
467     qtm->o_ptr += i;
468   }
469 
470   /* store local state */
471 
472   STORE_BITS;
473   qtm->window_posn = window_posn;
474   qtm->frame_todo = frame_todo;
475   qtm->H = H;
476   qtm->L = L;
477   qtm->C = C;
478 
479   return MSPACK_ERR_OK;
480 }
481 
qtmd_free(struct qtmd_stream * qtm)482 void qtmd_free(struct qtmd_stream *qtm) {
483   struct mspack_system *sys;
484   if (qtm) {
485     sys = qtm->sys;
486     sys->free(qtm->window);
487     sys->free(qtm->inbuf);
488     sys->free(qtm);
489   }
490 }
491