1
2 /*-------------------------------------------------------------*/
3 /*--- Decompression machinery ---*/
4 /*--- decompress.c ---*/
5 /*-------------------------------------------------------------*/
6
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
10
11 bzip2/libbzip2 version 1.0.5 of 10 December 2007
12 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
15 README file.
16
17 This program is released under the terms of the license contained
18 in the file LICENSE.
19 ------------------------------------------------------------------ */
20
21
22 #include "bzlib_private.h"
23
24 ABC_NAMESPACE_IMPL_START
25
26
27
28 /*---------------------------------------------------*/
29 static
makeMaps_d(DState * s)30 void makeMaps_d ( DState* s )
31 {
32 Int32 i;
33 s->nInUse = 0;
34 for (i = 0; i < 256; i++)
35 if (s->inUse[i]) {
36 s->seqToUnseq[s->nInUse] = i;
37 s->nInUse++;
38 }
39 }
40
41
42 /*---------------------------------------------------*/
43 #define RETURN(rrr) \
44 { retVal = rrr; goto save_state_and_return; };
45
46 #define GET_BITS(lll,vvv,nnn) \
47 case lll: s->state = lll; \
48 while (True) { \
49 if (s->bsLive >= nnn) { \
50 UInt32 v; \
51 v = (s->bsBuff >> \
52 (s->bsLive-nnn)) & ((1 << nnn)-1); \
53 s->bsLive -= nnn; \
54 vvv = v; \
55 break; \
56 } \
57 if (s->strm->avail_in == 0) RETURN(BZ_OK); \
58 s->bsBuff \
59 = (s->bsBuff << 8) | \
60 ((UInt32) \
61 (*((UChar*)(s->strm->next_in)))); \
62 s->bsLive += 8; \
63 s->strm->next_in++; \
64 s->strm->avail_in--; \
65 s->strm->total_in_lo32++; \
66 if (s->strm->total_in_lo32 == 0) \
67 s->strm->total_in_hi32++; \
68 }
69
70 #define GET_UCHAR(lll,uuu) \
71 GET_BITS(lll,uuu,8)
72
73 #define GET_BIT(lll,uuu) \
74 GET_BITS(lll,uuu,1)
75
76 /*---------------------------------------------------*/
77 #define GET_MTF_VAL(label1,label2,lval) \
78 { \
79 if (groupPos == 0) { \
80 groupNo++; \
81 if (groupNo >= nSelectors) \
82 RETURN(BZ_DATA_ERROR); \
83 groupPos = BZ_G_SIZE; \
84 gSel = s->selector[groupNo]; \
85 gMinlen = s->minLens[gSel]; \
86 gLimit = &(s->limit[gSel][0]); \
87 gPerm = &(s->perm[gSel][0]); \
88 gBase = &(s->base[gSel][0]); \
89 } \
90 groupPos--; \
91 zn = gMinlen; \
92 GET_BITS(label1, zvec, zn); \
93 while (1) { \
94 if (zn > 20 /* the longest code */) \
95 RETURN(BZ_DATA_ERROR); \
96 if (zvec <= gLimit[zn]) break; \
97 zn++; \
98 GET_BIT(label2, zj); \
99 zvec = (zvec << 1) | zj; \
100 }; \
101 if (zvec - gBase[zn] < 0 \
102 || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
103 RETURN(BZ_DATA_ERROR); \
104 lval = gPerm[zvec - gBase[zn]]; \
105 }
106
107
108 /*---------------------------------------------------*/
BZ2_decompress(DState * s)109 Int32 BZ2_decompress ( DState* s )
110 {
111 UChar uc;
112 Int32 retVal;
113 Int32 minLen, maxLen;
114 bz_stream* strm = s->strm;
115
116 /* stuff that needs to be saved/restored */
117 Int32 i;
118 Int32 j;
119 Int32 t;
120 Int32 alphaSize;
121 Int32 nGroups;
122 Int32 nSelectors;
123 Int32 EOB;
124 Int32 groupNo;
125 Int32 groupPos;
126 Int32 nextSym;
127 Int32 nblockMAX;
128 Int32 nblock;
129 Int32 es;
130 Int32 N;
131 Int32 curr;
132 Int32 zt;
133 Int32 zn;
134 Int32 zvec;
135 Int32 zj;
136 Int32 gSel;
137 Int32 gMinlen;
138 Int32* gLimit;
139 Int32* gBase;
140 Int32* gPerm;
141
142 if (s->state == BZ_X_MAGIC_1) {
143 /*initialise the save area*/
144 s->save_i = 0;
145 s->save_j = 0;
146 s->save_t = 0;
147 s->save_alphaSize = 0;
148 s->save_nGroups = 0;
149 s->save_nSelectors = 0;
150 s->save_EOB = 0;
151 s->save_groupNo = 0;
152 s->save_groupPos = 0;
153 s->save_nextSym = 0;
154 s->save_nblockMAX = 0;
155 s->save_nblock = 0;
156 s->save_es = 0;
157 s->save_N = 0;
158 s->save_curr = 0;
159 s->save_zt = 0;
160 s->save_zn = 0;
161 s->save_zvec = 0;
162 s->save_zj = 0;
163 s->save_gSel = 0;
164 s->save_gMinlen = 0;
165 s->save_gLimit = NULL;
166 s->save_gBase = NULL;
167 s->save_gPerm = NULL;
168 }
169
170 /*restore from the save area*/
171 i = s->save_i;
172 j = s->save_j;
173 t = s->save_t;
174 alphaSize = s->save_alphaSize;
175 nGroups = s->save_nGroups;
176 nSelectors = s->save_nSelectors;
177 EOB = s->save_EOB;
178 groupNo = s->save_groupNo;
179 groupPos = s->save_groupPos;
180 nextSym = s->save_nextSym;
181 nblockMAX = s->save_nblockMAX;
182 nblock = s->save_nblock;
183 es = s->save_es;
184 N = s->save_N;
185 curr = s->save_curr;
186 zt = s->save_zt;
187 zn = s->save_zn;
188 zvec = s->save_zvec;
189 zj = s->save_zj;
190 gSel = s->save_gSel;
191 gMinlen = s->save_gMinlen;
192 gLimit = s->save_gLimit;
193 gBase = s->save_gBase;
194 gPerm = s->save_gPerm;
195
196 retVal = BZ_OK;
197
198 switch (s->state) {
199
200 GET_UCHAR(BZ_X_MAGIC_1, uc);
201 if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
202
203 GET_UCHAR(BZ_X_MAGIC_2, uc);
204 if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
205
206 GET_UCHAR(BZ_X_MAGIC_3, uc)
207 if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
208
209 GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
210 if (s->blockSize100k < (BZ_HDR_0 + 1) ||
211 s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
212 s->blockSize100k -= BZ_HDR_0;
213
214 if (s->smallDecompress) {
215 s->ll16 = (unsigned short *)BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
216 s->ll4 = (unsigned char *)BZALLOC(
217 ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
218 );
219 if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
220 } else {
221 s->tt = (unsigned *)BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
222 if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
223 }
224
225 GET_UCHAR(BZ_X_BLKHDR_1, uc);
226
227 if (uc == 0x17) goto endhdr_2;
228 if (uc != 0x31) RETURN(BZ_DATA_ERROR);
229 GET_UCHAR(BZ_X_BLKHDR_2, uc);
230 if (uc != 0x41) RETURN(BZ_DATA_ERROR);
231 GET_UCHAR(BZ_X_BLKHDR_3, uc);
232 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
233 GET_UCHAR(BZ_X_BLKHDR_4, uc);
234 if (uc != 0x26) RETURN(BZ_DATA_ERROR);
235 GET_UCHAR(BZ_X_BLKHDR_5, uc);
236 if (uc != 0x53) RETURN(BZ_DATA_ERROR);
237 GET_UCHAR(BZ_X_BLKHDR_6, uc);
238 if (uc != 0x59) RETURN(BZ_DATA_ERROR);
239
240 s->currBlockNo++;
241 if (s->verbosity >= 2)
242 VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
243
244 s->storedBlockCRC = 0;
245 GET_UCHAR(BZ_X_BCRC_1, uc);
246 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
247 GET_UCHAR(BZ_X_BCRC_2, uc);
248 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
249 GET_UCHAR(BZ_X_BCRC_3, uc);
250 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
251 GET_UCHAR(BZ_X_BCRC_4, uc);
252 s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
253
254 GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
255
256 s->origPtr = 0;
257 GET_UCHAR(BZ_X_ORIGPTR_1, uc);
258 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
259 GET_UCHAR(BZ_X_ORIGPTR_2, uc);
260 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
261 GET_UCHAR(BZ_X_ORIGPTR_3, uc);
262 s->origPtr = (s->origPtr << 8) | ((Int32)uc);
263
264 if (s->origPtr < 0)
265 RETURN(BZ_DATA_ERROR);
266 if (s->origPtr > 10 + 100000*s->blockSize100k)
267 RETURN(BZ_DATA_ERROR);
268
269 /*--- Receive the mapping table ---*/
270 for (i = 0; i < 16; i++) {
271 GET_BIT(BZ_X_MAPPING_1, uc);
272 if (uc == 1)
273 s->inUse16[i] = True; else
274 s->inUse16[i] = False;
275 }
276
277 for (i = 0; i < 256; i++) s->inUse[i] = False;
278
279 for (i = 0; i < 16; i++)
280 if (s->inUse16[i])
281 for (j = 0; j < 16; j++) {
282 GET_BIT(BZ_X_MAPPING_2, uc);
283 if (uc == 1) s->inUse[i * 16 + j] = True;
284 }
285 makeMaps_d ( s );
286 if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
287 alphaSize = s->nInUse+2;
288
289 /*--- Now the selectors ---*/
290 GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
291 if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
292 GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
293 if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
294 for (i = 0; i < nSelectors; i++) {
295 j = 0;
296 while (True) {
297 GET_BIT(BZ_X_SELECTOR_3, uc);
298 if (uc == 0) break;
299 j++;
300 if (j >= nGroups) RETURN(BZ_DATA_ERROR);
301 }
302 s->selectorMtf[i] = j;
303 }
304
305 /*--- Undo the MTF values for the selectors. ---*/
306 {
307 UChar pos[BZ_N_GROUPS], tmp, v;
308 for (v = 0; v < nGroups; v++) pos[v] = v;
309
310 for (i = 0; i < nSelectors; i++) {
311 v = s->selectorMtf[i];
312 tmp = pos[v];
313 while (v > 0) { pos[v] = pos[v-1]; v--; }
314 pos[0] = tmp;
315 s->selector[i] = tmp;
316 }
317 }
318
319 /*--- Now the coding tables ---*/
320 for (t = 0; t < nGroups; t++) {
321 GET_BITS(BZ_X_CODING_1, curr, 5);
322 for (i = 0; i < alphaSize; i++) {
323 while (True) {
324 if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
325 GET_BIT(BZ_X_CODING_2, uc);
326 if (uc == 0) break;
327 GET_BIT(BZ_X_CODING_3, uc);
328 if (uc == 0) curr++; else curr--;
329 }
330 s->len[t][i] = curr;
331 }
332 }
333
334 /*--- Create the Huffman decoding tables ---*/
335 for (t = 0; t < nGroups; t++) {
336 minLen = 32;
337 maxLen = 0;
338 for (i = 0; i < alphaSize; i++) {
339 if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
340 if (s->len[t][i] < minLen) minLen = s->len[t][i];
341 }
342 BZ2_hbCreateDecodeTables (
343 &(s->limit[t][0]),
344 &(s->base[t][0]),
345 &(s->perm[t][0]),
346 &(s->len[t][0]),
347 minLen, maxLen, alphaSize
348 );
349 s->minLens[t] = minLen;
350 }
351
352 /*--- Now the MTF values ---*/
353
354 EOB = s->nInUse+1;
355 nblockMAX = 100000 * s->blockSize100k;
356 groupNo = -1;
357 groupPos = 0;
358
359 for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
360
361 /*-- MTF init --*/
362 {
363 Int32 ii, jj, kk;
364 kk = MTFA_SIZE-1;
365 for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
366 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
367 s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
368 kk--;
369 }
370 s->mtfbase[ii] = kk + 1;
371 }
372 }
373 /*-- end MTF init --*/
374
375 nblock = 0;
376 GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
377
378 while (True) {
379
380 if (nextSym == EOB) break;
381
382 if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
383
384 es = -1;
385 N = 1;
386 do {
387 if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
388 if (nextSym == BZ_RUNB) es = es + (1+1) * N;
389 N = N * 2;
390 GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
391 }
392 while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
393
394 es++;
395 uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
396 s->unzftab[uc] += es;
397
398 if (s->smallDecompress)
399 while (es > 0) {
400 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
401 s->ll16[nblock] = (UInt16)uc;
402 nblock++;
403 es--;
404 }
405 else
406 while (es > 0) {
407 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
408 s->tt[nblock] = (UInt32)uc;
409 nblock++;
410 es--;
411 };
412
413 continue;
414
415 } else {
416
417 if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
418
419 /*-- uc = MTF ( nextSym-1 ) --*/
420 {
421 Int32 ii, jj, kk, pp, lno, off;
422 UInt32 nn;
423 nn = (UInt32)(nextSym - 1);
424
425 if (nn < MTFL_SIZE) {
426 /* avoid general-case expense */
427 pp = s->mtfbase[0];
428 uc = s->mtfa[pp+nn];
429 while (nn > 3) {
430 Int32 z = pp+nn;
431 s->mtfa[(z) ] = s->mtfa[(z)-1];
432 s->mtfa[(z)-1] = s->mtfa[(z)-2];
433 s->mtfa[(z)-2] = s->mtfa[(z)-3];
434 s->mtfa[(z)-3] = s->mtfa[(z)-4];
435 nn -= 4;
436 }
437 while (nn > 0) {
438 s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
439 };
440 s->mtfa[pp] = uc;
441 } else {
442 /* general case */
443 lno = nn / MTFL_SIZE;
444 off = nn % MTFL_SIZE;
445 pp = s->mtfbase[lno] + off;
446 uc = s->mtfa[pp];
447 while (pp > s->mtfbase[lno]) {
448 s->mtfa[pp] = s->mtfa[pp-1]; pp--;
449 };
450 s->mtfbase[lno]++;
451 while (lno > 0) {
452 s->mtfbase[lno]--;
453 s->mtfa[s->mtfbase[lno]]
454 = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
455 lno--;
456 }
457 s->mtfbase[0]--;
458 s->mtfa[s->mtfbase[0]] = uc;
459 if (s->mtfbase[0] == 0) {
460 kk = MTFA_SIZE-1;
461 for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
462 for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
463 s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
464 kk--;
465 }
466 s->mtfbase[ii] = kk + 1;
467 }
468 }
469 }
470 }
471 /*-- end uc = MTF ( nextSym-1 ) --*/
472
473 s->unzftab[s->seqToUnseq[uc]]++;
474 if (s->smallDecompress)
475 s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
476 s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
477 nblock++;
478
479 GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
480 continue;
481 }
482 }
483
484 /* Now we know what nblock is, we can do a better sanity
485 check on s->origPtr.
486 */
487 if (s->origPtr < 0 || s->origPtr >= nblock)
488 RETURN(BZ_DATA_ERROR);
489
490 /*-- Set up cftab to facilitate generation of T^(-1) --*/
491 s->cftab[0] = 0;
492 for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
493 for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
494 for (i = 0; i <= 256; i++) {
495 if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
496 /* s->cftab[i] can legitimately be == nblock */
497 RETURN(BZ_DATA_ERROR);
498 }
499 }
500
501 s->state_out_len = 0;
502 s->state_out_ch = 0;
503 BZ_INITIALISE_CRC ( s->calculatedBlockCRC );
504 s->state = BZ_X_OUTPUT;
505 if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
506
507 if (s->smallDecompress) {
508
509 /*-- Make a copy of cftab, used in generation of T --*/
510 for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
511
512 /*-- compute the T vector --*/
513 for (i = 0; i < nblock; i++) {
514 uc = (UChar)(s->ll16[i]);
515 SET_LL(i, s->cftabCopy[uc]);
516 s->cftabCopy[uc]++;
517 }
518
519 /*-- Compute T^(-1) by pointer reversal on T --*/
520 i = s->origPtr;
521 j = GET_LL(i);
522 do {
523 Int32 tmp = GET_LL(j);
524 SET_LL(j, i);
525 i = j;
526 j = tmp;
527 }
528 while (i != s->origPtr);
529
530 s->tPos = s->origPtr;
531 s->nblock_used = 0;
532 if (s->blockRandomised) {
533 BZ_RAND_INIT_MASK;
534 BZ_GET_SMALL(s->k0); s->nblock_used++;
535 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
536 } else {
537 BZ_GET_SMALL(s->k0); s->nblock_used++;
538 }
539
540 } else {
541
542 /*-- compute the T^(-1) vector --*/
543 for (i = 0; i < nblock; i++) {
544 uc = (UChar)(s->tt[i] & 0xff);
545 s->tt[s->cftab[uc]] |= (i << 8);
546 s->cftab[uc]++;
547 }
548
549 s->tPos = s->tt[s->origPtr] >> 8;
550 s->nblock_used = 0;
551 if (s->blockRandomised) {
552 BZ_RAND_INIT_MASK;
553 BZ_GET_FAST(s->k0); s->nblock_used++;
554 BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
555 } else {
556 BZ_GET_FAST(s->k0); s->nblock_used++;
557 }
558
559 }
560
561 RETURN(BZ_OK);
562
563
564
565 endhdr_2:
566
567 GET_UCHAR(BZ_X_ENDHDR_2, uc);
568 if (uc != 0x72) RETURN(BZ_DATA_ERROR);
569 GET_UCHAR(BZ_X_ENDHDR_3, uc);
570 if (uc != 0x45) RETURN(BZ_DATA_ERROR);
571 GET_UCHAR(BZ_X_ENDHDR_4, uc);
572 if (uc != 0x38) RETURN(BZ_DATA_ERROR);
573 GET_UCHAR(BZ_X_ENDHDR_5, uc);
574 if (uc != 0x50) RETURN(BZ_DATA_ERROR);
575 GET_UCHAR(BZ_X_ENDHDR_6, uc);
576 if (uc != 0x90) RETURN(BZ_DATA_ERROR);
577
578 s->storedCombinedCRC = 0;
579 GET_UCHAR(BZ_X_CCRC_1, uc);
580 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
581 GET_UCHAR(BZ_X_CCRC_2, uc);
582 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
583 GET_UCHAR(BZ_X_CCRC_3, uc);
584 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
585 GET_UCHAR(BZ_X_CCRC_4, uc);
586 s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
587
588 s->state = BZ_X_IDLE;
589 RETURN(BZ_STREAM_END);
590
591 default: AssertH ( False, 4001 );
592 }
593
594 AssertH ( False, 4002 );
595
596 save_state_and_return:
597
598 s->save_i = i;
599 s->save_j = j;
600 s->save_t = t;
601 s->save_alphaSize = alphaSize;
602 s->save_nGroups = nGroups;
603 s->save_nSelectors = nSelectors;
604 s->save_EOB = EOB;
605 s->save_groupNo = groupNo;
606 s->save_groupPos = groupPos;
607 s->save_nextSym = nextSym;
608 s->save_nblockMAX = nblockMAX;
609 s->save_nblock = nblock;
610 s->save_es = es;
611 s->save_N = N;
612 s->save_curr = curr;
613 s->save_zt = zt;
614 s->save_zn = zn;
615 s->save_zvec = zvec;
616 s->save_zj = zj;
617 s->save_gSel = gSel;
618 s->save_gMinlen = gMinlen;
619 s->save_gLimit = gLimit;
620 s->save_gBase = gBase;
621 s->save_gPerm = gPerm;
622
623 return retVal;
624 }
625
626
627 /*-------------------------------------------------------------*/
628 /*--- end decompress.c ---*/
629 /*-------------------------------------------------------------*/
630 ABC_NAMESPACE_IMPL_END
631
632