1 unit bzip2;
2 {****************************************************************************
3
4 BZIP2 decompression unit
5
6 Copyright (C) 2002 by Daniel Mantione
7
8 This unit provides a decompression stream to decode .bz2 files. It is
9 inpired by Julian R. Seward's libbzip2 library and therefore you should
10 send credits to him and bug reports to me :)
11
12 This code is licensed under the same terms as the original libbz2 library,
13 which is decsribed in the file LICENSE. If you don't have this file, look
14 at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
15 be included. In case of problems, contact the author.
16
17 E-mail addresses:
18
19 Daniel Mantione <daniel.mantione@freepascal.org>
20 Julian R. Seward <jseward@acm.org>
21
22 Please do not contact Julian about this Pascal library, he didn't wrote it.
23
24 ****************************************************************************}
25 interface
26
27 {$goto on}
28
29 uses objects, bzip2comn;
30
31 Type
32 Tbzip2_decode_stream=object(Tstream)
33 short:cardinal;
34 readstream:Pstream;
35 block_randomized:boolean;
36 blocksize:byte;
37 tt:Pcardinal_array;
38 tt_count:cardinal;
39 rle_run_left,rle_run_data:byte;
40 nextrle:Pbyte;
41 decode_available:cardinal;
42 block_origin:cardinal;
43 current_block:cardinal;
44 read_data,bits_available:byte;
45 inuse16:set of 0..15;
46 inuse:set of 0..255;
47 inuse_count:cardinal;
48 seq_to_unseq:array[0..255] of byte;
49 alphasize:cardinal;
50 group_count,group_pos,gsel,gminlen:byte;
51 group_no:cardinal;
52 glimit,gperm,gbase:Phuffarray;
53 selector_count:cardinal;
54 selector,selector_mtf:array[0..max_selectors] of byte;
55 len:array[0..max_groups,0..max_alpha_size] of byte;
56 limit:array[0..max_groups,0..max_alpha_size] of cardinal;
57 base:array[0..max_groups,0..max_alpha_size] of cardinal;
58 perm:array[0..max_groups,0..max_alpha_size] of cardinal;
59 minlens:array[0..max_groups] of byte;
60 cftab:array[0..257] of cardinal;
61 mtfbase:array[0..256 div mtfl_size-1] of cardinal;
62 mtfa:array[0..mtfa_size-1] of byte;
63 constructor init(Areadstream:Pstream);
get_bitsnull64 function get_bits(n:byte):byte;
get_booleannull65 function get_boolean:boolean;
get_bytenull66 function get_byte:byte;
get_cardinal24null67 function get_cardinal24:cardinal;
get_cardinalnull68 function get_cardinal:cardinal;
69 procedure receive_mapping_table;
70 procedure receive_selectors;
71 procedure undo_mtf_values;
72 procedure receive_coding_tables;
73 procedure make_hufftab;
74 procedure init_mtf;
get_mtf_valuenull75 function get_mtf_value:cardinal;
76 procedure move_mtf_block;
77 procedure receive_mtf_values;
78 procedure detransform;
decode_blocknull79 function decode_block:boolean;
80 procedure read(var buf;count:Longint);virtual;
81 procedure new_block;
82 procedure consume_rle;inline;
83 procedure rle_read(bufptr:Pbyte;var count:Longint);
84 destructor done;virtual;
85 end;
86
87
88 implementation
89
90 {$ifdef cpui386}
91 {$i bzip2i386.inc}
92 {$endif}
93
94 {*****************************************************************************
95 Tbzip2_decode_stream
96 *****************************************************************************}
97
98 constructor Tbzip2_decode_stream.init(Areadstream:Pstream);
99
100 var magic:array[1..3] of char;
101 c:char;
102
103 begin
104 readstream:=Areadstream;
105 {Read the magic.}
106 readstream^.read(magic,sizeof(magic));
107 if magic<>bzip2_stream_magic then
108 begin
109 error(stiniterror,bzip2_bad_header_magic);
110 exit;
111 end;
112 {Read the block size and allocate the working array.}
113 readstream^.read(c,1);
114 blocksize:=byte(c)-byte('0');
115 getmem(tt,blocksize*100000*sizeof(cardinal));
116 decode_available:=high(decode_available);
117 end;
118
Tbzip2_decode_stream.get_bitsnull119 function Tbzip2_decode_stream.get_bits(n:byte):byte;
120
121 var data:byte;
122
123 begin
124 if n>bits_available then
125 begin
126 readstream^.read(data,1);
127 get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
128 read_data:=data shl (n-bits_available);
129 inc(bits_available,8);
130 end
131 else
132 begin
133 get_bits:=read_data shr (8-n);
134 read_data:=read_data shl n;
135 end;
136 dec(bits_available,n);
137 end;
138
get_booleannull139 function Tbzip2_decode_stream.get_boolean:boolean;
140
141 begin
142 get_boolean:=boolean(get_bits(1));
143 end;
144
Tbzip2_decode_stream.get_bytenull145 function Tbzip2_decode_stream.get_byte:byte;
146
147 begin
148 get_byte:=get_bits(8);
149 end;
150
get_cardinal24null151 function Tbzip2_decode_stream.get_cardinal24:cardinal;
152
153 begin
154 get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
155 end;
156
157
Tbzip2_decode_stream.get_cardinalnull158 function Tbzip2_decode_stream.get_cardinal:cardinal;
159
160 begin
161 get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
162 get_bits(8);
163 end;
164
165 procedure Tbzip2_decode_stream.receive_mapping_table;
166
167 {Receive the mapping table. To save space, the inuse set is stored in pieces
168 of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
169 the pieces follow.}
170
171 var i,j:byte;
172
173 begin
174 inuse16:=[];
175 {Receive the first 16 bits which tell which pieces are stored.}
176 for i:=0 to 15 do
177 if get_boolean then
178 include(inuse16,i);
179
180 {Receive the used pieces.}
181 inuse:=[];
182 inuse_count:=0;
183 for i:=0 to 15 do
184 if i in inuse16 then
185 for j:=0 to 15 do
186 if get_boolean then
187 begin
188 include(inuse,16*i+j);
189 seq_to_unseq[inuse_count]:=16*i+j;
190 inc(inuse_count);
191 end;
192 { system.write('Mapping table: ');
193 for i:=0 to 255 do
194 if i in inuse then
195 system.write(i,' ');
196 writeln;}
197 end;
198
199 procedure Tbzip2_decode_stream.receive_selectors;
200
201 {Receives the selectors.}
202
203 var i:cardinal;
204 j:byte;
205
206 begin
207 group_count:=get_bits(3);
208 selector_count:=get_bits(8) shl 7 or get_bits(7);
209 for i:=0 to selector_count-1 do
210 begin
211 j:=0;
212 while get_boolean do
213 begin
214 inc(j);
215 if j>5 then
216 error(streaderror,bzip2_data_error);
217 end;
218 selector_mtf[i]:=j;
219 end;
220 { system.write('Selector_mtf: ');
221 for i:=0 to selector_count-1 do
222 system.write(selector_mtf[i],' ');
223 writeln;}
224 end;
225
226 procedure Tbzip2_decode_stream.undo_mtf_values;
227
228 {Undo the MTF values for the selectors.}
229
230 var pos:array[0..max_groups] of byte;
231 i:cardinal;
232 v,tmp:byte;
233
234 begin
235 for v:=0 to group_count-1 do
236 pos[v]:=v;
237 for i:=0 to selector_count-1 do
238 begin
239 v:=selector_mtf[i];
240 tmp:=pos[v];
241 while v<>0 do
242 begin
243 pos[v]:=pos[v-1];
244 dec(v);
245 end;
246 pos[0]:=tmp;
247 selector[i]:=tmp;
248 end;
249 end;
250
251 procedure Tbzip2_decode_stream.receive_coding_tables;
252
253 var t,curr:byte;
254 i:cardinal;
255
256 begin
257 for t:=0 to group_count-1 do
258 begin
259 curr:=get_bits(5);
260 for i:=0 to alphasize-1 do
261 begin
262 repeat
263 if not(curr in [1..20]) then
264 begin
265 error(streaderror,bzip2_data_error);
266 exit;
267 end;
268 if not get_boolean then
269 break;
270 if get_boolean then
271 dec(curr)
272 else
273 inc(curr);
274 until false;
275 len[t,i]:=curr;
276 end;
277 end;
278 { writeln('Coding tables:');
279 for t:=0 to group_count-1 do
280 begin
281 for i:=0 to alphasize-1 do
282 system.write(len[t,i],' ');
283 writeln;
284 end;}
285 end;
286
287 procedure Tbzip2_decode_stream.make_hufftab;
288
289 {Builds the Huffman tables.}
290
291 var i:cardinal;
292 t,minlen,maxlen:byte;
293
294 begin
295 for t:=0 to group_count-1 do
296 begin
297 minlen:=32;
298 maxlen:=0;
299 for i:=0 to alphasize-1 do
300 begin
301 if len[t,i]>maxlen then
302 maxlen:=len[t,i];
303 if len[t,i]<minlen then
304 minlen:=len[t,i];
305 end;
306 hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
307 minlen,maxlen,alphasize);
308 minlens[t]:=minlen;
309 end;
310 end;
311
312 procedure Tbzip2_decode_stream.init_mtf;
313
314 var i,j:byte;
315 k:cardinal;
316
317 begin
318 k:=mtfa_size-1;
319 for i:=256 div mtfl_size-1 downto 0 do
320 begin
321 for j:=mtfl_size-1 downto 0 do
322 begin
323 mtfa[k]:=i*mtfl_size+j;
324 dec(k);
325 end;
326 mtfbase[i]:=k+1;
327 end;
328 end;
329
Tbzip2_decode_stream.get_mtf_valuenull330 function Tbzip2_decode_stream.get_mtf_value:cardinal;
331
332 var zn:byte;
333 zvec:cardinal;
334
335 begin
336 if group_pos=0 then
337 begin
338 inc(group_no);
339 group_pos:=group_size;
340 gsel:=selector[group_no];
341 gminlen:=minlens[gsel];
342 glimit:=@limit[gsel];
343 gperm:=@perm[gsel];
344 gbase:=@base[gsel];
345 end;
346 dec(group_pos);
347 zn:=gminlen;
348 zvec:=get_bits(zn);
349 while zvec>glimit^[zn] do
350 begin
351 inc(zn);
352 zvec:=zvec shl 1 or byte(get_boolean);
353 end;
354 get_mtf_value:=gperm^[zvec-gbase^[zn]];
355 end;
356
357 procedure Tbzip2_decode_stream.move_mtf_block;
358
359 var i:byte;
360 j,k:cardinal;
361
362 begin
363 k:=MTFA_SIZE;
364 for i:=256 div MTFL_SIZE-1 downto 0 do
365 begin
366 j:=mtfbase[i];
367 Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
368 Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
369 Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
370 dec(k,16);
371 Pcardinal(@mtfa[k ])^:=Pcardinal(@mtfa[j ])^;
372 mtfbase[i]:=k;
373 end;
374 end;
375
376 procedure Tbzip2_decode_stream.receive_mtf_values;
377
378 const run_a=0;
379 run_b=1;
380
381 var t,next_sym:cardinal;
382 es:cardinal;
383 n:byte;
384 nn,i:cardinal;
385 p,q:Pbyte;
386 u,v:Pcardinal;
387 lno,off:cardinal;
388
389 begin
390 group_no:=high(group_no);
391 group_pos:=0;
392 t:=0;
393 for i:=0 to 257 do
394 cftab[i]:=0;
395 init_mtf;
396 next_sym:=get_mtf_value;
397 while next_sym<>inuse_count+1 do
398 begin
399 { writeln(t,' ',next_sym);
400 if t=22296 then
401 t:=t; }
402 if next_sym<=run_b then
403 begin
404 es:=0;
405 n:=0;
406 repeat
407 inc(es,(next_sym+1) shl n);
408 inc(n);
409 next_sym:=get_mtf_value;
410 until next_sym>run_b;
411 n:=seq_to_unseq[mtfa[mtfbase[0]]];
412 inc(cftab[n],es);
413 if t+es>100000*blocksize then
414 begin
415 error(streaderror,bzip2_data_error);
416 exit;
417 end;
418 while es>0 do
419 begin
420 tt^[t]:=ntole(cardinal(n));
421 dec(es);
422 inc(t);
423 end;
424 end
425 else
426 begin
427 nn:=next_sym-1;
428 if nn<mtfl_size then
429 begin
430 {Avoid the costs of the general case.}
431 p:=@mtfa[mtfbase[0]];
432 q:=p+nn;
433 n:=q^;
434 repeat
435 q^:=(q-1)^;
436 dec(q);
437 until q=p;
438 q^:=n;
439 end
440 else
441 begin
442 {General case.}
443 lno:=nn div MTFL_SIZE;
444 off:=nn and (MTFL_SIZE-1);
445 p:=@mtfa[mtfbase[lno]];
446 q:=p+off;
447 n:=q^;
448 while(q<>p) do
449 begin
450 q^:=(q-1)^;
451 dec(q);
452 end;
453 u:=@mtfbase;
454 v:=u+lno;
455 repeat
456 mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
457 dec(v);
458 dec(v^);
459 until v=u;
460 mtfa[v^]:=n;
461 if v^=0 then
462 move_mtf_block;
463 end;
464 inc(cftab[seq_to_unseq[n]]);
465 tt^[t]:=ntole(cardinal(seq_to_unseq[n]));
466 inc(t);
467 if t>100000*blocksize then
468 begin
469 error(streaderror,bzip2_data_error);
470 exit;
471 end;
472 next_sym:=get_mtf_value;
473 end;
474 end;
475 tt_count:=t;
476 {Setup cftab to facilitate generation of T^(-1).}
477 t:=0;
478 for i:=0 to 256 do
479 begin
480 nn:=cftab[i];
481 cftab[i]:=t;
482 { writeln(i,' ',t);}
483 inc(t,nn);
484 end;
485 end;
486
487 {$ifndef HAVE_DETRANSFORM}
488
489 procedure Tbzip2_decode_stream.detransform;
490
491 var a:cardinal;
492 p,q,r:Pcardinal;
493
494 begin
495 a:=0;
496 p:=@tt^[0];
497 q:=p+tt_count;
498 while p<>q do
499 begin
500 r:=@tt^[cftab[ntole(p^) and $ff]];
501 inc(cftab[ntole(p^) and $ff]);
502 r^:=r^ or ntole(a);
503 inc(a,256);
504 inc(p);
505 end;
506 end;
507
508 {$endif}
509
decode_blocknull510 function Tbzip2_decode_stream.decode_block:boolean;
511
512 {Decode a new compressed block.}
513
514 var magic:array[1..6] of char;
515 stored_blockcrc:cardinal;
516 i:byte;
517
518 begin
519 for i:=1 to 6 do
520 magic[i]:=char(get_byte);
521 if magic='1AY&SY' then
522 begin
523 inc(current_block);
524 { writeln('Block ',current_block,': Header ok');}
525 stored_blockcrc:=get_cardinal;
526 block_randomized:=get_boolean;
527 block_origin:=get_cardinal24;
528
529 {Receive the mapping table.}
530 receive_mapping_table;
531 alphasize:=cardinal(inuse_count)+2;
532 { writeln('Mapping table ok.');}
533
534 {Receive the selectors.}
535 receive_selectors;
536 if status<>0 then
537 exit;
538 { writeln('Selectors ok.');}
539 {Undo the MTF values for the selectors.}
540 undo_mtf_values;
541 { writeln('Undo mtf ok.');}
542 {Receive the coding tables.}
543 receive_coding_tables;
544 if status<>0 then
545 exit;
546 { writeln('Coding tables ok');}
547 {Build the Huffman tables.}
548 make_hufftab;
549 { writeln('Huffman ok.');}
550 {Receive the MTF values.}
551 receive_mtf_values;
552 { writeln('MTF OK');}
553 {Undo the Burrows Wheeler transformation.}
554 detransform;
555 { writeln('Detransform OK');}
556 decode_available:=tt_count;
557 end
558 else
559 begin
560 if magic<>#$17'rE8P'#$90 then
561 error(streaderror,bzip2_bad_block_magic);
562 decode_block:=false;
563 end;
564 end;
565
566 procedure Tbzip2_decode_stream.new_block;
567
568 begin
569 if decode_block then
570 nextrle:=@tt^[ntole(tt^[block_origin]) shr 8]
571 else
572 begin
573 error(streaderror,bzip2_endoffile);
574 nextrle:=nil;
575 end;
576 end;
577
578 procedure Tbzip2_decode_stream.consume_rle;inline;
579
580 {Make nextrle point to the next decoded byte. If nextrle did point to the last
581 byte in the current block, decode the next block.}
582
583 begin
584 { Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
585 nextrle:=@tt^[ntole(Pcardinal(nextrle)^) shr 8];
586 dec(decode_available);
587 if decode_available=0 then
588 new_block;
589 end;
590
591 procedure Tbzip2_decode_stream.rle_read(bufptr:Pbyte;var count:Longint);
592
593 var rle_len:cardinal;
594 data:byte;
595
596 label rle_write;
597
598 begin
599 rle_len:=rle_run_left;
600 data:=rle_run_data;
601 if block_randomized then
602 {Not yet implemented.}
603 runerror(212)
604 else
605 begin
606 if rle_len<>0 then
607 {Speed is important. Instead of an if statement within the
608 repeat loop use a goto outside the loop.}
609 goto rle_write;
610 repeat
611 if decode_available=0 then
612 break;
613 rle_len:=1;
614 data:=nextrle^;
615 consume_rle;
616 if (decode_available>0) and (data=nextrle^) then
617 begin
618 inc(rle_len);
619 consume_rle;
620 if (decode_available>0) and (data=nextrle^) then
621 begin
622 inc(rle_len);
623 consume_rle;
624 if (decode_available>0) and (data=nextrle^) then
625 begin
626 consume_rle;
627 inc(rle_len,nextrle^+1);
628 consume_rle;
629 end;
630 end;
631 end;
632 rle_write:
633 repeat
634 bufptr^:=data;
635 inc(bufptr);
636 dec(count);
637 dec(rle_len);
638 until (rle_len=0) or (count=0);
639 until count=0;
640 short:=count;
641 end;
642 rle_run_data:=data;
643 rle_run_left:=rle_len;
644 end;
645
646 procedure Tbzip2_decode_stream.read(var buf;count:Longint);
647
648 var bufptr:Pbyte;
649
650 begin
651 short:=0;
652 bufptr:=@buf;
653 if decode_available=high(decode_available) then
654 begin
655 {Initialize the rle process:
656 - Decode a block
657 - Initialize pointer.}
658 if not decode_block then
659 begin
660 error(streaderror,bzip2_endoffile);
661 nextrle:=nil;
662 end;
663 nextrle:=@tt^[ntole(tt^[block_origin]) shr 8];
664 end;
665 rle_read(bufptr,count);
666 end;
667
668 destructor Tbzip2_decode_stream.done;
669
670 begin
671 if tt<>nil then
672 freemem(tt,blocksize*100000*sizeof(cardinal));
673 inherited done;
674 end;
675
676 end.
677