1 unit bzip2;
2 {****************************************************************************
3 
4                              BZIP2 decompression unit
5 
6                         Copyright (C) 2002 by Daniel Mantione
7 
8 This unit provides a decompression stream to decode .bz2 files. It is
9 inpired by Julian R. Seward's libbzip2 library and therefore you should
10 send credits to him and bug reports to me :)
11 
12 This code is licensed under the same terms as the original libbz2 library,
13 which is decsribed in the file LICENSE. If you don't have this file, look
14 at http://www.freepascal.org for this bzip2 unit, the LICENSE file will
15 be included. In case of problems, contact the author.
16 
17 E-mail addresses:
18 
19 Daniel Mantione     <daniel.mantione@freepascal.org>
20 Julian R. Seward    <jseward@acm.org>
21 
22 Please do not contact Julian about this Pascal library, he didn't wrote it.
23 
24 ****************************************************************************}
25 interface
26 
27 {$goto on}
28 
29 uses objects, bzip2comn;
30 
31 Type
32       Tbzip2_decode_stream=object(Tstream)
33         short:cardinal;
34         readstream:Pstream;
35         block_randomized:boolean;
36         blocksize:byte;
37         tt:Pcardinal_array;
38         tt_count:cardinal;
39         rle_run_left,rle_run_data:byte;
40         nextrle:Pbyte;
41         decode_available:cardinal;
42         block_origin:cardinal;
43         current_block:cardinal;
44         read_data,bits_available:byte;
45         inuse16:set of 0..15;
46         inuse:set of 0..255;
47         inuse_count:cardinal;
48         seq_to_unseq:array[0..255] of byte;
49         alphasize:cardinal;
50         group_count,group_pos,gsel,gminlen:byte;
51         group_no:cardinal;
52         glimit,gperm,gbase:Phuffarray;
53         selector_count:cardinal;
54         selector,selector_mtf:array[0..max_selectors] of byte;
55         len:array[0..max_groups,0..max_alpha_size] of byte;
56         limit:array[0..max_groups,0..max_alpha_size] of cardinal;
57         base:array[0..max_groups,0..max_alpha_size] of cardinal;
58         perm:array[0..max_groups,0..max_alpha_size] of cardinal;
59         minlens:array[0..max_groups] of byte;
60         cftab:array[0..257] of cardinal;
61         mtfbase:array[0..256 div mtfl_size-1] of cardinal;
62         mtfa:array[0..mtfa_size-1] of byte;
63         constructor init(Areadstream:Pstream);
get_bitsnull64         function get_bits(n:byte):byte;
get_booleannull65         function get_boolean:boolean;
get_bytenull66         function get_byte:byte;
get_cardinal24null67         function get_cardinal24:cardinal;
get_cardinalnull68         function get_cardinal:cardinal;
69         procedure receive_mapping_table;
70         procedure receive_selectors;
71         procedure undo_mtf_values;
72         procedure receive_coding_tables;
73         procedure make_hufftab;
74         procedure init_mtf;
get_mtf_valuenull75         function get_mtf_value:cardinal;
76         procedure move_mtf_block;
77         procedure receive_mtf_values;
78         procedure detransform;
decode_blocknull79         function decode_block:boolean;
80         procedure read(var buf;count:Longint);virtual;
81         procedure new_block;
82         procedure consume_rle;inline;
83         procedure rle_read(bufptr:Pbyte;var count:Longint);
84         destructor done;virtual;
85       end;
86 
87 
88 implementation
89 
90 {$ifdef cpui386}
91   {$i bzip2i386.inc}
92 {$endif}
93 
94 {*****************************************************************************
95                              Tbzip2_decode_stream
96 *****************************************************************************}
97 
98 constructor Tbzip2_decode_stream.init(Areadstream:Pstream);
99 
100 var magic:array[1..3] of char;
101     c:char;
102 
103 begin
104   readstream:=Areadstream;
105   {Read the magic.}
106   readstream^.read(magic,sizeof(magic));
107   if magic<>bzip2_stream_magic then
108     begin
109       error(stiniterror,bzip2_bad_header_magic);
110       exit;
111     end;
112   {Read the block size and allocate the working array.}
113   readstream^.read(c,1);
114   blocksize:=byte(c)-byte('0');
115   getmem(tt,blocksize*100000*sizeof(cardinal));
116   decode_available:=high(decode_available);
117 end;
118 
Tbzip2_decode_stream.get_bitsnull119 function Tbzip2_decode_stream.get_bits(n:byte):byte;
120 
121 var data:byte;
122 
123 begin
124   if n>bits_available then
125     begin
126       readstream^.read(data,1);
127       get_bits:=(read_data shr (8-n)) or data shr (8-(n-bits_available));
128       read_data:=data shl (n-bits_available);
129       inc(bits_available,8);
130     end
131   else
132     begin
133       get_bits:=read_data shr (8-n);
134       read_data:=read_data shl n;
135     end;
136   dec(bits_available,n);
137 end;
138 
get_booleannull139 function Tbzip2_decode_stream.get_boolean:boolean;
140 
141 begin
142   get_boolean:=boolean(get_bits(1));
143 end;
144 
Tbzip2_decode_stream.get_bytenull145 function Tbzip2_decode_stream.get_byte:byte;
146 
147 begin
148   get_byte:=get_bits(8);
149 end;
150 
get_cardinal24null151 function Tbzip2_decode_stream.get_cardinal24:cardinal;
152 
153 begin
154   get_cardinal24:=get_bits(8) shl 16 or get_bits(8) shl 8 or get_bits(8);
155 end;
156 
157 
Tbzip2_decode_stream.get_cardinalnull158 function Tbzip2_decode_stream.get_cardinal:cardinal;
159 
160 begin
161   get_cardinal:=get_bits(8) shl 24 or get_bits(8) shl 16 or get_bits(8) shl 8 or
162                 get_bits(8);
163 end;
164 
165 procedure Tbzip2_decode_stream.receive_mapping_table;
166 
167 {Receive the mapping table. To save space, the inuse set is stored in pieces
168  of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
169  the pieces follow.}
170 
171 var i,j:byte;
172 
173 begin
174   inuse16:=[];
175   {Receive the first 16 bits which tell which pieces are stored.}
176   for i:=0 to 15 do
177     if get_boolean then
178       include(inuse16,i);
179 
180   {Receive the used pieces.}
181   inuse:=[];
182   inuse_count:=0;
183   for i:=0 to 15 do
184     if i in inuse16 then
185       for j:=0 to 15 do
186         if get_boolean then
187           begin
188             include(inuse,16*i+j);
189             seq_to_unseq[inuse_count]:=16*i+j;
190             inc(inuse_count);
191           end;
192 {  system.write('Mapping table: ');
193   for i:=0 to 255 do
194     if i in inuse then
195       system.write(i,' ');
196   writeln;}
197 end;
198 
199 procedure Tbzip2_decode_stream.receive_selectors;
200 
201 {Receives the selectors.}
202 
203 var i:cardinal;
204     j:byte;
205 
206 begin
207   group_count:=get_bits(3);
208   selector_count:=get_bits(8) shl 7 or get_bits(7);
209   for i:=0 to selector_count-1 do
210     begin
211       j:=0;
212       while get_boolean do
213         begin
214           inc(j);
215           if j>5 then
216             error(streaderror,bzip2_data_error);
217         end;
218       selector_mtf[i]:=j;
219     end;
220 {  system.write('Selector_mtf: ');
221   for i:=0 to selector_count-1 do
222     system.write(selector_mtf[i],' ');
223   writeln;}
224 end;
225 
226 procedure Tbzip2_decode_stream.undo_mtf_values;
227 
228 {Undo the MTF values for the selectors.}
229 
230 var pos:array[0..max_groups] of byte;
231     i:cardinal;
232     v,tmp:byte;
233 
234 begin
235   for v:=0 to group_count-1 do
236     pos[v]:=v;
237   for i:=0 to selector_count-1 do
238     begin
239       v:=selector_mtf[i];
240       tmp:=pos[v];
241       while v<>0 do
242         begin
243           pos[v]:=pos[v-1];
244           dec(v);
245         end;
246       pos[0]:=tmp;
247       selector[i]:=tmp;
248     end;
249 end;
250 
251 procedure Tbzip2_decode_stream.receive_coding_tables;
252 
253 var t,curr:byte;
254     i:cardinal;
255 
256 begin
257   for t:=0 to group_count-1 do
258     begin
259       curr:=get_bits(5);
260       for i:=0 to alphasize-1 do
261         begin
262           repeat
263             if not(curr in [1..20]) then
264               begin
265                 error(streaderror,bzip2_data_error);
266                 exit;
267               end;
268             if not get_boolean then
269               break;
270             if get_boolean then
271               dec(curr)
272             else
273               inc(curr);
274           until false;
275           len[t,i]:=curr;
276         end;
277     end;
278 {  writeln('Coding tables:');
279   for t:=0 to group_count-1 do
280     begin
281       for i:=0 to alphasize-1 do
282         system.write(len[t,i],' ');
283       writeln;
284     end;}
285 end;
286 
287 procedure Tbzip2_decode_stream.make_hufftab;
288 
289 {Builds the Huffman tables.}
290 
291 var i:cardinal;
292     t,minlen,maxlen:byte;
293 
294 begin
295   for t:=0 to group_count-1 do
296     begin
297       minlen:=32;
298       maxlen:=0;
299       for i:=0 to alphasize-1 do
300         begin
301           if len[t,i]>maxlen then
302             maxlen:=len[t,i];
303           if len[t,i]<minlen then
304             minlen:=len[t,i];
305         end;
306       hb_create_decode_tables(limit[t],base[t],perm[t],len[t],
307                               minlen,maxlen,alphasize);
308       minlens[t]:=minlen;
309     end;
310 end;
311 
312 procedure Tbzip2_decode_stream.init_mtf;
313 
314 var i,j:byte;
315     k:cardinal;
316 
317 begin
318   k:=mtfa_size-1;
319   for i:=256 div mtfl_size-1 downto 0 do
320     begin
321       for j:=mtfl_size-1 downto 0 do
322         begin
323           mtfa[k]:=i*mtfl_size+j;
324           dec(k);
325         end;
326       mtfbase[i]:=k+1;
327     end;
328 end;
329 
Tbzip2_decode_stream.get_mtf_valuenull330 function Tbzip2_decode_stream.get_mtf_value:cardinal;
331 
332 var zn:byte;
333     zvec:cardinal;
334 
335 begin
336   if group_pos=0 then
337     begin
338       inc(group_no);
339       group_pos:=group_size;
340       gsel:=selector[group_no];
341       gminlen:=minlens[gsel];
342       glimit:=@limit[gsel];
343       gperm:=@perm[gsel];
344       gbase:=@base[gsel];
345     end;
346   dec(group_pos);
347   zn:=gminlen;
348   zvec:=get_bits(zn);
349   while zvec>glimit^[zn] do
350     begin
351       inc(zn);
352       zvec:=zvec shl 1 or byte(get_boolean);
353     end;
354   get_mtf_value:=gperm^[zvec-gbase^[zn]];
355 end;
356 
357 procedure Tbzip2_decode_stream.move_mtf_block;
358 
359 var i:byte;
360     j,k:cardinal;
361 
362 begin
363   k:=MTFA_SIZE;
364   for i:=256 div MTFL_SIZE-1 downto 0 do
365     begin
366       j:=mtfbase[i];
367       Pcardinal(@mtfa[k- 4])^:=Pcardinal(@mtfa[j+12])^;
368       Pcardinal(@mtfa[k- 8])^:=Pcardinal(@mtfa[j+ 8])^;
369       Pcardinal(@mtfa[k-12])^:=Pcardinal(@mtfa[j+ 4])^;
370       dec(k,16);
371       Pcardinal(@mtfa[k   ])^:=Pcardinal(@mtfa[j   ])^;
372       mtfbase[i]:=k;
373     end;
374 end;
375 
376 procedure Tbzip2_decode_stream.receive_mtf_values;
377 
378 const run_a=0;
379       run_b=1;
380 
381 var t,next_sym:cardinal;
382     es:cardinal;
383     n:byte;
384     nn,i:cardinal;
385     p,q:Pbyte;
386     u,v:Pcardinal;
387     lno,off:cardinal;
388 
389 begin
390   group_no:=high(group_no);
391   group_pos:=0;
392   t:=0;
393   for i:=0 to 257 do
394     cftab[i]:=0;
395   init_mtf;
396   next_sym:=get_mtf_value;
397   while next_sym<>inuse_count+1 do
398     begin
399 {      writeln(t,'   ',next_sym);
400       if t=22296 then
401         t:=t;                    }
402       if next_sym<=run_b then
403         begin
404           es:=0;
405           n:=0;
406           repeat
407             inc(es,(next_sym+1) shl n);
408             inc(n);
409             next_sym:=get_mtf_value;
410           until next_sym>run_b;
411           n:=seq_to_unseq[mtfa[mtfbase[0]]];
412           inc(cftab[n],es);
413           if t+es>100000*blocksize then
414             begin
415               error(streaderror,bzip2_data_error);
416               exit;
417             end;
418           while es>0 do
419             begin
420               tt^[t]:=ntole(cardinal(n));
421               dec(es);
422               inc(t);
423             end;
424         end
425       else
426         begin
427           nn:=next_sym-1;
428           if nn<mtfl_size then
429             begin
430               {Avoid the costs of the general case.}
431               p:=@mtfa[mtfbase[0]];
432               q:=p+nn;
433               n:=q^;
434               repeat
435                 q^:=(q-1)^;
436                 dec(q);
437               until q=p;
438               q^:=n;
439             end
440           else
441             begin
442               {General case.}
443               lno:=nn div MTFL_SIZE;
444               off:=nn and (MTFL_SIZE-1);
445               p:=@mtfa[mtfbase[lno]];
446               q:=p+off;
447               n:=q^;
448               while(q<>p) do
449                 begin
450                   q^:=(q-1)^;
451                   dec(q);
452                 end;
453               u:=@mtfbase;
454               v:=u+lno;
455               repeat
456                 mtfa[v^]:=mtfa[(v-1)^+MTFL_SIZE-1];
457                 dec(v);
458                 dec(v^);
459               until v=u;
460               mtfa[v^]:=n;
461               if v^=0 then
462                 move_mtf_block;
463             end;
464           inc(cftab[seq_to_unseq[n]]);
465           tt^[t]:=ntole(cardinal(seq_to_unseq[n]));
466           inc(t);
467           if t>100000*blocksize then
468             begin
469               error(streaderror,bzip2_data_error);
470               exit;
471             end;
472           next_sym:=get_mtf_value;
473         end;
474     end;
475     tt_count:=t;
476   {Setup cftab to facilitate generation of T^(-1).}
477   t:=0;
478   for i:=0 to 256 do
479     begin
480       nn:=cftab[i];
481       cftab[i]:=t;
482 {      writeln(i,' ',t);}
483       inc(t,nn);
484     end;
485 end;
486 
487 {$ifndef HAVE_DETRANSFORM}
488 
489 procedure Tbzip2_decode_stream.detransform;
490 
491 var a:cardinal;
492     p,q,r:Pcardinal;
493 
494 begin
495   a:=0;
496   p:=@tt^[0];
497   q:=p+tt_count;
498   while p<>q do
499     begin
500       r:=@tt^[cftab[ntole(p^) and $ff]];
501       inc(cftab[ntole(p^) and $ff]);
502       r^:=r^ or ntole(a);
503       inc(a,256);
504       inc(p);
505     end;
506 end;
507 
508 {$endif}
509 
decode_blocknull510 function Tbzip2_decode_stream.decode_block:boolean;
511 
512 {Decode a new compressed block.}
513 
514 var magic:array[1..6] of char;
515     stored_blockcrc:cardinal;
516     i:byte;
517 
518 begin
519   for i:=1 to 6 do
520     magic[i]:=char(get_byte);
521   if magic='1AY&SY' then
522     begin
523       inc(current_block);
524 {      writeln('Block ',current_block,': Header ok');}
525       stored_blockcrc:=get_cardinal;
526       block_randomized:=get_boolean;
527       block_origin:=get_cardinal24;
528 
529       {Receive the mapping table.}
530       receive_mapping_table;
531       alphasize:=cardinal(inuse_count)+2;
532 {      writeln('Mapping table ok.');}
533 
534       {Receive the selectors.}
535       receive_selectors;
536       if status<>0 then
537         exit;
538 {      writeln('Selectors ok.');}
539       {Undo the MTF values for the selectors.}
540       undo_mtf_values;
541 {      writeln('Undo mtf ok.');}
542       {Receive the coding tables.}
543       receive_coding_tables;
544       if status<>0 then
545         exit;
546 {      writeln('Coding tables ok');}
547       {Build the Huffman tables.}
548       make_hufftab;
549 {      writeln('Huffman ok.');}
550       {Receive the MTF values.}
551       receive_mtf_values;
552 {      writeln('MTF OK');}
553       {Undo the Burrows Wheeler transformation.}
554       detransform;
555 {      writeln('Detransform OK');}
556       decode_available:=tt_count;
557     end
558   else
559     begin
560       if magic<>#$17'rE8P'#$90 then
561         error(streaderror,bzip2_bad_block_magic);
562       decode_block:=false;
563     end;
564 end;
565 
566 procedure Tbzip2_decode_stream.new_block;
567 
568 begin
569   if decode_block then
570     nextrle:=@tt^[ntole(tt^[block_origin]) shr 8]
571   else
572     begin
573       error(streaderror,bzip2_endoffile);
574       nextrle:=nil;
575     end;
576 end;
577 
578 procedure Tbzip2_decode_stream.consume_rle;inline;
579 
580 {Make nextrle point to the next decoded byte. If nextrle did point to the last
581  byte in the current block, decode the next block.}
582 
583 begin
584 {  Pcardinal(nextrle)^:=Pcardinal(nextrle)^ shr 8;}
585   nextrle:=@tt^[ntole(Pcardinal(nextrle)^) shr 8];
586   dec(decode_available);
587   if decode_available=0 then
588     new_block;
589 end;
590 
591 procedure Tbzip2_decode_stream.rle_read(bufptr:Pbyte;var count:Longint);
592 
593 var rle_len:cardinal;
594     data:byte;
595 
596 label rle_write;
597 
598 begin
599   rle_len:=rle_run_left;
600   data:=rle_run_data;
601   if block_randomized then
602     {Not yet implemented.}
603     runerror(212)
604   else
605     begin
606       if rle_len<>0 then
607         {Speed is important. Instead of an if statement within the
608          repeat loop use a goto outside the loop.}
609         goto rle_write;
610       repeat
611         if decode_available=0 then
612           break;
613         rle_len:=1;
614         data:=nextrle^;
615         consume_rle;
616         if (decode_available>0) and (data=nextrle^) then
617           begin
618             inc(rle_len);
619             consume_rle;
620             if (decode_available>0) and (data=nextrle^) then
621               begin
622                 inc(rle_len);
623                 consume_rle;
624                 if (decode_available>0) and (data=nextrle^) then
625                   begin
626                     consume_rle;
627                     inc(rle_len,nextrle^+1);
628                     consume_rle;
629                   end;
630               end;
631           end;
632 rle_write:
633         repeat
634             bufptr^:=data;
635             inc(bufptr);
636             dec(count);
637             dec(rle_len);
638         until (rle_len=0) or (count=0);
639       until count=0;
640       short:=count;
641     end;
642   rle_run_data:=data;
643   rle_run_left:=rle_len;
644 end;
645 
646 procedure Tbzip2_decode_stream.read(var buf;count:Longint);
647 
648 var bufptr:Pbyte;
649 
650 begin
651   short:=0;
652   bufptr:=@buf;
653   if decode_available=high(decode_available) then
654     begin
655       {Initialize the rle process:
656         - Decode a block
657         - Initialize pointer.}
658       if not decode_block then
659         begin
660           error(streaderror,bzip2_endoffile);
661           nextrle:=nil;
662         end;
663       nextrle:=@tt^[ntole(tt^[block_origin]) shr 8];
664     end;
665   rle_read(bufptr,count);
666 end;
667 
668 destructor Tbzip2_decode_stream.done;
669 
670 begin
671   if tt<>nil then
672     freemem(tt,blocksize*100000*sizeof(cardinal));
673   inherited done;
674 end;
675 
676 end.
677