1--  Legal licensing note:
2
3--  Copyright (c) 2009 .. 2018 Gautier de Montmollin (maintainer of the Ada version)
4--  SWITZERLAND
5
6--  Permission is hereby granted, free of charge, to any person obtaining a copy
7--  of this software and associated documentation files (the "Software"), to deal
8--  in the Software without restriction, including without limitation the rights
9--  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10--  copies of the Software, and to permit persons to whom the Software is
11--  furnished to do so, subject to the following conditions:
12
13--  The above copyright notice and this permission notice shall be included in
14--  all copies or substantial portions of the Software.
15
16--  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17--  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18--  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19--  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20--  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21--  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22--  THE SOFTWARE.
23
24--  NB: this is the MIT License, as found 21-Aug-2016 on the site
25--  http://www.opensource.org/licenses/mit-license.php
26
27--  Translated on 20-Oct-2009 by (New) P2Ada v. 15-Nov-2006
28--  Rework by G. de Montmollin (see spec. for details)
29
30with Ada.Unchecked_Deallocation;
31
32package body BZip2.Decoding is
33
34  procedure Decompress is
35
36    max_groups    : constant:= 6;
37    max_alpha_size: constant:= 258;
38    max_code_len  : constant:= 23;
39    group_size    : constant:= 50;
40    max_selectors : constant:= 2 + (900_000 / group_size);
41
42    sub_block_size: constant:= 100_000;
43
44    type Length_array is array (Integer range <>) of Natural;
45
46    block_randomized: Boolean:= False;
47    block_size: Natural;
48
49    use Interfaces;
50
51    type Tcardinal_array is array (Integer range <>) of Unsigned_32;
52    type Pcardinal_array is access Tcardinal_array;
53    procedure Dispose is new Ada.Unchecked_Deallocation(Tcardinal_array, Pcardinal_array);
54    tt: Pcardinal_array;
55    tt_count: Natural;
56
57    rle_run_left: Natural:= 0;
58    rle_run_data: Byte:= 0;
59    decode_available: Natural:= Natural'Last;
60    block_origin: Natural:= 0;
61    read_data: Byte:= 0;
62    bits_available: Natural:= 0;
63    inuse_count: Natural;
64    seq_to_unseq: array (0 .. 255 ) of Natural;
65    alpha_size: Natural;
66    group_count: Natural;
67    --
68    selector_count: Natural;
69    selector, selector_mtf: array (0 .. max_selectors) of Byte;
70    --
71    type Alpha_U32_array is array (0 .. max_alpha_size) of Unsigned_32;
72    type Alpha_Nat_array is array (0 .. max_alpha_size) of Natural;
73
74    len  : array (0 .. max_groups) of Alpha_Nat_array;
75    limit,
76    base ,
77    perm : array (0 .. max_groups) of Alpha_U32_array;
78    --
79    minlens: Length_array(0 .. max_groups);
80    cftab: array (0 .. 257) of Natural;
81    --
82    end_reached: Boolean:= False;
83
84    in_buf: Buffer(1 .. input_buffer_size);
85    in_idx: Natural:= in_buf'Last + 1;
86
87    function Read_byte return Byte is
88      res: Byte;
89    begin
90      if in_idx > in_buf'Last then
91        Read(in_buf);
92        in_idx:= in_buf'First;
93      end if;
94      res:= in_buf(in_idx);
95      in_idx:= in_idx + 1;
96      return res;
97    end Read_byte;
98
99    procedure Create_Huffman_Decoding_Tables(
100       limit, base, perm: in out Alpha_U32_array;
101       length           : in     Alpha_Nat_array;
102       min_len, max_len : Natural;
103       alpha_size       : Integer
104    )
105    is
106      pp, idx: Integer;
107      vec: Unsigned_32;
108    begin
109      pp:= 0;
110      for i in min_len .. max_len loop
111        for j in 0 .. alpha_size-1 loop
112          if length(j) = i then
113            perm(pp) := Unsigned_32(j);
114            pp := pp + 1;
115          end if;
116        end loop;
117      end loop;
118      for i in 0 .. max_code_len-1 loop
119        base(i) := 0;
120        limit(i) := 0;
121      end loop;
122      for i in 0 .. alpha_size-1 loop
123        idx := length(i)+1;
124        base(idx) := base(idx) + 1;
125      end loop;
126      for i in 1 .. max_code_len-1 loop
127        base(i) := base(i) + base(i-1);
128      end loop;
129      vec := 0;
130      for i in min_len .. max_len loop
131        vec:= vec + base(i+1) - base(i);
132        limit(i) := vec - 1;
133        vec := vec * 2;
134      end loop;
135      for i in min_len+1 .. max_len loop
136        base(i) := (limit(i-1)+1) * 2 - base(i);
137      end loop;
138    end Create_Huffman_Decoding_Tables;
139
140    procedure Init is
141      magic: String(1..3);
142      b: Byte;
143    begin
144      --  Read the magic.
145      for i in magic'Range loop
146        b:= Read_byte;
147        magic(i):= Character'Val(b);
148      end loop;
149      if magic /= "BZh" then
150        raise bad_header_magic;
151      end if;
152      --  Read the block size and allocate the working array.
153      b:= Read_byte;
154      block_size:= Natural(b) - Character'Pos('0');
155      tt:= new Tcardinal_array(0 .. block_size * sub_block_size);
156    end Init;
157
158    function Get_Bits(n: Natural) return Byte is
159      Result_get_bits : Byte;
160      data: Byte;
161    begin
162      if n > bits_available then
163        data:= Read_byte;
164        Result_get_bits:= Shift_Right(read_data, 8-n) or Shift_Right(data, 8-(n-bits_available));
165        read_data:= Shift_Left(data, n-bits_available);
166        bits_available:= bits_available + 8;
167      else
168        Result_get_bits:= Shift_Right(read_data, 8-n);
169        read_data:= Shift_Left(read_data, n);
170      end if;
171      bits_available:= bits_available - n;
172      return Result_get_bits;
173    end Get_Bits;
174
175    function Get_Bits_32(n: Natural) return Unsigned_32 is
176    begin
177      return Unsigned_32(Get_Bits(n));
178    end Get_Bits_32;
179
180    function Get_Boolean return Boolean is
181    begin
182      return Boolean'Val(Get_Bits(1));
183    end Get_Boolean;
184
185    function Get_Byte return Byte is
186    begin
187      return Get_Bits(8);
188    end Get_Byte;
189
190    function Get_Cardinal_24 return Unsigned_32 is
191    begin
192      return Shift_Left(Get_Bits_32(8),16) or Shift_Left(Get_Bits_32(8),8) or Get_Bits_32(8);
193    end Get_Cardinal_24;
194
195    function Get_Cardinal_32 return Unsigned_32 is
196    begin
197      return Shift_Left(Get_Bits_32(8),24)  or
198             Shift_Left(Get_Bits_32(8),16)  or
199             Shift_Left(Get_Bits_32(8), 8)  or
200             Get_Bits_32(8);
201    end Get_Cardinal_32;
202
203    --  Receive the mapping table. To save space, the inuse set is stored in pieces
204    --  of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then
205    --  the pieces follow.
206    procedure Receive_Mapping_Table is
207      inuse16: array(0 .. 15) of Boolean;
208      --* inuse: array(0 .. 255) of Boolean; -- for dump purposes
209    begin
210      --  Receive the first 16 bits which tell which pieces are stored.
211      for i in inuse16'Range loop
212        inuse16(i) := Get_Boolean;
213      end loop;
214      --  Receive the used pieces.
215      --* inuse:= (others => False);
216      inuse_count := 0;
217      for i in inuse16'Range loop
218        if inuse16(i) then
219          for j in 0 .. 15 loop
220            if Get_Boolean then
221              --* inuse(16*i+j):= True;
222              seq_to_unseq(inuse_count) := 16*i + j;
223              inuse_count:= inuse_count + 1;
224            end if;
225          end loop;
226        end if;
227      end loop;
228    end Receive_Mapping_Table;
229
230    procedure Receive_Selectors is
231      j: Byte;
232    begin
233      group_count:= Natural(Get_Bits(3));
234      selector_count:= Natural(Shift_Left(Get_Bits_32(8), 7) or Get_Bits_32(7));
235      for i in 0 .. selector_count-1 loop
236        j:=0;
237        while Get_Boolean loop
238          j:= j + 1;
239          if j > 5 then
240            raise data_error;
241          end if;
242        end loop;
243        selector_mtf(i):=j;
244      end loop;
245    end Receive_Selectors;
246
247    procedure Undo_MTF_Values_For_Selectors is
248      pos: array (0 .. max_groups) of Natural;
249      v, tmp: Natural;
250    begin
251      for w in 0 .. group_count-1 loop
252        pos(w) := w;
253      end loop;
254      for i in 0 .. selector_count-1 loop
255        v := Natural(selector_mtf(i));
256        tmp:= pos(v);
257        while v/=0 loop
258          pos(v) := pos(v-1);
259          v:= v - 1;
260        end loop;
261        pos(0) := tmp;
262        selector(i) := Byte(tmp);
263      end loop;
264    end Undo_MTF_Values_For_Selectors;
265
266    procedure Receive_Huffman_Bit_Lengths is
267      current_bit_length: Natural;
268    begin
269      for t in 0 .. group_count-1 loop
270        current_bit_length:= Natural(Get_Bits(5));
271        for symbol in 0 .. alpha_size-1 loop
272          loop
273            if current_bit_length not in 1..20 then
274              raise data_error;
275            end if;
276            exit when not Get_Boolean;
277            if Get_Boolean then
278              current_bit_length:= current_bit_length - 1;
279            else
280              current_bit_length:= current_bit_length + 1;
281            end if;
282          end loop;
283          len(t)(symbol) := current_bit_length;
284        end loop;
285      end loop;
286    end Receive_Huffman_Bit_Lengths;
287
288    procedure Make_Huffman_Tables is
289      minlen, maxlen: Natural;
290    begin
291      for t in 0 .. group_count-1 loop
292        minlen := 32;
293        maxlen := 0;
294        for i in 0 .. alpha_size-1 loop
295          if len(t)(i) > maxlen then
296            maxlen := len(t)(i);
297          end if;
298          if len(t)(i) < minlen then
299            minlen := len(t)(i);
300          end if;
301        end loop;
302        Create_Huffman_Decoding_Tables(
303          limit(t), base(t), perm(t), len(t),
304          minlen, maxlen, alpha_size
305        );
306        minlens(t):= minlen;
307      end loop;
308    end Make_Huffman_Tables;
309
310    -------------------------
311    -- MTF - Move To Front --
312    -------------------------
313
314    procedure Receive_MTF_Values is
315      --
316      mtfa_size: constant:= 4096;
317      mtfl_size: constant:= 16;
318      mtfbase: array (0 .. 256 / mtfl_size - 1) of Natural;
319      mtfa: array (0 .. mtfa_size - 1) of Natural;
320      --
321      procedure Init_MTF is
322        k: Natural:= mtfa_size-1;
323      begin
324        for i in reverse 0 .. 256  /  mtfl_size-1 loop
325          for j in reverse 0 .. mtfl_size-1 loop
326            mtfa(k):= i*mtfl_size + j;
327            k:= k - 1;
328          end loop;
329          mtfbase(i):= k+1;
330        end loop;
331      end Init_MTF;
332      --
333      group_pos, group_no: Integer;
334      gminlen, gsel: Natural;
335      --
336      function Get_MTF_Value return Unsigned_32 is
337        zn: Natural;
338        zvec: Unsigned_32;
339      begin
340        if group_pos = 0 then
341          group_no:= group_no + 1;
342          group_pos:= group_size;
343          gsel:= Natural(selector(group_no));
344          gminlen:= minlens(gsel);
345        end if;
346        group_pos:= group_pos - 1;
347        zn:= gminlen;
348        zvec:= Get_Bits_32(zn);
349        while zvec > limit(gsel)(zn) loop
350          zn:= zn + 1;
351          zvec:= Shift_Left(zvec, 1) or Get_Bits_32(1);
352        end loop;
353        return perm(gsel)(Natural(zvec-base(gsel)(zn)));
354      end Get_MTF_Value;
355      --
356      procedure Move_MTF_Block is
357        j, k: Natural;
358      begin
359        k:= mtfa_size;
360        for i in reverse 0 .. 256  /  mtfl_size - 1 loop
361          j:= mtfbase(i);
362          mtfa(k-16..k-1):= mtfa(j..j+15);
363          k:= k - 16;
364          mtfbase(i):= k;
365        end loop;
366      end Move_MTF_Block;
367      --
368      run_b: constant:= 1;
369      t: Natural;
370      next_sym: Unsigned_32;
371      es: Natural;
372      n, nn: Natural;
373      p,q: Natural; -- indexes mtfa
374      u,v: Natural; -- indexes mtfbase
375      lno, off: Natural;
376    begin -- Receive_MTF_Values
377      group_no:= -1;
378      group_pos:= 0;
379      t:= 0;
380      cftab:= (others => 0);
381      Init_MTF;
382      next_sym:= Get_MTF_Value;
383      --
384      while Natural(next_sym) /= inuse_count+1 loop
385        if next_sym <= run_b then
386          es:= 0;
387          n:= 0;
388          loop
389            es:= es + Natural(Shift_Left(next_sym+1, n));
390            n:= n + 1;
391            next_sym:= Get_MTF_Value;
392            exit when next_sym > run_b;
393          end loop;
394          n:= seq_to_unseq( mtfa(mtfbase(0)) );
395          cftab(n):= cftab(n) + es;
396          if t+es > sub_block_size * block_size then
397            raise data_error;
398          end if;
399          while es > 0 loop
400            tt(t):= Unsigned_32(n);
401            es:= es - 1;
402            t:= t + 1;
403          end loop;
404        else
405          nn:= Natural(next_sym - 1);
406          if nn < mtfl_size then
407            -- Avoid the costs of the general case.
408            p:= mtfbase(0);
409            q:= p + nn;
410            n:= mtfa(q);
411            loop
412              mtfa(q):= mtfa(q-1);
413              q:= q - 1;
414              exit when q = p;
415            end loop;
416            mtfa(q):= n;
417          else
418            --  General case.
419            lno:= nn   /   mtfl_size;
420            off:= nn  mod  mtfl_size;
421            p:= mtfbase(lno);
422            q:= p + off;
423            n:= mtfa(q);
424            while q /= p loop
425              mtfa(q):= mtfa(q-1);
426              q:= q - 1;
427            end loop;
428            u:= mtfbase'First;
429            v:= u + lno;
430            loop
431              mtfa(mtfbase(v)):= mtfa(mtfbase(v-1)+mtfl_size-1);
432              v:= v - 1;
433              mtfbase(v):= mtfbase(v) - 1;
434              exit when v = u;
435            end loop;
436            mtfa( mtfbase(v) ):= n;
437            if mtfbase(v) = 0 then
438              Move_MTF_Block;
439            end if;
440          end if;
441          cftab(seq_to_unseq(n)):= cftab(seq_to_unseq(n)) + 1;
442          tt(t):= Unsigned_32(seq_to_unseq(n));
443          t:= t + 1;
444          if t > sub_block_size * block_size then
445            raise data_error;
446          end if;
447          next_sym:= Get_MTF_Value;
448        end if;
449      end loop;
450      tt_count:= t;
451      --  Setup cftab to facilitate generation of T^(-1).
452      t:= 0;
453      for i in 0 .. 256 loop
454        nn:= cftab(i);
455        cftab(i):= t;
456        t:= t + nn;
457      end loop;
458    end Receive_MTF_Values;
459
460    procedure BWT_Detransform is
461      a : Unsigned_32 := 0;
462      r, i255: Natural;
463    begin
464      for p in 0 .. tt_count - 1 loop
465        i255 := Natural(tt(p) and 16#ff#);
466        r := cftab(i255);
467        cftab(i255) := cftab(i255) + 1;
468        tt(r) := tt(r) or a;
469        a := a + 16#100#;
470      end loop;
471    end BWT_Detransform;
472
473    compare_final_CRC: Boolean:= False;
474    stored_blockcrc, mem_stored_blockcrc, computed_crc: Unsigned_32;
475
476    -- Decode a new compressed block.
477    function Decode_Block return Boolean is
478      magic: String(1 .. 6);
479    begin
480      for i in 1 .. 6 loop
481        magic(i):= Character'Val(Get_Byte);
482      end loop;
483      if magic = "1AY&SY" then
484        if check_CRC then
485          if compare_final_CRC then
486            null; -- initialisation is delayed until the rle buffer is empty
487          else
488            CRC.Init(computed_crc); -- Initialize for next block.
489          end if;
490        end if;
491        stored_blockcrc := Get_Cardinal_32;
492        block_randomized := Get_Boolean;
493        block_origin := Natural(Get_Cardinal_24);
494        Receive_Mapping_Table;
495        alpha_size := inuse_count + 2;
496        Receive_Selectors;
497        Undo_MTF_Values_For_Selectors;
498        Receive_Huffman_Bit_Lengths;
499        Make_Huffman_Tables;
500        Receive_MTF_Values;
501        --  Undo the Burrows Wheeler transformation.
502        BWT_Detransform;
503        decode_available := tt_count;
504        return True;
505      elsif magic = Character'Val(16#17#) & "rE8P" & Character'Val(16#90#) then
506        return False;
507      else
508        raise bad_block_magic;
509      end if;
510    end Decode_Block;
511
512    next_rle_idx: Integer:= -2;
513    buf: Buffer(1 .. output_buffer_size);
514    last: Natural;
515
516    procedure Read is
517      shorten: Natural:= 0;
518
519      procedure RLE_Read is
520        rle_len: Natural;
521        data: Byte;
522        idx: Integer:= buf'First;
523        count: Integer:= buf'Length;
524        --
525        procedure RLE_Write is
526          pragma Inline(RLE_Write);
527        begin
528          loop
529            buf(idx):= data;
530            idx:= idx + 1;
531            count:= count - 1;
532            rle_len:= rle_len - 1;
533            if check_CRC then
534              CRC.Update(computed_crc, data);
535              if rle_len = 0 and then compare_final_CRC then
536                if CRC.Final(computed_crc) /= mem_stored_blockcrc then
537                  raise block_crc_check_failed;
538                end if;
539                compare_final_CRC:= False;
540                CRC.Init(computed_crc); -- Initialize for next block.
541              end if;
542            end if;
543            exit when rle_len = 0 or count = 0;
544          end loop;
545        end RLE_Write;
546        --
547        --  Handle extreme cases of data of length 1, 2.
548        --  This exception is always handled (see end of RLE_Read).
549        input_dried : exception;
550        --
551        -- Make next_rle_idx index to the next decoded byte.
552        -- If next_rle_idx did index to the last
553        -- byte in the current block, decode the next block.
554        --
555        procedure Consume_RLE is
556          pragma Inline(Consume_RLE);
557        begin
558          next_rle_idx:= Integer(Shift_Right(tt(next_rle_idx),8));
559          decode_available:= decode_available - 1;
560          if decode_available = 0 then
561            compare_final_CRC:= True;
562            mem_stored_blockcrc:= stored_blockcrc;
563            -- ^ There might be a new block when last block's
564            --   rle is finally emptied.
565            --
566            -- ** New block
567            if Decode_Block then
568              next_rle_idx:= Natural(Shift_Right(tt(block_origin),8));
569            else
570              next_rle_idx:= -1;
571              end_reached:= True;
572            end if;
573            -- **
574            if end_reached then
575              raise input_dried;
576            end if;
577          end if;
578        end Consume_RLE;
579        --
580        function RLE_Byte return Byte is
581          pragma Inline(RLE_Byte);
582        begin
583          return Byte(tt(next_rle_idx) and 16#FF#);
584        end RLE_Byte;
585        --
586        function RLE_Possible return Boolean is
587          pragma Inline(RLE_Possible);
588        begin
589          return decode_available > 0 and then data = RLE_Byte;
590        end RLE_Possible;
591        --
592      begin -- RLE_Read
593        rle_len:= rle_run_left;
594        data:= rle_run_data;
595        if block_randomized then
596          raise randomized_not_yet_implemented;
597        end if;
598        if rle_len /= 0 then
599          RLE_Write;
600          if count = 0 then
601            shorten:= 0;
602            rle_run_data:= data;
603            rle_run_left:= rle_len;
604            return;
605          end if;
606        end if;
607        begin
608          -- The big loop
609          loop
610            if decode_available = 0 or end_reached then
611              exit;
612            end if;
613            rle_len:= 1;
614            data:= RLE_Byte;
615            Consume_RLE;
616            if RLE_Possible then
617              rle_len:= rle_len + 1;
618              Consume_RLE;
619              if RLE_Possible then
620                rle_len:= rle_len + 1;
621                Consume_RLE;
622                if RLE_Possible then
623                  Consume_RLE;
624                  rle_len:= rle_len + Natural(RLE_Byte)+1;
625                  Consume_RLE;
626                end if;
627              end if;
628            end if;
629            RLE_Write;
630            exit when count = 0;
631          end loop;
632        exception
633          when input_dried => RLE_Write;
634        end;
635        shorten := count;
636        rle_run_data := data;
637        rle_run_left := rle_len;
638      end RLE_Read;
639
640    begin -- Read
641      last:= buf'Last;
642      if decode_available = Natural'Last then
643        --  Initialize the rle process:
644        --       - Decode a block
645        --       - Initialize pointer.
646        if Decode_Block then
647          next_rle_idx:= Natural(Shift_Right(tt(block_origin), 8));
648        else
649          next_rle_idx:= -1;
650          end_reached:= True;
651        end if;
652      end if;
653      RLE_Read;
654      last:= last - shorten;
655    end Read;
656
657  begin
658    Init;
659    loop
660      Read;
661      Write( buf(1..last) );
662      exit when end_reached and rle_run_left = 0;
663    end loop;
664    Dispose(tt);
665  end Decompress;
666
667end BZip2.Decoding;
668