1-- Legal licensing note: 2 3-- Copyright (c) 2009 .. 2018 Gautier de Montmollin (maintainer of the Ada version) 4-- SWITZERLAND 5 6-- Permission is hereby granted, free of charge, to any person obtaining a copy 7-- of this software and associated documentation files (the "Software"), to deal 8-- in the Software without restriction, including without limitation the rights 9-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10-- copies of the Software, and to permit persons to whom the Software is 11-- furnished to do so, subject to the following conditions: 12 13-- The above copyright notice and this permission notice shall be included in 14-- all copies or substantial portions of the Software. 15 16-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22-- THE SOFTWARE. 23 24-- NB: this is the MIT License, as found 21-Aug-2016 on the site 25-- http://www.opensource.org/licenses/mit-license.php 26 27-- Translated on 20-Oct-2009 by (New) P2Ada v. 15-Nov-2006 28-- Rework by G. de Montmollin (see spec. for details) 29 30with Ada.Unchecked_Deallocation; 31 32package body BZip2.Decoding is 33 34 procedure Decompress is 35 36 max_groups : constant:= 6; 37 max_alpha_size: constant:= 258; 38 max_code_len : constant:= 23; 39 group_size : constant:= 50; 40 max_selectors : constant:= 2 + (900_000 / group_size); 41 42 sub_block_size: constant:= 100_000; 43 44 type Length_array is array (Integer range <>) of Natural; 45 46 block_randomized: Boolean:= False; 47 block_size: Natural; 48 49 use Interfaces; 50 51 type Tcardinal_array is array (Integer range <>) of Unsigned_32; 52 type Pcardinal_array is access Tcardinal_array; 53 procedure Dispose is new Ada.Unchecked_Deallocation(Tcardinal_array, Pcardinal_array); 54 tt: Pcardinal_array; 55 tt_count: Natural; 56 57 rle_run_left: Natural:= 0; 58 rle_run_data: Byte:= 0; 59 decode_available: Natural:= Natural'Last; 60 block_origin: Natural:= 0; 61 read_data: Byte:= 0; 62 bits_available: Natural:= 0; 63 inuse_count: Natural; 64 seq_to_unseq: array (0 .. 255 ) of Natural; 65 alpha_size: Natural; 66 group_count: Natural; 67 -- 68 selector_count: Natural; 69 selector, selector_mtf: array (0 .. max_selectors) of Byte; 70 -- 71 type Alpha_U32_array is array (0 .. max_alpha_size) of Unsigned_32; 72 type Alpha_Nat_array is array (0 .. max_alpha_size) of Natural; 73 74 len : array (0 .. max_groups) of Alpha_Nat_array; 75 limit, 76 base , 77 perm : array (0 .. max_groups) of Alpha_U32_array; 78 -- 79 minlens: Length_array(0 .. max_groups); 80 cftab: array (0 .. 257) of Natural; 81 -- 82 end_reached: Boolean:= False; 83 84 in_buf: Buffer(1 .. input_buffer_size); 85 in_idx: Natural:= in_buf'Last + 1; 86 87 function Read_byte return Byte is 88 res: Byte; 89 begin 90 if in_idx > in_buf'Last then 91 Read(in_buf); 92 in_idx:= in_buf'First; 93 end if; 94 res:= in_buf(in_idx); 95 in_idx:= in_idx + 1; 96 return res; 97 end Read_byte; 98 99 procedure Create_Huffman_Decoding_Tables( 100 limit, base, perm: in out Alpha_U32_array; 101 length : in Alpha_Nat_array; 102 min_len, max_len : Natural; 103 alpha_size : Integer 104 ) 105 is 106 pp, idx: Integer; 107 vec: Unsigned_32; 108 begin 109 pp:= 0; 110 for i in min_len .. max_len loop 111 for j in 0 .. alpha_size-1 loop 112 if length(j) = i then 113 perm(pp) := Unsigned_32(j); 114 pp := pp + 1; 115 end if; 116 end loop; 117 end loop; 118 for i in 0 .. max_code_len-1 loop 119 base(i) := 0; 120 limit(i) := 0; 121 end loop; 122 for i in 0 .. alpha_size-1 loop 123 idx := length(i)+1; 124 base(idx) := base(idx) + 1; 125 end loop; 126 for i in 1 .. max_code_len-1 loop 127 base(i) := base(i) + base(i-1); 128 end loop; 129 vec := 0; 130 for i in min_len .. max_len loop 131 vec:= vec + base(i+1) - base(i); 132 limit(i) := vec - 1; 133 vec := vec * 2; 134 end loop; 135 for i in min_len+1 .. max_len loop 136 base(i) := (limit(i-1)+1) * 2 - base(i); 137 end loop; 138 end Create_Huffman_Decoding_Tables; 139 140 procedure Init is 141 magic: String(1..3); 142 b: Byte; 143 begin 144 -- Read the magic. 145 for i in magic'Range loop 146 b:= Read_byte; 147 magic(i):= Character'Val(b); 148 end loop; 149 if magic /= "BZh" then 150 raise bad_header_magic; 151 end if; 152 -- Read the block size and allocate the working array. 153 b:= Read_byte; 154 block_size:= Natural(b) - Character'Pos('0'); 155 tt:= new Tcardinal_array(0 .. block_size * sub_block_size); 156 end Init; 157 158 function Get_Bits(n: Natural) return Byte is 159 Result_get_bits : Byte; 160 data: Byte; 161 begin 162 if n > bits_available then 163 data:= Read_byte; 164 Result_get_bits:= Shift_Right(read_data, 8-n) or Shift_Right(data, 8-(n-bits_available)); 165 read_data:= Shift_Left(data, n-bits_available); 166 bits_available:= bits_available + 8; 167 else 168 Result_get_bits:= Shift_Right(read_data, 8-n); 169 read_data:= Shift_Left(read_data, n); 170 end if; 171 bits_available:= bits_available - n; 172 return Result_get_bits; 173 end Get_Bits; 174 175 function Get_Bits_32(n: Natural) return Unsigned_32 is 176 begin 177 return Unsigned_32(Get_Bits(n)); 178 end Get_Bits_32; 179 180 function Get_Boolean return Boolean is 181 begin 182 return Boolean'Val(Get_Bits(1)); 183 end Get_Boolean; 184 185 function Get_Byte return Byte is 186 begin 187 return Get_Bits(8); 188 end Get_Byte; 189 190 function Get_Cardinal_24 return Unsigned_32 is 191 begin 192 return Shift_Left(Get_Bits_32(8),16) or Shift_Left(Get_Bits_32(8),8) or Get_Bits_32(8); 193 end Get_Cardinal_24; 194 195 function Get_Cardinal_32 return Unsigned_32 is 196 begin 197 return Shift_Left(Get_Bits_32(8),24) or 198 Shift_Left(Get_Bits_32(8),16) or 199 Shift_Left(Get_Bits_32(8), 8) or 200 Get_Bits_32(8); 201 end Get_Cardinal_32; 202 203 -- Receive the mapping table. To save space, the inuse set is stored in pieces 204 -- of 16 bits. First 16 bits are stored which pieces of 16 bits are used, then 205 -- the pieces follow. 206 procedure Receive_Mapping_Table is 207 inuse16: array(0 .. 15) of Boolean; 208 --* inuse: array(0 .. 255) of Boolean; -- for dump purposes 209 begin 210 -- Receive the first 16 bits which tell which pieces are stored. 211 for i in inuse16'Range loop 212 inuse16(i) := Get_Boolean; 213 end loop; 214 -- Receive the used pieces. 215 --* inuse:= (others => False); 216 inuse_count := 0; 217 for i in inuse16'Range loop 218 if inuse16(i) then 219 for j in 0 .. 15 loop 220 if Get_Boolean then 221 --* inuse(16*i+j):= True; 222 seq_to_unseq(inuse_count) := 16*i + j; 223 inuse_count:= inuse_count + 1; 224 end if; 225 end loop; 226 end if; 227 end loop; 228 end Receive_Mapping_Table; 229 230 procedure Receive_Selectors is 231 j: Byte; 232 begin 233 group_count:= Natural(Get_Bits(3)); 234 selector_count:= Natural(Shift_Left(Get_Bits_32(8), 7) or Get_Bits_32(7)); 235 for i in 0 .. selector_count-1 loop 236 j:=0; 237 while Get_Boolean loop 238 j:= j + 1; 239 if j > 5 then 240 raise data_error; 241 end if; 242 end loop; 243 selector_mtf(i):=j; 244 end loop; 245 end Receive_Selectors; 246 247 procedure Undo_MTF_Values_For_Selectors is 248 pos: array (0 .. max_groups) of Natural; 249 v, tmp: Natural; 250 begin 251 for w in 0 .. group_count-1 loop 252 pos(w) := w; 253 end loop; 254 for i in 0 .. selector_count-1 loop 255 v := Natural(selector_mtf(i)); 256 tmp:= pos(v); 257 while v/=0 loop 258 pos(v) := pos(v-1); 259 v:= v - 1; 260 end loop; 261 pos(0) := tmp; 262 selector(i) := Byte(tmp); 263 end loop; 264 end Undo_MTF_Values_For_Selectors; 265 266 procedure Receive_Huffman_Bit_Lengths is 267 current_bit_length: Natural; 268 begin 269 for t in 0 .. group_count-1 loop 270 current_bit_length:= Natural(Get_Bits(5)); 271 for symbol in 0 .. alpha_size-1 loop 272 loop 273 if current_bit_length not in 1..20 then 274 raise data_error; 275 end if; 276 exit when not Get_Boolean; 277 if Get_Boolean then 278 current_bit_length:= current_bit_length - 1; 279 else 280 current_bit_length:= current_bit_length + 1; 281 end if; 282 end loop; 283 len(t)(symbol) := current_bit_length; 284 end loop; 285 end loop; 286 end Receive_Huffman_Bit_Lengths; 287 288 procedure Make_Huffman_Tables is 289 minlen, maxlen: Natural; 290 begin 291 for t in 0 .. group_count-1 loop 292 minlen := 32; 293 maxlen := 0; 294 for i in 0 .. alpha_size-1 loop 295 if len(t)(i) > maxlen then 296 maxlen := len(t)(i); 297 end if; 298 if len(t)(i) < minlen then 299 minlen := len(t)(i); 300 end if; 301 end loop; 302 Create_Huffman_Decoding_Tables( 303 limit(t), base(t), perm(t), len(t), 304 minlen, maxlen, alpha_size 305 ); 306 minlens(t):= minlen; 307 end loop; 308 end Make_Huffman_Tables; 309 310 ------------------------- 311 -- MTF - Move To Front -- 312 ------------------------- 313 314 procedure Receive_MTF_Values is 315 -- 316 mtfa_size: constant:= 4096; 317 mtfl_size: constant:= 16; 318 mtfbase: array (0 .. 256 / mtfl_size - 1) of Natural; 319 mtfa: array (0 .. mtfa_size - 1) of Natural; 320 -- 321 procedure Init_MTF is 322 k: Natural:= mtfa_size-1; 323 begin 324 for i in reverse 0 .. 256 / mtfl_size-1 loop 325 for j in reverse 0 .. mtfl_size-1 loop 326 mtfa(k):= i*mtfl_size + j; 327 k:= k - 1; 328 end loop; 329 mtfbase(i):= k+1; 330 end loop; 331 end Init_MTF; 332 -- 333 group_pos, group_no: Integer; 334 gminlen, gsel: Natural; 335 -- 336 function Get_MTF_Value return Unsigned_32 is 337 zn: Natural; 338 zvec: Unsigned_32; 339 begin 340 if group_pos = 0 then 341 group_no:= group_no + 1; 342 group_pos:= group_size; 343 gsel:= Natural(selector(group_no)); 344 gminlen:= minlens(gsel); 345 end if; 346 group_pos:= group_pos - 1; 347 zn:= gminlen; 348 zvec:= Get_Bits_32(zn); 349 while zvec > limit(gsel)(zn) loop 350 zn:= zn + 1; 351 zvec:= Shift_Left(zvec, 1) or Get_Bits_32(1); 352 end loop; 353 return perm(gsel)(Natural(zvec-base(gsel)(zn))); 354 end Get_MTF_Value; 355 -- 356 procedure Move_MTF_Block is 357 j, k: Natural; 358 begin 359 k:= mtfa_size; 360 for i in reverse 0 .. 256 / mtfl_size - 1 loop 361 j:= mtfbase(i); 362 mtfa(k-16..k-1):= mtfa(j..j+15); 363 k:= k - 16; 364 mtfbase(i):= k; 365 end loop; 366 end Move_MTF_Block; 367 -- 368 run_b: constant:= 1; 369 t: Natural; 370 next_sym: Unsigned_32; 371 es: Natural; 372 n, nn: Natural; 373 p,q: Natural; -- indexes mtfa 374 u,v: Natural; -- indexes mtfbase 375 lno, off: Natural; 376 begin -- Receive_MTF_Values 377 group_no:= -1; 378 group_pos:= 0; 379 t:= 0; 380 cftab:= (others => 0); 381 Init_MTF; 382 next_sym:= Get_MTF_Value; 383 -- 384 while Natural(next_sym) /= inuse_count+1 loop 385 if next_sym <= run_b then 386 es:= 0; 387 n:= 0; 388 loop 389 es:= es + Natural(Shift_Left(next_sym+1, n)); 390 n:= n + 1; 391 next_sym:= Get_MTF_Value; 392 exit when next_sym > run_b; 393 end loop; 394 n:= seq_to_unseq( mtfa(mtfbase(0)) ); 395 cftab(n):= cftab(n) + es; 396 if t+es > sub_block_size * block_size then 397 raise data_error; 398 end if; 399 while es > 0 loop 400 tt(t):= Unsigned_32(n); 401 es:= es - 1; 402 t:= t + 1; 403 end loop; 404 else 405 nn:= Natural(next_sym - 1); 406 if nn < mtfl_size then 407 -- Avoid the costs of the general case. 408 p:= mtfbase(0); 409 q:= p + nn; 410 n:= mtfa(q); 411 loop 412 mtfa(q):= mtfa(q-1); 413 q:= q - 1; 414 exit when q = p; 415 end loop; 416 mtfa(q):= n; 417 else 418 -- General case. 419 lno:= nn / mtfl_size; 420 off:= nn mod mtfl_size; 421 p:= mtfbase(lno); 422 q:= p + off; 423 n:= mtfa(q); 424 while q /= p loop 425 mtfa(q):= mtfa(q-1); 426 q:= q - 1; 427 end loop; 428 u:= mtfbase'First; 429 v:= u + lno; 430 loop 431 mtfa(mtfbase(v)):= mtfa(mtfbase(v-1)+mtfl_size-1); 432 v:= v - 1; 433 mtfbase(v):= mtfbase(v) - 1; 434 exit when v = u; 435 end loop; 436 mtfa( mtfbase(v) ):= n; 437 if mtfbase(v) = 0 then 438 Move_MTF_Block; 439 end if; 440 end if; 441 cftab(seq_to_unseq(n)):= cftab(seq_to_unseq(n)) + 1; 442 tt(t):= Unsigned_32(seq_to_unseq(n)); 443 t:= t + 1; 444 if t > sub_block_size * block_size then 445 raise data_error; 446 end if; 447 next_sym:= Get_MTF_Value; 448 end if; 449 end loop; 450 tt_count:= t; 451 -- Setup cftab to facilitate generation of T^(-1). 452 t:= 0; 453 for i in 0 .. 256 loop 454 nn:= cftab(i); 455 cftab(i):= t; 456 t:= t + nn; 457 end loop; 458 end Receive_MTF_Values; 459 460 procedure BWT_Detransform is 461 a : Unsigned_32 := 0; 462 r, i255: Natural; 463 begin 464 for p in 0 .. tt_count - 1 loop 465 i255 := Natural(tt(p) and 16#ff#); 466 r := cftab(i255); 467 cftab(i255) := cftab(i255) + 1; 468 tt(r) := tt(r) or a; 469 a := a + 16#100#; 470 end loop; 471 end BWT_Detransform; 472 473 compare_final_CRC: Boolean:= False; 474 stored_blockcrc, mem_stored_blockcrc, computed_crc: Unsigned_32; 475 476 -- Decode a new compressed block. 477 function Decode_Block return Boolean is 478 magic: String(1 .. 6); 479 begin 480 for i in 1 .. 6 loop 481 magic(i):= Character'Val(Get_Byte); 482 end loop; 483 if magic = "1AY&SY" then 484 if check_CRC then 485 if compare_final_CRC then 486 null; -- initialisation is delayed until the rle buffer is empty 487 else 488 CRC.Init(computed_crc); -- Initialize for next block. 489 end if; 490 end if; 491 stored_blockcrc := Get_Cardinal_32; 492 block_randomized := Get_Boolean; 493 block_origin := Natural(Get_Cardinal_24); 494 Receive_Mapping_Table; 495 alpha_size := inuse_count + 2; 496 Receive_Selectors; 497 Undo_MTF_Values_For_Selectors; 498 Receive_Huffman_Bit_Lengths; 499 Make_Huffman_Tables; 500 Receive_MTF_Values; 501 -- Undo the Burrows Wheeler transformation. 502 BWT_Detransform; 503 decode_available := tt_count; 504 return True; 505 elsif magic = Character'Val(16#17#) & "rE8P" & Character'Val(16#90#) then 506 return False; 507 else 508 raise bad_block_magic; 509 end if; 510 end Decode_Block; 511 512 next_rle_idx: Integer:= -2; 513 buf: Buffer(1 .. output_buffer_size); 514 last: Natural; 515 516 procedure Read is 517 shorten: Natural:= 0; 518 519 procedure RLE_Read is 520 rle_len: Natural; 521 data: Byte; 522 idx: Integer:= buf'First; 523 count: Integer:= buf'Length; 524 -- 525 procedure RLE_Write is 526 pragma Inline(RLE_Write); 527 begin 528 loop 529 buf(idx):= data; 530 idx:= idx + 1; 531 count:= count - 1; 532 rle_len:= rle_len - 1; 533 if check_CRC then 534 CRC.Update(computed_crc, data); 535 if rle_len = 0 and then compare_final_CRC then 536 if CRC.Final(computed_crc) /= mem_stored_blockcrc then 537 raise block_crc_check_failed; 538 end if; 539 compare_final_CRC:= False; 540 CRC.Init(computed_crc); -- Initialize for next block. 541 end if; 542 end if; 543 exit when rle_len = 0 or count = 0; 544 end loop; 545 end RLE_Write; 546 -- 547 -- Handle extreme cases of data of length 1, 2. 548 -- This exception is always handled (see end of RLE_Read). 549 input_dried : exception; 550 -- 551 -- Make next_rle_idx index to the next decoded byte. 552 -- If next_rle_idx did index to the last 553 -- byte in the current block, decode the next block. 554 -- 555 procedure Consume_RLE is 556 pragma Inline(Consume_RLE); 557 begin 558 next_rle_idx:= Integer(Shift_Right(tt(next_rle_idx),8)); 559 decode_available:= decode_available - 1; 560 if decode_available = 0 then 561 compare_final_CRC:= True; 562 mem_stored_blockcrc:= stored_blockcrc; 563 -- ^ There might be a new block when last block's 564 -- rle is finally emptied. 565 -- 566 -- ** New block 567 if Decode_Block then 568 next_rle_idx:= Natural(Shift_Right(tt(block_origin),8)); 569 else 570 next_rle_idx:= -1; 571 end_reached:= True; 572 end if; 573 -- ** 574 if end_reached then 575 raise input_dried; 576 end if; 577 end if; 578 end Consume_RLE; 579 -- 580 function RLE_Byte return Byte is 581 pragma Inline(RLE_Byte); 582 begin 583 return Byte(tt(next_rle_idx) and 16#FF#); 584 end RLE_Byte; 585 -- 586 function RLE_Possible return Boolean is 587 pragma Inline(RLE_Possible); 588 begin 589 return decode_available > 0 and then data = RLE_Byte; 590 end RLE_Possible; 591 -- 592 begin -- RLE_Read 593 rle_len:= rle_run_left; 594 data:= rle_run_data; 595 if block_randomized then 596 raise randomized_not_yet_implemented; 597 end if; 598 if rle_len /= 0 then 599 RLE_Write; 600 if count = 0 then 601 shorten:= 0; 602 rle_run_data:= data; 603 rle_run_left:= rle_len; 604 return; 605 end if; 606 end if; 607 begin 608 -- The big loop 609 loop 610 if decode_available = 0 or end_reached then 611 exit; 612 end if; 613 rle_len:= 1; 614 data:= RLE_Byte; 615 Consume_RLE; 616 if RLE_Possible then 617 rle_len:= rle_len + 1; 618 Consume_RLE; 619 if RLE_Possible then 620 rle_len:= rle_len + 1; 621 Consume_RLE; 622 if RLE_Possible then 623 Consume_RLE; 624 rle_len:= rle_len + Natural(RLE_Byte)+1; 625 Consume_RLE; 626 end if; 627 end if; 628 end if; 629 RLE_Write; 630 exit when count = 0; 631 end loop; 632 exception 633 when input_dried => RLE_Write; 634 end; 635 shorten := count; 636 rle_run_data := data; 637 rle_run_left := rle_len; 638 end RLE_Read; 639 640 begin -- Read 641 last:= buf'Last; 642 if decode_available = Natural'Last then 643 -- Initialize the rle process: 644 -- - Decode a block 645 -- - Initialize pointer. 646 if Decode_Block then 647 next_rle_idx:= Natural(Shift_Right(tt(block_origin), 8)); 648 else 649 next_rle_idx:= -1; 650 end_reached:= True; 651 end if; 652 end if; 653 RLE_Read; 654 last:= last - shorten; 655 end Read; 656 657 begin 658 Init; 659 loop 660 Read; 661 Write( buf(1..last) ); 662 exit when end_reached and rle_run_left = 0; 663 end loop; 664 Dispose(tt); 665 end Decompress; 666 667end BZip2.Decoding; 668