1------------------------------------------------------------------------------ 2-- File: rezip_lib.adb 3-- Description: Recompression tool to make archives smaller. 4-- Core moved from Rezip (main) 5-- Author: Gautier de Montmollin 6------------------------------------------------------------------------------ 7-- 8-- To do: 9-- * In order to facilitate customization, ReZip could have a config file ( 10-- http://sf.net/projects/ini-files/ ) to store external packer program 11-- names. See ZipMax as an example... 12-- 13-- External programs used (feel free to customize/add/remove): 14-- 7-Zip, KZip, Zip (info-zip), AdvZip, DeflOpt 15-- Web URL's: see Zipper_specification below or run ReZip without arguments. 16 17with Zip.Headers, Zip.Compress, UnZip; 18with Zip.Create; use Zip.Create; 19with Zip_Streams; use Zip_Streams; 20 21with My_feedback, Flexible_temp_files; 22 23with Ada.Calendar; use Ada.Calendar; 24with Ada.Directories; use Ada.Directories; 25with Ada.Text_IO; use Ada.Text_IO; 26with Dual_IO; 27with Ada.Integer_Text_IO; use Ada.Integer_Text_IO; 28with Ada.Float_Text_IO; use Ada.Float_Text_IO; 29with Ada.Streams.Stream_IO; use Ada.Streams.Stream_IO; 30with Ada.Strings.Fixed; use Ada.Strings.Fixed, Ada.Strings; 31with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; 32with Ada.Characters.Handling; use Ada.Characters.Handling; 33with Ada.Unchecked_Deallocation; 34with Ada.Numerics.Discrete_Random; 35 36with Interfaces; use Interfaces; 37 38with GNAT.OS_Lib; 39 40package body Rezip_lib is 41 42 function S (Source : Ada.Strings.Unbounded.Unbounded_String) return String 43 renames Ada.Strings.Unbounded.To_String; 44 function U (Source : String) return Ada.Strings.Unbounded.Unbounded_String 45 renames Ada.Strings.Unbounded.To_Unbounded_String; 46 47 -- This might be better read from a config file... 48 -- 49 type Zipper_specification is record 50 name, title, URL, options: Unbounded_String; 51 expanded_options : Unbounded_String; 52 -- options with dynamically expanded tokens 53 made_by_version : Unsigned_16; 54 pkzm : Zip.PKZip_method; 55 limit : Zip.File_size_type; 56 -- Compression is considered too slow or unefficient beyond limit 57 -- E.g., kzip's algorithm might be O(N^2) or worse; on large files, 58 -- deflate_e or other methods are better anyway 59 randomized : Boolean; 60 end record; 61 62 NN: constant Unbounded_String:= Null_Unbounded_String; 63 64 -- Give up recompression above a certain data size for some external packers like KZip 65 -- or Zopfli. 66 -- 67 kzip_zopfli_limit: constant:= 2_000_000; 68 69 type Approach is ( 70 original, 71 shrink, 72 reduce_4, 73 deflate_3, 74 lzma_2, lzma_3, 75 presel_1, presel_2, 76 external_1, external_2, external_3, external_4, 77 external_5, external_6, external_7, external_8, 78 external_9, external_10, external_11, external_12, 79 external_13 80 ); 81 82 subtype Internal is Approach 83 range Approach'Succ(Approach'First) .. Approach'Pred(external_1); 84 subtype External is Approach 85 range external_1 .. Approach'Last; 86 87 ext: array(External) of Zipper_specification:= 88 ( -- Zip 2.32 or later: 89 (U("zip"), U("Zip"), U("http://info-zip.org/"), 90 U("-9"), NN, 20, Zip.deflate, 0, False), 91 -- 7-Zip 4.64 or later; Deflate: 92 (U("7z"), 93 U("7-Zip"), U("http://7-zip.org/"), 94 U("a -tzip -mm=deflate -mfb=258 -mpass=#RAND#(7,15) -mmc=10000"), 95 NN, 20, Zip.deflate, 0, True), 96 (U("7z"), 97 U("7-Zip"), NN, 98 U("a -tzip -mm=deflate64 -mfb=257 -mpass=15 -mmc=10000"), 99 NN, 21, Zip.deflate_e, 0, False), 100 -- KZip: 101 (U("kzip"),U("KZIP"),U("http://www.advsys.net/ken/utils.htm"), 102 U("/rn /b0"), NN, 20, Zip.deflate, kzip_zopfli_limit, True), 103 (U("kzip"),U("KZIP"),NN, 104 U("/rn /b#RAND#(0,128)"), NN, 20, Zip.deflate, kzip_zopfli_limit, True), 105 (U("kzip"),U("KZIP"),NN, 106 U("/rn /b#RAND#(128,2048)"), NN, 20, Zip.deflate, kzip_zopfli_limit, True), 107 -- Zip 3.0 or later; BZip2: 108 (U("zip"), U("Zip"), U("http://info-zip.org/"), 109 U("-#RAND#(1,9) -Z bzip2"), NN, 46, Zip.bzip2, 0, True), 110 -- 7-Zip 9.20 or later; LZMA: 111 (U("7z"), U("7-Zip"), NN, 112 U("a -tzip -mm=LZMA -mx=9"), NN, 63, Zip.lzma_meth, 0, False), 113 (U("7z"), U("7-Zip"), NN, -- dictionary size 2**19 = 512 KB 114 U("a -tzip -mm=LZMA:a=2:d=19:mf=bt3:fb=128:lc=0:lp=2"), NN, 63, Zip.lzma_meth, 0, False), 115 (U("7z"), U("7-Zip"), NN, 116 U("a -tzip -mm=LZMA:a=2:d=#RAND#(3200,3700)k:mf=bt4:fb=#RAND#(255,273):lc=2:lp0:pb0"), 117 NN, 63, Zip.lzma_meth, 0, True), 118 (U("7z"), U("7-Zip"), NN, -- dictionary size 2**25 = 32 MB 119 U("a -tzip -mm=LZMA:a=2:d=25:mf=bt3:fb=255:lc=7"), NN, 63, Zip.lzma_meth, 0, False), 120 (U("7z"), U("7-Zip"), NN, -- dictionary size 2**26 = 64 MB 121 U("a -tzip -mm=LZMA:a=2:d=26:mf=bt3:fb=222:lc=8:lp0:pb1"), NN, 63, Zip.lzma_meth, 0, False), 122 -- AdvZip: advancecomp v1.19+ interesting for the Zopfli algorithm 123 (U("advzip"), U("AdvZip"), U("http://advancemame.sf.net/comp-readme.html"), 124 U("-a -4"), NN, 20, Zip.deflate, kzip_zopfli_limit, False) 125 ); 126 127 defl_opt: constant Zipper_specification:= 128 (U("deflopt"), U("DeflOpt"), U("http://www.walbeehm.com/download/"), 129 NN, NN, 0, Zip.deflate, 0, False); 130 131 procedure Rezip ( 132 from_zip_file : String; 133 to_zip_file : String; 134 format_choice : Zip_format_set := all_formats; -- force output into selected format set 135 touch : Boolean := False; -- set time stamps to now 136 lower : Boolean := False; -- set full file names to lower case 137 delete_comment : Boolean := False; -- delete zip comment 138 randomized_stable : Positive := 1; 139 log_file : String := ""; 140 html_report : String := ""; 141 internal_only : Boolean := False -- Zip-Ada algorithms only, no ext. call 142 ) 143 is 144 145 package DFIO is new Dual_IO.Float_IO(Float); 146 147 procedure Rip_data( 148 archive : Zip.Zip_info; -- from this archive... 149 input : in out Root_Zipstream_Type'Class; 150 data_name : String; -- extract this data 151 rip_rename : String; -- to this file (compressed) 152 unzip_rename : String; -- and this one (uncompressed) 153 header : out Zip.Headers.Local_File_Header 154 ) 155 is 156 file_index : Zip_Streams.ZS_Index_Type; 157 comp_size : Zip.File_size_type; 158 uncomp_size : Zip.File_size_type; 159 file_out : Ada.Streams.Stream_IO.File_Type; 160 dummy_encoding : Zip.Zip_name_encoding; 161 dummy_crc : Unsigned_32; 162 use UnZip; 163 begin 164 Zip.Find_offset( 165 info => archive, 166 name => data_name, 167 name_encoding => dummy_encoding, 168 file_index => file_index, 169 comp_size => comp_size, 170 uncomp_size => uncomp_size, 171 crc_32 => dummy_crc 172 ); 173 Set_Index(input, file_index); 174 Zip.Headers.Read_and_check(input, header); 175 -- Skip name and extra field 176 Set_Index(input, 177 Index(input) + 178 Zip_Streams.ZS_Size_Type 179 (header.extra_field_length + 180 header.filename_length) 181 ); 182 -- * Get the data, compressed 183 Ada.Streams.Stream_IO.Create(file_out, Out_File, rip_rename); 184 Zip.Copy_chunk(input, Stream(file_out).all, Integer(comp_size)); 185 Close(file_out); 186 if unzip_rename /= "" then 187 -- * Get the data, uncompressed 188 Extract( 189 from => archive, 190 what => data_name, 191 rename => unzip_rename, 192 options => 193 ( test_only => False, 194 junk_directories => False, 195 case_sensitive_match => True, 196 extract_as_text => False 197 ) 198 ); 199 end if; 200 end Rip_data; 201 202 Approach_to_Method: constant array(Internal) of Zip.Compress.Compression_Method:= 203 (shrink => Zip.Compress.Shrink, 204 reduce_4 => Zip.Compress.Reduce_4, 205 deflate_3 => Zip.Compress.Deflate_3, 206 lzma_2 => Zip.Compress.LZMA_2, 207 lzma_3 => Zip.Compress.LZMA_3, 208 presel_1 => Zip.Compress.Preselection_1, 209 presel_2 => Zip.Compress.Preselection_2 210 ); 211 212 type Packer_info is record 213 size : Zip.File_size_type; 214 zfm : Unsigned_16; 215 count : Natural; 216 saved : Integer_64; 217 -- can be negative if -defl chosen: suboptimal recompression, 218 -- but compatible method 219 uncomp_size : Unsigned_64; 220 -- summed uncompressed sizes might be more than 2**32 221 expanded_options: Unbounded_String; 222 iter : Positive; -- iterations needed 223 LZMA_EOS : Boolean; 224 end record; 225 226 type Packer_info_array is array(Approach) of Packer_info; 227 228 type Dir_entry; 229 type p_Dir_entry is access Dir_entry; 230 -- 231 type Dir_entry is record 232 head: Zip.Headers.Central_File_Header; 233 name: Unbounded_String; 234 next: p_Dir_entry:= null; 235 chosen_approach: Approach:= original; 236 info: Packer_info_array; 237 end record; 238 239 function Temp_name( 240 compressed: Boolean; 241 appr : Approach 242 ) 243 return String 244 is 245 initial: constant array(Boolean) of Character:= ('u','c'); 246 begin 247 return 248 Flexible_temp_files.Radix & 249 "_!" & initial(compressed) & 250 '!' & Trim(Integer'Image(Approach'Pos(appr)), Left) & 251 "!_.tmp"; 252 end Temp_name; 253 254 function Img(a: Approach; html: Boolean) return String is 255 function Repl(s: String) return String is 256 t: String:= s; 257 begin 258 for i in t'Range loop 259 if html and t(i) = ':' then t(i):= ' '; end if; -- break too long lines 260 end loop; 261 return t; 262 end Repl; 263 begin 264 if a in External then 265 return "External: " & S(ext(a).title) & ", " & Repl(S(ext(a).expanded_options)); 266 else 267 declare 268 s: constant String:= Approach'Image(a); 269 begin 270 return s(s'First) & To_Lower(s(s'First+1..s'Last) & (Approach'Width-s'Length+1) * ' '); 271 end; 272 end if; 273 end Img; 274 275 -- From AZip_Common... 276 function Image_1000(r: Zip.File_size_type; separator: Character:= ''') return String is 277 s: constant String:= Zip.File_size_type'Image(r); 278 t: String(s'First..s'First+(s'Length*4)/3); 279 j, c: Natural; 280 begin 281 -- For signed integers 282 -- if r < 0 then 283 -- return '-' & Image_1000(abs r, separator); 284 -- end if; 285 -- 286 -- We build result string t from right to left 287 j:= t'Last + 1; 288 c:= 0; 289 for i in reverse s'First..s'Last loop 290 exit when s(i) = ' ' or s(i) = '-'; 291 if c > 0 and then c mod 3 = 0 then 292 j:= j - 1; 293 t(j):= separator; 294 end if; 295 j:= j - 1; 296 t(j):= s(i); 297 c:= c + 1; 298 end loop; 299 return t(j..t'Last); 300 end Image_1000; 301 302 function Image_1000(r: Integer_64; separator: Character:= ''') return String is 303 s: constant String:= Integer_64'Image(r); 304 t: String(s'First..s'First+(s'Length*4)/3); 305 j, c: Natural; 306 begin 307 -- For signed integers 308 if r < 0 then 309 return '-' & Image_1000(abs r, separator); 310 end if; 311 -- We build result string t from right to left 312 j:= t'Last + 1; 313 c:= 0; 314 for i in reverse s'First..s'Last loop 315 exit when s(i) = ' ' or s(i) = '-'; 316 if c > 0 and then c mod 3 = 0 then 317 j:= j - 1; 318 t(j):= separator; 319 end if; 320 j:= j - 1; 321 t(j):= s(i); 322 c:= c + 1; 323 end loop; 324 return t(j..t'Last); 325 end Image_1000; 326 327 procedure Call_external( 328 packer: String; 329 args : String 330 ) 331 is 332 use GNAT.OS_Lib; 333 procedure Dispose is 334 new Ada.Unchecked_Deallocation(Argument_List, Argument_List_Access); 335 list: Argument_List_Access; 336 ok: Boolean; 337 begin 338 Dual_IO.Put_Line(packer & " [" & args & ']'); 339 list:= Argument_String_To_List(args); 340 GNAT.OS_Lib.Spawn(packer, list.all, ok); 341 Dispose(list); 342 if not ok then 343 Dual_IO.Put_Line( 344 "Warning: cannot call " & packer & 345 ". Is it callable through the ""path"" ?" 346 ); 347 end if; 348 end Call_external; 349 350 seed_iterator: Natural; 351 352 procedure Call_external_expanded( 353 packer : String; 354 options : String; 355 other_args: String; 356 expand : in out Unbounded_String -- expanded arguments 357 ) 358 is 359 type Token is (rand); 360 begin 361 expand:= U(options); 362 for t in Token loop 363 loop 364 declare 365 tok: constant String:= '#' & Token'Image(t) & '#'; 366 idx: constant Natural:= Index(expand, tok); 367 par: constant Natural:= Index(expand, ")"); 368 replace: Unbounded_String; 369 begin 370 exit when idx = 0; -- No more of token t to replace 371 declare 372 opt: constant String:= S(expand); -- partially processed option string 373 curr: constant String:= opt(idx+1..opt'Last); -- current option 374 par_a: constant Natural:= Index(curr, "("); 375 par_z: constant Natural:= Index(curr, ")"); 376 comma: constant Natural:= Index(curr, ","); 377 n1, n2, n: Integer; 378 begin 379 case t is 380 when rand => 381 n1:= Integer'Value(curr(par_a+1..comma-1)); 382 n2:= Integer'Value(curr(comma+1..par_z-1)); 383 declare 384 subtype rng is Integer range n1..n2; 385 package Rnd is new Ada.Numerics.Discrete_Random(rng); 386 gen: Rnd.Generator; 387 begin 388 Rnd.Reset(gen, seed_iterator); 389 -- On GNAT the clock-based Reset is too coarse 390 -- (gives many times the same seed when called with small 391 -- time intervals). 392 seed_iterator:= seed_iterator + 1; 393 n:= Rnd.Random(gen); 394 end; 395 replace:= U(Trim(Integer'Image(n),Left)); 396 end case; 397 Replace_Slice(expand, idx, par, S(replace)); 398 end; 399 end; 400 end loop; 401 end loop; 402 Call_external(packer, S(expand) & ' ' & other_args); 403 end Call_external_expanded; 404 405 procedure Process_External( 406 packer : String; 407 options : String; 408 out_name : String; 409 is_rand : Boolean; 410 is_deflate : Boolean; 411 info : out Packer_info 412 ) 413 is 414 temp_zip : constant String := Simple_Name (Flexible_temp_files.Radix) & "_$temp$.zip"; 415 rand_winner : constant String := Simple_Name (Flexible_temp_files.Radix) & "_$rand$.tmp"; 416 options_winner: Unbounded_String; 417 data_name: constant String:= Simple_Name(Temp_name(False,original)); 418 zi_ext: Zip.Zip_info; 419 header: Zip.Headers.Local_File_Header; 420 MyStream : aliased File_Zipstream; 421 cur_dir: constant String:= Current_Directory; 422 size_memory: array(1..randomized_stable) of Zip.File_size_type:= (others => 0); 423 size: Zip.File_size_type:= 0; 424 zfm: Unsigned_16; 425 attempt: Positive:= 1; 426 dummy_exp_opt: Unbounded_String; 427 begin 428 -- We jump into the TEMP directory, to avoid putting pathes into the 429 -- temporary zip file. 430 Set_Directory(Containing_Directory(Flexible_temp_files.Radix)); 431 loop 432 if Exists(temp_zip) then -- remove (eventually broken) zip 433 Delete_File(temp_zip); 434 end if; 435 Call_external_expanded( 436 packer, 437 options, 438 temp_zip & ' ' & data_name, 439 info.expanded_options 440 ); 441 if (not Exists(temp_zip)) and then Ada.Directories.Size(data_name) = 0 then 442 -- ADVZip 1.19 doesn't create a zip file for a 0-size entry; we call Zip instead... 443 Call_external_expanded("zip", "", temp_zip & ' ' & data_name, dummy_exp_opt); 444 end if; 445 if is_deflate then 446 -- Post processing of "deflated" entry with DeflOpt: 447 Call_external(S(defl_opt.name), temp_zip); 448 end if; 449 -- Now, rip 450 Set_Name (MyStream, temp_zip); 451 Open (MyStream, In_File); 452 Zip.Load( zi_ext, MyStream, True ); 453 Rip_data( 454 archive => zi_ext, 455 input => MyStream, 456 data_name => data_name, 457 rip_rename => out_name, 458 unzip_rename => "", 459 header => header 460 ); 461 Close(MyStream); 462 Delete_File(temp_zip); 463 Zip.Delete(zi_ext); 464 -- 465 if randomized_stable = 1 or not is_rand then -- normal behaviour (1 attempts) 466 size:= header.dd.compressed_size; 467 zfm := header.zip_type; 468 info.iter:= 1; 469 exit; 470 end if; 471 -- 472 -- Here, we process the cases where compressed sizes need 473 -- to be reduced and we expect a stable size over n=randomized_stable 474 -- attempts. 475 -- 476 if attempt = 1 or else 477 header.dd.compressed_size < size -- better size 478 then 479 size:= header.dd.compressed_size; 480 zfm := header.zip_type; 481 if Exists(rand_winner) then 482 Delete_File(rand_winner); 483 end if; 484 Rename(out_name, rand_winner); 485 options_winner:= info.expanded_options; 486 end if; 487 -- 488 -- Manage the array of last n=randomized_stable sizes 489 -- 490 if attempt > size_memory'Last then 491 for i in size_memory'First+1..size_memory'Last loop 492 size_memory(i-1):= size_memory(i); 493 end loop; 494 size_memory(size_memory'Last):= size; 495 else 496 size_memory(attempt):= size; 497 end if; 498 -- 499 -- Check stability over n=randomized_stable attempts 500 -- 501 if attempt >= randomized_stable then 502 if size_memory(randomized_stable) = size_memory(1) then 503 if Exists(out_name) then 504 Delete_File(out_name); 505 end if; 506 Rename(rand_winner, out_name); 507 info.expanded_options:= options_winner; 508 info.iter:= attempt; 509 exit; 510 end if; 511 end if; 512 attempt:= attempt + 1; 513 end loop; 514 info.size := size; 515 info.uncomp_size:= Unsigned_64(header.dd.uncompressed_size); 516 -- uncomp_size should not matter (always the same). 517 info.zfm := zfm; 518 info.LZMA_EOS := (zfm = 14) and (header.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0; 519 -- We jump back to the startup directory. 520 Set_Directory(cur_dir); 521 end Process_External; 522 523 -- Compress data as raw compressed data 524 procedure Process_Internal_Raw(a: Approach; e: in out Dir_entry) is 525 File_in : aliased File_Zipstream; 526 File_out : aliased File_Zipstream; 527 begin 528 Set_Name (File_in, Temp_name(False,original)); 529 Open (File_in, In_File); 530 Set_Name (File_out, Temp_name(True,a)); 531 Create (File_out, Out_File); 532 Zip.Compress.Compress_data 533 ( 534 input => File_in, 535 output => File_out, 536 input_size_known => True, 537 input_size => e.head.short_info.dd.uncompressed_size, 538 method => Approach_to_Method(a), 539 feedback => My_feedback'Access, 540 password => "", 541 content_hint => Zip.Compress.Guess_type_from_name(S(e.name)), 542 CRC => e.head.short_info.dd.crc_32, 543 -- we take the occasion to compute the CRC if not 544 -- yet available (e.g. JAR) 545 output_size => e.info(a).size, 546 zip_type => e.info(a).zfm 547 ); 548 e.info(a).LZMA_EOS := e.info(a).zfm = 14; 549 Close(File_in); 550 Close(File_out); 551 end Process_Internal_Raw; 552 553 -- Compress data as a Zip archive (like external methods), then call post-processing 554 procedure Process_Internal_as_Zip(a: Approach; e: in out Dir_entry) is 555 zip_file : aliased File_Zipstream; 556 archive : Zip_Create_info; 557 temp_zip: constant String:= Simple_Name(Flexible_temp_files.Radix) & "_$temp$.zip"; 558 data_name: constant String:= Simple_Name(Temp_name(False,original)); 559 zi_ext: Zip.Zip_info; 560 header: Zip.Headers.Local_File_Header; 561 MyStream : aliased File_Zipstream; 562 cur_dir: constant String:= Current_Directory; 563 begin 564 Set_Directory(Containing_Directory(Flexible_temp_files.Radix)); 565 Create (archive, zip_file'Unchecked_Access, temp_zip); 566 Set(archive, Approach_to_Method(a)); 567 Add_File(archive, data_name); 568 Finish (archive); 569 -- Post processing of "deflated" entry with DeflOpt: 570 Call_external(S(defl_opt.name), temp_zip); 571 -- Now, rip 572 Set_Name (MyStream, temp_zip); 573 Open (MyStream, In_File); 574 Zip.Load( zi_ext, MyStream, True ); 575 Rip_data( 576 archive => zi_ext, 577 input => MyStream, 578 data_name => data_name, 579 rip_rename => Temp_name(True,a), 580 unzip_rename => "", 581 header => header 582 ); 583 e.info(a).size:= header.dd.compressed_size; 584 e.info(a).zfm := header.zip_type; 585 e.info(a).LZMA_EOS := 586 (header.zip_type = 14) and (header.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0; 587 Close(MyStream); 588 Delete_File(temp_zip); 589 Zip.Delete(zi_ext); 590 Set_Directory(cur_dir); 591 end Process_Internal_as_Zip; 592 593 time_0 : constant Ada.Calendar.Time:= Clock; 594 595 procedure Repack_contents(orig_name, repacked_name, html_report_name: String) 596 is 597 zi: Zip.Zip_info; 598 MyStream : aliased File_Zipstream; 599 600 list, e, curr: p_Dir_entry:= null; 601 repacked_zip_file : aliased File_Zipstream; 602 null_packer_info: constant Packer_info := (0,0,0,0,0,NN,1,False); 603 total: Packer_info_array:= (others => null_packer_info); 604 -- total(a).count counts the files where approach 'a' was optimal 605 -- total(a).saved counts the saved bytes when approach 'a' was optimal 606 total_choice: Packer_info:= null_packer_info; 607 summary: Ada.Text_IO.File_Type; 608 T0, T1 : Ada.Calendar.Time; 609 seconds: Duration; 610 -- 611 type Approach_Filtering is array(Approach) of Boolean; 612 consider_a_priori: Approach_Filtering; 613 -- 614 lightred: constant String:= "#f43048"; 615 616 procedure Process_one(unique_name: String) is 617 comp_size : Zip.File_size_type; 618 uncomp_size: Zip.File_size_type; 619 choice: Approach:= original; 620 deco: constant String:= "-->-->-->" & (20+unique_name'Length) * '-'; 621 mth: Zip.PKZip_method; 622 consider: Approach_Filtering; 623 gain: Integer_64; 624 -- 625 procedure Winner_color is 626 begin 627 if e.info(choice).size < e.info(original).size then 628 Put(summary,"<td bgcolor=lightgreen><b>"); 629 -- We were able to reduce the size. :-) 630 elsif e.info(choice).size = e.info(original).size then 631 Put(summary,"<td><b>"); 632 -- Original was already the best. 633 else 634 Put(summary,"<td bgcolor=" & lightred & "><b>"); 635 -- Forced to a format with a less efficient compression. :-( 636 end if; 637 end Winner_color; 638 -- 639 use Zip; 640 begin 641 -- Start with the set of approaches that has been decided for all entries. 642 consider:= consider_a_priori; 643 if unique_name = "" or else 644 ( unique_name(unique_name'Last)='\' 645 or unique_name(unique_name'Last)='/' 646 ) 647 then 648 return; -- directories are useless entries! 649 end if; 650 total_choice.count:= total_choice.count + 1; 651 Dual_IO.Close_and_Append_Log; -- have an up to date copy on file system 652 Dual_IO.Put_Line(deco); 653 Dual_IO.Put_Line( 654 ' ' & 655 Integer'Image((100 * total_choice.count) / Zip.Entries(zi)) & 656 "% - Processing " & 657 unique_name & ',' & 658 Integer'Image(total_choice.count) & 659 " of" & 660 Integer'Image(Zip.Entries(zi)) 661 ); 662 Dual_IO.Put_Line(deco); 663 Dual_IO.New_Line; 664 -- 665 e:= new Dir_entry; 666 if curr = null then 667 curr:= e; 668 list:= e; 669 else 670 curr.next:= e; 671 curr:= e; 672 end if; 673 e.name:= U(unique_name); 674 e.head.made_by_version := 20; -- version 2.0 675 e.head.comment_length := 0; 676 e.head.disk_number_start := 0; 677 e.head.internal_attributes := 0; -- 0: seems binary; 1, text 678 e.head.external_attributes := 0; 679 -- 680 Dual_IO.Put(" Phase 1: dump & unzip -"); 681 Rip_data( 682 archive => zi, 683 input => MyStream, 684 data_name => unique_name, 685 rip_rename => Temp_name(True,original), 686 unzip_rename => Temp_name(False,original), 687 header => e.head.short_info 688 ); 689 -- 690 if touch then 691 e.head.short_info.file_timedate:= Zip.Convert(time_0); 692 end if; 693 if lower then 694 e.name:= U(To_Lower(S(e.name))); 695 end if; 696 -- Get reliable data from zi 697 Zip.Get_sizes( 698 info => zi, 699 name => unique_name, 700 comp_size => comp_size, 701 uncomp_size => uncomp_size 702 ); 703 Dual_IO.Put_Line(" done"); 704 -- 705 -- Apply limitations: skip some methods if certain conditions are met. 706 -- For instance: 707 -- Shrink may in rare cases be better, but only for tiny files. 708 -- KZip and Zopfli are excellent but really too slow on large files. 709 -- 710 for a in Approach loop 711 case a is 712 when original => 713 null; 714 when shrink => 715 consider(a):= consider(a) and uncomp_size <= 6000; 716 when reduce_4 => 717 consider(a):= consider(a) and uncomp_size <= 9000; 718 when External => 719 consider(a):= consider(a) and (ext(a).limit = 0 or uncomp_size <= ext(a).limit); 720 when others => 721 null; 722 end case; 723 end loop; 724 Dual_IO.Put_Line(" Phase 2: try different tactics..."); 725 -- 726 Try_all_approaches: 727 -- 728 for a in Approach loop 729 if consider(a) then 730 Dual_IO.Put(" -o-> " & Img(a, html => False)); 731 e.info(a).iter:= 1; 732 case a is 733 -- 734 when original => 735 -- This is from the original .zip - just record size and method 736 e.info(a).size:= comp_size; 737 e.info(a).zfm := e.head.short_info.zip_type; 738 e.info(a).LZMA_EOS := 739 (e.info(a).zfm = 14) and 740 (e.head.short_info.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0; 741 mth:= Zip.Method_from_code(e.info(a).zfm); 742 -- 743 when Internal => 744 if Approach_to_Method(a) in Zip.Compress.Deflation_Method 745 and not internal_only 746 then 747 -- We will post-process our internal Deflate with DeflOpt. 748 Process_Internal_as_Zip(a, e.all); 749 else 750 Process_Internal_Raw(a, e.all); 751 end if; 752 when External => 753 Dual_IO.New_Line; 754 Process_External( 755 S(ext(a).name), 756 S(ext(a).options), 757 Temp_name(True,a), 758 ext(a).randomized, 759 ext(a).pkzm = Zip.deflate, 760 e.info(a) 761 ); 762 e.head.made_by_version:= ext(a).made_by_version; 763 ext(a).expanded_options:= e.info(a).expanded_options; 764 -- 765 end case; 766 total(a).size:= total(a).size + e.info(a).size; 767 if e.info(a).size < e.info(choice).size then 768 -- Hurra, we found a smaller size than previous choice! 769 choice:= a; 770 end if; 771 if choice = original and not format_choice(mth) then 772 -- This occurs if we want to make an archive with only a certain set of formats, 773 -- for instance deflate_or_store, which is the most compatible. 774 -- Since approach _a_ uses a format in the desired set, the choice will be 775 -- forced out of original, even with a worse size. 776 choice:= a; 777 end if; 778 Dual_IO.New_Line; 779 end if; 780 end loop Try_all_approaches; 781 -- 782 total_choice.size:= total_choice.size + e.info(choice).size; 783 total(choice).count:= total(choice).count + 1; 784 total_choice.uncomp_size:= 785 total_choice.uncomp_size + Unsigned_64(uncomp_size); 786 gain:= Integer_64(e.info(original).size) - Integer_64(e.info(choice).size); 787 total(choice).saved:= total(choice).saved + gain; 788 total_choice.saved:= total_choice.saved + gain; 789 -- 790 Dual_IO.New_Line; 791 Dual_IO.Put( 792 " Phase 3: Winner is " & Img(choice, html => False) & 793 "; gain in bytes:" & Integer_64'Image(gain) & 794 "; writing data -" 795 ); 796 -- * Summary outputs 797 Put(summary, 798 "<tr><td>" & 799 Trim(Integer'Image(total_choice.count),Left) & 800 -- '/' & 801 -- Trim(Integer'Image(Zip.Entries(zi)),Left) & 802 "</td>" & 803 "<td bgcolor=lightgrey><tt>" & unique_name & "</tt>, " & 804 Image_1000(uncomp_size) & "</td>"); 805 for a in Approach loop 806 if consider_a_priori(a) then 807 if not consider(a) then 808 Put(summary,"<td bgcolor=lightgray>skipped"); 809 elsif a = choice then 810 Winner_color; 811 elsif e.info(a).size = e.info(choice).size then -- ex aequo 812 Put(summary,"<td bgcolor=lightblue><b>"); 813 else 814 Put(summary,"<td>"); 815 end if; 816 if consider(a) then 817 Put(summary, Image_1000(e.info(a).size)); 818 end if; 819 if choice = a then 820 Put(summary,"</b>"); 821 end if; 822 Put(summary,"</td>"); 823 end if; 824 end loop; 825 -- Recall winner approach, method and size: 826 Put(summary,"<td>" & Img(choice, html => True) & "</td>"); 827 Put(summary, 828 "<td bgcolor=#fafa64>" & 829 Zip.Image(Zip.Method_from_code(e.info(choice).zfm)) & 830 "</td>" 831 ); 832 Put(summary, 833 "<td>" & 834 Zip.Image(Zip.Method_from_code(e.info(original).zfm)) & 835 "</td>" 836 ); 837 Winner_color; 838 Put(summary, Image_1000(e.info(choice).size)); 839 Put(summary,"</b></td><td>"); 840 if e.info(original).size > 0 then 841 Put( 842 summary, 843 100.0 * Float(e.info(choice).size) / Float(e.info(original).size), 844 3,2,0 845 ); 846 Put(summary,"%"); 847 end if; 848 Put(summary,"</td><td>"); 849 if uncomp_size > 0 then 850 Put( 851 summary, 852 100.0 * Float(e.info(choice).size) / Float(uncomp_size), 853 3,2,0 854 ); 855 Put(summary,"%"); 856 end if; 857 Put(summary,"</td><td>"); 858 Put(summary,Integer'Image(e.info(choice).iter)); 859 Put_Line(summary,"</td></tr>"); 860 -- 861 -- Write winning data: 862 -- 863 e.head.short_info.extra_field_length:= 0; -- We choose to ignore it... 864 -- No data descriptor after data (bit 3); no EOS for LZMA (bit 1): 865 e.head.short_info.bit_flag:= 866 e.head.short_info.bit_flag and (2#1111_1111_1111_0101#); 867 -- Set the LZMA EOS flag if present in winner entry (checked by 7-Zip v.17.01): 868 if e.info(choice).LZMA_EOS then 869 e.head.short_info.bit_flag:= e.head.short_info.bit_flag or Zip.Headers.LZMA_EOS_Flag_Bit; 870 end if; 871 -- Set or adjust the pre-data data descriptor: 872 -- NB: even if missing pre-data, CRC will have been computed 873 -- at least with one internal method 874 e.head.short_info.dd.uncompressed_size:= uncomp_size; 875 -- Put the winning size and method 876 e.head.short_info.dd.compressed_size:= e.info(choice).size; 877 e.head.short_info.zip_type:= e.info(choice).zfm; 878 e.head.local_header_offset:= Unsigned_32(Index(repacked_zip_file))-1; 879 Zip.Headers.Write(repacked_zip_file, e.head.short_info); 880 String'Write(repacked_zip_file'Access, S(e.name)); 881 -- Copy the compressed data 882 Zip.Copy_file( Temp_name(True,choice), repacked_zip_file ); 883 Dual_IO.Put_Line(" done"); 884 Dual_IO.New_Line; 885 end Process_one; 886 887 procedure Process_all is new Zip.Traverse(Process_one); 888 889 ed: Zip.Headers.End_of_Central_Dir; 890 891 function Webcolor(a: Approach) return String is 892 v: Float; 893 sr,sg,sb: String(1..10); 894 begin 895 if total_choice.saved > 0 and 896 -- with options like -defl ot -fast_dec, we may have 897 -- negative values or other strange things: 898 total(a).saved >= 0 899 then 900 v:= Float(total(a).saved) / Float(total_choice.saved); 901 -- ^ contribution of approach 'a' 902 else 903 v:= 0.0; 904 end if; 905 Put(sr, 512 + Integer(144.0 + 111.0 * (1.0 - v)), 16); 906 sb:= sr; 907 Put(sg, 512 + Integer(238.0 + 17.0 * (1.0 - v)), 16); 908 return 909 sr(sr'Last-2..sr'Last-1) & 910 sg(sg'Last-2..sg'Last-1) & 911 sb(sb'Last-2..sb'Last-1); 912 end Webcolor; 913 914 meth: Zip.Compress.Compression_Method; 915 916 begin -- Repack_contents 917 T0:= Clock; 918 for a in Approach loop 919 case a is 920 when original => 921 consider_a_priori(a):= True; 922 when Internal => 923 meth:= Approach_to_Method(a); 924 case meth is 925 when Zip.Compress.Single_Method => 926 consider_a_priori(a):= format_choice(Zip.Compress.Method_to_Format(meth)); 927 when Zip.Compress.Multi_Method => 928 -- For the sake of simplicity, we consider the Multi_Method's 929 -- only when all formats are admitted. 930 consider_a_priori(a):= format_choice = all_formats; 931 end case; 932 when External => 933 consider_a_priori(a):= format_choice(ext(a).pkzm) and not internal_only; 934 end case; 935 end loop; 936 Set_Name (MyStream, orig_name); 937 Open (MyStream, In_File); 938 Zip.Load( zi, MyStream, True ); 939 940 Set_Name (repacked_zip_file, repacked_name); 941 Create(repacked_zip_file, Out_File); 942 Create(summary, Out_File, html_report_name); 943 -- 944 -- HTML Report begins here. 945 -- 946 Put_Line(summary, 947 "<html><head><title>ReZip summary for file " 948 & orig_name & "</title></head>" 949 ); 950 Put_Line(summary, "<style>.container { overflow-y: auto; height: 87%; }"); 951 Put_Line(summary, "td_approach { width:115px; }"); 952 Put_Line(summary, "</style><body>"); 953 Put_Line(summary, "<font face=""Calibri, Arial, Tahoma""> <!-- Set font for the whole page !-->"); 954 Put_Line(summary, 955 "<h2><a target=_blank href=" & Zip.web & 956 ">ReZip</a> summary for file " & orig_name & "</h2>" 957 ); 958 Put_Line(summary, 959 "ReZip - Zip-Ada Library version " & Zip.version & " dated " & Zip.reference 960 ); 961 if format_choice /= all_formats then 962 Put_Line(summary, 963 "<br><table border=0 cellpadding=0 cellspacing=0>" & 964 "<tr bgcolor=" & lightred & 965 "><td><b>An option that filters methods is on, " & 966 "result(s) may be sub-optimal - see details at bottom.</b></td></tr></table><br>" 967 ); 968 end if; 969 Put_Line(summary, "<div class=""container""><table border=1 cellpadding=1 cellspacing=1>"); 970 Put(summary, 971 "<tr bgcolor=lightyellow><td></td>"& 972 "<td align=right valign=top><b>Approach:</b></td>" 973 ); 974 for a in Approach loop 975 if consider_a_priori(a) then 976 if a in External then 977 ext(a).expanded_options:= ext(a).options; 978 end if; 979 Put(summary, "<td valign=top class=""td_approach"">" & Img(a, html => True) & "</td>"); 980 end if; 981 end loop; 982 Put_Line(summary, "</tr>"); 983 Put(summary, 984 "<tr bgcolor=lightyellow><td></td>"& 985 "<td bgcolor=lightgrey valign=bottom><b>File name, uncompressed size:</b></td>" 986 ); 987 -- Additionally, we show a row with the Approach's Compression_Method's output format (the 988 -- Zip.PKZip_method). If it is not unique, we mention it. 989 for a in Approach loop 990 if consider_a_priori(a) then 991 case a is 992 when original => 993 Put(summary, "<td align=right bgcolor=#dddd00 class=""td_approach"">Approach's<br>format →</td>"); 994 when Internal => 995 Put(summary, "<td bgcolor=#fafa64>"); 996 meth:= Approach_to_Method(a); 997 case meth is 998 when Zip.Compress.Single_Method => 999 Put(summary, Zip.Image(Zip.Compress.Method_to_Format(meth))); 1000 when Zip.Compress.Multi_Method => 1001 Put(summary, "(Various formats)"); 1002 end case; 1003 Put(summary, "</td>"); 1004 when External => 1005 Put(summary, "<td bgcolor=#fafa64>" & Zip.Image(ext(a).pkzm) & "</td>"); 1006 end case; 1007 end if; 1008 end loop; 1009 Put_Line(summary, 1010 "<td><b>Choice</b></td>"& 1011 "<td bgcolor=#dddd00>Choice's<br>method/<br>format</td>"& 1012 "<td>Original<br>method/<br>format</td>"& 1013 "<td>Smallest<br>size</td>" & 1014 "<td>% of<br>original</td><td>% of<br>uncompressed</td><td>Iterations</td></tr>" 1015 ); 1016 -- 1017 -- 1/ Recompress each file into the new archive: 1018 -- 1019 Process_all(zi); 1020 -- 1021 -- 2/ Almost done - write Central Directory: 1022 -- 1023 ed.central_dir_offset:= Unsigned_32(Index(repacked_zip_file))-1; 1024 ed.total_entries:= 0; 1025 ed.central_dir_size:= 0; 1026 ed.main_comment_length:= 0; 1027 declare 1028 comment: constant String:= Zip.Zip_comment(zi); 1029 begin 1030 if not delete_comment then 1031 ed.main_comment_length:= comment'Length; 1032 end if; 1033 -- Restart at the beginning of the list 1034 e:= list; 1035 while e /= null loop 1036 ed.total_entries:= ed.total_entries + 1; 1037 Zip.Headers.Write(repacked_zip_file, e.head); 1038 String'Write(repacked_zip_file'Access, S(e.name)); 1039 ed.central_dir_size:= 1040 ed.central_dir_size + 1041 Zip.Headers.central_header_length + 1042 Unsigned_32(e.head.short_info.filename_length); 1043 e:= e.next; 1044 end loop; 1045 ed.disknum:= 0; 1046 ed.disknum_with_start:= 0; 1047 ed.disk_total_entries:= ed.total_entries; 1048 Zip.Headers.Write(repacked_zip_file, ed); 1049 if not delete_comment then 1050 String'Write(repacked_zip_file'Access, comment); 1051 end if; 1052 end; 1053 Close(repacked_zip_file); 1054 Close(MyStream); 1055 -- 1056 -- Cleanup 1057 -- 1058 for a in Approach loop 1059 if consider_a_priori(a) then 1060 if Exists(Temp_name(True,a)) then 1061 Delete_File( Temp_name(True,a) ); 1062 end if; 1063 if a = original then -- also an uncompressed data file to delete 1064 Delete_File( Temp_name(False,a) ); 1065 end if; 1066 end if; 1067 end loop; 1068 -- Report total bytes 1069 Put(summary,"<tr><td></td><td><b>T<small>OTAL BYTES</small></b></td>"); 1070 for a in Approach loop 1071 if consider_a_priori(a) then 1072 Put(summary, 1073 "<td bgcolor=#" & Webcolor(a) & ">" & 1074 Image_1000(total(a).size) & "</td>" 1075 ); 1076 end if; 1077 end loop; 1078 Put(summary, 1079 "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & Image_1000(total_choice.size) & 1080 "</b></td><td>" 1081 ); 1082 if total(original).size > 0 then 1083 Put(summary, 1084 100.0 * Float(total_choice.size) / Float(total(original).size), 1085 3,2,0 1086 ); 1087 Put(summary,"%"); 1088 end if; 1089 Put(summary, "</td><td>"); 1090 if total_choice.uncomp_size > 0 then 1091 Put(summary, 1092 100.0 * Float(total_choice.size) / Float(total_choice.uncomp_size), 1093 3,2,0 1094 ); 1095 Put(summary,"%"); 1096 end if; 1097 Put_Line(summary, "</td></tr>"); 1098 -- Report total files per approach 1099 Put(summary,"<tr><td></td><td><b>T<small>OTAL FILES (when optimal)</small></b></td>"); 1100 for a in Approach loop 1101 if consider_a_priori(a) then 1102 Put(summary, 1103 "<td bgcolor=#" & Webcolor(a) & ">" & 1104 Integer'Image(total(a).count) & "</td>" 1105 ); 1106 end if; 1107 end loop; 1108 Put(summary, 1109 "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & Integer'Image(total_choice.count) & 1110 "</b></td>" & 1111 "<td>" 1112 ); 1113 Put_Line(summary, "</td></tr>"); 1114 -- Report total saved bytes per approach 1115 Put(summary,"<tr><td></td><td><b>T<small>OTAL SAVED BYTES (when optimal)</small></b></td>"); 1116 for a in Approach loop 1117 if consider_a_priori(a) then 1118 Put(summary, "<td bgcolor=#" & Webcolor(a) & ">" & Image_1000(total(a).saved) & "</td>"); 1119 end if; 1120 end loop; 1121 Put(summary, 1122 "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & 1123 Image_1000(total_choice.saved) & "</b></td>" & 1124 "<td>" 1125 ); 1126 if total(original).size > 0 then 1127 Put(summary, 1128 100.0 * Float(total_choice.saved) / Float(total(original).size), 1129 3,2,0 1130 ); 1131 Put(summary,"%"); 1132 end if; 1133 Put(summary, "</td><td>"); 1134 if total_choice.uncomp_size > 0 then 1135 Put(summary, 1136 100.0 * Float(total_choice.saved) / Float(total_choice.uncomp_size), 1137 3,2,0 1138 ); 1139 Put(summary,"%"); 1140 end if; 1141 Put_Line(summary, "</td></tr></table></div><br><br>"); 1142 Put_Line(summary, "<dt>Options used for ReZip</dt>"); 1143 Put_Line(summary, "<dd>Randomized_stable =" & Integer'Image(randomized_stable) & "<br>"); 1144 Put_Line(summary, " Formats allowed:<br><table border=1 cellpadding=1 cellspacing=1>"); 1145 for f in format_choice'Range loop 1146 Put_Line(summary, 1147 " <tr><td>" & Zip.Image(f) & "</td><td>" & 1148 Boolean'Image(format_choice(f)) & "</td></tr>"); 1149 end loop; 1150 Put_Line(summary, " </table>"); 1151 Put_Line(summary, "</dd>"); 1152 T1:= Clock; 1153 seconds:= T1-T0; 1154 Put(summary, "Time elapsed : "); 1155 Put(summary, Float( seconds ), 4, 2, 0 ); 1156 Put(summary, " seconds, or"); 1157 Put(summary, Float( seconds ) / 60.0, 4, 2, 0 ); 1158 Put(summary, " minutes, or"); 1159 Put(summary, Float( seconds ) / 3600.0, 4, 2, 0 ); 1160 Put_Line(summary, " hours.</font></body></html>"); 1161 Close(summary); 1162 Dual_IO.Put("Time elapsed : "); 1163 DFIO.Put( Float( seconds ), 4, 2, 0 ); 1164 Dual_IO.Put_Line( " sec"); 1165 Dual_IO.Put_Line("All details for " & orig_name & " in " & html_report_name); 1166 end Repack_contents; 1167 1168 -- This is for randomizing the above seed_iterator. 1169 subtype Seed_Range is Integer range 1..1_000_000; 1170 package Rnd_seed is new Ada.Numerics.Discrete_Random(Seed_Range); 1171 gen_seed: Rnd_seed.Generator; 1172 1173 begin 1174 Rnd_seed.Reset(gen_seed); -- 1x clock-based randomization 1175 seed_iterator:= Rnd_seed.Random(gen_seed); 1176 Flexible_temp_files.Initialize; 1177 Dual_IO.Create_Log(log_file); 1178 Repack_contents(from_zip_file, to_zip_file, html_report); 1179 Dual_IO.Close_Log; 1180 Flexible_temp_files.Finalize; 1181 end Rezip; 1182 1183 procedure Show_external_packer_list is 1184 procedure Display(p: Zipper_specification) is 1185 fix: String(1..8):= (others => ' '); 1186 begin 1187 Insert(fix,fix'First, S(p.title)); 1188 Ada.Text_IO.Put(" " & fix); 1189 fix:= (others => ' '); 1190 Insert(fix,fix'First, S(p.name)); 1191 Ada.Text_IO.Put_Line(" Executable: " & fix & " URL: " & S(p.URL)); 1192 end Display; 1193 name_is_new: Boolean; 1194 begin 1195 for e in External loop 1196 name_is_new:= True; 1197 for ee in External'First .. External'Pred(e) loop 1198 name_is_new:= name_is_new and ext(e).name /= ext(ee).name; 1199 end loop; 1200 if name_is_new then 1201 Display(ext(e)); 1202 end if; 1203 end loop; 1204 Display(defl_opt); 1205 end Show_external_packer_list; 1206 1207end Rezip_lib; 1208