1------------------------------------------------------------------------------
2--  File:            rezip_lib.adb
3--  Description:     Recompression tool to make archives smaller.
4--                   Core moved from Rezip (main)
5--  Author:          Gautier de Montmollin
6------------------------------------------------------------------------------
7--
8-- To do:
9--  * In order to facilitate customization, ReZip could have a config file (
10--    http://sf.net/projects/ini-files/ ) to store external packer program
11--    names. See ZipMax as an example...
12--
13-- External programs used (feel free to customize/add/remove):
14--   7-Zip, KZip, Zip (info-zip), AdvZip, DeflOpt
15--   Web URL's: see Zipper_specification below or run ReZip without arguments.
16
17with Zip.Headers, Zip.Compress, UnZip;
18with Zip.Create;                        use Zip.Create;
19with Zip_Streams;                       use Zip_Streams;
20
21with My_feedback, Flexible_temp_files;
22
23with Ada.Calendar;                      use Ada.Calendar;
24with Ada.Directories;                   use Ada.Directories;
25with Ada.Text_IO;                       use Ada.Text_IO;
26with Dual_IO;
27with Ada.Integer_Text_IO;               use Ada.Integer_Text_IO;
28with Ada.Float_Text_IO;                 use Ada.Float_Text_IO;
29with Ada.Streams.Stream_IO;             use Ada.Streams.Stream_IO;
30with Ada.Strings.Fixed;                 use Ada.Strings.Fixed, Ada.Strings;
31with Ada.Strings.Unbounded;             use Ada.Strings.Unbounded;
32with Ada.Characters.Handling;           use Ada.Characters.Handling;
33with Ada.Unchecked_Deallocation;
34with Ada.Numerics.Discrete_Random;
35
36with Interfaces;                        use Interfaces;
37
38with GNAT.OS_Lib;
39
40package body Rezip_lib is
41
42  function S (Source : Ada.Strings.Unbounded.Unbounded_String) return String
43    renames Ada.Strings.Unbounded.To_String;
44  function U (Source : String) return Ada.Strings.Unbounded.Unbounded_String
45    renames Ada.Strings.Unbounded.To_Unbounded_String;
46
47  -- This might be better read from a config file...
48  --
49  type Zipper_specification is record
50    name, title, URL, options: Unbounded_String;
51    expanded_options    : Unbounded_String;
52    -- options with dynamically expanded tokens
53    made_by_version     : Unsigned_16;
54    pkzm                : Zip.PKZip_method;
55    limit               : Zip.File_size_type;
56    -- Compression is considered too slow or unefficient beyond limit
57    -- E.g., kzip's algorithm might be O(N^2) or worse; on large files,
58    --   deflate_e or other methods are better anyway
59    randomized          : Boolean;
60  end record;
61
62  NN: constant Unbounded_String:= Null_Unbounded_String;
63
64  --  Give up recompression above a certain data size for some external packers like KZip
65  --  or Zopfli.
66  --
67  kzip_zopfli_limit: constant:= 2_000_000;
68
69  type Approach is (
70    original,
71    shrink,
72    reduce_4,
73    deflate_3,
74    lzma_2, lzma_3,
75    presel_1, presel_2,
76    external_1, external_2, external_3, external_4,
77    external_5, external_6, external_7, external_8,
78    external_9, external_10, external_11, external_12,
79    external_13
80  );
81
82  subtype Internal is Approach
83    range Approach'Succ(Approach'First) .. Approach'Pred(external_1);
84  subtype External is Approach
85    range external_1 .. Approach'Last;
86
87  ext: array(External) of Zipper_specification:=
88    ( -- Zip 2.32 or later:
89      (U("zip"), U("Zip"), U("http://info-zip.org/"),
90         U("-9"), NN, 20, Zip.deflate, 0, False),
91      -- 7-Zip 4.64 or later; Deflate:
92      (U("7z"),
93         U("7-Zip"), U("http://7-zip.org/"),
94         U("a -tzip -mm=deflate -mfb=258 -mpass=#RAND#(7,15) -mmc=10000"),
95         NN, 20, Zip.deflate, 0, True),
96      (U("7z"),
97         U("7-Zip"), NN,
98         U("a -tzip -mm=deflate64 -mfb=257 -mpass=15 -mmc=10000"),
99         NN, 21, Zip.deflate_e, 0, False),
100      -- KZip:
101      (U("kzip"),U("KZIP"),U("http://www.advsys.net/ken/utils.htm"),
102         U("/rn /b0"), NN, 20, Zip.deflate, kzip_zopfli_limit, True),
103      (U("kzip"),U("KZIP"),NN,
104         U("/rn /b#RAND#(0,128)"), NN, 20, Zip.deflate, kzip_zopfli_limit, True),
105      (U("kzip"),U("KZIP"),NN,
106         U("/rn /b#RAND#(128,2048)"), NN, 20, Zip.deflate, kzip_zopfli_limit, True),
107      -- Zip 3.0 or later; BZip2:
108      (U("zip"), U("Zip"), U("http://info-zip.org/"),
109         U("-#RAND#(1,9) -Z bzip2"), NN, 46, Zip.bzip2, 0, True),
110      -- 7-Zip 9.20 or later; LZMA:
111      (U("7z"), U("7-Zip"), NN,
112         U("a -tzip -mm=LZMA -mx=9"), NN, 63, Zip.lzma_meth, 0, False),
113      (U("7z"), U("7-Zip"), NN, -- dictionary size 2**19 = 512 KB
114         U("a -tzip -mm=LZMA:a=2:d=19:mf=bt3:fb=128:lc=0:lp=2"), NN, 63, Zip.lzma_meth, 0, False),
115      (U("7z"), U("7-Zip"), NN,
116         U("a -tzip -mm=LZMA:a=2:d=#RAND#(3200,3700)k:mf=bt4:fb=#RAND#(255,273):lc=2:lp0:pb0"),
117         NN, 63, Zip.lzma_meth, 0, True),
118      (U("7z"), U("7-Zip"), NN, -- dictionary size 2**25 = 32 MB
119         U("a -tzip -mm=LZMA:a=2:d=25:mf=bt3:fb=255:lc=7"), NN, 63, Zip.lzma_meth, 0, False),
120      (U("7z"), U("7-Zip"), NN, -- dictionary size 2**26 = 64 MB
121         U("a -tzip -mm=LZMA:a=2:d=26:mf=bt3:fb=222:lc=8:lp0:pb1"), NN, 63, Zip.lzma_meth, 0, False),
122      -- AdvZip: advancecomp v1.19+ interesting for the Zopfli algorithm
123      (U("advzip"), U("AdvZip"), U("http://advancemame.sf.net/comp-readme.html"),
124         U("-a -4"), NN, 20, Zip.deflate, kzip_zopfli_limit, False)
125    );
126
127  defl_opt: constant Zipper_specification:=
128    (U("deflopt"), U("DeflOpt"), U("http://www.walbeehm.com/download/"),
129     NN, NN, 0, Zip.deflate, 0, False);
130
131  procedure Rezip (
132    from_zip_file      : String;
133    to_zip_file        : String;
134    format_choice      : Zip_format_set := all_formats;  --  force output into selected format set
135    touch              : Boolean        := False;        --  set time stamps to now
136    lower              : Boolean        := False;        --  set full file names to lower case
137    delete_comment     : Boolean        := False;        --  delete zip comment
138    randomized_stable  : Positive       := 1;
139    log_file           : String         := "";
140    html_report        : String         := "";
141    internal_only      : Boolean        := False         --  Zip-Ada algorithms only, no ext. call
142  )
143  is
144
145    package DFIO is new Dual_IO.Float_IO(Float);
146
147    procedure Rip_data(
148      archive      : Zip.Zip_info; -- from this archive...
149      input        : in out Root_Zipstream_Type'Class;
150      data_name    : String;       -- extract this data
151      rip_rename   : String;       -- to this file (compressed)
152      unzip_rename : String;       -- and this one (uncompressed)
153      header       : out Zip.Headers.Local_File_Header
154    )
155    is
156      file_index     : Zip_Streams.ZS_Index_Type;
157      comp_size      : Zip.File_size_type;
158      uncomp_size    : Zip.File_size_type;
159      file_out       : Ada.Streams.Stream_IO.File_Type;
160      dummy_encoding : Zip.Zip_name_encoding;
161      dummy_crc      : Unsigned_32;
162      use UnZip;
163    begin
164      Zip.Find_offset(
165        info           => archive,
166        name           => data_name,
167        name_encoding  => dummy_encoding,
168        file_index     => file_index,
169        comp_size      => comp_size,
170        uncomp_size    => uncomp_size,
171        crc_32         => dummy_crc
172      );
173      Set_Index(input, file_index);
174      Zip.Headers.Read_and_check(input, header);
175      -- Skip name and extra field
176      Set_Index(input,
177        Index(input) +
178          Zip_Streams.ZS_Size_Type
179           (header.extra_field_length +
180            header.filename_length)
181      );
182      -- * Get the data, compressed
183      Ada.Streams.Stream_IO.Create(file_out, Out_File, rip_rename);
184      Zip.Copy_chunk(input, Stream(file_out).all, Integer(comp_size));
185      Close(file_out);
186      if unzip_rename /= "" then
187        -- * Get the data, uncompressed
188        Extract(
189          from    => archive,
190          what    => data_name,
191          rename  => unzip_rename,
192          options =>
193          (   test_only => False,
194              junk_directories => False,
195              case_sensitive_match => True,
196              extract_as_text => False
197          )
198        );
199      end if;
200    end Rip_data;
201
202    Approach_to_Method: constant array(Internal) of Zip.Compress.Compression_Method:=
203      (shrink    => Zip.Compress.Shrink,
204       reduce_4  => Zip.Compress.Reduce_4,
205       deflate_3 => Zip.Compress.Deflate_3,
206       lzma_2    => Zip.Compress.LZMA_2,
207       lzma_3    => Zip.Compress.LZMA_3,
208       presel_1  => Zip.Compress.Preselection_1,
209       presel_2  => Zip.Compress.Preselection_2
210      );
211
212    type Packer_info is record
213      size            : Zip.File_size_type;
214      zfm             : Unsigned_16;
215      count           : Natural;
216      saved           : Integer_64;
217      -- can be negative if -defl chosen: suboptimal recompression,
218      -- but compatible method
219      uncomp_size     : Unsigned_64;
220      -- summed uncompressed sizes might be more than 2**32
221      expanded_options: Unbounded_String;
222      iter            : Positive; -- iterations needed
223      LZMA_EOS        : Boolean;
224    end record;
225
226    type Packer_info_array is array(Approach) of Packer_info;
227
228    type Dir_entry;
229    type p_Dir_entry is access Dir_entry;
230    --
231    type Dir_entry is record
232      head: Zip.Headers.Central_File_Header;
233      name: Unbounded_String;
234      next: p_Dir_entry:= null;
235      chosen_approach: Approach:= original;
236      info: Packer_info_array;
237    end record;
238
239    function Temp_name(
240      compressed: Boolean;
241      appr      : Approach
242    )
243      return String
244    is
245      initial: constant array(Boolean) of Character:= ('u','c');
246    begin
247      return
248        Flexible_temp_files.Radix &
249        "_!" & initial(compressed) &
250        '!' & Trim(Integer'Image(Approach'Pos(appr)), Left) &
251        "!_.tmp";
252    end Temp_name;
253
254    function Img(a: Approach; html: Boolean) return String is
255      function Repl(s: String) return String is
256        t: String:= s;
257      begin
258        for i in t'Range loop
259          if html and t(i) = ':' then t(i):= ' '; end if; -- break too long lines
260        end loop;
261        return t;
262      end Repl;
263    begin
264      if a in External then
265        return "External: " & S(ext(a).title) & ", " & Repl(S(ext(a).expanded_options));
266      else
267        declare
268          s: constant String:= Approach'Image(a);
269        begin
270          return s(s'First) & To_Lower(s(s'First+1..s'Last) & (Approach'Width-s'Length+1) * ' ');
271        end;
272      end if;
273    end Img;
274
275    --  From AZip_Common...
276    function Image_1000(r: Zip.File_size_type; separator: Character:= ''') return String is
277      s: constant String:= Zip.File_size_type'Image(r);
278      t: String(s'First..s'First+(s'Length*4)/3);
279      j, c: Natural;
280    begin
281      --  For signed integers
282      --  if r < 0 then
283      --    return '-' & Image_1000(abs r, separator);
284      --  end if;
285      --
286      --  We build result string t from right to left
287      j:= t'Last + 1;
288      c:= 0;
289      for i in reverse s'First..s'Last loop
290        exit when s(i) = ' ' or s(i) = '-';
291        if c > 0 and then c mod 3 = 0 then
292          j:= j - 1;
293          t(j):= separator;
294        end if;
295        j:= j - 1;
296        t(j):= s(i);
297        c:= c + 1;
298      end loop;
299      return t(j..t'Last);
300    end Image_1000;
301
302    function Image_1000(r: Integer_64; separator: Character:= ''') return String is
303      s: constant String:= Integer_64'Image(r);
304      t: String(s'First..s'First+(s'Length*4)/3);
305      j, c: Natural;
306    begin
307      --  For signed integers
308      if r < 0 then
309        return '-' & Image_1000(abs r, separator);
310      end if;
311      --  We build result string t from right to left
312      j:= t'Last + 1;
313      c:= 0;
314      for i in reverse s'First..s'Last loop
315        exit when s(i) = ' ' or s(i) = '-';
316        if c > 0 and then c mod 3 = 0 then
317          j:= j - 1;
318          t(j):= separator;
319        end if;
320        j:= j - 1;
321        t(j):= s(i);
322        c:= c + 1;
323      end loop;
324      return t(j..t'Last);
325    end Image_1000;
326
327    procedure Call_external(
328      packer:        String;
329      args  :        String
330    )
331    is
332      use GNAT.OS_Lib;
333      procedure Dispose is
334        new Ada.Unchecked_Deallocation(Argument_List, Argument_List_Access);
335      list: Argument_List_Access;
336      ok: Boolean;
337    begin
338      Dual_IO.Put_Line(packer & " [" & args & ']');
339      list:= Argument_String_To_List(args);
340      GNAT.OS_Lib.Spawn(packer, list.all, ok);
341      Dispose(list);
342      if not ok then
343        Dual_IO.Put_Line(
344          "Warning: cannot call " & packer &
345          ". Is it callable through the ""path"" ?"
346        );
347      end if;
348    end Call_external;
349
350    seed_iterator: Natural;
351
352    procedure Call_external_expanded(
353      packer    :        String;
354      options   :        String;
355      other_args:        String;
356      expand    : in out Unbounded_String -- expanded arguments
357    )
358    is
359      type Token is (rand);
360    begin
361      expand:= U(options);
362      for t in Token loop
363        loop
364          declare
365            tok: constant String:= '#' & Token'Image(t) & '#';
366            idx: constant Natural:= Index(expand, tok);
367            par: constant Natural:= Index(expand, ")");
368            replace: Unbounded_String;
369          begin
370            exit when idx = 0;  --  No more of token t to replace
371            declare
372              opt: constant String:= S(expand); -- partially processed option string
373              curr: constant String:= opt(idx+1..opt'Last); -- current option
374              par_a: constant Natural:= Index(curr, "(");
375              par_z: constant Natural:= Index(curr, ")");
376              comma: constant Natural:= Index(curr, ",");
377              n1, n2, n: Integer;
378            begin
379              case t is
380                when rand =>
381                  n1:= Integer'Value(curr(par_a+1..comma-1));
382                  n2:= Integer'Value(curr(comma+1..par_z-1));
383                  declare
384                    subtype rng is Integer range n1..n2;
385                    package Rnd is new Ada.Numerics.Discrete_Random(rng);
386                    gen: Rnd.Generator;
387                  begin
388                    Rnd.Reset(gen, seed_iterator);
389                    -- On GNAT the clock-based Reset is too coarse
390                    -- (gives many times the same seed when called with small
391                    -- time intervals).
392                    seed_iterator:= seed_iterator + 1;
393                    n:= Rnd.Random(gen);
394                  end;
395                  replace:= U(Trim(Integer'Image(n),Left));
396              end case;
397              Replace_Slice(expand, idx, par, S(replace));
398            end;
399          end;
400        end loop;
401      end loop;
402      Call_external(packer, S(expand) & ' ' & other_args);
403    end Call_external_expanded;
404
405    procedure Process_External(
406      packer     : String;
407      options    : String;
408      out_name   : String;
409      is_rand    : Boolean;
410      is_deflate : Boolean;
411      info       : out Packer_info
412    )
413    is
414      temp_zip    : constant String := Simple_Name (Flexible_temp_files.Radix) & "_$temp$.zip";
415      rand_winner : constant String := Simple_Name (Flexible_temp_files.Radix) & "_$rand$.tmp";
416      options_winner: Unbounded_String;
417      data_name: constant String:= Simple_Name(Temp_name(False,original));
418      zi_ext: Zip.Zip_info;
419      header: Zip.Headers.Local_File_Header;
420      MyStream   : aliased File_Zipstream;
421      cur_dir: constant String:= Current_Directory;
422      size_memory: array(1..randomized_stable) of Zip.File_size_type:= (others => 0);
423      size: Zip.File_size_type:= 0;
424      zfm: Unsigned_16;
425      attempt: Positive:= 1;
426      dummy_exp_opt: Unbounded_String;
427    begin
428      -- We jump into the TEMP directory, to avoid putting pathes into the
429      -- temporary zip file.
430      Set_Directory(Containing_Directory(Flexible_temp_files.Radix));
431      loop
432        if Exists(temp_zip) then -- remove (eventually broken) zip
433          Delete_File(temp_zip);
434        end if;
435        Call_external_expanded(
436          packer,
437          options,
438          temp_zip & ' ' & data_name,
439          info.expanded_options
440        );
441        if (not Exists(temp_zip)) and then Ada.Directories.Size(data_name) = 0 then
442          --  ADVZip 1.19 doesn't create a zip file for a 0-size entry; we call Zip instead...
443          Call_external_expanded("zip", "", temp_zip & ' ' & data_name, dummy_exp_opt);
444        end if;
445        if is_deflate then
446          --  Post processing of "deflated" entry with DeflOpt:
447          Call_external(S(defl_opt.name), temp_zip);
448        end if;
449        --  Now, rip
450        Set_Name (MyStream, temp_zip);
451        Open (MyStream, In_File);
452        Zip.Load( zi_ext, MyStream, True );
453        Rip_data(
454          archive      => zi_ext,
455          input        => MyStream,
456          data_name    => data_name,
457          rip_rename   => out_name,
458          unzip_rename => "",
459          header       => header
460        );
461        Close(MyStream);
462        Delete_File(temp_zip);
463        Zip.Delete(zi_ext);
464        --
465        if randomized_stable = 1 or not is_rand then -- normal behaviour (1 attempts)
466          size:= header.dd.compressed_size;
467          zfm := header.zip_type;
468          info.iter:= 1;
469          exit;
470        end if;
471        --
472        -- Here, we process the cases where compressed sizes need
473        -- to be reduced and we expect a stable size over n=randomized_stable
474        -- attempts.
475        --
476        if attempt = 1 or else
477          header.dd.compressed_size < size -- better size
478        then
479          size:= header.dd.compressed_size;
480          zfm := header.zip_type;
481          if Exists(rand_winner) then
482            Delete_File(rand_winner);
483          end if;
484          Rename(out_name, rand_winner);
485          options_winner:= info.expanded_options;
486        end if;
487        --
488        -- Manage the array of last n=randomized_stable sizes
489        --
490        if attempt > size_memory'Last then
491          for i in size_memory'First+1..size_memory'Last loop
492            size_memory(i-1):= size_memory(i);
493          end loop;
494          size_memory(size_memory'Last):= size;
495        else
496          size_memory(attempt):= size;
497        end if;
498        --
499        -- Check stability over n=randomized_stable attempts
500        --
501        if attempt >= randomized_stable then
502          if size_memory(randomized_stable) = size_memory(1) then
503            if Exists(out_name) then
504              Delete_File(out_name);
505            end if;
506            Rename(rand_winner, out_name);
507            info.expanded_options:= options_winner;
508            info.iter:= attempt;
509            exit;
510          end if;
511        end if;
512        attempt:= attempt + 1;
513      end loop;
514      info.size       := size;
515      info.uncomp_size:= Unsigned_64(header.dd.uncompressed_size);
516      -- uncomp_size should not matter (always the same).
517      info.zfm        := zfm;
518      info.LZMA_EOS   := (zfm = 14) and (header.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0;
519      -- We jump back to the startup directory.
520      Set_Directory(cur_dir);
521    end Process_External;
522
523    --  Compress data as raw compressed data
524    procedure Process_Internal_Raw(a: Approach; e: in out Dir_entry) is
525      File_in       : aliased File_Zipstream;
526      File_out      : aliased File_Zipstream;
527    begin
528      Set_Name (File_in, Temp_name(False,original));
529      Open (File_in, In_File);
530      Set_Name (File_out, Temp_name(True,a));
531      Create (File_out, Out_File);
532      Zip.Compress.Compress_data
533      (
534        input            => File_in,
535        output           => File_out,
536        input_size_known => True,
537        input_size       => e.head.short_info.dd.uncompressed_size,
538        method           => Approach_to_Method(a),
539        feedback         => My_feedback'Access,
540        password         => "",
541        content_hint     => Zip.Compress.Guess_type_from_name(S(e.name)),
542        CRC              => e.head.short_info.dd.crc_32,
543        -- we take the occasion to compute the CRC if not
544        -- yet available (e.g. JAR)
545        output_size      => e.info(a).size,
546        zip_type         => e.info(a).zfm
547      );
548      e.info(a).LZMA_EOS := e.info(a).zfm = 14;
549      Close(File_in);
550      Close(File_out);
551    end Process_Internal_Raw;
552
553    --  Compress data as a Zip archive (like external methods), then call post-processing
554    procedure Process_Internal_as_Zip(a: Approach; e: in out Dir_entry) is
555      zip_file : aliased File_Zipstream;
556      archive : Zip_Create_info;
557      temp_zip: constant String:= Simple_Name(Flexible_temp_files.Radix) & "_$temp$.zip";
558      data_name: constant String:= Simple_Name(Temp_name(False,original));
559      zi_ext: Zip.Zip_info;
560      header: Zip.Headers.Local_File_Header;
561      MyStream   : aliased File_Zipstream;
562      cur_dir: constant String:= Current_Directory;
563    begin
564      Set_Directory(Containing_Directory(Flexible_temp_files.Radix));
565      Create (archive, zip_file'Unchecked_Access, temp_zip);
566      Set(archive, Approach_to_Method(a));
567      Add_File(archive, data_name);
568      Finish (archive);
569      --  Post processing of "deflated" entry with DeflOpt:
570      Call_external(S(defl_opt.name), temp_zip);
571      --  Now, rip
572      Set_Name (MyStream, temp_zip);
573      Open (MyStream, In_File);
574      Zip.Load( zi_ext, MyStream, True );
575      Rip_data(
576        archive      => zi_ext,
577        input        => MyStream,
578        data_name    => data_name,
579        rip_rename   => Temp_name(True,a),
580        unzip_rename => "",
581        header       => header
582      );
583      e.info(a).size:= header.dd.compressed_size;
584      e.info(a).zfm := header.zip_type;
585      e.info(a).LZMA_EOS :=
586        (header.zip_type = 14) and (header.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0;
587      Close(MyStream);
588      Delete_File(temp_zip);
589      Zip.Delete(zi_ext);
590      Set_Directory(cur_dir);
591    end Process_Internal_as_Zip;
592
593    time_0 : constant Ada.Calendar.Time:= Clock;
594
595    procedure Repack_contents(orig_name, repacked_name, html_report_name: String)
596    is
597      zi: Zip.Zip_info;
598      MyStream   : aliased File_Zipstream;
599
600      list, e, curr: p_Dir_entry:= null;
601      repacked_zip_file   : aliased File_Zipstream;
602      null_packer_info: constant Packer_info := (0,0,0,0,0,NN,1,False);
603      total: Packer_info_array:= (others => null_packer_info);
604      -- total(a).count counts the files where approach 'a' was optimal
605      -- total(a).saved counts the saved bytes when approach 'a' was optimal
606      total_choice: Packer_info:= null_packer_info;
607      summary: Ada.Text_IO.File_Type;
608      T0, T1 : Ada.Calendar.Time;
609      seconds: Duration;
610      --
611      type Approach_Filtering is array(Approach) of Boolean;
612      consider_a_priori: Approach_Filtering;
613      --
614      lightred: constant String:= "#f43048";
615
616      procedure Process_one(unique_name: String) is
617        comp_size  :  Zip.File_size_type;
618        uncomp_size:  Zip.File_size_type;
619        choice: Approach:= original;
620        deco: constant String:= "-->-->-->" & (20+unique_name'Length) * '-';
621        mth: Zip.PKZip_method;
622        consider: Approach_Filtering;
623        gain: Integer_64;
624        --
625        procedure Winner_color is
626        begin
627          if e.info(choice).size < e.info(original).size then
628            Put(summary,"<td bgcolor=lightgreen><b>");
629            --  We were able to reduce the size. :-)
630          elsif e.info(choice).size = e.info(original).size then
631            Put(summary,"<td><b>");
632            --  Original was already the best.
633          else
634            Put(summary,"<td bgcolor=" & lightred & "><b>");
635            --  Forced to a format with a less efficient compression. :-(
636          end if;
637        end Winner_color;
638        --
639        use Zip;
640      begin
641        --  Start with the set of approaches that has been decided for all entries.
642        consider:= consider_a_priori;
643        if unique_name = "" or else
644          (   unique_name(unique_name'Last)='\'
645           or unique_name(unique_name'Last)='/'
646          )
647        then
648          return; -- directories are useless entries!
649        end if;
650        total_choice.count:= total_choice.count + 1;
651        Dual_IO.Close_and_Append_Log; -- have an up to date copy on file system
652        Dual_IO.Put_Line(deco);
653        Dual_IO.Put_Line(
654          ' ' &
655          Integer'Image((100 * total_choice.count) / Zip.Entries(zi)) &
656          "% - Processing " &
657          unique_name & ',' &
658          Integer'Image(total_choice.count) &
659          " of" &
660          Integer'Image(Zip.Entries(zi))
661        );
662        Dual_IO.Put_Line(deco);
663        Dual_IO.New_Line;
664        --
665        e:= new Dir_entry;
666        if curr = null then
667          curr:= e;
668          list:= e;
669        else
670          curr.next:= e;
671          curr:= e;
672        end if;
673        e.name:= U(unique_name);
674        e.head.made_by_version     := 20; -- version 2.0
675        e.head.comment_length      := 0;
676        e.head.disk_number_start   := 0;
677        e.head.internal_attributes := 0; -- 0: seems binary; 1, text
678        e.head.external_attributes := 0;
679        --
680        Dual_IO.Put("    Phase 1:  dump & unzip -");
681        Rip_data(
682          archive      => zi,
683          input        => MyStream,
684          data_name    => unique_name,
685          rip_rename   => Temp_name(True,original),
686          unzip_rename => Temp_name(False,original),
687          header       => e.head.short_info
688        );
689        --
690        if touch then
691          e.head.short_info.file_timedate:= Zip.Convert(time_0);
692        end if;
693        if lower then
694          e.name:= U(To_Lower(S(e.name)));
695        end if;
696        -- Get reliable data from zi
697        Zip.Get_sizes(
698          info           => zi,
699          name           => unique_name,
700          comp_size      => comp_size,
701          uncomp_size    => uncomp_size
702        );
703        Dual_IO.Put_Line(" done");
704        --
705        --  Apply limitations: skip some methods if certain conditions are met.
706        --  For instance:
707        --    Shrink may in rare cases be better, but only for tiny files.
708        --    KZip and Zopfli are excellent but really too slow on large files.
709        --
710        for a in Approach loop
711          case a is
712            when original =>
713              null;
714            when shrink =>
715              consider(a):= consider(a) and uncomp_size <= 6000;
716            when reduce_4 =>
717              consider(a):= consider(a) and uncomp_size <= 9000;
718            when External =>
719              consider(a):= consider(a) and (ext(a).limit = 0 or uncomp_size <= ext(a).limit);
720            when others =>
721              null;
722          end case;
723        end loop;
724        Dual_IO.Put_Line("    Phase 2:  try different tactics...");
725        --
726        Try_all_approaches:
727        --
728        for a in Approach loop
729          if consider(a) then
730            Dual_IO.Put("              -o-> " & Img(a, html => False));
731            e.info(a).iter:= 1;
732            case a is
733              --
734              when original =>
735                -- This is from the original .zip - just record size and method
736                e.info(a).size:= comp_size;
737                e.info(a).zfm := e.head.short_info.zip_type;
738                e.info(a).LZMA_EOS :=
739                  (e.info(a).zfm = 14) and
740                  (e.head.short_info.bit_flag and Zip.Headers.LZMA_EOS_Flag_Bit) /= 0;
741                mth:= Zip.Method_from_code(e.info(a).zfm);
742                --
743              when Internal =>
744                if Approach_to_Method(a) in Zip.Compress.Deflation_Method
745                  and not internal_only
746                then
747                  --  We will post-process our internal Deflate with DeflOpt.
748                  Process_Internal_as_Zip(a, e.all);
749                else
750                  Process_Internal_Raw(a, e.all);
751                end if;
752              when External =>
753                Dual_IO.New_Line;
754                Process_External(
755                  S(ext(a).name),
756                  S(ext(a).options),
757                  Temp_name(True,a),
758                  ext(a).randomized,
759                  ext(a).pkzm = Zip.deflate,
760                  e.info(a)
761                );
762                e.head.made_by_version:= ext(a).made_by_version;
763                ext(a).expanded_options:= e.info(a).expanded_options;
764                --
765            end case;
766            total(a).size:= total(a).size + e.info(a).size;
767            if e.info(a).size < e.info(choice).size then
768              --  Hurra, we found a smaller size than previous choice!
769              choice:= a;
770            end if;
771            if choice = original and not format_choice(mth) then
772              --  This occurs if we want to make an archive with only a certain set of formats,
773              --  for instance deflate_or_store, which is the most compatible.
774              --  Since approach _a_ uses a format in the desired set, the choice will be
775              --  forced out of original, even with a worse size.
776              choice:= a;
777            end if;
778            Dual_IO.New_Line;
779          end if;
780        end loop Try_all_approaches;
781        --
782        total_choice.size:= total_choice.size + e.info(choice).size;
783        total(choice).count:= total(choice).count + 1;
784        total_choice.uncomp_size:=
785          total_choice.uncomp_size + Unsigned_64(uncomp_size);
786        gain:= Integer_64(e.info(original).size) - Integer_64(e.info(choice).size);
787        total(choice).saved:= total(choice).saved + gain;
788        total_choice.saved:= total_choice.saved + gain;
789        --
790        Dual_IO.New_Line;
791        Dual_IO.Put(
792          "    Phase 3:  Winner is " & Img(choice, html => False) &
793          "; gain in bytes:" & Integer_64'Image(gain) &
794          "; writing data -"
795        );
796        -- * Summary outputs
797        Put(summary,
798          "<tr><td>" &
799          Trim(Integer'Image(total_choice.count),Left) &
800          -- '/' &
801          -- Trim(Integer'Image(Zip.Entries(zi)),Left) &
802          "</td>" &
803          "<td bgcolor=lightgrey><tt>" & unique_name & "</tt>, " &
804          Image_1000(uncomp_size) & "</td>");
805        for a in Approach loop
806          if consider_a_priori(a) then
807            if not consider(a) then
808              Put(summary,"<td bgcolor=lightgray>skipped");
809            elsif a = choice then
810              Winner_color;
811            elsif e.info(a).size = e.info(choice).size then -- ex aequo
812              Put(summary,"<td bgcolor=lightblue><b>");
813            else
814              Put(summary,"<td>");
815            end if;
816            if consider(a) then
817              Put(summary, Image_1000(e.info(a).size));
818            end if;
819            if choice = a then
820              Put(summary,"</b>");
821            end if;
822            Put(summary,"</td>");
823          end if;
824        end loop;
825        -- Recall winner approach, method and size:
826        Put(summary,"<td>" & Img(choice, html => True) & "</td>");
827        Put(summary,
828          "<td bgcolor=#fafa64>" &
829          Zip.Image(Zip.Method_from_code(e.info(choice).zfm)) &
830          "</td>"
831        );
832        Put(summary,
833          "<td>" &
834          Zip.Image(Zip.Method_from_code(e.info(original).zfm)) &
835          "</td>"
836        );
837        Winner_color;
838        Put(summary, Image_1000(e.info(choice).size));
839        Put(summary,"</b></td><td>");
840        if e.info(original).size > 0 then
841          Put(
842            summary,
843            100.0 * Float(e.info(choice).size) / Float(e.info(original).size),
844            3,2,0
845          );
846          Put(summary,"%");
847        end if;
848        Put(summary,"</td><td>");
849        if uncomp_size > 0 then
850          Put(
851            summary,
852            100.0 * Float(e.info(choice).size) / Float(uncomp_size),
853            3,2,0
854          );
855          Put(summary,"%");
856        end if;
857        Put(summary,"</td><td>");
858        Put(summary,Integer'Image(e.info(choice).iter));
859        Put_Line(summary,"</td></tr>");
860        --
861        -- Write winning data:
862        --
863        e.head.short_info.extra_field_length:= 0; -- We choose to ignore it...
864        -- No data descriptor after data (bit 3); no EOS for LZMA (bit 1):
865        e.head.short_info.bit_flag:=
866          e.head.short_info.bit_flag and (2#1111_1111_1111_0101#);
867        --  Set the LZMA EOS flag if present in winner entry (checked by 7-Zip v.17.01):
868        if e.info(choice).LZMA_EOS then
869          e.head.short_info.bit_flag:= e.head.short_info.bit_flag or Zip.Headers.LZMA_EOS_Flag_Bit;
870        end if;
871        -- Set or adjust the pre-data data descriptor:
872        -- NB: even if missing pre-data, CRC will have been computed
873        --     at least with one internal method
874        e.head.short_info.dd.uncompressed_size:= uncomp_size;
875        -- Put the winning size and method
876        e.head.short_info.dd.compressed_size:= e.info(choice).size;
877        e.head.short_info.zip_type:= e.info(choice).zfm;
878        e.head.local_header_offset:= Unsigned_32(Index(repacked_zip_file))-1;
879        Zip.Headers.Write(repacked_zip_file, e.head.short_info);
880        String'Write(repacked_zip_file'Access, S(e.name));
881        -- Copy the compressed data
882        Zip.Copy_file( Temp_name(True,choice), repacked_zip_file );
883        Dual_IO.Put_Line(" done");
884        Dual_IO.New_Line;
885      end Process_one;
886
887      procedure Process_all is new Zip.Traverse(Process_one);
888
889      ed: Zip.Headers.End_of_Central_Dir;
890
891      function Webcolor(a: Approach) return String is
892        v: Float;
893        sr,sg,sb: String(1..10);
894      begin
895        if total_choice.saved > 0 and
896          -- with options like -defl ot -fast_dec, we may have
897          -- negative values or other strange things:
898           total(a).saved >= 0
899        then
900          v:= Float(total(a).saved) / Float(total_choice.saved);
901          -- ^ contribution of approach 'a'
902        else
903          v:= 0.0;
904        end if;
905        Put(sr, 512 + Integer(144.0 + 111.0 * (1.0 - v)), 16);
906        sb:= sr;
907        Put(sg, 512 + Integer(238.0 + 17.0 * (1.0 - v)), 16);
908        return
909          sr(sr'Last-2..sr'Last-1) &
910          sg(sg'Last-2..sg'Last-1) &
911          sb(sb'Last-2..sb'Last-1);
912      end Webcolor;
913
914      meth: Zip.Compress.Compression_Method;
915
916    begin -- Repack_contents
917      T0:= Clock;
918      for a in Approach loop
919        case a is
920          when original =>
921            consider_a_priori(a):= True;
922          when Internal =>
923            meth:= Approach_to_Method(a);
924            case meth is
925              when Zip.Compress.Single_Method =>
926                consider_a_priori(a):= format_choice(Zip.Compress.Method_to_Format(meth));
927              when Zip.Compress.Multi_Method =>
928                --  For the sake of simplicity, we consider the Multi_Method's
929                --  only when all formats are admitted.
930                consider_a_priori(a):= format_choice = all_formats;
931            end case;
932          when External =>
933            consider_a_priori(a):= format_choice(ext(a).pkzm) and not internal_only;
934        end case;
935      end loop;
936      Set_Name (MyStream, orig_name);
937      Open (MyStream, In_File);
938      Zip.Load( zi, MyStream, True );
939
940      Set_Name (repacked_zip_file, repacked_name);
941      Create(repacked_zip_file, Out_File);
942      Create(summary, Out_File, html_report_name);
943      --
944      --  HTML Report begins here.
945      --
946      Put_Line(summary,
947        "<html><head><title>ReZip summary for file "
948         & orig_name & "</title></head>"
949      );
950      Put_Line(summary, "<style>.container { overflow-y: auto; height: 87%; }");
951      Put_Line(summary, "td_approach { width:115px; }");
952      Put_Line(summary, "</style><body>");
953      Put_Line(summary, "<font face=""Calibri, Arial, Tahoma""> <!-- Set font for the whole page !-->");
954      Put_Line(summary,
955        "<h2><a target=_blank href=" & Zip.web &
956        ">ReZip</a> summary for file " & orig_name & "</h2>"
957      );
958      Put_Line(summary,
959        "ReZip - Zip-Ada Library version " & Zip.version & " dated " & Zip.reference
960      );
961      if format_choice /= all_formats then
962        Put_Line(summary,
963          "<br><table border=0 cellpadding=0 cellspacing=0>" &
964          "<tr bgcolor=" & lightred &
965          "><td><b>An option that filters methods is on, " &
966          "result(s) may be sub-optimal - see details at bottom.</b></td></tr></table><br>"
967        );
968      end if;
969      Put_Line(summary, "<div class=""container""><table border=1 cellpadding=1 cellspacing=1>");
970      Put(summary,
971        "<tr bgcolor=lightyellow><td></td>"&
972        "<td align=right valign=top><b>Approach:</b></td>"
973      );
974      for a in Approach loop
975        if consider_a_priori(a) then
976          if a in External then
977            ext(a).expanded_options:= ext(a).options;
978          end if;
979          Put(summary, "<td valign=top class=""td_approach"">" & Img(a, html => True) & "</td>");
980        end if;
981      end loop;
982      Put_Line(summary, "</tr>");
983      Put(summary,
984        "<tr bgcolor=lightyellow><td></td>"&
985        "<td bgcolor=lightgrey valign=bottom><b>File name, uncompressed size:</b></td>"
986      );
987      --  Additionally, we show a row with the Approach's Compression_Method's output format (the
988      --  Zip.PKZip_method). If it is not unique, we mention it.
989      for a in Approach loop
990        if consider_a_priori(a) then
991          case a is
992            when original =>
993              Put(summary, "<td align=right bgcolor=#dddd00 class=""td_approach"">Approach's<br>format &rarr;</td>");
994            when Internal =>
995              Put(summary, "<td bgcolor=#fafa64>");
996              meth:= Approach_to_Method(a);
997              case meth is
998                when Zip.Compress.Single_Method =>
999                  Put(summary, Zip.Image(Zip.Compress.Method_to_Format(meth)));
1000                when Zip.Compress.Multi_Method =>
1001                  Put(summary, "(Various formats)");
1002              end case;
1003              Put(summary, "</td>");
1004            when External =>
1005              Put(summary, "<td bgcolor=#fafa64>" & Zip.Image(ext(a).pkzm) & "</td>");
1006          end case;
1007        end if;
1008      end loop;
1009      Put_Line(summary,
1010        "<td><b>Choice</b></td>"&
1011        "<td bgcolor=#dddd00>Choice's<br>method/<br>format</td>"&
1012        "<td>Original<br>method/<br>format</td>"&
1013        "<td>Smallest<br>size</td>" &
1014        "<td>% of<br>original</td><td>% of<br>uncompressed</td><td>Iterations</td></tr>"
1015      );
1016      --
1017      -- 1/ Recompress each file into the new archive:
1018      --
1019      Process_all(zi);
1020      --
1021      -- 2/ Almost done - write Central Directory:
1022      --
1023      ed.central_dir_offset:= Unsigned_32(Index(repacked_zip_file))-1;
1024      ed.total_entries:= 0;
1025      ed.central_dir_size:= 0;
1026      ed.main_comment_length:= 0;
1027      declare
1028        comment: constant String:= Zip.Zip_comment(zi);
1029      begin
1030        if not delete_comment then
1031          ed.main_comment_length:= comment'Length;
1032        end if;
1033        -- Restart at the beginning of the list
1034        e:= list;
1035        while e /= null loop
1036          ed.total_entries:= ed.total_entries + 1;
1037          Zip.Headers.Write(repacked_zip_file, e.head);
1038          String'Write(repacked_zip_file'Access, S(e.name));
1039          ed.central_dir_size:=
1040            ed.central_dir_size +
1041            Zip.Headers.central_header_length +
1042            Unsigned_32(e.head.short_info.filename_length);
1043          e:= e.next;
1044        end loop;
1045        ed.disknum:= 0;
1046        ed.disknum_with_start:= 0;
1047        ed.disk_total_entries:= ed.total_entries;
1048        Zip.Headers.Write(repacked_zip_file, ed);
1049        if not delete_comment then
1050          String'Write(repacked_zip_file'Access, comment);
1051        end if;
1052      end;
1053      Close(repacked_zip_file);
1054      Close(MyStream);
1055      --
1056      -- Cleanup
1057      --
1058      for a in Approach loop
1059        if consider_a_priori(a) then
1060          if Exists(Temp_name(True,a)) then
1061            Delete_File( Temp_name(True,a) );
1062          end if;
1063          if a = original then -- also an uncompressed data file to delete
1064            Delete_File( Temp_name(False,a) );
1065          end if;
1066        end if;
1067      end loop;
1068      -- Report total bytes
1069      Put(summary,"<tr><td></td><td><b>T<small>OTAL BYTES</small></b></td>");
1070      for a in Approach loop
1071        if consider_a_priori(a) then
1072          Put(summary,
1073            "<td bgcolor=#" & Webcolor(a) & ">" &
1074            Image_1000(total(a).size) & "</td>"
1075          );
1076        end if;
1077      end loop;
1078      Put(summary,
1079        "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & Image_1000(total_choice.size) &
1080        "</b></td><td>"
1081      );
1082      if total(original).size > 0 then
1083        Put(summary,
1084          100.0 * Float(total_choice.size) / Float(total(original).size),
1085          3,2,0
1086        );
1087        Put(summary,"%");
1088      end if;
1089      Put(summary, "</td><td>");
1090      if total_choice.uncomp_size > 0 then
1091        Put(summary,
1092          100.0 * Float(total_choice.size) / Float(total_choice.uncomp_size),
1093          3,2,0
1094        );
1095        Put(summary,"%");
1096      end if;
1097      Put_Line(summary, "</td></tr>");
1098      -- Report total files per approach
1099      Put(summary,"<tr><td></td><td><b>T<small>OTAL FILES (when optimal)</small></b></td>");
1100      for a in Approach loop
1101        if consider_a_priori(a) then
1102          Put(summary,
1103            "<td bgcolor=#" & Webcolor(a) & ">" &
1104            Integer'Image(total(a).count) & "</td>"
1105          );
1106        end if;
1107      end loop;
1108      Put(summary,
1109        "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" & Integer'Image(total_choice.count) &
1110        "</b></td>" &
1111        "<td>"
1112      );
1113      Put_Line(summary, "</td></tr>");
1114      -- Report total saved bytes per approach
1115      Put(summary,"<tr><td></td><td><b>T<small>OTAL SAVED BYTES (when optimal)</small></b></td>");
1116      for a in Approach loop
1117        if consider_a_priori(a) then
1118          Put(summary, "<td bgcolor=#" & Webcolor(a) & ">" & Image_1000(total(a).saved) & "</td>");
1119        end if;
1120      end loop;
1121      Put(summary,
1122        "<td></td><td></td><td></td><td bgcolor=lightgreen><b>" &
1123        Image_1000(total_choice.saved) & "</b></td>" &
1124        "<td>"
1125      );
1126      if total(original).size > 0 then
1127        Put(summary,
1128          100.0 * Float(total_choice.saved) / Float(total(original).size),
1129          3,2,0
1130        );
1131        Put(summary,"%");
1132      end if;
1133      Put(summary, "</td><td>");
1134      if total_choice.uncomp_size > 0 then
1135        Put(summary,
1136          100.0 * Float(total_choice.saved) / Float(total_choice.uncomp_size),
1137          3,2,0
1138        );
1139        Put(summary,"%");
1140      end if;
1141      Put_Line(summary, "</td></tr></table></div><br><br>");
1142      Put_Line(summary, "<dt>Options used for ReZip</dt>");
1143      Put_Line(summary, "<dd>Randomized_stable =" & Integer'Image(randomized_stable) & "<br>");
1144      Put_Line(summary, "    Formats allowed:<br><table border=1 cellpadding=1 cellspacing=1>");
1145      for f in format_choice'Range loop
1146        Put_Line(summary,
1147          "      <tr><td>" & Zip.Image(f) & "</td><td>" &
1148          Boolean'Image(format_choice(f)) & "</td></tr>");
1149      end loop;
1150      Put_Line(summary, "    </table>");
1151      Put_Line(summary, "</dd>");
1152      T1:= Clock;
1153      seconds:= T1-T0;
1154      Put(summary, "Time elapsed : ");
1155      Put(summary,  Float( seconds ), 4, 2, 0 );
1156      Put(summary,  " seconds, or");
1157      Put(summary,  Float( seconds ) / 60.0, 4, 2, 0 );
1158      Put(summary,  " minutes, or");
1159      Put(summary,  Float( seconds ) / 3600.0, 4, 2, 0 );
1160      Put_Line(summary,  " hours.</font></body></html>");
1161      Close(summary);
1162      Dual_IO.Put("Time elapsed : ");
1163      DFIO.Put( Float( seconds ), 4, 2, 0 );
1164      Dual_IO.Put_Line( " sec");
1165      Dual_IO.Put_Line("All details for " & orig_name & " in " & html_report_name);
1166    end Repack_contents;
1167
1168    -- This is for randomizing the above seed_iterator.
1169    subtype Seed_Range is Integer range 1..1_000_000;
1170    package Rnd_seed is new Ada.Numerics.Discrete_Random(Seed_Range);
1171    gen_seed: Rnd_seed.Generator;
1172
1173  begin
1174    Rnd_seed.Reset(gen_seed); -- 1x clock-based randomization
1175    seed_iterator:= Rnd_seed.Random(gen_seed);
1176    Flexible_temp_files.Initialize;
1177    Dual_IO.Create_Log(log_file);
1178    Repack_contents(from_zip_file, to_zip_file, html_report);
1179    Dual_IO.Close_Log;
1180    Flexible_temp_files.Finalize;
1181  end Rezip;
1182
1183  procedure Show_external_packer_list is
1184    procedure Display(p: Zipper_specification) is
1185      fix: String(1..8):= (others => ' ');
1186    begin
1187      Insert(fix,fix'First, S(p.title));
1188      Ada.Text_IO.Put("  " & fix);
1189      fix:= (others => ' ');
1190      Insert(fix,fix'First, S(p.name));
1191      Ada.Text_IO.Put_Line(" Executable: " & fix & " URL: " & S(p.URL));
1192    end Display;
1193    name_is_new: Boolean;
1194  begin
1195    for e in External loop
1196      name_is_new:= True;
1197      for ee in External'First .. External'Pred(e) loop
1198        name_is_new:= name_is_new and ext(e).name /= ext(ee).name;
1199      end loop;
1200      if name_is_new then
1201        Display(ext(e));
1202      end if;
1203    end loop;
1204    Display(defl_opt);
1205  end Show_external_packer_list;
1206
1207end Rezip_lib;
1208