1-- Legal licensing note:
2
3--  Copyright (c) 2007 .. 2018 Gautier de Montmollin (Maintainer of the Ada version)
4--  SWITZERLAND
5
6--  Permission is hereby granted, free of charge, to any person obtaining a copy
7--  of this software and associated documentation files (the "Software"), to deal
8--  in the Software without restriction, including without limitation the rights
9--  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10--  copies of the Software, and to permit persons to whom the Software is
11--  furnished to do so, subject to the following conditions:
12
13--  The above copyright notice and this permission notice shall be included in
14--  all copies or substantial portions of the Software.
15
16--  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17--  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18--  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19--  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20--  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21--  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22--  THE SOFTWARE.
23
24-- NB: this is the MIT License, as found on the site
25-- http://www.opensource.org/licenses/mit-license.php
26
27with Zip.CRC_Crypto,
28     Zip.Compress.Shrink,
29     Zip.Compress.Reduce,
30     Zip.Compress.Deflate,
31     Zip.Compress.LZMA_E;
32
33with Ada.Characters.Handling;           use Ada.Characters.Handling;
34with Ada.Numerics.Discrete_Random;
35with Ada.Strings.Fixed;                 use Ada.Strings.Fixed;
36
37package body Zip.Compress is
38
39  use Zip_Streams, Zip.CRC_Crypto;
40
41  -------------------
42  -- Compress_data --
43  -------------------
44
45  procedure Compress_data
46   (input,
47    output          : in out Zip_Streams.Root_Zipstream_Type'Class;
48    input_size_known: Boolean;
49    input_size      : File_size_type;
50    method          : Compression_Method;
51    feedback        : Feedback_proc;
52    password        : String;
53    content_hint    : Data_content_type;
54    CRC             : out Interfaces.Unsigned_32;
55    output_size     : out File_size_type;
56    zip_type        : out Interfaces.Unsigned_16
57   )
58  is
59    use Interfaces;
60    counted: File_size_type;
61    user_aborting: Boolean;
62    idx_in:  constant ZS_Index_Type:= Index(input);
63    idx_out: constant ZS_Index_Type:= Index(output);
64    compression_ok: Boolean;
65    first_feedback: Boolean:= True;
66    --
67    is_encrypted: constant Boolean:= password /= "";
68    encrypt_pack, mem_encrypt_pack: Crypto_pack;
69    encrypt_header: Byte_Buffer(1..12);
70    package Byte_soup is new Ada.Numerics.Discrete_Random(Byte);
71    use Byte_soup;
72    cg: Byte_soup.Generator;
73    --
74    --  Store data as is, or, if do_write = False, just compute CRC (this is for encryption).
75    --
76    procedure Store_data(do_write: Boolean) is
77      Buffer      : Byte_Buffer (1 .. buffer_size);
78      Last_Read   : Natural;
79    begin
80      zip_type:= compression_format_code.store;
81      counted:= 0;
82      while not End_Of_Stream(input) loop
83        if input_size_known and counted >= input_size then
84          exit;
85        end if;
86        -- Copy data
87        BlockRead (input, Buffer, Last_Read);
88        counted:= counted + File_size_type (Last_Read);
89        Update(CRC, Buffer (1 .. Last_Read));
90        if do_write then
91          Encode(encrypt_pack, Buffer (1 .. Last_Read));
92          BlockWrite(output, Buffer (1 .. Last_Read));
93        end if;
94        -- Feedback
95        if feedback /= null and then
96          (first_feedback or (counted mod (2**16)=0) or
97          (input_size_known and counted = input_size))
98        then
99          if input_size_known then
100            feedback(
101              percents_done =>
102                Natural( (100.0 * Float(counted)) / Float(input_size) ),
103              entry_skipped => False,
104              user_abort    => user_aborting );
105          else
106            feedback(
107              percents_done => 0,
108              entry_skipped => False,
109              user_abort    => user_aborting );
110          end if;
111          first_feedback:= False;
112          if user_aborting then
113            raise User_abort;
114          end if;
115        end if;
116      end loop;
117      output_size:= counted;
118      compression_ok:= True;
119    end Store_data;
120    --
121    procedure Compress_data_single_method(actual_method: Single_Method) is
122    begin
123      Init(CRC);
124      if is_encrypted then
125        Init_keys(encrypt_pack, password);
126        Set_mode(encrypt_pack, encrypted);
127        --  A bit dumb from Zip spec: we need to know the final CRC in order to set up
128        --  the last byte of the encryption header, that allows for detecting if a password
129        --  is OK - this, with 255/256 probability of correct detection of a wrong password!
130        --  Result: 1st scan of the whole input stream with CRC calculation:
131        Store_data(do_write => False);
132        Reset(cg);
133        for i in 1..11 loop
134          encrypt_header(i):= Random(cg);
135        end loop;
136        encrypt_header(12):= Byte(Shift_Right( Final(CRC), 24 ));
137        Set_Index(input, idx_in);
138        Init(CRC);
139        Encode(encrypt_pack, encrypt_header);
140        BlockWrite(output, encrypt_header);
141        --
142        --  We need to remember at this point the encryption keys in case we need
143        --  to rewrite from here (compression failed, store data).
144        --
145        mem_encrypt_pack:= encrypt_pack;
146      else
147        Set_mode(encrypt_pack, clear);
148      end if;
149      --
150      --  Dispatch the work to child procedures doing the stream compression
151      --  in different formats, depending on the actual compression method.
152      --  For example, for methods LZMA_for_JPEG, LZMA_for_WAV, or LZMA_3, we
153      --  logically call Zip.Compress.LZMA_E for the job.
154      --
155      case actual_method is
156        --
157        when Store =>
158          Store_data(do_write => True);
159        --
160        when Shrink =>
161          Zip.Compress.Shrink(
162            input, output, input_size_known, input_size, feedback,
163            CRC, encrypt_pack, output_size, compression_ok
164          );
165          zip_type:= compression_format_code.shrink;
166        --
167        when Reduction_Method =>
168          Zip.Compress.Reduce(
169            input, output, input_size_known, input_size, feedback,
170            actual_method,
171            CRC, encrypt_pack, output_size, compression_ok
172          );
173          zip_type:= compression_format_code.reduce +
174            Unsigned_16(
175              Compression_Method'Pos(actual_method) -
176              Compression_Method'Pos(Reduce_1)
177            );
178        when Deflation_Method =>
179          Zip.Compress.Deflate(
180            input, output, input_size_known, input_size, feedback,
181            actual_method,
182            CRC, encrypt_pack, output_size, compression_ok
183          );
184          zip_type:= compression_format_code.deflate;
185        when LZMA_Method =>
186          Zip.Compress.LZMA_E(
187            input, output, input_size_known, input_size, feedback,
188            actual_method,
189            CRC, encrypt_pack, output_size, compression_ok
190          );
191          zip_type:= compression_format_code.lzma;
192      end case;
193      CRC:= Final(CRC);
194      --
195      -- Handle case where compression has been unefficient:
196      -- data to be compressed is too "random"; then compressed data
197      -- happen to be larger than uncompressed data
198      --
199      if not compression_ok then
200        -- Go back to the beginning and just store the data
201        Set_Index(input, idx_in);
202        if is_encrypted then
203          Set_Index(output, idx_out + 12);
204          --  Restore the encryption keys to their state just after the encryption header:
205          encrypt_pack:= mem_encrypt_pack;
206        else
207          Set_Index(output, idx_out);
208        end if;
209        Init(CRC);
210        Store_data(do_write => True);
211        CRC:= Final(CRC);
212      end if;
213      if is_encrypted then
214        output_size:= output_size + 12;
215      end if;
216    end Compress_data_single_method;
217
218    fast_presel: constant Boolean:=
219      method = Preselection_1 or (input_size_known and input_size < 22_805);
220
221    data_type_to_LZMA_method: constant array(Data_content_type) of LZMA_Method:=
222      (JPEG    => LZMA_for_JPEG,
223       ARW_RW2 => LZMA_for_ARW,
224       ORF_CR2 => LZMA_for_ORF,
225       MP3     => LZMA_for_MP3,
226       MP4     => LZMA_for_MP4,
227       PGM     => LZMA_for_PGM,
228       PPM     => LZMA_for_PPM,
229       PNG     => LZMA_for_PNG,
230       WAV     => LZMA_for_WAV,
231       others  => LZMA_1  --  Fake, should be unused as such.
232      );
233
234  begin
235    case method is
236      --
237      when Single_Method =>
238        Compress_data_single_method(method);
239      --
240      when Preselection_Method =>
241        case content_hint is
242          when Neutral =>  --  No clue about what kind of data
243            if input_size_known and input_size < 9_000 then
244              Compress_data_single_method(Deflation_Method'Last);  --  Deflate
245            elsif fast_presel then
246              --  See: Optimum, LZ77 sheet in za_work.xls
247              --       or l2_vs_l3.xls with a larger data set.
248              Compress_data_single_method(LZMA_2);                 --  LZMA with IZ_10 match finder
249            else
250              Compress_data_single_method(LZMA_3);                 --  LZMA with BT4 match finder
251            end if;
252          when ARW_RW2 | ORF_CR2 | MP3 | MP4 | JPEG | PGM | PPM | PNG | WAV =>
253            if input_size_known and input_size < 2_250 then
254              Compress_data_single_method(Deflation_Method'Last);  --  Deflate
255            else
256              Compress_data_single_method(data_type_to_LZMA_method(content_hint));
257            end if;
258          when GIF =>
259            if input_size_known and input_size < 350 then
260              Compress_data_single_method(Deflate_1);
261            else
262              Compress_data_single_method(LZMA_for_GIF);
263            end if;
264          when Zip_in_Zip =>
265            if input_size_known and input_size < 1_000 then
266              Compress_data_single_method(Deflation_Method'Last);  --  Deflate
267            elsif fast_presel then
268              Compress_data_single_method(LZMA_2_for_Zip_in_Zip);
269            else
270              Compress_data_single_method(LZMA_3_for_Zip_in_Zip);
271            end if;
272          when Source_code =>
273            if input_size_known and input_size < 8_000 then
274              Compress_data_single_method(Deflation_Method'Last);  --  Deflate
275            elsif fast_presel then
276              Compress_data_single_method(LZMA_2_for_Source);
277            else
278              Compress_data_single_method(LZMA_3_for_Source);
279            end if;
280        end case;
281    end case;
282  end Compress_data;
283
284  function Guess_type_from_name(name: String) return Data_content_type is
285    up: constant String:= To_Upper(name);
286    ext_1: constant String:= Tail(up, 2);
287    ext_2: constant String:= Tail(up, 3);
288    ext_3: constant String:= Tail(up, 4);
289    ext_4: constant String:= Tail(up, 5);
290  begin
291    if ext_3 = ".JPG" or else ext_4 = ".JPEG" then
292      return JPEG;
293    end if;
294    if ext_3 = ".ADA" or else ext_3 = ".ADS" or else ext_3 = ".ADB"
295      or else ext_1 = ".C" or else ext_1 = ".H"
296      or else ext_3 = ".CPP" or else ext_3 = ".HPP"
297      or else ext_3 = ".DEF" or else ext_3 = ".ASM"
298      or else ext_4 = ".JAVA" or else ext_2 = ".CS"
299      or else ext_3 = ".PAS" or else ext_3 = ".INC" or else ext_2 = ".PP" or else ext_3 = ".LPR"
300      or else ext_3 = ".MAK" or else ext_2 = ".IN"
301      or else ext_2 = ".SH" or else ext_3 = ".BAT" or else ext_3 = ".CMD"
302      or else ext_3 = ".XML" or else ext_3 = ".XSL" or else ext_4 = ".SGML"
303      or else ext_3 = ".HTM" or else ext_4 = ".HTML"
304      or else ext_2 = ".JS" or else ext_3 = ".LSP"
305      or else ext_3 = ".CSV" or else ext_3 = ".SQL"
306    then
307      return Source_code;
308    end if;
309    --  Zip archives happen to be zipped...
310    if ext_4 = ".EPUB"  --  EPUB: e-book reader format
311      or else ext_3 = ".JAR" or else ext_3 = ".ZIP"
312      or else ext_3 = ".ODB" or else ext_3 = ".ODS" or else ext_3 = ".ODT"
313      or else ext_3 = ".OTR" or else ext_3 = ".OTS" or else ext_3 = ".OTT"
314      or else ext_3 = ".CRX" or else ext_3 = ".NTH"
315      or else ext_4 = ".DOCX" or else ext_4 = ".PPTX" or else ext_4 = ".XLSX"
316    then
317      return Zip_in_Zip;
318    end if;
319    --  Some raw camera picture data
320    if ext_3 = ".ORF"          --  Raw Olympus
321      or else ext_3 = ".CR2"   --  Raw Canon
322      or else ext_3 = ".RAF"   --  Raw Fujifilm
323      or else ext_3 = ".SRW"   --  Raw Samsung
324    then
325      return ORF_CR2;
326    end if;
327    if ext_3 = ".ARW"          --  Raw Sony
328      or else ext_3 = ".RW2"   --  Raw Panasonic
329      or else ext_3 = ".NEF"   --  Raw Nikon
330      or else ext_3 = ".DNG"   --  Raw Leica, Pentax
331      or else ext_3 = ".X3F"   --  Raw Sigma
332    then
333      return ARW_RW2;
334    end if;
335    if ext_3 = ".PGM" then
336      return PGM;
337    end if;
338    if ext_3 = ".PPM" then
339      return PPM;
340    end if;
341    if ext_3 = ".MP3" then
342      return MP3;
343    end if;
344    if ext_3 = ".MTS" or else ext_3 = ".MP4" or else ext_3 = ".M4A" or else ext_3 = ".M4P" then
345      return MP4;
346    end if;
347    if ext_3 = ".PNG" then
348      return PNG;
349    end if;
350    if ext_3 = ".GIF" then
351      return GIF;
352    end if;
353    if ext_3 = ".WAV" or else ext_3 = ".UAX" then
354      return WAV;
355    end if;
356    return Neutral;
357  end Guess_type_from_name;
358
359end Zip.Compress;
360