1-- Legal licensing note: 2 3-- Copyright (c) 2007 .. 2018 Gautier de Montmollin (Maintainer of the Ada version) 4-- SWITZERLAND 5 6-- Permission is hereby granted, free of charge, to any person obtaining a copy 7-- of this software and associated documentation files (the "Software"), to deal 8-- in the Software without restriction, including without limitation the rights 9-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10-- copies of the Software, and to permit persons to whom the Software is 11-- furnished to do so, subject to the following conditions: 12 13-- The above copyright notice and this permission notice shall be included in 14-- all copies or substantial portions of the Software. 15 16-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22-- THE SOFTWARE. 23 24-- NB: this is the MIT License, as found on the site 25-- http://www.opensource.org/licenses/mit-license.php 26 27with Zip.CRC_Crypto, 28 Zip.Compress.Shrink, 29 Zip.Compress.Reduce, 30 Zip.Compress.Deflate, 31 Zip.Compress.LZMA_E; 32 33with Ada.Characters.Handling; use Ada.Characters.Handling; 34with Ada.Numerics.Discrete_Random; 35with Ada.Strings.Fixed; use Ada.Strings.Fixed; 36 37package body Zip.Compress is 38 39 use Zip_Streams, Zip.CRC_Crypto; 40 41 ------------------- 42 -- Compress_data -- 43 ------------------- 44 45 procedure Compress_data 46 (input, 47 output : in out Zip_Streams.Root_Zipstream_Type'Class; 48 input_size_known: Boolean; 49 input_size : File_size_type; 50 method : Compression_Method; 51 feedback : Feedback_proc; 52 password : String; 53 content_hint : Data_content_type; 54 CRC : out Interfaces.Unsigned_32; 55 output_size : out File_size_type; 56 zip_type : out Interfaces.Unsigned_16 57 ) 58 is 59 use Interfaces; 60 counted: File_size_type; 61 user_aborting: Boolean; 62 idx_in: constant ZS_Index_Type:= Index(input); 63 idx_out: constant ZS_Index_Type:= Index(output); 64 compression_ok: Boolean; 65 first_feedback: Boolean:= True; 66 -- 67 is_encrypted: constant Boolean:= password /= ""; 68 encrypt_pack, mem_encrypt_pack: Crypto_pack; 69 encrypt_header: Byte_Buffer(1..12); 70 package Byte_soup is new Ada.Numerics.Discrete_Random(Byte); 71 use Byte_soup; 72 cg: Byte_soup.Generator; 73 -- 74 -- Store data as is, or, if do_write = False, just compute CRC (this is for encryption). 75 -- 76 procedure Store_data(do_write: Boolean) is 77 Buffer : Byte_Buffer (1 .. buffer_size); 78 Last_Read : Natural; 79 begin 80 zip_type:= compression_format_code.store; 81 counted:= 0; 82 while not End_Of_Stream(input) loop 83 if input_size_known and counted >= input_size then 84 exit; 85 end if; 86 -- Copy data 87 BlockRead (input, Buffer, Last_Read); 88 counted:= counted + File_size_type (Last_Read); 89 Update(CRC, Buffer (1 .. Last_Read)); 90 if do_write then 91 Encode(encrypt_pack, Buffer (1 .. Last_Read)); 92 BlockWrite(output, Buffer (1 .. Last_Read)); 93 end if; 94 -- Feedback 95 if feedback /= null and then 96 (first_feedback or (counted mod (2**16)=0) or 97 (input_size_known and counted = input_size)) 98 then 99 if input_size_known then 100 feedback( 101 percents_done => 102 Natural( (100.0 * Float(counted)) / Float(input_size) ), 103 entry_skipped => False, 104 user_abort => user_aborting ); 105 else 106 feedback( 107 percents_done => 0, 108 entry_skipped => False, 109 user_abort => user_aborting ); 110 end if; 111 first_feedback:= False; 112 if user_aborting then 113 raise User_abort; 114 end if; 115 end if; 116 end loop; 117 output_size:= counted; 118 compression_ok:= True; 119 end Store_data; 120 -- 121 procedure Compress_data_single_method(actual_method: Single_Method) is 122 begin 123 Init(CRC); 124 if is_encrypted then 125 Init_keys(encrypt_pack, password); 126 Set_mode(encrypt_pack, encrypted); 127 -- A bit dumb from Zip spec: we need to know the final CRC in order to set up 128 -- the last byte of the encryption header, that allows for detecting if a password 129 -- is OK - this, with 255/256 probability of correct detection of a wrong password! 130 -- Result: 1st scan of the whole input stream with CRC calculation: 131 Store_data(do_write => False); 132 Reset(cg); 133 for i in 1..11 loop 134 encrypt_header(i):= Random(cg); 135 end loop; 136 encrypt_header(12):= Byte(Shift_Right( Final(CRC), 24 )); 137 Set_Index(input, idx_in); 138 Init(CRC); 139 Encode(encrypt_pack, encrypt_header); 140 BlockWrite(output, encrypt_header); 141 -- 142 -- We need to remember at this point the encryption keys in case we need 143 -- to rewrite from here (compression failed, store data). 144 -- 145 mem_encrypt_pack:= encrypt_pack; 146 else 147 Set_mode(encrypt_pack, clear); 148 end if; 149 -- 150 -- Dispatch the work to child procedures doing the stream compression 151 -- in different formats, depending on the actual compression method. 152 -- For example, for methods LZMA_for_JPEG, LZMA_for_WAV, or LZMA_3, we 153 -- logically call Zip.Compress.LZMA_E for the job. 154 -- 155 case actual_method is 156 -- 157 when Store => 158 Store_data(do_write => True); 159 -- 160 when Shrink => 161 Zip.Compress.Shrink( 162 input, output, input_size_known, input_size, feedback, 163 CRC, encrypt_pack, output_size, compression_ok 164 ); 165 zip_type:= compression_format_code.shrink; 166 -- 167 when Reduction_Method => 168 Zip.Compress.Reduce( 169 input, output, input_size_known, input_size, feedback, 170 actual_method, 171 CRC, encrypt_pack, output_size, compression_ok 172 ); 173 zip_type:= compression_format_code.reduce + 174 Unsigned_16( 175 Compression_Method'Pos(actual_method) - 176 Compression_Method'Pos(Reduce_1) 177 ); 178 when Deflation_Method => 179 Zip.Compress.Deflate( 180 input, output, input_size_known, input_size, feedback, 181 actual_method, 182 CRC, encrypt_pack, output_size, compression_ok 183 ); 184 zip_type:= compression_format_code.deflate; 185 when LZMA_Method => 186 Zip.Compress.LZMA_E( 187 input, output, input_size_known, input_size, feedback, 188 actual_method, 189 CRC, encrypt_pack, output_size, compression_ok 190 ); 191 zip_type:= compression_format_code.lzma; 192 end case; 193 CRC:= Final(CRC); 194 -- 195 -- Handle case where compression has been unefficient: 196 -- data to be compressed is too "random"; then compressed data 197 -- happen to be larger than uncompressed data 198 -- 199 if not compression_ok then 200 -- Go back to the beginning and just store the data 201 Set_Index(input, idx_in); 202 if is_encrypted then 203 Set_Index(output, idx_out + 12); 204 -- Restore the encryption keys to their state just after the encryption header: 205 encrypt_pack:= mem_encrypt_pack; 206 else 207 Set_Index(output, idx_out); 208 end if; 209 Init(CRC); 210 Store_data(do_write => True); 211 CRC:= Final(CRC); 212 end if; 213 if is_encrypted then 214 output_size:= output_size + 12; 215 end if; 216 end Compress_data_single_method; 217 218 fast_presel: constant Boolean:= 219 method = Preselection_1 or (input_size_known and input_size < 22_805); 220 221 data_type_to_LZMA_method: constant array(Data_content_type) of LZMA_Method:= 222 (JPEG => LZMA_for_JPEG, 223 ARW_RW2 => LZMA_for_ARW, 224 ORF_CR2 => LZMA_for_ORF, 225 MP3 => LZMA_for_MP3, 226 MP4 => LZMA_for_MP4, 227 PGM => LZMA_for_PGM, 228 PPM => LZMA_for_PPM, 229 PNG => LZMA_for_PNG, 230 WAV => LZMA_for_WAV, 231 others => LZMA_1 -- Fake, should be unused as such. 232 ); 233 234 begin 235 case method is 236 -- 237 when Single_Method => 238 Compress_data_single_method(method); 239 -- 240 when Preselection_Method => 241 case content_hint is 242 when Neutral => -- No clue about what kind of data 243 if input_size_known and input_size < 9_000 then 244 Compress_data_single_method(Deflation_Method'Last); -- Deflate 245 elsif fast_presel then 246 -- See: Optimum, LZ77 sheet in za_work.xls 247 -- or l2_vs_l3.xls with a larger data set. 248 Compress_data_single_method(LZMA_2); -- LZMA with IZ_10 match finder 249 else 250 Compress_data_single_method(LZMA_3); -- LZMA with BT4 match finder 251 end if; 252 when ARW_RW2 | ORF_CR2 | MP3 | MP4 | JPEG | PGM | PPM | PNG | WAV => 253 if input_size_known and input_size < 2_250 then 254 Compress_data_single_method(Deflation_Method'Last); -- Deflate 255 else 256 Compress_data_single_method(data_type_to_LZMA_method(content_hint)); 257 end if; 258 when GIF => 259 if input_size_known and input_size < 350 then 260 Compress_data_single_method(Deflate_1); 261 else 262 Compress_data_single_method(LZMA_for_GIF); 263 end if; 264 when Zip_in_Zip => 265 if input_size_known and input_size < 1_000 then 266 Compress_data_single_method(Deflation_Method'Last); -- Deflate 267 elsif fast_presel then 268 Compress_data_single_method(LZMA_2_for_Zip_in_Zip); 269 else 270 Compress_data_single_method(LZMA_3_for_Zip_in_Zip); 271 end if; 272 when Source_code => 273 if input_size_known and input_size < 8_000 then 274 Compress_data_single_method(Deflation_Method'Last); -- Deflate 275 elsif fast_presel then 276 Compress_data_single_method(LZMA_2_for_Source); 277 else 278 Compress_data_single_method(LZMA_3_for_Source); 279 end if; 280 end case; 281 end case; 282 end Compress_data; 283 284 function Guess_type_from_name(name: String) return Data_content_type is 285 up: constant String:= To_Upper(name); 286 ext_1: constant String:= Tail(up, 2); 287 ext_2: constant String:= Tail(up, 3); 288 ext_3: constant String:= Tail(up, 4); 289 ext_4: constant String:= Tail(up, 5); 290 begin 291 if ext_3 = ".JPG" or else ext_4 = ".JPEG" then 292 return JPEG; 293 end if; 294 if ext_3 = ".ADA" or else ext_3 = ".ADS" or else ext_3 = ".ADB" 295 or else ext_1 = ".C" or else ext_1 = ".H" 296 or else ext_3 = ".CPP" or else ext_3 = ".HPP" 297 or else ext_3 = ".DEF" or else ext_3 = ".ASM" 298 or else ext_4 = ".JAVA" or else ext_2 = ".CS" 299 or else ext_3 = ".PAS" or else ext_3 = ".INC" or else ext_2 = ".PP" or else ext_3 = ".LPR" 300 or else ext_3 = ".MAK" or else ext_2 = ".IN" 301 or else ext_2 = ".SH" or else ext_3 = ".BAT" or else ext_3 = ".CMD" 302 or else ext_3 = ".XML" or else ext_3 = ".XSL" or else ext_4 = ".SGML" 303 or else ext_3 = ".HTM" or else ext_4 = ".HTML" 304 or else ext_2 = ".JS" or else ext_3 = ".LSP" 305 or else ext_3 = ".CSV" or else ext_3 = ".SQL" 306 then 307 return Source_code; 308 end if; 309 -- Zip archives happen to be zipped... 310 if ext_4 = ".EPUB" -- EPUB: e-book reader format 311 or else ext_3 = ".JAR" or else ext_3 = ".ZIP" 312 or else ext_3 = ".ODB" or else ext_3 = ".ODS" or else ext_3 = ".ODT" 313 or else ext_3 = ".OTR" or else ext_3 = ".OTS" or else ext_3 = ".OTT" 314 or else ext_3 = ".CRX" or else ext_3 = ".NTH" 315 or else ext_4 = ".DOCX" or else ext_4 = ".PPTX" or else ext_4 = ".XLSX" 316 then 317 return Zip_in_Zip; 318 end if; 319 -- Some raw camera picture data 320 if ext_3 = ".ORF" -- Raw Olympus 321 or else ext_3 = ".CR2" -- Raw Canon 322 or else ext_3 = ".RAF" -- Raw Fujifilm 323 or else ext_3 = ".SRW" -- Raw Samsung 324 then 325 return ORF_CR2; 326 end if; 327 if ext_3 = ".ARW" -- Raw Sony 328 or else ext_3 = ".RW2" -- Raw Panasonic 329 or else ext_3 = ".NEF" -- Raw Nikon 330 or else ext_3 = ".DNG" -- Raw Leica, Pentax 331 or else ext_3 = ".X3F" -- Raw Sigma 332 then 333 return ARW_RW2; 334 end if; 335 if ext_3 = ".PGM" then 336 return PGM; 337 end if; 338 if ext_3 = ".PPM" then 339 return PPM; 340 end if; 341 if ext_3 = ".MP3" then 342 return MP3; 343 end if; 344 if ext_3 = ".MTS" or else ext_3 = ".MP4" or else ext_3 = ".M4A" or else ext_3 = ".M4P" then 345 return MP4; 346 end if; 347 if ext_3 = ".PNG" then 348 return PNG; 349 end if; 350 if ext_3 = ".GIF" then 351 return GIF; 352 end if; 353 if ext_3 = ".WAV" or else ext_3 = ".UAX" then 354 return WAV; 355 end if; 356 return Neutral; 357 end Guess_type_from_name; 358 359end Zip.Compress; 360