1#!/usr/local/bin/perl
2
3use strict;
4use warnings;
5
6use Compress::Raw::Lzma qw (LZMA_STREAM_END LZMA_DICT_SIZE_MIN);
7use File::Basename;
8
9# author:
10# philsmd
11# magnum (adapt to JtR use)
12
13# version:
14# 1.3
15
16# date released:
17# April 2015
18
19# date last updated:
20# 28th Nov 2018
21
22# dependencies:
23# Compress::Raw::Lzma
24
25# supported file types:
26# - is able to identify and parse .7z files
27# - is able to identify and parse splitted .7z files (.7z.001, .7z.002, ...)
28# - is able to identify and parse regular (non-packed) .sfx files
29
30# install dependencies like this:
31#    sudo cpan Compress::Raw::Lzma
32# or sudo apt-get install libcompress-raw-lzma-perl
33# or sudo perl -MCPAN -e 'install Compress::Raw::Lzma'
34
35
36#
37# Explanation of the "hash" format:
38#
39
40# the fields of this format are separate by the dollar ("$") sign:
41# ("xyz" means that the string xyz is used literally, brackets indicate variables)
42#
43# "$"
44# "7z"
45# "$"
46# [data type indicator]           # see "Explanation of the data type indicator" below
47# "$"
48# [cost factor]                   # means: 2 ^ [cost factor] iterations
49# "$"
50# [length of salt]
51# "$"
52# [salt]
53# "$"
54# [length of iv]                  # the initialization vector length
55# "$"
56# [iv]                            # the initialization vector itself
57# "$"
58# [CRC32]                         # the actual "hash"
59# "$"
60# [length of encrypted data]      # the encrypted data length in bytes
61# "$"
62# [length of decrypted data]      # the decrypted data length in bytes
63# "$"
64# [encrypted data]                # the encrypted (and possibly also compressed) data
65
66# in case the data was not truncated and a decompression step is needed to verify the CRC32, these fields are appended:
67# "$"
68# [length of data for CRC32]      # the length of the first "file" needed to verify the CRC32 checksum
69# "$"
70# [coder attributes]              # most of the coders/decompressors need some attributes (e.g. encoded lc, pb, lp, dictSize values);
71
72#
73# Explanation of the data type indicator
74#
75
76# This field is the first field after the hash signature (i.e. after "$7z$).
77# Whenever the data was longer than the value of PASSWORD_RECOVERY_TOOL_DATA_LIMIT and the data could be truncated due to the padding attack,
78# the value of this field will be set to 128.
79#
80# If no truncation is used:
81# - the value will be 0 if the data doesn't need to be decompressed to check the CRC32 checksum
82# - all values different from 128, but greater than 0, indicate that the data must be decompressed as follows:
83#   - 1 means that the data must be decompressed using the LZMA1 decompressor
84#   - 2 means that the data must be decompressed using the LZMA2 decompressor
85#   - 3 means that the data must be decompressed using the PPMD decompressor
86#   - 4 means that the data must be decompressed using the BCJ decompressor
87#   - 5 means that the data must be decompressed using the BCJ2 decompressor
88#   - 6 means that the data must be decompressed using the BZIP2 decompressor
89#   - 7 means that the data must be decompressed using the DEFLATE decompressor
90
91# Truncated data can only be verified using the padding attack and therefore combinations between truncation + a compressor are not allowed.
92# Therefore, whenever the value is 128 or 0, neither coder attributes nor the length of the data for the CRC32 check is within the output.
93# On the other hand, for all values above or equal 1 and smaller than 128, both coder attributes and the length for CRC32 check is in the output.
94
95#
96# Constants
97#
98
99# cracker specific stuff
100
101my $ANALYZE_ALL_STREAMS_TO_FIND_SHORTEST_DATA_BUF = 1;
102
103my $SHOW_LIST_OF_ALL_STREAMS = 0; # $ANALYZE_ALL_STREAMS_TO_FIND_SHORTEST_DATA_BUF must be set to 1 to list/debug all streams
104my $SHOW_LZMA_DECOMPRESS_AFTER_DECRYPT_WARNING = 1;
105
106my $SHORTEN_HASH_LENGTH_TO_CRC_LENGTH = 1; # only output the bytes needed for the checksum of the first file (plus a fixed length
107                                           # header at the very beginning of the stream; plus additional +5% to cover the exception
108                                           # that the compressed file is slightly longer than the raw file)
109
110my $SHORTEN_HASH_FIXED_HEADER  = 32.5;  # at the beginning of the compressed stream we have some header info
111                                        # (shortened hash can't really be shorter than the metadata needed for decompression)
112                                        # the extra +0.5 is used to round up (we use integer numbers)
113my $SHORTEN_HASH_EXTRA_PERCENT = 5;     # the compressed stream could be slightly longer than the underlying data (special cases)
114                                        # in percent: i.e. x % == (x / 100)
115
116my $PASSWORD_RECOVERY_TOOL_NAME = "john";
117my $PASSWORD_RECOVERY_TOOL_DATA_LIMIT = 0x80000000;          # hexadecimal output value. This value should always be >= 64
118my $PASSWORD_RECOVERY_TOOL_SUPPORT_PADDING_ATTACK  = 1;      # does the cracker support the AES-CBC padding attack (0 means no, 1 means yes)
119my @PASSWORD_RECOVERY_TOOL_SUPPORTED_DECOMPRESSORS = (1, 2); # within this list we only need values ranging from 1 to 7
120                                                             # i.e. SEVEN_ZIP_LZMA1_COMPRESSED to SEVEN_ZIP_DEFLATE_COMPRESSED
121
122# 7-zip specific stuff
123
124my $LZMA2_MIN_COMPRESSED_LEN = 16; # the raw data (decrypted) needs to be at least: 3 + 1 + 1, header (start + size) + at least one byte of data + end
125                                   # therefore we need to have at least one AES BLOCK (128 bits = 16 bytes)
126
127# header
128
129my $SEVEN_ZIP_MAGIC = "7z\xbc\xaf\x27\x1c";
130my $SEVEN_ZIP_MAGIC_LEN = 6;                # fixed length of $SEVEN_ZIP_MAGIC
131
132my $SEVEN_ZIP_END                = "\x00";
133my $SEVEN_ZIP_HEADER             = "\x01";
134my $SEVEN_ZIP_ARCHIVE_PROPERTIES = "\x02";
135my $SEVEN_ZIP_ADD_STREAMS_INFO   = "\x03";
136my $SEVEN_ZIP_MAIN_STREAMS_INFO  = "\x04";
137my $SEVEN_ZIP_FILES_INFO         = "\x05";
138my $SEVEN_ZIP_PACK_INFO          = "\x06";
139my $SEVEN_ZIP_UNPACK_INFO        = "\x07";
140my $SEVEN_ZIP_SUBSTREAMS_INFO    = "\x08";
141my $SEVEN_ZIP_SIZE               = "\x09";
142my $SEVEN_ZIP_CRC                = "\x0a";
143my $SEVEN_ZIP_FOLDER             = "\x0b";
144my $SEVEN_ZIP_UNPACK_SIZE        = "\x0c";
145my $SEVEN_ZIP_NUM_UNPACK_STREAM  = "\x0d";
146my $SEVEN_ZIP_EMPTY_STREAM       = "\x0e";
147my $SEVEN_ZIP_EMPTY_FILE         = "\x0f";
148my $SEVEN_ZIP_ANTI_FILE          = "\x10";
149my $SEVEN_ZIP_NAME               = "\x11";
150my $SEVEN_ZIP_CREATION_TIME      = "\x12";
151my $SEVEN_ZIP_ACCESS_TIME        = "\x13";
152my $SEVEN_ZIP_MODIFICATION_TIME  = "\x14";
153my $SEVEN_ZIP_WIN_ATTRIBUTE      = "\x15";
154my $SEVEN_ZIP_ENCODED_HEADER     = "\x17";
155my $SEVEN_ZIP_START_POS          = "\x18";
156my $SEVEN_ZIP_DUMMY              = "\x19";
157
158my $SEVEN_ZIP_MAX_PROPERTY_TYPE  = 2 ** 30; # 1073741824
159my $SEVEN_ZIP_NOT_EXTERNAL       = "\x00";
160my $SEVEN_ZIP_EXTERNAL           = "\x01";
161my $SEVEN_ZIP_ALL_DEFINED        = "\x01";
162my $SEVEN_ZIP_FILE_NAME_END      = "\x00\x00";
163
164# codec
165
166my $SEVEN_ZIP_AES               = "\x06\xf1\x07\x01"; # all the following codec values are from CPP/7zip/Archive/7z/7zHeader.h
167
168my $SEVEN_ZIP_LZMA1             = "\x03\x01\x01";
169my $SEVEN_ZIP_LZMA2             = "\x21";
170my $SEVEN_ZIP_PPMD              = "\x03\x04\x01";
171my $SEVEN_ZIP_BCJ               = "\x03\x03\x01\x03";
172my $SEVEN_ZIP_BCJ2              = "\x03\x03\x01\x1b";
173my $SEVEN_ZIP_BZIP2             = "\x04\x02\x02";
174my $SEVEN_ZIP_DEFLATE           = "\x04\x01\x08";
175
176# hash format
177
178my $SEVEN_ZIP_HASH_SIGNATURE    = "\$7z\$";
179my $SEVEN_ZIP_DEFAULT_POWER     = 19;
180my $SEVEN_ZIP_DEFAULT_IV        = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
181
182my $SEVEN_ZIP_UNCOMPRESSED       =   0;
183my $SEVEN_ZIP_LZMA1_COMPRESSED   =   1;
184my $SEVEN_ZIP_LZMA2_COMPRESSED   =   2;
185my $SEVEN_ZIP_PPMD_COMPRESSED    =   3;
186my $SEVEN_ZIP_BCJ_COMPRESSED     =   4;
187my $SEVEN_ZIP_BCJ2_COMPRESSED    =   5;
188my $SEVEN_ZIP_BZIP2_COMPRESSED   =   6;
189my $SEVEN_ZIP_DEFLATE_COMPRESSED =   7;
190my $SEVEN_ZIP_TRUNCATED          = 128; # (0x80 or 0b10000000)
191
192my %SEVEN_ZIP_COMPRESSOR_NAMES   = (1 => "LZMA1", 2 => "LZMA2", 3 => "PPMD", 4 => "BCJ", 5 => "BCJ2", 6 => "BZIP2",
193                                    7 => "DEFLATE");
194
195#
196# Helper functions
197#
198
199sub usage
200{
201  my $prog_name = shift;
202
203  print STDERR "Usage: $prog_name <7-Zip file>...\n";
204}
205
206my $memory_buffer_read_offset = 0;
207
208sub my_read
209{
210  my $input  = shift;
211  my $length = shift;
212
213  my $type_of_input = ref ($input);
214
215  my $output_buffer = "";
216
217  if ($type_of_input eq "GLOB")
218  {
219    read $input, $output_buffer, $length;
220  }
221  elsif ($type_of_input eq "HASH")
222  {
223    my $cur_file_handle = $$input{0}{'fh'};
224    my $cur_file_number = $$input{0}{'num'};
225
226    my $bytes_read = 0;
227
228    while ($bytes_read != $length)
229    {
230      my $name  = $$input{$cur_file_number}{'name'};
231      my $start = $$input{$cur_file_number}{'start'};
232      my $size  = $$input{$cur_file_number}{'size'};
233
234      my $cur_file_bytes_avail = ($start + $size) - $memory_buffer_read_offset;
235
236      if ($cur_file_bytes_avail < 1)
237      {
238        print STDERR "ERROR: failed to get the correct file offsets of splitted archive file '$name'\n";
239
240        exit (1);
241      }
242
243      my $total_bytes_to_read  = $length - $bytes_read;
244      my $bytes_to_read = $total_bytes_to_read;
245
246      if ($bytes_to_read > $cur_file_bytes_avail)
247      {
248        $bytes_to_read = $cur_file_bytes_avail;
249      }
250
251      # append the current bytes read from the file to the overall output buffer
252
253      my $temp_output_buffer = "";
254
255      my $bytes = read ($cur_file_handle, $temp_output_buffer, $bytes_to_read);
256
257      $output_buffer .= $temp_output_buffer;
258
259      if ($bytes != $bytes_to_read)
260      {
261        print STDERR "ERROR: could not read from splitted 7z file '$name'\n";
262
263        exit (1);
264      }
265
266      $bytes_read += $bytes_to_read;
267      $memory_buffer_read_offset += $bytes_to_read;
268
269      # the following case only happens if we need to read across 2 or more files
270
271      if ($bytes_read != $length)
272      {
273        # we exhausted the current file, move to the next one!
274
275        close ($cur_file_handle);
276
277        $cur_file_number++;
278
279        if (! exists ($$input{$cur_file_number}))
280        {
281          my $name_prefix = get_splitted_archive_raw_name ($name);
282
283          print STDERR "ERROR: could not open part #$cur_file_number of the splitted archive file '$name_prefix'\n";
284
285          exit (1);
286        }
287
288        my $name = $$input{$cur_file_number}{'name'};
289
290        if (! open ($cur_file_handle, "<$name"))
291        {
292          print STDERR "ERROR: could not open the splitted archive file '$name' for reading\n";
293
294          exit (1);
295        }
296
297        $$input{0}{'fh'}  = $cur_file_handle;
298        $$input{0}{'num'} = $cur_file_number;
299      }
300    }
301  }
302  else
303  {
304    $output_buffer = substr ($$input, $memory_buffer_read_offset, $length);
305
306    $memory_buffer_read_offset += $length;
307  }
308
309  return $output_buffer;
310}
311
312sub my_tell
313{
314  my $input = shift;
315
316  my $res = 0;
317
318  my $type_of_input = ref ($input);
319
320  if ($type_of_input eq "HASH")
321  {
322    $res = $memory_buffer_read_offset;
323  }
324  else
325  {
326    $res = tell ($input);
327  }
328
329  return $res;
330}
331
332sub my_seek
333{
334  my $input  = shift;
335  my $offset = shift;
336  my $whence = shift;
337
338  my $res = 0;
339
340  my $type_of_input = ref ($input);
341
342  if ($type_of_input eq "HASH")
343  {
344    # get total number of files and total/accumulated file size
345
346    my $number_of_files= 1;
347
348    # we assume that $$input{1} exists (we did already check that beforehand)
349
350    my $end = 0;
351
352    while (exists ($$input{$number_of_files}))
353    {
354      $end = $$input{$number_of_files}{'start'} + $$input{$number_of_files}{'size'};
355
356      $number_of_files++;
357    }
358
359    my $new_offset = 0;
360
361    # absolute (from start)
362    if ($whence == 0)
363    {
364      $new_offset = $offset;
365    }
366    # relative (depending on current position)
367    elsif ($whence == 1)
368    {
369      $new_offset = $memory_buffer_read_offset + $offset;
370    }
371    # offset from the end of the file
372    else
373    {
374      $new_offset = $end + $offset;
375    }
376
377    # sanity check
378
379    if (($new_offset < 0) || ($new_offset > $end))
380    {
381      my $name = get_splitted_archive_raw_name ($$input{1}{'name'});
382
383      print STDERR "ERROR: could not seek within the splitted archive '$name'\n";
384
385      exit (1);
386    }
387
388    $memory_buffer_read_offset = $new_offset;
389
390    # check if the correct file is open
391    # 1. determine the correct file
392    # 2. if the "incorrect" file is open, close it and open the correct one
393
394    my $cur_file_number = 1;
395    my $file_was_found  = 0;
396
397    my $start = 0;
398    my $size  = 0;
399
400    while (exists ($$input{$cur_file_number}))
401    {
402      $start = $$input{$cur_file_number}{'start'};
403      $size  = $$input{$cur_file_number}{'size'};
404
405      my $end = $start + $size;
406
407      if ($memory_buffer_read_offset >= $start)
408      {
409        if ($memory_buffer_read_offset < $end)
410        {
411          $file_was_found = 1;
412
413          last;
414        }
415      }
416
417      $cur_file_number++;
418    }
419
420    if ($file_was_found == 0)
421    {
422      my $name = get_splitted_archive_raw_name ($$input{1}{'name'});
423
424      print STDERR "ERROR: could not read the splitted archive '$name' (maybe some parts are missing?)\n";
425
426      exit (1);
427    }
428
429    if ($$input{0}{'num'} != $cur_file_number)
430    {
431      # if we enter this block, we definitely need to "change" to another file
432
433      close ($$input{0}{'fh'});
434
435      my $name = $$input{$cur_file_number}{'name'};
436
437      my $seven_zip_file;
438
439      if (! open ($seven_zip_file, "<$name"))
440      {
441        print STDERR "ERROR: could not open the file '$name' for reading\n";
442
443        exit (1);
444      }
445
446      $$input{0}{'fh'}  = $seven_zip_file;
447      $$input{0}{'num'} = $cur_file_number;
448    }
449
450    # always seek w/ absolute positions within the splitted part!
451    $res = seek ($$input{0}{'fh'}, $memory_buffer_read_offset - $start, 0);
452  }
453  else
454  {
455    $res = seek ($input, $offset, $whence);
456  }
457
458  return $res;
459}
460
461sub get_uint32
462{
463  my $fp = shift;
464
465  my $bytes = my_read ($fp, 4);
466
467  return (0, 0) if (length ($bytes) != 4);
468
469  my $num = unpack ("L", $bytes);
470
471  return $num;
472}
473
474sub get_uint64
475{
476  my $fp = shift;
477
478  my $bytes = my_read ($fp, 8);
479
480  return (0, 0) if (length ($bytes) != 8);
481
482  my ($uint1, $uint2) = unpack ("LL<", $bytes);
483
484  my $num = $uint2 << 32 | $uint1;
485
486  return $bytes, $num;
487}
488
489sub read_number
490{
491  my $fp = shift;
492
493  my $b = ord (my_read ($fp, 1));
494
495  if (($b & 0x80) == 0)
496  {
497    return $b;
498  }
499
500  my $value = ord (my_read ($fp, 1));
501
502  for (my $i = 1; $i < 8; $i++)
503  {
504    my $mask = 0x80 >> $i;
505
506    if (($b & $mask) == 0)
507    {
508      my $high = $b & ($mask - 1);
509
510      $value |= ($high << ($i * 8));
511
512      return $value;
513    }
514
515    my $next = ord (my_read ($fp, 1));
516
517    $value |= ($next << ($i * 8));
518  }
519
520  return $value;
521}
522
523sub num_to_id
524{
525  my $num = shift;
526
527  # special case:
528
529  return "\x00" if ($num == 0);
530
531  # normal case:
532
533  my $id = "";
534
535  while ($num > 0)
536  {
537    my $value = $num & 0xff;
538
539    $id = chr ($value) . $id;
540
541    $num >>= 8;
542  }
543
544  return $id;
545}
546
547sub read_id
548{
549  my $fp = shift;
550
551  my $id;
552
553  my $num = read_number ($fp);
554
555  # convert number to their ASCII code correspondent byte
556
557  return num_to_id ($num);
558}
559
560sub get_boolean_vector
561{
562  my $fp = shift;
563
564  my $number_items = shift;
565
566  my @booleans;
567
568  # get the values
569
570  my $v = 0;
571  my $mask = 0;
572
573  for (my $i = 0; $i < $number_items; $i++)
574  {
575    if ($mask == 0)
576    {
577      my $byte = my_read ($fp, 1);
578
579      $v = ord ($byte);
580      $mask = 0x80;
581    }
582
583    my $val = ($v & $mask) != 0;
584
585    push (@booleans, $val);
586
587    $mask >>= 1;
588  }
589
590  return @booleans;
591}
592
593sub get_boolean_vector_check_all
594{
595  my $fp = shift;
596
597  my $number_items = shift;
598
599  my @booleans;
600
601  # check first byte to see if all are defined
602
603  my $all_defined = my_read ($fp, 1);
604
605  if ($all_defined eq $SEVEN_ZIP_ALL_DEFINED)
606  {
607    @booleans = (1) x $number_items;
608  }
609  else
610  {
611    @booleans = get_boolean_vector ($fp, $number_items);
612  }
613
614  return @booleans;
615}
616
617sub is_supported_seven_zip_file
618{
619  my $fp = shift;
620
621  my $magic_len = length ($SEVEN_ZIP_MAGIC);
622
623  my $signature = my_read ($fp, $magic_len);
624
625  return $signature eq $SEVEN_ZIP_MAGIC;
626}
627
628sub get_decoder_properties
629{
630  my $attributes = shift;
631
632  my $salt_len;
633  my $salt_buf;
634  my $iv_len;
635  my $iv_buf;
636  my $number_cycles_power;
637
638  # set some default values
639
640  $salt_len = 0;
641  $salt_buf = "";
642  $iv_len = length ($SEVEN_ZIP_DEFAULT_IV);
643  $iv_buf = $SEVEN_ZIP_DEFAULT_IV;
644  $number_cycles_power = $SEVEN_ZIP_DEFAULT_POWER;
645
646  # the most important information is encoded in first and second byte
647  # i.e. the salt/iv length, number cycle power
648
649  my $offset = 0;
650
651  my $first_byte = substr ($attributes, 0, 1);
652  $first_byte = ord ($first_byte);
653
654  $offset++;
655
656  $number_cycles_power = $first_byte & 0x3f;
657
658  if (($first_byte & 0xc0) == 0)
659  {
660    return ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power);
661  }
662
663  $salt_len = ($first_byte >> 7) & 1;
664  $iv_len   = ($first_byte >> 6) & 1;
665
666  # combine this info with the second byte
667
668  my $second_byte = substr ($attributes, 1, 1);
669  $second_byte = ord ($second_byte);
670
671  $offset++;
672
673  $salt_len += ($second_byte >> 4);
674  $iv_len   += ($second_byte & 0x0f);
675
676  $salt_buf = substr ($attributes, $offset, $salt_len);
677
678  $offset += $salt_len;
679
680  $iv_buf = substr ($attributes, $offset, $iv_len);
681
682  # pad the iv with zeros
683
684  my $iv_max_length = 16;
685
686  $iv_buf .= "\x00" x $iv_max_length;
687  $iv_buf = substr ($iv_buf, 0, $iv_max_length);
688
689  return ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power);
690}
691
692sub get_digest
693{
694  my $index = shift;
695
696  my $unpack_info = shift;
697  my $substreams_info = shift;
698
699  my $digest;
700
701  my $digests_unpack_info = $unpack_info->{'digests'};
702  my $digests_substreams_info = $substreams_info->{'digests'};
703
704  my $use_unpack_info = 0;
705  my $use_substreams_info = 0;
706
707  if (defined ($digests_unpack_info))
708  {
709    my $digests_unpack_info_size = 0;
710
711    if (@$digests_unpack_info)
712    {
713      $digests_unpack_info_size = scalar (@$digests_unpack_info);
714    }
715
716    if ($index < $digests_unpack_info_size)
717    {
718      if (ref (@$digests_unpack_info[$index]) eq "HASH")
719      {
720        $use_unpack_info = 1;
721      }
722    }
723  }
724
725  if (defined ($digests_substreams_info))
726  {
727    my $digests_substreams_info_size = 0;
728
729    if (@$digests_substreams_info)
730    {
731      $digests_substreams_info_size = scalar (@$digests_substreams_info);
732    }
733
734    if ($index < $digests_substreams_info_size)
735    {
736      if (ref (@$digests_substreams_info[$index]) eq "HASH")
737      {
738        $use_substreams_info = 1;
739      }
740    }
741  }
742
743  if ($use_unpack_info == 1)
744  {
745    $digest = @$digests_unpack_info[$index];
746  }
747  elsif ($use_substreams_info == 1)
748  {
749    $digest = @$digests_substreams_info[$index];
750  }
751
752  return $digest;
753}
754
755sub has_encrypted_header
756{
757  my $folder = shift;
758
759  my $encrypted;
760
761  # get first coder
762
763  my $coders = $folder->{'coders'};
764
765  # get attributes of the first coder
766
767  my $attributes = @$coders[0]->{'codec_id'};
768
769  if ($attributes eq $SEVEN_ZIP_AES)
770  {
771    $encrypted = 1;
772  }
773  else
774  {
775    $encrypted = 0;
776  }
777
778  return $encrypted;
779}
780
781sub lzma_properties_decode
782{
783  my $attributes = shift;
784
785  my $lclppb;
786
787  $lclppb = substr ($attributes, 0, 1);
788
789  my @data;
790
791  #data[0] is the lclppb value
792
793  $data[1] = ord (substr ($attributes, 1, 1));
794  $data[2] = ord (substr ($attributes, 2, 1));
795  $data[3] = ord (substr ($attributes, 3, 1));
796  $data[4] = ord (substr ($attributes, 4, 1));
797
798  my $dict_size = $data[1] | $data[2] << 8 | $data[3] << 16 | $data[4] << 24;
799
800  if ($dict_size < LZMA_DICT_SIZE_MIN)
801  {
802    $dict_size = LZMA_DICT_SIZE_MIN;
803  }
804
805  my $d = ord ($lclppb);
806
807  my $lc = int ($d % 9);
808     $d  = int ($d / 9);
809  my $pb = int ($d / 5);
810  my $lp = int ($d % 5);
811
812  return ($lclppb, $dict_size, $lc, $pb, $lp);
813}
814
815sub lzma_alone_header_field_encode
816{
817  my $num = shift;
818  my $length = shift;
819
820  my $value;
821
822  my $length_doubled = $length * 2;
823  my $big_endian_val = pack ("H*", sprintf ("%0${length_doubled}x", $num));
824
825  # what follows is just some easy way to convert endianess (there might be better ways of course)
826
827  $value = "";
828
829  for (my $i = $length - 1; $i >= 0; $i--)
830  {
831    $value .= substr ($big_endian_val, $i, 1);
832  }
833
834  return $value;
835}
836
837sub show_empty_streams_info_warning
838{
839  my $file_path = shift;
840
841  print STDERR "WARNING: the file '" . $file_path . "' does not contain any meaningful data (the so-called streams info), it might only contain a list of empty files.\n";
842}
843
844sub extract_hash_from_archive
845{
846  my $fp = shift;
847  my $archive = shift;
848  my $file_path = shift;
849
850  my $hash_buf = "";
851
852  # check if everything is defined/initialized
853  # and retrieve the single "objects"
854
855  return undef unless (defined ($archive));
856
857  my $parsed_header = $archive->{'parsed_header'};
858  return undef unless (defined ($parsed_header));
859
860  my $signature_header = $archive->{'signature_header'};
861  return undef unless (defined ($signature_header));
862
863  my $streams_info = $parsed_header->{'streams_info'};
864
865  if (! defined ($streams_info))
866  {
867    show_empty_streams_info_warning ($file_path);
868
869    return undef;
870  }
871
872  my $unpack_info = $streams_info->{'unpack_info'};
873  return undef unless (defined ($unpack_info));
874
875  my $substreams_info = $streams_info->{'substreams_info'};
876
877  my $digests = $unpack_info->{'digests'};
878  return undef unless (defined ($digests));
879
880  my $folders = $unpack_info->{'folders'};
881  return undef unless (defined ($folders));
882
883  my $pack_info = $streams_info->{'pack_info'};
884  return undef unless (defined ($pack_info));
885
886  # init file seek values
887
888  my $position_after_header = $signature_header->{'position_after_header'};
889  my $position_pack = $pack_info->{'pack_pos'};
890  my $current_seek_position = $position_after_header + $position_pack;
891
892  #
893  # start:
894  #
895
896  # get first folder/coder
897
898  my $folder_id = 0;
899
900  my $folder = @$folders[$folder_id];
901
902  my $number_coders = $folder->{'number_coders'};
903
904  # check if header is encrypted
905
906  my $has_encrypted_header = 0;
907
908  if ($number_coders > 1)
909  {
910    $has_encrypted_header = 0;
911  }
912  else
913  {
914    $has_encrypted_header = has_encrypted_header ($folder);
915  }
916
917  # get the first coder
918
919  my $coder_id = 0;
920
921  my $coder = $folder->{'coders'}[$coder_id];
922  return undef unless (defined ($coder));
923
924  my $codec_id = $coder->{'codec_id'};
925
926  # set index and seek to postition
927
928  my $current_index = 0;
929
930  my_seek ($fp, $current_seek_position, 0);
931
932  # if it is lzma compressed, we need to decompress it first
933
934  if ($codec_id eq $SEVEN_ZIP_LZMA1)
935  {
936    # get the sizes
937
938    my $unpack_size = $unpack_info->{'unpack_sizes'}[$current_index];
939
940    my $data_len = $pack_info->{'pack_sizes'}[$current_index];
941
942    # get the data
943
944    my $data = my_read ($fp, $data_len);
945
946    # lzma "header" stuff
947
948    my $attributes = $coder->{'attributes'};
949
950    my ($property_lclppb, $dict_size, $lc, $pb, $lp) = lzma_properties_decode ($attributes);
951
952    return undef unless (length ($property_lclppb) == 1);
953
954    # the alone-format header is defined like this:
955    #
956    #   +------------+----+----+----+----+--+--+--+--+--+--+--+--+
957    #   | Properties |  Dictionary Size  |   Uncompressed Size   |
958    #   +------------+----+----+----+----+--+--+--+--+--+--+--+--+
959    #
960
961    my $decompressed_header = "";
962
963    # we loop over this code section max. 2 times to try two variants of headers (with the correct/specific values and with default values)
964
965    for (my $try_number = 1; $try_number <= 2; $try_number++)
966    {
967      my ($dict_size_encoded, $uncompressed_size_encoded);
968      my $lz = new Compress::Raw::Lzma::AloneDecoder (AppendOutput => 1);
969
970      if ($try_number == 1)
971      {
972        $dict_size_encoded         = lzma_alone_header_field_encode ($dict_size,   4); # 4 bytes (the "Dictionary Size" field), little endian
973        $uncompressed_size_encoded = lzma_alone_header_field_encode ($unpack_size, 8); # 8 bytes (the "Uncompressed Size" field), little endian
974      }
975      else
976      {
977        # this is the fallback case (using some default values):
978
979        $dict_size_encoded         = pack ("H*", "00008000");         # "default" dictionary size (2^23 = 0x00800000)
980        $uncompressed_size_encoded = pack ("H*", "ffffffffffffffff"); # means: unknown uncompressed size
981      }
982
983      my $lzma_alone_format_header = $property_lclppb . $dict_size_encoded . $uncompressed_size_encoded;
984
985      my $lzma_header = $lzma_alone_format_header . $data;
986
987      my $status = $lz->code ($lzma_header, $decompressed_header);
988
989      if (length ($status) > 0)
990      {
991        if ($try_number == 2)
992        {
993          if ($status != LZMA_STREAM_END)
994          {
995            print STDERR "WARNING: the LZMA header decompression for the file '" . $file_path . "' failed with status: '" . $status . "'\n";
996
997            if ($status eq "Data is corrupt")
998            {
999              print STDERR "\n";
1000              print STDERR "INFO: for some reasons, for large LZMA buffers, we sometimes get a 'Data is corrupt' error.\n";
1001              print STDERR "      This is a known issue of this tool and needs to be investigated.\n";
1002
1003              print STDERR "\n";
1004              print STDERR "      The problem might have to do with this small paragraph hidden in the 7z documentation (quote):\n";
1005              print STDERR "      'The reference LZMA Decoder ignores the value of the \"Corrupted\" variable.\n";
1006              print STDERR "       So it continues to decode the stream, even if the corruption can be detected\n";
1007              print STDERR "       in the Range Decoder. To provide the full compatibility with output of the\n";
1008              print STDERR "       reference LZMA Decoder, another LZMA Decoder implementation must also\n";
1009              print STDERR "       ignore the value of the \"Corrupted\" variable.'\n";
1010              print STDERR "\n";
1011              print STDERR "      (taken from the DOC/lzma-specification.txt file of the 7z-SDK: see for instance:\n";
1012              print STDERR "       https://github.com/jljusten/LZMA-SDK/blob/master/DOC/lzma-specification.txt#L343-L347)\n";
1013            }
1014
1015            return undef;
1016          }
1017        }
1018      }
1019
1020      last if (length ($decompressed_header) > 0); # if we got some output it seems that it worked just fine
1021    }
1022
1023    return undef unless (length ($decompressed_header) > 0);
1024
1025    # in theory we should also check that the length is correct
1026    # return undef unless (length ($decompressed_header) == $unpack_size);
1027
1028    # check the decompressed 7zip header
1029
1030    $memory_buffer_read_offset = 0; # decompressed_header is a new memory buffer (which uses a offset to speed things up)
1031
1032    my $id = read_id (\$decompressed_header);
1033
1034    return undef unless ($id eq $SEVEN_ZIP_HEADER);
1035
1036    my $header = read_seven_zip_header (\$decompressed_header);
1037
1038    # override the "old" archive object
1039
1040    $archive = {
1041      "signature_header" => $signature_header,
1042      "parsed_header" => $header
1043    };
1044
1045    $parsed_header = $archive->{'parsed_header'};
1046    return "" unless (defined ($parsed_header));
1047
1048    # this didn't change at all
1049    # $signature_header = $archive->{'signature_header'};
1050    # return undef unless (defined ($signature_header));
1051
1052    $streams_info = $parsed_header->{'streams_info'};
1053
1054    if (! defined ($streams_info))
1055    {
1056      show_empty_streams_info_warning ($file_path);
1057
1058      return "";
1059    }
1060
1061    $unpack_info = $streams_info->{'unpack_info'};
1062    return "" unless (defined ($unpack_info));
1063
1064    $substreams_info = $streams_info->{'substreams_info'};
1065
1066    $digests = $unpack_info->{'digests'};
1067    return "" unless (defined ($digests));
1068
1069    $folders = $unpack_info->{'folders'};
1070    return "" unless (defined ($folders));
1071
1072    my $number_folders = $unpack_info->{'number_folders'};
1073
1074    $pack_info = $streams_info->{'pack_info'};
1075    return "" unless (defined ($pack_info));
1076
1077    # loop over all folders/coders to check if we find an AES encrypted stream
1078
1079    $position_pack = $pack_info->{'pack_pos'};
1080    $current_seek_position = $position_after_header + $position_pack; # reset the seek position
1081
1082    for (my $folder_pos = 0; $folder_pos < $number_folders; $folder_pos++)
1083    {
1084      $folder = @$folders[$folder_pos];
1085      last unless (defined ($folder));
1086
1087      $number_coders = $folder->{'number_coders'};
1088
1089      my $num_pack_sizes = scalar (@{$pack_info->{'pack_sizes'}});
1090
1091      for (my $coder_pos = 0; $coder_pos < $number_coders; $coder_pos++)
1092      {
1093        $coder = $folder->{'coders'}[$coder_pos];
1094        last unless (defined ($coder));
1095
1096        $coder_id = $coder_pos; # Attention: coder_id != codec_id !
1097
1098        $codec_id = $coder->{'codec_id'};
1099
1100        # we stop after first AES found, but in theory we could also deal
1101        # with several different AES streams (in that case we would need
1102        # to print several hash buffers, but this is a very special case)
1103
1104        last if ($codec_id eq $SEVEN_ZIP_AES);
1105
1106        # ELSE: update seek position and index:
1107
1108        if ($current_index < $num_pack_sizes) # not all pack_sizes always need to be known (final ones can be skipped)
1109        {
1110          my $pack_size = $pack_info->{'pack_sizes'}[$current_index];
1111
1112          $current_seek_position += $pack_size;
1113        }
1114
1115        $current_index++;
1116      }
1117
1118      last if ($codec_id eq $SEVEN_ZIP_AES);
1119
1120      last unless (defined ($coder));
1121    }
1122
1123    # we unfortunately can't do anything if no AES encrypted data was found
1124
1125    if ($codec_id ne $SEVEN_ZIP_AES)
1126    {
1127      print STDERR "WARNING: no AES data found in the 7z file '" . $file_path . "'\n";
1128
1129      return "";
1130    }
1131  }
1132  elsif ($codec_id eq $SEVEN_ZIP_LZMA2)
1133  {
1134    print STDERR "WARNING: lzma2 compression found within '" . $file_path . "' is currently not supported, ";
1135    print STDERR "but could be probably added easily\n";
1136
1137    return "";
1138  }
1139  elsif ($codec_id ne $SEVEN_ZIP_AES)
1140  {
1141    print STDERR "WARNING: unsupported coder with codec id '" . unpack ("H*", $codec_id) . "' in file '" . $file_path . "' found.\n";
1142    print STDERR "If you think this codec method from DOC/Methods.txt of the 7-Zip source code ";
1143    print STDERR "should be supported, please file a problem report/feature request\n";
1144
1145    return "";
1146  }
1147
1148  #
1149  # finally: fill hash_buf
1150  #
1151
1152  # first get the data with help of pack info
1153
1154  my $unpack_size = $unpack_info->{'unpack_sizes'}[$current_index];
1155
1156  my $data_len = $pack_info->{'pack_sizes'}[$current_index];
1157
1158  my $digests_index = $current_index; # correct ?
1159
1160  # reset the file pointer to the position after signature header and get the data
1161
1162  my_seek ($fp, $current_seek_position, 0);
1163
1164  # get remaining hash info (iv, number cycles power)
1165
1166  my $digest = get_digest ($digests_index, $unpack_info, $substreams_info);
1167
1168  return undef unless ((defined ($digest)) && ($digest->{'defined'} == 1));
1169
1170  my $attributes = $coder->{'attributes'};
1171
1172  my ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power) = get_decoder_properties ($attributes);
1173
1174  my $crc = $digest->{'crc'};
1175
1176  # special case: we can truncate the data_len and use 32 bytes in total for both iv + data (last 32 bytes of data)
1177
1178  my $is_truncated = 0;
1179  my $padding_attack_possible = 0;
1180
1181  my $data;
1182
1183  if ($has_encrypted_header == 0)
1184  {
1185    my $length_difference = $data_len - $unpack_size;
1186
1187    if ($length_difference > 3)
1188    {
1189      if ($data_len > ($PASSWORD_RECOVERY_TOOL_DATA_LIMIT / 2))
1190      {
1191        if ($PASSWORD_RECOVERY_TOOL_SUPPORT_PADDING_ATTACK == 1)
1192        {
1193          my_seek ($fp, $data_len - 32, 1);
1194
1195          $iv_buf = my_read ($fp, 16);
1196          $iv_len = 16;
1197
1198          $data = my_read ($fp, 16);
1199          $data_len = 16;
1200
1201          $unpack_size %= 16;
1202
1203          $is_truncated = 1;
1204        }
1205      }
1206
1207      $padding_attack_possible = 1;
1208    }
1209  }
1210
1211  my $type_of_compression    = $SEVEN_ZIP_UNCOMPRESSED;
1212  my $compression_attributes = "";
1213
1214  for (my $coder_pos = $coder_id + 1; $coder_pos < $number_coders; $coder_pos++)
1215  {
1216    $coder = $folder->{'coders'}[$coder_pos];
1217    last unless (defined ($coder));
1218
1219    $codec_id = $coder->{'codec_id'};
1220
1221    if ($codec_id eq $SEVEN_ZIP_LZMA1)
1222    {
1223      $type_of_compression = $SEVEN_ZIP_LZMA1_COMPRESSED;
1224    }
1225    elsif ($codec_id eq $SEVEN_ZIP_LZMA2)
1226    {
1227      $type_of_compression = $SEVEN_ZIP_LZMA2_COMPRESSED;
1228    }
1229    elsif ($codec_id eq $SEVEN_ZIP_PPMD)
1230    {
1231      $type_of_compression = $SEVEN_ZIP_PPMD_COMPRESSED;
1232    }
1233    elsif ($codec_id eq $SEVEN_ZIP_BCJ)
1234    {
1235      $type_of_compression = $SEVEN_ZIP_BCJ_COMPRESSED;
1236    }
1237    elsif ($codec_id eq $SEVEN_ZIP_BCJ2)
1238    {
1239      $type_of_compression = $SEVEN_ZIP_BCJ2_COMPRESSED;
1240    }
1241    elsif ($codec_id eq $SEVEN_ZIP_BZIP2)
1242    {
1243      $type_of_compression = $SEVEN_ZIP_BZIP2_COMPRESSED;
1244    }
1245    elsif ($codec_id eq $SEVEN_ZIP_DEFLATE)
1246    {
1247      $type_of_compression = $SEVEN_ZIP_DEFLATE_COMPRESSED;
1248    }
1249
1250    if ($type_of_compression != $SEVEN_ZIP_UNCOMPRESSED)
1251    {
1252      if (defined ($coder->{'attributes'}))
1253      {
1254        $compression_attributes = unpack ("H*", $coder->{'attributes'});
1255      }
1256
1257      last; # no need to continue looping, we found what we needed (and 2+ compressions are never combined by the 7z format)
1258    }
1259  }
1260
1261  # show a warning if the decompression algorithm is currently not supported by the cracker
1262
1263  if ($SHOW_LZMA_DECOMPRESS_AFTER_DECRYPT_WARNING == 1)
1264  {
1265    if ($type_of_compression != $SEVEN_ZIP_UNCOMPRESSED)
1266    {
1267      if ($is_truncated == 0)
1268      {
1269        if (grep (/^$type_of_compression$/, @PASSWORD_RECOVERY_TOOL_SUPPORTED_DECOMPRESSORS) == 0)
1270        {
1271          print STDERR "WARNING: to correctly verify the CRC checksum of the data contained within the file '". $file_path . "',\n";
1272          print STDERR "the data must be decompressed using " . $SEVEN_ZIP_COMPRESSOR_NAMES{$type_of_compression};
1273          print STDERR " after the decryption step.\n";
1274          print STDERR "\n";
1275          print STDERR "$PASSWORD_RECOVERY_TOOL_NAME currently does not support this particular decompression algorithm.\n";
1276          print STDERR "\n";
1277
1278          if ($padding_attack_possible == 1)
1279          {
1280            print STDERR "INFO: However there is also some good news in this particular case.\n";
1281            print STDERR "Since AES-CBC is used by the 7z algorithm and the data length of this file allows a padding attack,\n";
1282            print STDERR "the password recovery tool might be able to use that to verify the correctness of password candidates.\n";
1283            print STDERR "By using this attack there might of course be a higher probability of false positives.\n";
1284            print STDERR "\n";
1285          }
1286          elsif ($type_of_compression == $SEVEN_ZIP_LZMA2_COMPRESSED) # this special case should only work for LZMA2
1287          {
1288            if ($data_len <= $LZMA2_MIN_COMPRESSED_LEN)
1289            {
1290              print STDERR "INFO: it might still be possible to crack the password of this archive since the data part seems\n";
1291              print STDERR "to be very short and therefore it might use the LZMA2 uncompressed chunk feature\n";
1292              print STDERR "\n";
1293            }
1294          }
1295        }
1296      }
1297    }
1298  }
1299
1300  my $type_of_data = $SEVEN_ZIP_UNCOMPRESSED; # this variable will hold the "number" after the "$7z$" hash signature
1301
1302  if ($is_truncated == 1)
1303  {
1304    $type_of_data = $SEVEN_ZIP_TRUNCATED; # note: this means that we neither need the crc_len, nor the coder attributes
1305  }
1306  else
1307  {
1308    $type_of_data = $type_of_compression;
1309  }
1310
1311  my $crc_len = 0;
1312
1313  if (($type_of_data != $SEVEN_ZIP_UNCOMPRESSED) && ($type_of_data != $SEVEN_ZIP_TRUNCATED))
1314  {
1315    if (scalar ($substreams_info->{'unpack_sizes'}) > 0)
1316    {
1317      $crc_len = $substreams_info->{'unpack_sizes'}[0]; # default: use the first file of the first stream
1318    }
1319  }
1320
1321  if (! defined ($data))
1322  {
1323    if (($type_of_data != $SEVEN_ZIP_UNCOMPRESSED) && ($type_of_data != $SEVEN_ZIP_TRUNCATED))
1324    {
1325      if ($ANALYZE_ALL_STREAMS_TO_FIND_SHORTEST_DATA_BUF == 1)
1326      {
1327        my $number_file_indices = scalar (@{$substreams_info->{'unpack_sizes'}});
1328        my $number_streams      = scalar (@{$substreams_info->{'unpack_stream_numbers'}});
1329        my $number_pack_info    = scalar (@{$pack_info->{'pack_sizes'}}); # same as $pack_info->{'number_pack_streams'}
1330        my $number_folders      = scalar (@{$folders}); # same as $unpack_info->{'number_folders'}
1331
1332        # check if there is a stream with a smaller first file than the first file of the first stream
1333        # (this is just a clever approach to produce shorter hashes)
1334
1335        my $file_idx    = 0;
1336        my $data_offset = 0;
1337
1338        my $data_offset_tmp = 0;
1339
1340        # sanity checks (otherwise we might overflow):
1341
1342        if ($number_pack_info < $number_streams) # should never happen (they should be equal)
1343        {
1344          $number_streams = $number_pack_info;
1345        }
1346
1347        if ($number_folders < $number_streams) # should never happen (they should be equal)
1348        {
1349          $number_streams = $number_folders;
1350        }
1351
1352        for (my $stream_idx = 0; $stream_idx < $number_streams; $stream_idx++)
1353        {
1354          my $next_file_idx = $substreams_info->{'unpack_stream_numbers'}[$stream_idx];
1355
1356          my $length_first_file = $substreams_info->{'unpack_sizes'}[$file_idx];
1357
1358          my $length_compressed = $pack_info->{'pack_sizes'}[$stream_idx];
1359
1360          if ($SHOW_LIST_OF_ALL_STREAMS == 1)
1361          {
1362            print STDERR sprintf ("DEBUG: new stream found with first file consisting of %9d bytes of %10d bytes total stream length\n", $length_first_file, $length_compressed);
1363          }
1364
1365          if ($length_first_file < $crc_len)
1366          {
1367            my $digest = get_digest ($file_idx, $unpack_info, $substreams_info);
1368
1369            next unless ((defined ($digest)) && ($digest->{'defined'} == 1));
1370
1371            # get new AES settings (salt, iv, costs):
1372
1373            my $coders = @$folders[$stream_idx]->{'coders'};
1374
1375            my $aes_coder_idx   = 0;
1376            my $aes_coder_found = 0;
1377
1378            for (my $coders_idx = 0; $coders_idx < $number_coders; $coders_idx++)
1379            {
1380              my $codec_id = @$coders[$coders_idx]->{'codec_id'};
1381
1382              if ($codec_id eq $SEVEN_ZIP_AES)
1383              {
1384                $aes_coder_idx = $coders_idx;
1385
1386                $aes_coder_found = 1;
1387              }
1388              elsif (defined (@$coders[$coders_idx]->{'attributes'}))
1389              {
1390                $compression_attributes = unpack ("H*", @$coders[$coders_idx]->{'attributes'});
1391              }
1392            }
1393
1394            next unless ($aes_coder_found == 1);
1395
1396            $attributes = @$coders[$aes_coder_idx]->{'attributes'};
1397
1398            #
1399            # set the "new" hash properties (for this specific/better stream with smaller first file):
1400            #
1401
1402            ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power) = get_decoder_properties ($attributes);
1403
1404            $crc = $digest->{'crc'};
1405
1406            $crc_len  = $length_first_file;
1407
1408            $data_len = $length_compressed;
1409
1410            $unpack_size = $length_first_file;
1411
1412            $data_offset = $data_offset_tmp;
1413
1414            # we assume that $type_of_data and $type_of_compression didn't change between the streams
1415            # (this should/could be checked too to avoid any strange problems)
1416          }
1417
1418          $file_idx += $next_file_idx;
1419
1420          if ($file_idx >= $number_file_indices) # should never happen
1421          {
1422            last;
1423          }
1424
1425          $data_offset_tmp += $length_compressed;
1426        }
1427
1428        if ($SHOW_LIST_OF_ALL_STREAMS == 1)
1429        {
1430          print STDERR sprintf ("DEBUG: shortest file at the beginning of a stream consists of %d bytes (offset: %d bytes)\n", $crc_len, $data_offset);
1431        }
1432
1433        if ($data_offset > 0)
1434        {
1435          my_seek ($fp, $data_offset, 1);
1436        }
1437      }
1438
1439      if ($SHORTEN_HASH_LENGTH_TO_CRC_LENGTH == 1)
1440      {
1441        my $aes_len = int ($SHORTEN_HASH_FIXED_HEADER + $crc_len + $SHORTEN_HASH_EXTRA_PERCENT / 100 * $crc_len);
1442
1443        my $AES_BLOCK_SIZE = 16;
1444
1445        $aes_len += $AES_BLOCK_SIZE - 1; # add these bytes to be sure to always include the last "block" too (round up and cast)
1446
1447        $aes_len = int ($aes_len / $AES_BLOCK_SIZE) * $AES_BLOCK_SIZE;
1448
1449        if ($aes_len < $data_len)
1450        {
1451          $data_len    = $aes_len;
1452          $unpack_size = $aes_len;
1453        }
1454      }
1455    }
1456
1457    $data = my_read ($fp, $data_len); # NOTE: we shouldn't read a very huge data buffer directly into memory
1458                                      # improvement: read the data in chunks of several MBs and keep printing it
1459                                      # directly to stdout (by also not returning a string from this function)
1460                                      # that would help to achieve minimal RAM consumption (even for very large hashes)
1461  }
1462
1463  return undef unless (length ($data) == $data_len);
1464
1465  if ($data_len > ($PASSWORD_RECOVERY_TOOL_DATA_LIMIT / 2))
1466  {
1467    print STDERR "WARNING: the file '". $file_path . "' unfortunately can't be used with $PASSWORD_RECOVERY_TOOL_NAME since the data length\n";
1468    print STDERR "in this particular case is too long ($data_len of the maximum allowed " .($PASSWORD_RECOVERY_TOOL_DATA_LIMIT / 2). " bytes).\n";
1469
1470    if ($PASSWORD_RECOVERY_TOOL_SUPPORT_PADDING_ATTACK == 1)
1471    {
1472      print STDERR "Furthermore, it could not be truncated. This should only happen in very rare cases.\n";
1473    }
1474
1475    return "";
1476  }
1477
1478  $hash_buf = sprintf ("%s:%s%u\$%u\$%u\$%s\$%u\$%s\$%u\$%u\$%u\$%s",
1479    basename($file_path),
1480    $SEVEN_ZIP_HASH_SIGNATURE,
1481    $type_of_data,
1482    $number_cycles_power,
1483    $salt_len,
1484    unpack ("H*", $salt_buf),
1485    $iv_len,
1486    unpack ("H*", $iv_buf),
1487    $crc,
1488    $data_len,
1489    $unpack_size,
1490    unpack ("H*", $data) # could be very large. We could/should avoid loading/copying this data into memory
1491  );
1492
1493  return $hash_buf if ($type_of_data == $SEVEN_ZIP_UNCOMPRESSED);
1494  return $hash_buf if ($type_of_data == $SEVEN_ZIP_TRUNCATED);
1495
1496  $hash_buf .= sprintf ("\$%u\$%s",
1497    $crc_len,
1498    $compression_attributes
1499  );
1500
1501  return $hash_buf;
1502}
1503
1504sub read_seven_zip_signature_header
1505{
1506  my $fp = shift;
1507
1508  my $signature;
1509
1510  # ArchiveVersion
1511
1512  my $major_version = my_read ($fp, 1);
1513
1514  $major_version = ord ($major_version);
1515
1516  my $minor_version = my_read ($fp, 1);
1517
1518  $minor_version = ord ($minor_version);
1519
1520  # StartHeaderCRC
1521
1522  my_read ($fp, 4); # skip start header CRC
1523
1524  # StartHeader
1525
1526  my $next_header_offset = get_uint64 ($fp);
1527  my $next_header_size   = get_uint64 ($fp);
1528
1529  my_read ($fp, 4); # next header CRC
1530
1531  my $position_after_header = my_tell ($fp);
1532
1533  $signature = {
1534    "major_version" => $major_version,
1535    "minor_version" => $minor_version,
1536    "next_header_offset" => $next_header_offset,
1537    "next_header_size" => $next_header_size,
1538    "position_after_header" => $position_after_header
1539  };
1540
1541  return $signature;
1542}
1543
1544sub skip_seven_zip_data
1545{
1546  my $fp = shift;
1547
1548  # determine the length to skip
1549
1550  my $len = my_read ($fp, 1);
1551
1552  # do skip len bytes
1553
1554  $len = ord ($len);
1555
1556  my_read ($fp, $len);
1557}
1558
1559sub wait_for_seven_zip_id
1560{
1561  my $fp = shift;
1562  my $id = shift;
1563
1564  while (1)
1565  {
1566    my $new_id = read_id ($fp);
1567
1568    if ($new_id eq $id)
1569    {
1570      return 1;
1571    }
1572    elsif ($new_id eq $SEVEN_ZIP_END)
1573    {
1574      return 0;
1575    }
1576
1577    skip_seven_zip_data ($fp);
1578  }
1579
1580  return 0;
1581}
1582
1583sub read_seven_zip_digests
1584{
1585  my $fp = shift;
1586
1587  my $number_items = shift;
1588
1589  my @digests;
1590
1591  # init
1592
1593  for (my $i = 0; $i < $number_items; $i++)
1594  {
1595    my $digest = {
1596      "crc" => "",
1597      "defined" => 0
1598    };
1599
1600    push (@digests, $digest)
1601  }
1602
1603  # get number of items
1604
1605  my @digests_defined = get_boolean_vector_check_all ($fp, $number_items);
1606
1607  # for each number of item, get a digest
1608
1609  for (my $i = 0; $i < $number_items; $i++)
1610  {
1611    my $crc = 0;
1612
1613    for (my $i = 0; $i < 4; $i++)
1614    {
1615      my $val = my_read ($fp, 1);
1616
1617      $val = ord ($val);
1618
1619      $crc |= ($val << (8 * $i));
1620    }
1621
1622    $digests[$i]->{'crc'} = $crc;
1623    $digests[$i]->{'defined'} = $digests_defined[$i];
1624  }
1625
1626  return @digests;
1627}
1628
1629sub read_seven_zip_pack_info
1630{
1631  my $fp = shift;
1632
1633  my $pack_info;
1634
1635  # PackPos
1636
1637  my $pack_pos = read_number  ($fp);
1638
1639  # NumPackStreams
1640
1641  my $number_pack_streams = read_number ($fp);
1642
1643  # must be "size" id
1644
1645  if (! wait_for_seven_zip_id ($fp, $SEVEN_ZIP_SIZE))
1646  {
1647    return undef;
1648  }
1649
1650  my @pack_sizes = (0) x $number_pack_streams;
1651
1652  for (my $i = 0; $i < $number_pack_streams; $i++)
1653  {
1654    $pack_sizes[$i] = read_number ($fp);
1655  }
1656
1657  $pack_info = {
1658    "number_pack_streams" => $number_pack_streams,
1659    "pack_pos" => $pack_pos,
1660    "pack_sizes" => \@pack_sizes
1661  };
1662
1663  # read remaining data
1664
1665  while (1)
1666  {
1667    my $id = read_id ($fp);
1668
1669    if ($id eq $SEVEN_ZIP_END)
1670    {
1671      return $pack_info;
1672    }
1673    elsif ($id eq $SEVEN_ZIP_CRC)
1674    {
1675      my $digests = read_seven_zip_digests ($fp, $number_pack_streams);
1676
1677      # we do not need those digests, ignore them
1678      # (but we need to read them from the stream)
1679
1680      next;
1681    }
1682
1683    skip_seven_zip_data ($fp);
1684  }
1685
1686  # something went wrong
1687
1688  return undef;
1689}
1690
1691sub read_seven_zip_folders
1692{
1693  my $fp = shift;
1694
1695  my $folder;
1696
1697  my @coders = ();
1698  my @bindpairs = ();
1699  my $index_main_stream = 0;
1700  my $sum_input_streams  = 0;
1701  my $sum_output_streams = 0;
1702  my $sum_packed_streams = 1;
1703
1704  # NumCoders
1705
1706  my $number_coders = read_number ($fp);
1707
1708  # loop
1709
1710  for (my $i = 0; $i < $number_coders; $i++)
1711  {
1712    my $main_byte = my_read ($fp, 1);
1713
1714    $main_byte = ord ($main_byte);
1715
1716    if ($main_byte & 0xC0)
1717    {
1718      return undef;
1719    }
1720
1721    my $codec_id_size = $main_byte & 0xf;
1722
1723    if ($codec_id_size > 8)
1724    {
1725      return undef;
1726    }
1727
1728    # the codec id (very important info for us):
1729    # codec_id: 06F10701 -> AES-256 + SHA-256
1730    # codec_id: 030101   -> lzma  (we need to decompress - k_LZMA)
1731    # codec_id: 21       -> lzma2 (we need to decompress - k_LZMA2)
1732
1733    my $codec_id = my_read ($fp, $codec_id_size);
1734
1735    # NumInStreams
1736
1737    my $number_input_streams = 1;
1738
1739    # NumOutStreams
1740
1741    my $number_output_streams = 1;
1742
1743    if (($main_byte & 0x10) != 0)
1744    {
1745      $number_input_streams  = read_number ($fp);
1746      $number_output_streams = read_number ($fp);
1747    }
1748
1749    $sum_input_streams  += $number_input_streams;
1750    $sum_output_streams += $number_output_streams;
1751
1752    # attributes
1753
1754    my $attributes;
1755
1756    if (($main_byte & 0x020) != 0)
1757    {
1758      my $property_size = read_number ($fp);
1759
1760      $attributes = my_read ($fp, $property_size);
1761    }
1762
1763    $coders[$i] = {
1764      "codec_id" => $codec_id,
1765      "number_input_streams" => $number_input_streams,
1766      "number_output_streams" => $number_output_streams,
1767      "attributes" => $attributes
1768    };
1769  }
1770
1771  if (($sum_input_streams != 1) || ($sum_output_streams != 1))
1772  {
1773    # InStreamUsed / OutStreamUsed
1774
1775    my @input_stream_used  = (0) x $sum_input_streams;
1776    my @output_stream_used = (0) x $sum_output_streams;
1777
1778    # BindPairs
1779
1780    my $number_bindpairs = $sum_output_streams - 1;
1781
1782    for (my $i = 0; $i < $number_bindpairs; $i++)
1783    {
1784      # input
1785
1786      my $index_input = read_number ($fp);
1787
1788      if ($input_stream_used[$index_input] == 1)
1789      {
1790        return undef; # the stream is used already, shouldn't happen at all
1791      }
1792
1793      $input_stream_used[$index_input] = 1;
1794
1795      # output
1796
1797      my $index_output = read_number ($fp);
1798
1799      if ($output_stream_used[$index_output] == 1)
1800      {
1801        return undef;
1802      }
1803
1804      $output_stream_used[$index_output] = 1;
1805
1806      my @new_bindpair = ($index_input, $index_output);
1807
1808      push (@bindpairs, \@new_bindpair);
1809    }
1810
1811    # PackedStreams
1812
1813    $sum_packed_streams = $sum_input_streams - $number_bindpairs;
1814
1815    if ($sum_packed_streams != 1)
1816    {
1817      for (my $i = 0; $i < $sum_packed_streams; $i++)
1818      {
1819        # we can ignore this
1820
1821        read_number ($fp); # my $index = read_number ($fp);
1822      }
1823    }
1824
1825    # determine the main stream
1826
1827    $index_main_stream = -1;
1828
1829    for (my $i = 0; $i < $sum_output_streams; $i++)
1830    {
1831      if ($output_stream_used[$i] == 0)
1832      {
1833        $index_main_stream = $i;
1834
1835        last;
1836      }
1837    }
1838
1839    if ($index_main_stream == -1)
1840    {
1841      return undef; # should not happen
1842    }
1843  }
1844
1845  $folder = {
1846    "number_coders" => $number_coders,
1847    "coders" => \@coders,
1848    "bindpairs" => \@bindpairs,
1849    "index_main_stream"  => $index_main_stream,
1850    "sum_input_streams"  => $sum_input_streams,
1851    "sum_output_streams" => $sum_output_streams,
1852    "sum_packed_streams" => $sum_packed_streams,
1853  };
1854
1855  return $folder;
1856}
1857
1858sub read_seven_zip_unpack_info
1859{
1860  my $fp = shift;
1861
1862  my $unpack_info;
1863
1864  my $number_folders = 0;
1865  my @folders = ();
1866  my @datastream_indices = ();
1867  my @unpack_sizes;
1868  my @digests;
1869  my @main_unpack_size_index;
1870  my @coder_unpack_sizes;
1871
1872  # check until we see the "folder" id
1873
1874  if (! wait_for_seven_zip_id ($fp, $SEVEN_ZIP_FOLDER))
1875  {
1876    return undef;
1877  }
1878
1879  # NumFolders
1880
1881  $number_folders = read_number ($fp);
1882
1883  # External
1884
1885  my $external = my_read ($fp, 1);
1886
1887  # loop
1888
1889  my $sum_coders_output_streams = 0;
1890  my $sum_folders = 0;
1891
1892  for (my $i = 0; $i < $number_folders; $i++)
1893  {
1894    if ($external eq $SEVEN_ZIP_NOT_EXTERNAL)
1895    {
1896      my $folder = read_seven_zip_folders ($fp);
1897
1898      $folders[$i] = $folder;
1899
1900      $main_unpack_size_index[$i] = $folder->{'index_main_stream'};
1901      $coder_unpack_sizes[$i] = $sum_coders_output_streams;
1902
1903      $sum_coders_output_streams += $folder->{'sum_output_streams'};
1904
1905      $sum_folders++;
1906    }
1907    elsif ($external eq $SEVEN_ZIP_EXTERNAL)
1908    {
1909      $datastream_indices[$i] = read_number ($fp);
1910    }
1911    else
1912    {
1913      return undef;
1914    }
1915  }
1916
1917  if (!wait_for_seven_zip_id ($fp, $SEVEN_ZIP_UNPACK_SIZE))
1918  {
1919    return undef;
1920  }
1921
1922  for (my $i = 0; $i < $sum_coders_output_streams; $i++)
1923  {
1924    $unpack_sizes[$i] = read_number ($fp);
1925  }
1926
1927  # read remaining data
1928
1929  while (1)
1930  {
1931    my $id = read_id ($fp);
1932
1933    if ($id eq $SEVEN_ZIP_END)
1934    {
1935      $unpack_info = {
1936        "number_folders" => $number_folders,
1937        "folders" => \@folders,
1938        "datastream_indices" => \@datastream_indices,
1939        "digests" => \@digests,
1940        "unpack_sizes" => \@unpack_sizes,
1941        "main_unpack_size_index" => \@main_unpack_size_index,
1942        "coder_unpack_sizes" => \@coder_unpack_sizes
1943      };
1944
1945      return $unpack_info;
1946    }
1947    elsif ($id eq $SEVEN_ZIP_CRC)
1948    {
1949      my @new_digests = read_seven_zip_digests ($fp, $sum_folders);
1950
1951      for (my $i = 0; $i < $sum_folders; $i++)
1952      {
1953        $digests[$i]->{'defined'} = $new_digests[$i]->{'defined'};
1954        $digests[$i]->{'crc'} = $new_digests[$i]->{'crc'};
1955      }
1956
1957      next;
1958    }
1959
1960    skip_seven_zip_data ($fp);
1961  }
1962
1963  # something went wrong
1964
1965  return undef;
1966}
1967
1968sub get_folder_unpack_size
1969{
1970  my $unpack_info  = shift;
1971  my $folder_index = shift;
1972
1973  my $index = $unpack_info->{'coder_unpack_sizes'}[$folder_index] + $unpack_info->{'main_unpack_size_index'}[$folder_index];
1974
1975  return $unpack_info->{'unpack_sizes'}[$index];
1976}
1977
1978sub has_valid_folder_crc
1979{
1980  my $digests = shift;
1981  my $index   = shift;
1982
1983  if (! defined (@$digests[$index]))
1984  {
1985    return 0;
1986  }
1987
1988  my $digest = @$digests[$index];
1989
1990  if ($digest->{'defined'} != 1)
1991  {
1992    return 0;
1993  }
1994
1995  if (length ($digest->{'crc'}) < 1)
1996  {
1997    return 0;
1998  }
1999
2000  return 1;
2001}
2002
2003sub read_seven_zip_substreams_info
2004{
2005  my $fp = shift;
2006
2007  my $unpack_info = shift;
2008
2009  my $number_folders = $unpack_info->{'number_folders'};
2010  my $folders = $unpack_info->{'folders'};
2011
2012  my $folders_digests = $unpack_info->{'digests'};
2013
2014  my $substreams_info;
2015  my @number_unpack_streams = (1) x $number_folders;
2016  my @unpack_sizes;
2017  my @digests;
2018
2019  # get the numbers of unpack streams
2020
2021  my $id;
2022
2023  while (1)
2024  {
2025    $id = read_id ($fp);
2026
2027    if ($id eq $SEVEN_ZIP_NUM_UNPACK_STREAM)
2028    {
2029      for (my $i = 0; $i < $number_folders; $i++)
2030      {
2031        $number_unpack_streams[$i] = read_number ($fp);
2032      }
2033
2034      next;
2035    }
2036    elsif ($id eq $SEVEN_ZIP_CRC)
2037    {
2038      last;
2039    }
2040    elsif ($id eq $SEVEN_ZIP_SIZE)
2041    {
2042      last;
2043    }
2044    elsif ($id eq $SEVEN_ZIP_END)
2045    {
2046      last;
2047    }
2048
2049    skip_seven_zip_data ($fp);
2050  }
2051
2052  if ($id eq $SEVEN_ZIP_SIZE)
2053  {
2054    for (my $i = 0; $i < $number_folders; $i++)
2055    {
2056      my $number_substreams = $number_unpack_streams[$i];
2057
2058      if ($number_substreams == 0)
2059      {
2060        next;
2061      }
2062
2063      my $sum_unpack_sizes = 0;
2064
2065      for (my $j = 1; $j < $number_substreams; $j++)
2066      {
2067        my $size = read_number ($fp);
2068
2069        push (@unpack_sizes, $size);
2070
2071        $sum_unpack_sizes += $size;
2072      }
2073
2074      # add the folder unpack size itself
2075
2076      my $folder_unpack_size = get_folder_unpack_size ($unpack_info, $i);
2077
2078      if ($folder_unpack_size < $sum_unpack_sizes)
2079      {
2080        return undef;
2081      }
2082
2083      my $size = $folder_unpack_size - $sum_unpack_sizes;
2084
2085      push (@unpack_sizes, $size);
2086    }
2087
2088    $id = read_id ($fp);
2089  }
2090  else
2091  {
2092    for (my $i = 0; $i < $number_folders; $i++)
2093    {
2094      my $number_substreams = $number_unpack_streams[$i];
2095
2096      if ($number_substreams > 1)
2097      {
2098        return undef;
2099      }
2100
2101      if ($number_substreams == 1)
2102      {
2103        push (@unpack_sizes, get_folder_unpack_size ($unpack_info, $i));
2104      }
2105    }
2106  }
2107
2108  my $number_digests = 0;
2109
2110  for (my $i = 0; $i < $number_folders; $i++)
2111  {
2112    my $number_substreams = $number_unpack_streams[$i];
2113
2114    if (($number_substreams != 1) || (has_valid_folder_crc ($folders_digests, $i) == 0))
2115    {
2116      $number_digests += $number_substreams;
2117    }
2118  }
2119
2120  while (1)
2121  {
2122    if ($id eq $SEVEN_ZIP_END)
2123    {
2124      last;
2125    }
2126    elsif ($id eq $SEVEN_ZIP_CRC)
2127    {
2128      my @is_digest_defined = get_boolean_vector_check_all ($fp, $number_digests);
2129
2130      my $k  = 0;
2131      my $k2 = 0;
2132
2133      for (my $i = 0; $i < $number_folders; $i++)
2134      {
2135        my $number_substreams = $number_unpack_streams[$i];
2136
2137        if (($number_substreams == 1) && (has_valid_folder_crc ($folders_digests, $i)))
2138        {
2139          $digests[$k]->{'defined'} = 1;
2140          $digests[$k]->{'crc'} = @$folders_digests[$i]->{'crc'};
2141
2142          $k++;
2143        }
2144        else
2145        {
2146          for (my $j = 0; $j < $number_substreams; $j++)
2147          {
2148            my $defined = $is_digest_defined[$k2];
2149
2150            # increase k2
2151
2152            $k2++;
2153
2154            if ($defined == 1)
2155            {
2156              my $digest = 0;
2157
2158              for (my $i = 0; $i < 4; $i++)
2159              {
2160                my $val = my_read ($fp, 1);
2161
2162                $val = ord ($val);
2163
2164                $digest |= ($val << (8 * $i));
2165              }
2166
2167              $digests[$k]->{'defined'} = 1;
2168              $digests[$k]->{'crc'} = $digest;
2169            }
2170            else
2171            {
2172              $digests[$k]->{'defined'} = 0;
2173              $digests[$k]->{'crc'} = 0;
2174            }
2175
2176            $k++;
2177          }
2178        }
2179      }
2180    }
2181    else
2182    {
2183      skip_seven_zip_data ($fp);
2184    }
2185
2186    $id = read_id ($fp);
2187  }
2188
2189  my $len_defined = scalar (@digests);
2190  my $len_unpack_sizes = scalar (@unpack_sizes);
2191
2192  if ($len_defined != $len_unpack_sizes)
2193  {
2194    my $k = 0;
2195
2196    for (my $i = 0; $i < $number_folders; $i++)
2197    {
2198      my $number_substreams = $number_unpack_streams[$i];
2199
2200      if (($number_substreams == 1) && (has_valid_folder_crc ($folders_digests, $i)))
2201      {
2202        $digests[$k]->{'defined'} = 1;
2203        $digests[$k]->{'crc'} = @$folders_digests[$i]->{'crc'};
2204
2205        $k++;
2206      }
2207      else
2208      {
2209        for (my $j = 0; $j < $number_substreams; $j++)
2210        {
2211          $digests[$k]->{'defined'} = 0;
2212          $digests[$k]->{'crc'} = 0;
2213
2214          $k++;
2215        }
2216      }
2217    }
2218  }
2219
2220  $substreams_info = {
2221    "unpack_stream_numbers" => \@number_unpack_streams,
2222    "unpack_sizes" => \@unpack_sizes,
2223    "number_digests" => $number_digests,
2224    "digests" => \@digests
2225  };
2226
2227  return $substreams_info;
2228}
2229
2230sub read_seven_zip_streams_info
2231{
2232  my $fp = shift;
2233
2234  my $streams_info;
2235
2236  my $pack_info;
2237  my $unpack_info;
2238  my $substreams_info;
2239
2240  # get the type of streams info (id)
2241
2242  my $id = read_id ($fp);
2243
2244  if ($id eq $SEVEN_ZIP_PACK_INFO)
2245  {
2246    $pack_info = read_seven_zip_pack_info ($fp);
2247
2248    return undef unless (defined ($pack_info));
2249
2250    $id = read_id ($fp);
2251  }
2252
2253  if ($id eq $SEVEN_ZIP_UNPACK_INFO)
2254  {
2255    $unpack_info = read_seven_zip_unpack_info ($fp);
2256
2257    return undef unless (defined ($unpack_info));
2258
2259    $id = read_id ($fp);
2260  }
2261
2262  if ($id eq $SEVEN_ZIP_SUBSTREAMS_INFO)
2263  {
2264    $substreams_info = read_seven_zip_substreams_info ($fp, $unpack_info);
2265
2266    return undef unless (defined ($substreams_info));
2267
2268    $id = read_id ($fp);
2269  }
2270  else
2271  {
2272    my @number_unpack_streams = ();
2273    my @unpack_sizes = ();
2274    my $number_digests = 0;
2275    my $digests;
2276
2277    if (defined ($unpack_info))
2278    {
2279      my $folders = $unpack_info->{'folders'};
2280
2281      my $number_folders = $unpack_info->{'number_folders'};
2282
2283      for (my $i = 0; $i < $number_folders; $i++)
2284      {
2285        $number_unpack_streams[$i] = 1;
2286
2287        my $folder_unpack_size = get_folder_unpack_size ($unpack_info, $i);
2288
2289        push (@unpack_sizes, $folder_unpack_size);
2290      }
2291    }
2292
2293    $substreams_info = {
2294      "unpack_stream_numbers" => \@number_unpack_streams,
2295      "unpack_sizes" => \@unpack_sizes,
2296      "number_digests" => $number_digests,
2297      "digests" => $digests
2298    };
2299  }
2300
2301  $streams_info = {
2302    "pack_info" => $pack_info,
2303    "unpack_info" => $unpack_info,
2304    "substreams_info" => $substreams_info
2305  };
2306
2307  return $streams_info;
2308}
2309
2310sub folder_seven_zip_decode
2311{
2312  my $streams_info = shift;
2313
2314  my $number_coders = 0;
2315
2316  for (my $i = 0; $i < $number_coders; $i++)
2317  {
2318  }
2319  #parse_folder ();
2320
2321  return;
2322}
2323
2324sub read_seven_zip_archive_properties
2325{
2326  my $fp = shift;
2327
2328  # also the 7-Zip source code just skip data from the archive property entry
2329
2330  while (1)
2331  {
2332    my $id = read_id ($fp);
2333
2334    if ($id eq $SEVEN_ZIP_END)
2335    {
2336      return 1;
2337    }
2338
2339    skip_seven_zip_data ($fp);
2340  }
2341
2342  # something went wrong
2343
2344  return 0;
2345}
2346
2347sub get_uint64_defined_vector
2348{
2349  my $fp = shift;
2350
2351  my $number_items = shift;
2352
2353  my @values;
2354
2355  # first check if the values are defined
2356
2357  my @defines = get_boolean_vector_check_all ($fp, $number_items);
2358
2359  my $external = my_read ($fp, 1);
2360
2361  if ($external eq $SEVEN_ZIP_EXTERNAL)
2362  {
2363    # ignored for now
2364  }
2365
2366  for (my $i = 0; $i < $number_items; $i++)
2367  {
2368    my $defined = $defines[$i];
2369
2370    my $value = 0;
2371
2372    if ($defined != 0)
2373    {
2374      $value = get_uint64 ($fp);
2375    }
2376
2377    $values[$i] = $value;
2378  }
2379
2380  return @values;
2381}
2382
2383sub read_seven_zip_files_info
2384{
2385  my $fp = shift;
2386
2387  my $streams_info = shift;
2388
2389  my $files_info;
2390
2391  my @files;
2392
2393  # NumFiles
2394
2395  my $number_files = read_number ($fp);
2396
2397  # init file
2398
2399  for (my $i = 0; $i < $number_files; $i++)
2400  {
2401    $files[$i]->{'name_utf16'} = "";
2402    $files[$i]->{'attribute_defined'} = 0;
2403    $files[$i]->{'attribute'} = 0;
2404    $files[$i]->{'is_empty_stream'} = 0;
2405    $files[$i]->{'start_position'} = 0;
2406    $files[$i]->{'creation_time'} = 0;
2407    $files[$i]->{'access_time'} = 0;
2408    $files[$i]->{'modification_time'} = 0;
2409    $files[$i]->{'size'} = 0;
2410    $files[$i]->{'has_stream'} = 0;
2411    $files[$i]->{'is_dir'} = 0;
2412    $files[$i]->{'crc_defined'} = 0;
2413    $files[$i]->{'crc'} = "";
2414  }
2415
2416  my $number_empty_streams = 0;
2417
2418  my @empty_streams = (0) x $number_files;
2419  my @empty_files   = (0) x $number_files;
2420  my @anti_files    = (0) x $number_files;
2421
2422  # loop over all properties
2423
2424  while (1)
2425  {
2426    my $property_type_val = read_number ($fp);
2427
2428    my $property_type = num_to_id ($property_type_val);
2429
2430    if ($property_type eq $SEVEN_ZIP_END)
2431    {
2432      last;
2433    }
2434
2435    # Size
2436
2437    my $size = read_number ($fp);
2438
2439    # check and act according to the type of property found
2440
2441    my $is_known_type = 1;
2442
2443    if ($property_type_val > $SEVEN_ZIP_MAX_PROPERTY_TYPE)
2444    {
2445      # ignore (isKnownType false in 7-Zip source code)
2446
2447      my_read ($fp, $size);
2448    }
2449    else
2450    {
2451      if ($property_type eq $SEVEN_ZIP_NAME)
2452      {
2453        my $external = my_read ($fp, 1);
2454
2455        if ($external eq $SEVEN_ZIP_EXTERNAL)
2456        {
2457          # TODO: not implemented yet
2458
2459          return undef;
2460        }
2461
2462        my $files_size = scalar (@files);
2463
2464        for (my $i = 0; $i < $files_size; $i++)
2465        {
2466          my $name = "";
2467
2468          while (1)
2469          {
2470            my $name_part = my_read ($fp, 2);
2471
2472            if ($name_part eq $SEVEN_ZIP_FILE_NAME_END)
2473            {
2474              last;
2475            }
2476            else
2477            {
2478              $name .= $name_part;
2479            }
2480          }
2481
2482          $files[$i]->{'name_utf16'} = $name;
2483        }
2484      }
2485      elsif ($property_type eq $SEVEN_ZIP_WIN_ATTRIBUTE)
2486      {
2487        my $files_size = scalar (@files);
2488
2489        my @booleans = get_boolean_vector_check_all ($fp, $number_files);
2490
2491        my $external = my_read ($fp, 1);
2492
2493        if ($external eq $SEVEN_ZIP_EXTERNAL)
2494        {
2495          # TODO: not implemented yet
2496
2497          return undef;
2498        }
2499
2500        for (my $i = 0; $i < $number_files; $i++)
2501        {
2502          my $defined = $booleans[$i];
2503
2504          $files[$i]->{'attribute_defined'} = $defined;
2505
2506          if ($defined)
2507          {
2508            my $attributes = get_uint32 ($fp);
2509
2510            $files[$i]->{'attribute'} = $attributes;
2511          }
2512        }
2513      }
2514      elsif ($property_type eq $SEVEN_ZIP_EMPTY_STREAM)
2515      {
2516        @empty_streams = get_boolean_vector ($fp, $number_files);
2517
2518        $number_empty_streams = 0;
2519
2520        # loop over all boolean and set the files attribute + empty/anti stream vector
2521
2522        my $number_booleans = scalar (@empty_streams);
2523
2524        for (my $i = 0; $i < $number_booleans; $i++)
2525        {
2526          my $boolean = $empty_streams[$i];
2527
2528          $files[$i]->{'is_empty_stream'} = $boolean;
2529
2530          if ($boolean)
2531          {
2532            $number_empty_streams++;
2533          }
2534        }
2535
2536        for (my $i = 0; $i < $number_empty_streams; $i++)
2537        {
2538          $empty_files[$i] = 0;
2539          $anti_files[$i]  = 0;
2540        }
2541      }
2542      elsif ($property_type eq $SEVEN_ZIP_EMPTY_FILE)
2543      {
2544        @empty_files = get_boolean_vector ($fp, $number_empty_streams);
2545      }
2546      elsif ($property_type eq $SEVEN_ZIP_ANTI_FILE)
2547      {
2548        @anti_files = get_boolean_vector ($fp, $number_empty_streams);
2549      }
2550      elsif ($property_type eq $SEVEN_ZIP_START_POS)
2551      {
2552        my @start_positions = get_uint64_defined_vector ($fp, $number_files);
2553
2554        my $number_start_positions = scalar (@start_positions);
2555
2556        for (my $i = 0; $i < $number_start_positions; $i++)
2557        {
2558          $files[$i]->{'start_position'} = $start_positions[$i];
2559        }
2560      }
2561      elsif ($property_type eq $SEVEN_ZIP_CREATION_TIME)
2562      {
2563        my @creation_times = get_uint64_defined_vector ($fp, $number_files);
2564
2565        my $number_creation_times = scalar (@creation_times);
2566
2567        for (my $i = 0; $i < $number_creation_times; $i++)
2568        {
2569          $files[$i]->{'creation_time'} = $creation_times[$i];
2570        }
2571      }
2572      elsif ($property_type eq $SEVEN_ZIP_ACCESS_TIME)
2573      {
2574        my @access_times = get_uint64_defined_vector ($fp, $number_files);
2575
2576        my $number_access_times = scalar (@access_times);
2577
2578        for (my $i = 0; $i < $number_access_times; $i++)
2579        {
2580          $files[$i]->{'access_time'} = $access_times[$i];
2581        }
2582      }
2583      elsif ($property_type eq $SEVEN_ZIP_MODIFICATION_TIME)
2584      {
2585        my @modification_times = get_uint64_defined_vector ($fp, $number_files);
2586
2587        my $number_modification_times = scalar (@modification_times);
2588
2589        for (my $i = 0; $i < $number_modification_times; $i++)
2590        {
2591          $files[$i]->{'modification_time'} = $modification_times[$i];
2592        }
2593      }
2594      elsif ($property_type eq $SEVEN_ZIP_DUMMY)
2595      {
2596        my $compare_bytes = "\x00" x $size;
2597
2598        my $bytes = my_read ($fp, $size);
2599
2600        if ($bytes ne $compare_bytes)
2601        {
2602          return undef;
2603        }
2604      }
2605      else
2606      {
2607        # ignore (isKnownType also in 7-Zip source code)
2608
2609        my_read ($fp, $size);
2610      }
2611    }
2612  }
2613
2614  # next id should be SEVEN_ZIP_END, but we (and 7-ZIP source code too) do not care
2615
2616  my $id = read_id ($fp);
2617
2618  # check anti files
2619
2620  my $number_anti_items = 0;
2621
2622  for (my $i = 0; $i < $number_empty_streams; $i++)
2623  {
2624    if ($anti_files[$i] != 0)
2625    {
2626      $number_anti_items++;
2627    }
2628  }
2629
2630  # set digests depending on empty/anti files
2631
2632  my $index_sizes = 0;
2633  my $index_empty_files = 0;
2634
2635  my $unpack_info = $streams_info->{'unpack_info'};
2636  my $substreams_info = $streams_info->{'substreams_info'};
2637
2638  for (my $i = 0; $i < $number_files; $i++)
2639  {
2640    my $is_anti = 0;
2641    my $has_stream = 1;
2642
2643    if ($empty_streams[$i] == 1)
2644    {
2645      $has_stream = 0;
2646    }
2647
2648    $files[$i]->{'has_stream'} = $has_stream;
2649    $files[$i]->{'crc'} = "";
2650
2651    if ($has_stream == 1)
2652    {
2653      $is_anti = 0;
2654
2655      $files[$i]->{'is_dir'} = 0;
2656      $files[$i]->{'size'} = $unpack_info->{'unpack_sizes'}[$index_sizes];
2657
2658      $files[$i]->{'crc_defined'} = 0;
2659      $files[$i]->{'crc'} = "";
2660
2661      my $is_crc_defined = has_valid_folder_crc ($unpack_info->{'digests'}, $index_sizes);
2662
2663      if ($is_crc_defined == 1)
2664      {
2665        $files[$i]->{'crc_defined'} = 1;
2666
2667        my $crc_item = $unpack_info->{'digests'}[$index_sizes];
2668
2669        $files[$i]->{'crc'} = $crc_item->{'crc'};
2670      }
2671      else
2672      {
2673        # can we really do this too?
2674
2675        $is_crc_defined = has_valid_folder_crc ($substreams_info->{'digests'}, $index_sizes);
2676
2677        if ($is_crc_defined == 1)
2678        {
2679          $files[$i]->{'crc_defined'} = 1;
2680
2681          my $crc_item = $substreams_info->{'digests'}[$index_sizes];
2682
2683          $files[$i]->{'crc'} = $crc_item->{'crc'};
2684        }
2685      }
2686
2687      $index_sizes++;
2688    }
2689    else
2690    {
2691      my $is_dir = 0;
2692
2693      if ($empty_files[$index_empty_files] == 0)
2694      {
2695        $files[$i]->{'is_dir'} = 1;
2696      }
2697      else
2698      {
2699        $files[$i]->{'is_dir'} = 0;
2700      }
2701
2702      $files[$i]->{'size'} = 0;
2703
2704      $files[$i]->{'crc_defined'} = 0;
2705      $files[$i]->{'crc'} = "";
2706
2707      $index_empty_files++;
2708    }
2709  }
2710
2711  $files_info = {
2712    "number_files" => $number_files,
2713    "files" => \@files
2714  };
2715
2716  return $files_info;
2717}
2718
2719sub read_seven_zip_header
2720{
2721  my $fp = shift;
2722
2723  my $header;
2724
2725  my $additional_streams_info;
2726  my $streams_info;
2727  my $files_info;
2728
2729  # get the type of header
2730
2731  my $id = read_id ($fp);
2732
2733  if ($id eq $SEVEN_ZIP_ARCHIVE_PROPERTIES)
2734  {
2735    # we just ignore the data here (but we need to read it from the stream!)
2736
2737    if (! read_seven_zip_archive_properties ($fp))
2738    {
2739      return undef;
2740    }
2741
2742    $id = read_id ($fp);
2743  }
2744
2745  if ($id eq $SEVEN_ZIP_ADD_STREAMS_INFO)
2746  {
2747    $additional_streams_info = read_and_decode_seven_zip_packed_stream ($fp);
2748
2749    return undef unless (defined ($additional_streams_info));
2750
2751    # do we need to change the start position here ?
2752
2753    $id = read_id ($fp);
2754  }
2755
2756  if ($id eq $SEVEN_ZIP_MAIN_STREAMS_INFO)
2757  {
2758    $streams_info = read_seven_zip_streams_info ($fp);
2759
2760    return undef unless (defined ($streams_info));
2761
2762    $id = read_id ($fp);
2763  }
2764
2765  if ($id eq $SEVEN_ZIP_FILES_INFO)
2766  {
2767    $files_info = read_seven_zip_files_info ($fp, $streams_info);
2768
2769    return undef unless (defined ($files_info));
2770  }
2771
2772  $header = {
2773    "additional_streams_info" => $additional_streams_info,
2774    "streams_info" => $streams_info,
2775    "files_info" => $files_info,
2776    "type" => "raw"
2777  };
2778
2779  return $header;
2780}
2781
2782sub read_and_decode_seven_zip_packed_stream
2783{
2784  my $fp = shift;
2785
2786  my $packed_stream;
2787
2788  $packed_stream = read_seven_zip_streams_info ($fp);
2789
2790  # for each folder, get the decoder and decode the data
2791
2792  return $packed_stream;
2793}
2794
2795sub parse_seven_zip_header
2796{
2797  my $fp = shift;
2798
2799  my $header;
2800  my $streams_info;
2801
2802  # get the type of the header (id)
2803
2804  my $id = read_id ($fp);
2805
2806  # check if either encoded/packed or encrypted: to get the details we need to check the method
2807
2808  if ($id ne $SEVEN_ZIP_HEADER)
2809  {
2810    if ($id ne $SEVEN_ZIP_ENCODED_HEADER)
2811    {
2812      # when we reach this code section we probably found an invalid 7z file (just ignore it!)
2813      # print STDERR "WARNING: only encoded headers are allowed if no raw header is present\n";
2814
2815      return undef;
2816    }
2817
2818    $streams_info = read_and_decode_seven_zip_packed_stream ($fp);
2819
2820    return undef unless (defined ($streams_info));
2821
2822    $header = {
2823      "additional_streams_info" => undef,
2824      "streams_info" => $streams_info,
2825      "files_info" => undef,
2826      "type" => "encoded"
2827    }
2828
2829    # Note: now the 7-Zip code normally parses the header (which we got from the decode operation above)
2830    # but we do not really need to do this here. Skip
2831  }
2832  else
2833  {
2834    $header = read_seven_zip_header ($fp);
2835  }
2836
2837  return $header;
2838}
2839
2840sub read_seven_zip_next_header
2841{
2842  my $fp = shift;
2843
2844  my $header_size   = shift;
2845  my $header_offset = shift;
2846
2847  my $header;
2848
2849  # get the header of size header_size at relative position header_offset
2850
2851  my_seek ($fp, $header_offset, 1);
2852
2853  # read the header
2854
2855  $header = parse_seven_zip_header ($fp);
2856
2857  return $header;
2858}
2859
2860sub read_seven_zip_archive
2861{
2862  my $fp = shift;
2863
2864  my $archive;
2865
2866  # SignatureHeader
2867
2868  my $signature = read_seven_zip_signature_header ($fp);
2869
2870  return undef unless (defined ($signature));
2871
2872  # parse the header
2873
2874  my $parsed_header = read_seven_zip_next_header ($fp, $signature->{'next_header_size'}, $signature->{'next_header_offset'});
2875
2876  return undef unless (defined ($parsed_header));
2877
2878  $archive = {
2879    "signature_header" => $signature,
2880    "parsed_header" => $parsed_header
2881  };
2882
2883  return $archive;
2884}
2885
2886sub seven_zip_get_hash
2887{
2888  my $file_path = shift;
2889
2890  my $hash_buf = "";
2891
2892  # open file for reading
2893
2894  my $seven_zip_file;
2895
2896  if (! open ($seven_zip_file, "<$file_path"))
2897  {
2898    print STDERR "WARNING: could not open the file '$file_path' for reading\n";
2899
2900    return $hash_buf;
2901  }
2902
2903  binmode ($seven_zip_file);
2904
2905  # check if valid and supported 7z file
2906
2907  if (! is_supported_seven_zip_file ($seven_zip_file))
2908  {
2909    return sfx_get_hash ($seven_zip_file, $file_path);
2910  }
2911
2912  my $archive = read_seven_zip_archive ($seven_zip_file);
2913
2914  $hash_buf = extract_hash_from_archive ($seven_zip_file, $archive, $file_path);
2915
2916  # cleanup
2917
2918  close ($seven_zip_file);
2919
2920  return $hash_buf;
2921}
2922
2923#
2924# SFX related helper functions
2925#
2926
2927# The strategy here is as follows:
2928# 1. only use sfx-checks whenever the 7z header is not at start (i.e. if parsing of a "regular" 7z failed)
2929# 2. try to read PE
2930# 3. try to search for $SEVEN_ZIP_MAGIC within the 512 bytes bounderies
2931# 4. try to do a full scan ($SEVEN_ZIP_MAGIC_LEN bytes at a time)
2932
2933# sfx_7z_pe_search () searches for the 7z signature by seeking to the correct offset in the PE file
2934# (e.g. after the PE stub aka the executable part)
2935
2936sub sfx_7z_pe_search
2937{
2938  my $fp = shift;
2939
2940  my $found = 0;
2941
2942
2943  # 1. DOS header (e_lfanew)
2944  # 2. Portable executable (PE) headers (NumberOfSections)
2945  # 3. Section headers (PointerToRawData + SizeOfRawData)
2946
2947  # we assume that the file is a common/standard PE executable, we will do some checks:
2948
2949  # DOS header
2950
2951  # we should have a MS-DOS MZ executable
2952
2953  my $bytes = my_read ($fp, 2);
2954
2955  return 0 if (length ($bytes) != 2);
2956  return 0 if ($bytes ne "MZ"); # 0x5a4d
2957  return 0 if (length (my_read ($fp, 58)) != 58);
2958
2959  $bytes = my_read ($fp, 4);
2960
2961  return 0 if (length ($bytes) != 4);
2962
2963  my $e_lfanew = unpack ("L", $bytes);
2964
2965  my_seek ($fp, $e_lfanew, 0);
2966
2967  # PE header
2968
2969  $bytes = my_read ($fp, 4); # PE0000 signature after DOS part
2970
2971  return 0 if (length ($bytes) != 4);
2972  return 0 if ($bytes ne "PE\x00\x00");
2973  return 0 if (length (my_read ($fp, 2)) != 2); # skip FileHeader.Machine
2974
2975  $bytes = my_read ($fp, 2);
2976
2977  return 0 if (length ($bytes) != 2);
2978
2979  my $num_sections = unpack ("S", $bytes);
2980
2981  return 0 if ($num_sections < 1);
2982
2983  return 0 if (length (my_read ($fp,  16)) !=  16); # skip rest of FileHeader
2984  return 0 if (length (my_read ($fp, 224)) != 224); # skip OptionalHeader
2985
2986  my $section_farthest = 0;
2987  my $pos_after_farthest_section = 0;
2988
2989  for (my $i = 0; $i < $num_sections; $i++)
2990  {
2991    # we loop through all the section headers
2992
2993    #my $name = my_read ($fp, 8); return 0 if (length (my_read ($fp, 8)) != 8);
2994    return 0 if (length (my_read ($fp, 16)) != 16); # skip Name, Misc, VirtualAddress, SizeOfRawData
2995
2996    # SizeOfRawData
2997
2998    $bytes = my_read ($fp, 4);
2999
3000    return 0 if (length ($bytes) != 4);
3001
3002    my $size_of_raw_data = unpack ("L", $bytes);
3003
3004    # PointerToRawData
3005
3006    $bytes = my_read ($fp, 4);
3007
3008    return 0 if (length ($bytes) != 4);
3009
3010    my $pointer_to_raw_data = unpack ("L", $bytes);
3011
3012    # the sections are not quaranteed to be ordered (=> compare all of them!)
3013
3014    if ($pointer_to_raw_data > $section_farthest)
3015    {
3016      $section_farthest = $pointer_to_raw_data;
3017
3018      $pos_after_farthest_section = $pointer_to_raw_data + $size_of_raw_data;
3019    }
3020
3021
3022    # loop to next SectionTable entry
3023
3024    return 0 if (length (my_read ($fp, 16)) != 16); # skip rest of SectionHeader
3025  }
3026
3027  # check if 7z signature found (after stub)
3028
3029  my_seek ($fp, $pos_after_farthest_section, 0);
3030
3031  $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN);
3032
3033  return 0 if (length ($bytes) != $SEVEN_ZIP_MAGIC_LEN);
3034
3035  if ($bytes eq $SEVEN_ZIP_MAGIC)
3036  {
3037    $found = 1;
3038  }
3039
3040  return $found;
3041}
3042
3043# sfx_7z_512_search () searches for the 7z signature by only looking at every 512 byte boundery
3044
3045sub sfx_7z_512_search
3046{
3047  my $fp = shift;
3048
3049  my $found = 0;
3050
3051
3052  my $seek_skip = 512 - $SEVEN_ZIP_MAGIC_LEN;
3053
3054  my $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN);
3055
3056  my $len_bytes = length ($bytes);
3057
3058  while ($len_bytes == $SEVEN_ZIP_MAGIC_LEN)
3059  {
3060    if ($bytes eq $SEVEN_ZIP_MAGIC)
3061    {
3062      $found = 1;
3063
3064      last;
3065    }
3066
3067    my_seek ($fp, $seek_skip, 1);
3068
3069    $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN);
3070
3071    $len_bytes = length ($bytes);
3072  }
3073
3074  return $found;
3075}
3076
3077# sfx_7z_full_search () searches for the 7z signature by looking at every byte in the file
3078# (this type of search should only be performed if no other variant worked)
3079
3080sub sfx_7z_full_search
3081{
3082  my $fp = shift;
3083
3084  my $found = 0;
3085
3086  my $idx_into_magic = 0;
3087  my $prev_idx_into_magic = 0;
3088
3089  my $len_bytes = $SEVEN_ZIP_MAGIC_LEN;
3090
3091  while ($len_bytes == $SEVEN_ZIP_MAGIC_LEN)
3092  {
3093    my $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN);
3094
3095    last if (length  ($bytes) == 0);
3096
3097    $prev_idx_into_magic = $idx_into_magic;
3098
3099    if ($bytes eq $SEVEN_ZIP_MAGIC)
3100    {
3101      $found = 1;
3102
3103      last;
3104    }
3105
3106    for (my $i = 0; $i < length ($bytes); $i++)
3107    {
3108      my $c = substr ($bytes, $i, 1);
3109
3110      if ($c ne substr ($SEVEN_ZIP_MAGIC, $idx_into_magic, 1))
3111      {
3112        $idx_into_magic = 0; #reset
3113      }
3114      else
3115      {
3116        $idx_into_magic++;
3117
3118        if ($idx_into_magic == $SEVEN_ZIP_MAGIC_LEN)
3119        {
3120          $found = 1;
3121
3122          last;
3123        }
3124      }
3125    }
3126
3127    last if ($found == 1);
3128
3129    $len_bytes = length ($bytes);
3130  }
3131
3132  return ($found, $prev_idx_into_magic);
3133}
3134
3135sub sfx_get_hash
3136{
3137  my $fp = shift;
3138  my $file_path = shift;
3139
3140  my $hash_buf = "";
3141
3142
3143  my %db_positions_analysed = (); # holds a list of offsets that we already tried to parse
3144
3145  # we make the assumption that there is max one .7z file within the .sfx!
3146
3147  # Variant 1 (PE file structure parsing)
3148
3149  my_seek ($fp, 0, 0);
3150
3151  if (sfx_7z_pe_search ($fp))
3152  {
3153    my $cur_pos = my_tell ($fp);
3154
3155    $db_positions_analysed{$cur_pos} = 1; # mark it as analyzed
3156
3157    my $archive = read_seven_zip_archive ($fp);
3158
3159    $hash_buf = extract_hash_from_archive ($fp, $archive, $file_path);
3160
3161    if (defined ($hash_buf))
3162    {
3163      if (length ($hash_buf) > 0)
3164      {
3165        return $hash_buf;
3166      }
3167    }
3168  }
3169
3170  # Variant 2 (search only at the 512 bytes bounderies)
3171
3172  my_seek ($fp, 512, 0);
3173
3174  while (sfx_7z_512_search ($fp) != 0)
3175  {
3176    my $cur_pos = my_tell ($fp);
3177
3178    if (! exists ($db_positions_analysed{$cur_pos}))
3179    {
3180      $db_positions_analysed{$cur_pos} = 1; # mark it as analyzed
3181
3182      my $archive = read_seven_zip_archive ($fp);
3183
3184      $hash_buf = extract_hash_from_archive ($fp, $archive, $file_path);
3185
3186      if (defined ($hash_buf))
3187      {
3188        if (length ($hash_buf) > 0)
3189        {
3190          return $hash_buf;
3191        }
3192      }
3193    }
3194
3195    last if (my_seek ($fp, $cur_pos + 512 - $SEVEN_ZIP_MAGIC_LEN, 0) != 1);
3196  }
3197
3198  # Variant 3 (full search - worst case - shouldn't happen at all with a standard .sfx)
3199
3200  my_seek ($fp, 0, 2);
3201
3202  my $file_size = my_tell ($fp);
3203
3204  if ($file_size > 8 * 1024 * 1024) # let's say that 8 MiB is already a huge file
3205  {
3206    print STDERR "WARNING: searching for the 7z signature in a $file_size bytes long file ('";
3207    print STDERR $file_path . "') might take some time\n";
3208  }
3209
3210  my_seek ($fp, 1, 0); # we do no that the signature is not at position 0, so we start at 1
3211
3212  my ($full_search_found, $full_search_idx) = sfx_7z_full_search ($fp);
3213
3214  while ($full_search_found != 0)
3215  {
3216    my $cur_pos = my_tell ($fp);
3217
3218    $cur_pos -= $full_search_idx;
3219
3220    my_seek ($fp, $cur_pos, 0); # we might not be there yet (depends if $full_search_idx != 0)
3221
3222    if (! exists ($db_positions_analysed{$cur_pos}))
3223    {
3224      # we can skip the database updates because it's our last try to find the 7z file
3225      # $db_positions_analysed{$cur_pos} = 1;
3226
3227      my $archive = read_seven_zip_archive ($fp);
3228
3229      $hash_buf = extract_hash_from_archive ($fp, $archive, $file_path);
3230
3231      if (defined ($hash_buf))
3232      {
3233        if (length ($hash_buf) > 0)
3234        {
3235          return $hash_buf;
3236        }
3237      }
3238    }
3239
3240    my_seek ($fp, $cur_pos, 0); # seek back to position JUST AFTER the previously found signature
3241
3242    ($full_search_found, $full_search_idx) = sfx_7z_full_search ($fp);
3243  }
3244
3245  # in theory if we reach this code section we already know that parsing the file failed (but let's confirm it)
3246
3247  my $sfx_successfully_parsed = 0;
3248
3249  if (defined ($hash_buf))
3250  {
3251    if (length ($hash_buf) > 0)
3252    {
3253      $sfx_successfully_parsed = 1;
3254    }
3255  }
3256
3257  if ($sfx_successfully_parsed == 0)
3258  {
3259    print STDERR "WARNING: the file '$file_path' is neither a supported 7-Zip file nor a supported SFX file\n";
3260  }
3261
3262  # cleanup
3263
3264  close ($fp);
3265
3266  return $hash_buf;
3267}
3268
3269sub globbing_on_windows
3270{
3271  my @file_list = @_;
3272
3273  my $os = $^O;
3274
3275  if (($os eq "MSWin32") || ($os eq "Win32"))
3276  {
3277    my $windows_globbing_module = "File::Glob";
3278    my $windows_globbing = "bsd_glob";
3279
3280    if (eval "require $windows_globbing_module")
3281    {
3282      no strict 'refs';
3283
3284      $windows_globbing_module->import ($windows_globbing);
3285
3286      my @new_file_list = ();
3287
3288      foreach my $item (@file_list)
3289      {
3290        push (@new_file_list, $windows_globbing-> ($item));
3291      }
3292
3293      @file_list = @new_file_list;
3294    }
3295  }
3296
3297  return @file_list;
3298}
3299
3300sub get_splitted_archive_raw_name
3301{
3302  my $full_name = shift;
3303
3304  my $name_idx = rindex ($full_name, ".");
3305
3306  my $name = substr ($full_name, 0, $name_idx);
3307
3308  return $name;
3309}
3310
3311sub get_ordered_splitted_file_list
3312{
3313  my @files = @_;
3314
3315  return () unless (scalar (@files) > 0); # never the case (already checked)
3316
3317  my $failed = 0;
3318  my $num_probably_splitted_files = 0;
3319
3320  my $file_prefix = "";
3321  my $extension_length = 0;
3322
3323  foreach my $file_name (@files)
3324  {
3325    my $idx_extension = rindex ($file_name, ".");
3326
3327    if ($idx_extension == -1)
3328    {
3329      $failed = 1;
3330      last;
3331    }
3332
3333    my $prefix    = substr ($file_name, 0, $idx_extension);
3334    my $extension = substr ($file_name, $idx_extension + 1);
3335
3336    if (length ($prefix) == 0)
3337    {
3338      $failed = 1;
3339      last;
3340    }
3341
3342    # detect change in file prefix (the actual "name")
3343
3344    if (length ($file_prefix) == 0) #init
3345    {
3346      $file_prefix = $prefix;
3347    }
3348
3349    if ($prefix ne $file_prefix)
3350    {
3351      $failed = 1;
3352      last;
3353    }
3354
3355    # check extensions (should be numbers only)
3356
3357    if ($extension !~ /^[0-9]*$/)
3358    {
3359      $failed = 1;
3360      last;
3361    }
3362
3363    if ($extension_length == 0) # init
3364    {
3365      $extension_length = length ($extension);
3366    }
3367
3368    if (length ($extension) != $extension_length)
3369    {
3370      $failed = 1;
3371      last;
3372    }
3373
3374    $num_probably_splitted_files++;
3375  }
3376
3377  return () unless (length ($file_prefix) > 0);
3378  return () unless ($extension_length > 0);
3379
3380  if ($failed == 1)
3381  {
3382    if ($num_probably_splitted_files > 1)
3383    {
3384      print STDERR "WARNING: it seems that some files could be part of a splitted 7z archive named '$file_prefix'\n";
3385      print STDERR "ATTENTION: make sure to only specify the files belonging to the splitted archive (do not combine them with other archives)\n";
3386    }
3387
3388    return ();
3389  }
3390
3391  # sort the list and check if there is no missing file
3392  # (at this point in time we can't verify if the last file is really the last one)
3393
3394  my @sorted_file_list = sort (@files);
3395
3396  my $max = scalar (@sorted_file_list);
3397
3398  return () if ($max != scalar (@files));
3399
3400  for (my $count = 0; $count < $max; $count++)
3401  {
3402    my $current_extension = sprintf ("%0${extension_length}i", $count + 1); # the splits start with .001, .002, ...
3403
3404    return () if ($sorted_file_list[$count] ne "$file_prefix.$current_extension");
3405  }
3406
3407  return @sorted_file_list;
3408}
3409
3410sub get_file_sizes_list
3411{
3412  my @files = @_;
3413
3414  my %files_with_sizes = ();
3415
3416  my $accumulated_size = 0;
3417
3418  for (my $count = 0; $count < scalar (@files); $count++)
3419  {
3420    my $file = $files[$count];
3421
3422    my @file_stat = stat ($file);
3423
3424    if (scalar (@file_stat) < 1)
3425    {
3426      print STDERR "ERROR: could not get the file size of the file '$file'\n";
3427
3428      exit (1);
3429    }
3430
3431    $files_with_sizes{0}{'fh'} = undef; # the file handle
3432    $files_with_sizes{0}{'num'} = 0;
3433
3434    $files_with_sizes{$count + 1}{'name'}  = $file;
3435    $files_with_sizes{$count + 1}{'size'}  = $file_stat[7];
3436    $files_with_sizes{$count + 1}{'start'} = $accumulated_size;
3437
3438    $accumulated_size += $file_stat[7];
3439  }
3440
3441  return %files_with_sizes;
3442}
3443
3444sub splitted_seven_zip_open
3445{
3446  my @files = @_;
3447
3448  my @sorted_file_list = get_ordered_splitted_file_list (@files);
3449
3450  return 0 if (scalar (@sorted_file_list) < 1);
3451
3452  my %file_list_with_sizes = get_file_sizes_list (@sorted_file_list);
3453
3454  # start to parse the file list
3455
3456  $memory_buffer_read_offset = 0; # just to be safe
3457
3458  my $first_splitted_file = $file_list_with_sizes{1}{'name'};
3459
3460  my $hash_buf = "";
3461
3462  # open file for reading
3463
3464  my $seven_zip_file;
3465
3466  if (! open ($seven_zip_file, "<$first_splitted_file"))
3467  {
3468    print STDERR "ERROR: could not open the the splitted archive file '$first_splitted_file' for reading\n";
3469
3470    exit (1);
3471  }
3472
3473  binmode ($seven_zip_file);
3474
3475  $file_list_with_sizes{0}{'fh'}  = $seven_zip_file;
3476  $file_list_with_sizes{0}{'num'} = 1; # meaning is: "first file"
3477
3478  # check if valid and supported 7z file
3479
3480  if (! is_supported_seven_zip_file (\%file_list_with_sizes))
3481  {
3482    print STDERR "ERROR: the splitted archive file '$first_splitted_file' is not a valid 7z file\n";
3483
3484    exit (1);
3485  }
3486
3487  my $archive = read_seven_zip_archive (\%file_list_with_sizes);
3488
3489  $hash_buf = extract_hash_from_archive (\%file_list_with_sizes, $archive, $first_splitted_file);
3490
3491  # cleanup
3492
3493  close ($seven_zip_file);
3494
3495  if (defined ($hash_buf))
3496  {
3497    if (length ($hash_buf) > 0)
3498    {
3499      print $hash_buf . "\n";
3500    }
3501  }
3502
3503  return 1;
3504}
3505
3506#
3507# Start
3508#
3509
3510if (scalar (@ARGV) lt 1)
3511{
3512  usage ($0);
3513
3514  exit (1);
3515}
3516
3517my @file_list = globbing_on_windows (@ARGV);
3518
3519# try to handle this special case: splitted .7z files (.7z.001, .7z.002, .7z.003, ...)
3520# ATTENTION: there is one restriction here: splitted archives shouldn't be combined with other
3521# splitted or non-splitted archives
3522
3523my $was_splitted = splitted_seven_zip_open (@file_list);
3524
3525if ($was_splitted == 1)
3526{
3527  exit (0);
3528}
3529
3530# "non-splitted" file list:
3531
3532foreach my $file_name (@file_list)
3533{
3534  if (! -e $file_name)
3535  {
3536    print STDERR "WARNING: could not open file '$file_name'\n";
3537
3538    next;
3539  }
3540
3541  my $hash_buf = seven_zip_get_hash ($file_name);
3542
3543  next unless (defined ($hash_buf));
3544  next unless (length ($hash_buf) > 0);
3545
3546  print $hash_buf . "\n";
3547}
3548
3549exit (0);
3550