1#!/usr/local/bin/perl 2 3use strict; 4use warnings; 5 6use Compress::Raw::Lzma qw (LZMA_STREAM_END LZMA_DICT_SIZE_MIN); 7use File::Basename; 8 9# author: 10# philsmd 11# magnum (adapt to JtR use) 12 13# version: 14# 1.3 15 16# date released: 17# April 2015 18 19# date last updated: 20# 28th Nov 2018 21 22# dependencies: 23# Compress::Raw::Lzma 24 25# supported file types: 26# - is able to identify and parse .7z files 27# - is able to identify and parse splitted .7z files (.7z.001, .7z.002, ...) 28# - is able to identify and parse regular (non-packed) .sfx files 29 30# install dependencies like this: 31# sudo cpan Compress::Raw::Lzma 32# or sudo apt-get install libcompress-raw-lzma-perl 33# or sudo perl -MCPAN -e 'install Compress::Raw::Lzma' 34 35 36# 37# Explanation of the "hash" format: 38# 39 40# the fields of this format are separate by the dollar ("$") sign: 41# ("xyz" means that the string xyz is used literally, brackets indicate variables) 42# 43# "$" 44# "7z" 45# "$" 46# [data type indicator] # see "Explanation of the data type indicator" below 47# "$" 48# [cost factor] # means: 2 ^ [cost factor] iterations 49# "$" 50# [length of salt] 51# "$" 52# [salt] 53# "$" 54# [length of iv] # the initialization vector length 55# "$" 56# [iv] # the initialization vector itself 57# "$" 58# [CRC32] # the actual "hash" 59# "$" 60# [length of encrypted data] # the encrypted data length in bytes 61# "$" 62# [length of decrypted data] # the decrypted data length in bytes 63# "$" 64# [encrypted data] # the encrypted (and possibly also compressed) data 65 66# in case the data was not truncated and a decompression step is needed to verify the CRC32, these fields are appended: 67# "$" 68# [length of data for CRC32] # the length of the first "file" needed to verify the CRC32 checksum 69# "$" 70# [coder attributes] # most of the coders/decompressors need some attributes (e.g. encoded lc, pb, lp, dictSize values); 71 72# 73# Explanation of the data type indicator 74# 75 76# This field is the first field after the hash signature (i.e. after "$7z$). 77# Whenever the data was longer than the value of PASSWORD_RECOVERY_TOOL_DATA_LIMIT and the data could be truncated due to the padding attack, 78# the value of this field will be set to 128. 79# 80# If no truncation is used: 81# - the value will be 0 if the data doesn't need to be decompressed to check the CRC32 checksum 82# - all values different from 128, but greater than 0, indicate that the data must be decompressed as follows: 83# - 1 means that the data must be decompressed using the LZMA1 decompressor 84# - 2 means that the data must be decompressed using the LZMA2 decompressor 85# - 3 means that the data must be decompressed using the PPMD decompressor 86# - 4 means that the data must be decompressed using the BCJ decompressor 87# - 5 means that the data must be decompressed using the BCJ2 decompressor 88# - 6 means that the data must be decompressed using the BZIP2 decompressor 89# - 7 means that the data must be decompressed using the DEFLATE decompressor 90 91# Truncated data can only be verified using the padding attack and therefore combinations between truncation + a compressor are not allowed. 92# Therefore, whenever the value is 128 or 0, neither coder attributes nor the length of the data for the CRC32 check is within the output. 93# On the other hand, for all values above or equal 1 and smaller than 128, both coder attributes and the length for CRC32 check is in the output. 94 95# 96# Constants 97# 98 99# cracker specific stuff 100 101my $ANALYZE_ALL_STREAMS_TO_FIND_SHORTEST_DATA_BUF = 1; 102 103my $SHOW_LIST_OF_ALL_STREAMS = 0; # $ANALYZE_ALL_STREAMS_TO_FIND_SHORTEST_DATA_BUF must be set to 1 to list/debug all streams 104my $SHOW_LZMA_DECOMPRESS_AFTER_DECRYPT_WARNING = 1; 105 106my $SHORTEN_HASH_LENGTH_TO_CRC_LENGTH = 1; # only output the bytes needed for the checksum of the first file (plus a fixed length 107 # header at the very beginning of the stream; plus additional +5% to cover the exception 108 # that the compressed file is slightly longer than the raw file) 109 110my $SHORTEN_HASH_FIXED_HEADER = 32.5; # at the beginning of the compressed stream we have some header info 111 # (shortened hash can't really be shorter than the metadata needed for decompression) 112 # the extra +0.5 is used to round up (we use integer numbers) 113my $SHORTEN_HASH_EXTRA_PERCENT = 5; # the compressed stream could be slightly longer than the underlying data (special cases) 114 # in percent: i.e. x % == (x / 100) 115 116my $PASSWORD_RECOVERY_TOOL_NAME = "john"; 117my $PASSWORD_RECOVERY_TOOL_DATA_LIMIT = 0x80000000; # hexadecimal output value. This value should always be >= 64 118my $PASSWORD_RECOVERY_TOOL_SUPPORT_PADDING_ATTACK = 1; # does the cracker support the AES-CBC padding attack (0 means no, 1 means yes) 119my @PASSWORD_RECOVERY_TOOL_SUPPORTED_DECOMPRESSORS = (1, 2); # within this list we only need values ranging from 1 to 7 120 # i.e. SEVEN_ZIP_LZMA1_COMPRESSED to SEVEN_ZIP_DEFLATE_COMPRESSED 121 122# 7-zip specific stuff 123 124my $LZMA2_MIN_COMPRESSED_LEN = 16; # the raw data (decrypted) needs to be at least: 3 + 1 + 1, header (start + size) + at least one byte of data + end 125 # therefore we need to have at least one AES BLOCK (128 bits = 16 bytes) 126 127# header 128 129my $SEVEN_ZIP_MAGIC = "7z\xbc\xaf\x27\x1c"; 130my $SEVEN_ZIP_MAGIC_LEN = 6; # fixed length of $SEVEN_ZIP_MAGIC 131 132my $SEVEN_ZIP_END = "\x00"; 133my $SEVEN_ZIP_HEADER = "\x01"; 134my $SEVEN_ZIP_ARCHIVE_PROPERTIES = "\x02"; 135my $SEVEN_ZIP_ADD_STREAMS_INFO = "\x03"; 136my $SEVEN_ZIP_MAIN_STREAMS_INFO = "\x04"; 137my $SEVEN_ZIP_FILES_INFO = "\x05"; 138my $SEVEN_ZIP_PACK_INFO = "\x06"; 139my $SEVEN_ZIP_UNPACK_INFO = "\x07"; 140my $SEVEN_ZIP_SUBSTREAMS_INFO = "\x08"; 141my $SEVEN_ZIP_SIZE = "\x09"; 142my $SEVEN_ZIP_CRC = "\x0a"; 143my $SEVEN_ZIP_FOLDER = "\x0b"; 144my $SEVEN_ZIP_UNPACK_SIZE = "\x0c"; 145my $SEVEN_ZIP_NUM_UNPACK_STREAM = "\x0d"; 146my $SEVEN_ZIP_EMPTY_STREAM = "\x0e"; 147my $SEVEN_ZIP_EMPTY_FILE = "\x0f"; 148my $SEVEN_ZIP_ANTI_FILE = "\x10"; 149my $SEVEN_ZIP_NAME = "\x11"; 150my $SEVEN_ZIP_CREATION_TIME = "\x12"; 151my $SEVEN_ZIP_ACCESS_TIME = "\x13"; 152my $SEVEN_ZIP_MODIFICATION_TIME = "\x14"; 153my $SEVEN_ZIP_WIN_ATTRIBUTE = "\x15"; 154my $SEVEN_ZIP_ENCODED_HEADER = "\x17"; 155my $SEVEN_ZIP_START_POS = "\x18"; 156my $SEVEN_ZIP_DUMMY = "\x19"; 157 158my $SEVEN_ZIP_MAX_PROPERTY_TYPE = 2 ** 30; # 1073741824 159my $SEVEN_ZIP_NOT_EXTERNAL = "\x00"; 160my $SEVEN_ZIP_EXTERNAL = "\x01"; 161my $SEVEN_ZIP_ALL_DEFINED = "\x01"; 162my $SEVEN_ZIP_FILE_NAME_END = "\x00\x00"; 163 164# codec 165 166my $SEVEN_ZIP_AES = "\x06\xf1\x07\x01"; # all the following codec values are from CPP/7zip/Archive/7z/7zHeader.h 167 168my $SEVEN_ZIP_LZMA1 = "\x03\x01\x01"; 169my $SEVEN_ZIP_LZMA2 = "\x21"; 170my $SEVEN_ZIP_PPMD = "\x03\x04\x01"; 171my $SEVEN_ZIP_BCJ = "\x03\x03\x01\x03"; 172my $SEVEN_ZIP_BCJ2 = "\x03\x03\x01\x1b"; 173my $SEVEN_ZIP_BZIP2 = "\x04\x02\x02"; 174my $SEVEN_ZIP_DEFLATE = "\x04\x01\x08"; 175 176# hash format 177 178my $SEVEN_ZIP_HASH_SIGNATURE = "\$7z\$"; 179my $SEVEN_ZIP_DEFAULT_POWER = 19; 180my $SEVEN_ZIP_DEFAULT_IV = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"; 181 182my $SEVEN_ZIP_UNCOMPRESSED = 0; 183my $SEVEN_ZIP_LZMA1_COMPRESSED = 1; 184my $SEVEN_ZIP_LZMA2_COMPRESSED = 2; 185my $SEVEN_ZIP_PPMD_COMPRESSED = 3; 186my $SEVEN_ZIP_BCJ_COMPRESSED = 4; 187my $SEVEN_ZIP_BCJ2_COMPRESSED = 5; 188my $SEVEN_ZIP_BZIP2_COMPRESSED = 6; 189my $SEVEN_ZIP_DEFLATE_COMPRESSED = 7; 190my $SEVEN_ZIP_TRUNCATED = 128; # (0x80 or 0b10000000) 191 192my %SEVEN_ZIP_COMPRESSOR_NAMES = (1 => "LZMA1", 2 => "LZMA2", 3 => "PPMD", 4 => "BCJ", 5 => "BCJ2", 6 => "BZIP2", 193 7 => "DEFLATE"); 194 195# 196# Helper functions 197# 198 199sub usage 200{ 201 my $prog_name = shift; 202 203 print STDERR "Usage: $prog_name <7-Zip file>...\n"; 204} 205 206my $memory_buffer_read_offset = 0; 207 208sub my_read 209{ 210 my $input = shift; 211 my $length = shift; 212 213 my $type_of_input = ref ($input); 214 215 my $output_buffer = ""; 216 217 if ($type_of_input eq "GLOB") 218 { 219 read $input, $output_buffer, $length; 220 } 221 elsif ($type_of_input eq "HASH") 222 { 223 my $cur_file_handle = $$input{0}{'fh'}; 224 my $cur_file_number = $$input{0}{'num'}; 225 226 my $bytes_read = 0; 227 228 while ($bytes_read != $length) 229 { 230 my $name = $$input{$cur_file_number}{'name'}; 231 my $start = $$input{$cur_file_number}{'start'}; 232 my $size = $$input{$cur_file_number}{'size'}; 233 234 my $cur_file_bytes_avail = ($start + $size) - $memory_buffer_read_offset; 235 236 if ($cur_file_bytes_avail < 1) 237 { 238 print STDERR "ERROR: failed to get the correct file offsets of splitted archive file '$name'\n"; 239 240 exit (1); 241 } 242 243 my $total_bytes_to_read = $length - $bytes_read; 244 my $bytes_to_read = $total_bytes_to_read; 245 246 if ($bytes_to_read > $cur_file_bytes_avail) 247 { 248 $bytes_to_read = $cur_file_bytes_avail; 249 } 250 251 # append the current bytes read from the file to the overall output buffer 252 253 my $temp_output_buffer = ""; 254 255 my $bytes = read ($cur_file_handle, $temp_output_buffer, $bytes_to_read); 256 257 $output_buffer .= $temp_output_buffer; 258 259 if ($bytes != $bytes_to_read) 260 { 261 print STDERR "ERROR: could not read from splitted 7z file '$name'\n"; 262 263 exit (1); 264 } 265 266 $bytes_read += $bytes_to_read; 267 $memory_buffer_read_offset += $bytes_to_read; 268 269 # the following case only happens if we need to read across 2 or more files 270 271 if ($bytes_read != $length) 272 { 273 # we exhausted the current file, move to the next one! 274 275 close ($cur_file_handle); 276 277 $cur_file_number++; 278 279 if (! exists ($$input{$cur_file_number})) 280 { 281 my $name_prefix = get_splitted_archive_raw_name ($name); 282 283 print STDERR "ERROR: could not open part #$cur_file_number of the splitted archive file '$name_prefix'\n"; 284 285 exit (1); 286 } 287 288 my $name = $$input{$cur_file_number}{'name'}; 289 290 if (! open ($cur_file_handle, "<$name")) 291 { 292 print STDERR "ERROR: could not open the splitted archive file '$name' for reading\n"; 293 294 exit (1); 295 } 296 297 $$input{0}{'fh'} = $cur_file_handle; 298 $$input{0}{'num'} = $cur_file_number; 299 } 300 } 301 } 302 else 303 { 304 $output_buffer = substr ($$input, $memory_buffer_read_offset, $length); 305 306 $memory_buffer_read_offset += $length; 307 } 308 309 return $output_buffer; 310} 311 312sub my_tell 313{ 314 my $input = shift; 315 316 my $res = 0; 317 318 my $type_of_input = ref ($input); 319 320 if ($type_of_input eq "HASH") 321 { 322 $res = $memory_buffer_read_offset; 323 } 324 else 325 { 326 $res = tell ($input); 327 } 328 329 return $res; 330} 331 332sub my_seek 333{ 334 my $input = shift; 335 my $offset = shift; 336 my $whence = shift; 337 338 my $res = 0; 339 340 my $type_of_input = ref ($input); 341 342 if ($type_of_input eq "HASH") 343 { 344 # get total number of files and total/accumulated file size 345 346 my $number_of_files= 1; 347 348 # we assume that $$input{1} exists (we did already check that beforehand) 349 350 my $end = 0; 351 352 while (exists ($$input{$number_of_files})) 353 { 354 $end = $$input{$number_of_files}{'start'} + $$input{$number_of_files}{'size'}; 355 356 $number_of_files++; 357 } 358 359 my $new_offset = 0; 360 361 # absolute (from start) 362 if ($whence == 0) 363 { 364 $new_offset = $offset; 365 } 366 # relative (depending on current position) 367 elsif ($whence == 1) 368 { 369 $new_offset = $memory_buffer_read_offset + $offset; 370 } 371 # offset from the end of the file 372 else 373 { 374 $new_offset = $end + $offset; 375 } 376 377 # sanity check 378 379 if (($new_offset < 0) || ($new_offset > $end)) 380 { 381 my $name = get_splitted_archive_raw_name ($$input{1}{'name'}); 382 383 print STDERR "ERROR: could not seek within the splitted archive '$name'\n"; 384 385 exit (1); 386 } 387 388 $memory_buffer_read_offset = $new_offset; 389 390 # check if the correct file is open 391 # 1. determine the correct file 392 # 2. if the "incorrect" file is open, close it and open the correct one 393 394 my $cur_file_number = 1; 395 my $file_was_found = 0; 396 397 my $start = 0; 398 my $size = 0; 399 400 while (exists ($$input{$cur_file_number})) 401 { 402 $start = $$input{$cur_file_number}{'start'}; 403 $size = $$input{$cur_file_number}{'size'}; 404 405 my $end = $start + $size; 406 407 if ($memory_buffer_read_offset >= $start) 408 { 409 if ($memory_buffer_read_offset < $end) 410 { 411 $file_was_found = 1; 412 413 last; 414 } 415 } 416 417 $cur_file_number++; 418 } 419 420 if ($file_was_found == 0) 421 { 422 my $name = get_splitted_archive_raw_name ($$input{1}{'name'}); 423 424 print STDERR "ERROR: could not read the splitted archive '$name' (maybe some parts are missing?)\n"; 425 426 exit (1); 427 } 428 429 if ($$input{0}{'num'} != $cur_file_number) 430 { 431 # if we enter this block, we definitely need to "change" to another file 432 433 close ($$input{0}{'fh'}); 434 435 my $name = $$input{$cur_file_number}{'name'}; 436 437 my $seven_zip_file; 438 439 if (! open ($seven_zip_file, "<$name")) 440 { 441 print STDERR "ERROR: could not open the file '$name' for reading\n"; 442 443 exit (1); 444 } 445 446 $$input{0}{'fh'} = $seven_zip_file; 447 $$input{0}{'num'} = $cur_file_number; 448 } 449 450 # always seek w/ absolute positions within the splitted part! 451 $res = seek ($$input{0}{'fh'}, $memory_buffer_read_offset - $start, 0); 452 } 453 else 454 { 455 $res = seek ($input, $offset, $whence); 456 } 457 458 return $res; 459} 460 461sub get_uint32 462{ 463 my $fp = shift; 464 465 my $bytes = my_read ($fp, 4); 466 467 return (0, 0) if (length ($bytes) != 4); 468 469 my $num = unpack ("L", $bytes); 470 471 return $num; 472} 473 474sub get_uint64 475{ 476 my $fp = shift; 477 478 my $bytes = my_read ($fp, 8); 479 480 return (0, 0) if (length ($bytes) != 8); 481 482 my ($uint1, $uint2) = unpack ("LL<", $bytes); 483 484 my $num = $uint2 << 32 | $uint1; 485 486 return $bytes, $num; 487} 488 489sub read_number 490{ 491 my $fp = shift; 492 493 my $b = ord (my_read ($fp, 1)); 494 495 if (($b & 0x80) == 0) 496 { 497 return $b; 498 } 499 500 my $value = ord (my_read ($fp, 1)); 501 502 for (my $i = 1; $i < 8; $i++) 503 { 504 my $mask = 0x80 >> $i; 505 506 if (($b & $mask) == 0) 507 { 508 my $high = $b & ($mask - 1); 509 510 $value |= ($high << ($i * 8)); 511 512 return $value; 513 } 514 515 my $next = ord (my_read ($fp, 1)); 516 517 $value |= ($next << ($i * 8)); 518 } 519 520 return $value; 521} 522 523sub num_to_id 524{ 525 my $num = shift; 526 527 # special case: 528 529 return "\x00" if ($num == 0); 530 531 # normal case: 532 533 my $id = ""; 534 535 while ($num > 0) 536 { 537 my $value = $num & 0xff; 538 539 $id = chr ($value) . $id; 540 541 $num >>= 8; 542 } 543 544 return $id; 545} 546 547sub read_id 548{ 549 my $fp = shift; 550 551 my $id; 552 553 my $num = read_number ($fp); 554 555 # convert number to their ASCII code correspondent byte 556 557 return num_to_id ($num); 558} 559 560sub get_boolean_vector 561{ 562 my $fp = shift; 563 564 my $number_items = shift; 565 566 my @booleans; 567 568 # get the values 569 570 my $v = 0; 571 my $mask = 0; 572 573 for (my $i = 0; $i < $number_items; $i++) 574 { 575 if ($mask == 0) 576 { 577 my $byte = my_read ($fp, 1); 578 579 $v = ord ($byte); 580 $mask = 0x80; 581 } 582 583 my $val = ($v & $mask) != 0; 584 585 push (@booleans, $val); 586 587 $mask >>= 1; 588 } 589 590 return @booleans; 591} 592 593sub get_boolean_vector_check_all 594{ 595 my $fp = shift; 596 597 my $number_items = shift; 598 599 my @booleans; 600 601 # check first byte to see if all are defined 602 603 my $all_defined = my_read ($fp, 1); 604 605 if ($all_defined eq $SEVEN_ZIP_ALL_DEFINED) 606 { 607 @booleans = (1) x $number_items; 608 } 609 else 610 { 611 @booleans = get_boolean_vector ($fp, $number_items); 612 } 613 614 return @booleans; 615} 616 617sub is_supported_seven_zip_file 618{ 619 my $fp = shift; 620 621 my $magic_len = length ($SEVEN_ZIP_MAGIC); 622 623 my $signature = my_read ($fp, $magic_len); 624 625 return $signature eq $SEVEN_ZIP_MAGIC; 626} 627 628sub get_decoder_properties 629{ 630 my $attributes = shift; 631 632 my $salt_len; 633 my $salt_buf; 634 my $iv_len; 635 my $iv_buf; 636 my $number_cycles_power; 637 638 # set some default values 639 640 $salt_len = 0; 641 $salt_buf = ""; 642 $iv_len = length ($SEVEN_ZIP_DEFAULT_IV); 643 $iv_buf = $SEVEN_ZIP_DEFAULT_IV; 644 $number_cycles_power = $SEVEN_ZIP_DEFAULT_POWER; 645 646 # the most important information is encoded in first and second byte 647 # i.e. the salt/iv length, number cycle power 648 649 my $offset = 0; 650 651 my $first_byte = substr ($attributes, 0, 1); 652 $first_byte = ord ($first_byte); 653 654 $offset++; 655 656 $number_cycles_power = $first_byte & 0x3f; 657 658 if (($first_byte & 0xc0) == 0) 659 { 660 return ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power); 661 } 662 663 $salt_len = ($first_byte >> 7) & 1; 664 $iv_len = ($first_byte >> 6) & 1; 665 666 # combine this info with the second byte 667 668 my $second_byte = substr ($attributes, 1, 1); 669 $second_byte = ord ($second_byte); 670 671 $offset++; 672 673 $salt_len += ($second_byte >> 4); 674 $iv_len += ($second_byte & 0x0f); 675 676 $salt_buf = substr ($attributes, $offset, $salt_len); 677 678 $offset += $salt_len; 679 680 $iv_buf = substr ($attributes, $offset, $iv_len); 681 682 # pad the iv with zeros 683 684 my $iv_max_length = 16; 685 686 $iv_buf .= "\x00" x $iv_max_length; 687 $iv_buf = substr ($iv_buf, 0, $iv_max_length); 688 689 return ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power); 690} 691 692sub get_digest 693{ 694 my $index = shift; 695 696 my $unpack_info = shift; 697 my $substreams_info = shift; 698 699 my $digest; 700 701 my $digests_unpack_info = $unpack_info->{'digests'}; 702 my $digests_substreams_info = $substreams_info->{'digests'}; 703 704 my $use_unpack_info = 0; 705 my $use_substreams_info = 0; 706 707 if (defined ($digests_unpack_info)) 708 { 709 my $digests_unpack_info_size = 0; 710 711 if (@$digests_unpack_info) 712 { 713 $digests_unpack_info_size = scalar (@$digests_unpack_info); 714 } 715 716 if ($index < $digests_unpack_info_size) 717 { 718 if (ref (@$digests_unpack_info[$index]) eq "HASH") 719 { 720 $use_unpack_info = 1; 721 } 722 } 723 } 724 725 if (defined ($digests_substreams_info)) 726 { 727 my $digests_substreams_info_size = 0; 728 729 if (@$digests_substreams_info) 730 { 731 $digests_substreams_info_size = scalar (@$digests_substreams_info); 732 } 733 734 if ($index < $digests_substreams_info_size) 735 { 736 if (ref (@$digests_substreams_info[$index]) eq "HASH") 737 { 738 $use_substreams_info = 1; 739 } 740 } 741 } 742 743 if ($use_unpack_info == 1) 744 { 745 $digest = @$digests_unpack_info[$index]; 746 } 747 elsif ($use_substreams_info == 1) 748 { 749 $digest = @$digests_substreams_info[$index]; 750 } 751 752 return $digest; 753} 754 755sub has_encrypted_header 756{ 757 my $folder = shift; 758 759 my $encrypted; 760 761 # get first coder 762 763 my $coders = $folder->{'coders'}; 764 765 # get attributes of the first coder 766 767 my $attributes = @$coders[0]->{'codec_id'}; 768 769 if ($attributes eq $SEVEN_ZIP_AES) 770 { 771 $encrypted = 1; 772 } 773 else 774 { 775 $encrypted = 0; 776 } 777 778 return $encrypted; 779} 780 781sub lzma_properties_decode 782{ 783 my $attributes = shift; 784 785 my $lclppb; 786 787 $lclppb = substr ($attributes, 0, 1); 788 789 my @data; 790 791 #data[0] is the lclppb value 792 793 $data[1] = ord (substr ($attributes, 1, 1)); 794 $data[2] = ord (substr ($attributes, 2, 1)); 795 $data[3] = ord (substr ($attributes, 3, 1)); 796 $data[4] = ord (substr ($attributes, 4, 1)); 797 798 my $dict_size = $data[1] | $data[2] << 8 | $data[3] << 16 | $data[4] << 24; 799 800 if ($dict_size < LZMA_DICT_SIZE_MIN) 801 { 802 $dict_size = LZMA_DICT_SIZE_MIN; 803 } 804 805 my $d = ord ($lclppb); 806 807 my $lc = int ($d % 9); 808 $d = int ($d / 9); 809 my $pb = int ($d / 5); 810 my $lp = int ($d % 5); 811 812 return ($lclppb, $dict_size, $lc, $pb, $lp); 813} 814 815sub lzma_alone_header_field_encode 816{ 817 my $num = shift; 818 my $length = shift; 819 820 my $value; 821 822 my $length_doubled = $length * 2; 823 my $big_endian_val = pack ("H*", sprintf ("%0${length_doubled}x", $num)); 824 825 # what follows is just some easy way to convert endianess (there might be better ways of course) 826 827 $value = ""; 828 829 for (my $i = $length - 1; $i >= 0; $i--) 830 { 831 $value .= substr ($big_endian_val, $i, 1); 832 } 833 834 return $value; 835} 836 837sub show_empty_streams_info_warning 838{ 839 my $file_path = shift; 840 841 print STDERR "WARNING: the file '" . $file_path . "' does not contain any meaningful data (the so-called streams info), it might only contain a list of empty files.\n"; 842} 843 844sub extract_hash_from_archive 845{ 846 my $fp = shift; 847 my $archive = shift; 848 my $file_path = shift; 849 850 my $hash_buf = ""; 851 852 # check if everything is defined/initialized 853 # and retrieve the single "objects" 854 855 return undef unless (defined ($archive)); 856 857 my $parsed_header = $archive->{'parsed_header'}; 858 return undef unless (defined ($parsed_header)); 859 860 my $signature_header = $archive->{'signature_header'}; 861 return undef unless (defined ($signature_header)); 862 863 my $streams_info = $parsed_header->{'streams_info'}; 864 865 if (! defined ($streams_info)) 866 { 867 show_empty_streams_info_warning ($file_path); 868 869 return undef; 870 } 871 872 my $unpack_info = $streams_info->{'unpack_info'}; 873 return undef unless (defined ($unpack_info)); 874 875 my $substreams_info = $streams_info->{'substreams_info'}; 876 877 my $digests = $unpack_info->{'digests'}; 878 return undef unless (defined ($digests)); 879 880 my $folders = $unpack_info->{'folders'}; 881 return undef unless (defined ($folders)); 882 883 my $pack_info = $streams_info->{'pack_info'}; 884 return undef unless (defined ($pack_info)); 885 886 # init file seek values 887 888 my $position_after_header = $signature_header->{'position_after_header'}; 889 my $position_pack = $pack_info->{'pack_pos'}; 890 my $current_seek_position = $position_after_header + $position_pack; 891 892 # 893 # start: 894 # 895 896 # get first folder/coder 897 898 my $folder_id = 0; 899 900 my $folder = @$folders[$folder_id]; 901 902 my $number_coders = $folder->{'number_coders'}; 903 904 # check if header is encrypted 905 906 my $has_encrypted_header = 0; 907 908 if ($number_coders > 1) 909 { 910 $has_encrypted_header = 0; 911 } 912 else 913 { 914 $has_encrypted_header = has_encrypted_header ($folder); 915 } 916 917 # get the first coder 918 919 my $coder_id = 0; 920 921 my $coder = $folder->{'coders'}[$coder_id]; 922 return undef unless (defined ($coder)); 923 924 my $codec_id = $coder->{'codec_id'}; 925 926 # set index and seek to postition 927 928 my $current_index = 0; 929 930 my_seek ($fp, $current_seek_position, 0); 931 932 # if it is lzma compressed, we need to decompress it first 933 934 if ($codec_id eq $SEVEN_ZIP_LZMA1) 935 { 936 # get the sizes 937 938 my $unpack_size = $unpack_info->{'unpack_sizes'}[$current_index]; 939 940 my $data_len = $pack_info->{'pack_sizes'}[$current_index]; 941 942 # get the data 943 944 my $data = my_read ($fp, $data_len); 945 946 # lzma "header" stuff 947 948 my $attributes = $coder->{'attributes'}; 949 950 my ($property_lclppb, $dict_size, $lc, $pb, $lp) = lzma_properties_decode ($attributes); 951 952 return undef unless (length ($property_lclppb) == 1); 953 954 # the alone-format header is defined like this: 955 # 956 # +------------+----+----+----+----+--+--+--+--+--+--+--+--+ 957 # | Properties | Dictionary Size | Uncompressed Size | 958 # +------------+----+----+----+----+--+--+--+--+--+--+--+--+ 959 # 960 961 my $decompressed_header = ""; 962 963 # we loop over this code section max. 2 times to try two variants of headers (with the correct/specific values and with default values) 964 965 for (my $try_number = 1; $try_number <= 2; $try_number++) 966 { 967 my ($dict_size_encoded, $uncompressed_size_encoded); 968 my $lz = new Compress::Raw::Lzma::AloneDecoder (AppendOutput => 1); 969 970 if ($try_number == 1) 971 { 972 $dict_size_encoded = lzma_alone_header_field_encode ($dict_size, 4); # 4 bytes (the "Dictionary Size" field), little endian 973 $uncompressed_size_encoded = lzma_alone_header_field_encode ($unpack_size, 8); # 8 bytes (the "Uncompressed Size" field), little endian 974 } 975 else 976 { 977 # this is the fallback case (using some default values): 978 979 $dict_size_encoded = pack ("H*", "00008000"); # "default" dictionary size (2^23 = 0x00800000) 980 $uncompressed_size_encoded = pack ("H*", "ffffffffffffffff"); # means: unknown uncompressed size 981 } 982 983 my $lzma_alone_format_header = $property_lclppb . $dict_size_encoded . $uncompressed_size_encoded; 984 985 my $lzma_header = $lzma_alone_format_header . $data; 986 987 my $status = $lz->code ($lzma_header, $decompressed_header); 988 989 if (length ($status) > 0) 990 { 991 if ($try_number == 2) 992 { 993 if ($status != LZMA_STREAM_END) 994 { 995 print STDERR "WARNING: the LZMA header decompression for the file '" . $file_path . "' failed with status: '" . $status . "'\n"; 996 997 if ($status eq "Data is corrupt") 998 { 999 print STDERR "\n"; 1000 print STDERR "INFO: for some reasons, for large LZMA buffers, we sometimes get a 'Data is corrupt' error.\n"; 1001 print STDERR " This is a known issue of this tool and needs to be investigated.\n"; 1002 1003 print STDERR "\n"; 1004 print STDERR " The problem might have to do with this small paragraph hidden in the 7z documentation (quote):\n"; 1005 print STDERR " 'The reference LZMA Decoder ignores the value of the \"Corrupted\" variable.\n"; 1006 print STDERR " So it continues to decode the stream, even if the corruption can be detected\n"; 1007 print STDERR " in the Range Decoder. To provide the full compatibility with output of the\n"; 1008 print STDERR " reference LZMA Decoder, another LZMA Decoder implementation must also\n"; 1009 print STDERR " ignore the value of the \"Corrupted\" variable.'\n"; 1010 print STDERR "\n"; 1011 print STDERR " (taken from the DOC/lzma-specification.txt file of the 7z-SDK: see for instance:\n"; 1012 print STDERR " https://github.com/jljusten/LZMA-SDK/blob/master/DOC/lzma-specification.txt#L343-L347)\n"; 1013 } 1014 1015 return undef; 1016 } 1017 } 1018 } 1019 1020 last if (length ($decompressed_header) > 0); # if we got some output it seems that it worked just fine 1021 } 1022 1023 return undef unless (length ($decompressed_header) > 0); 1024 1025 # in theory we should also check that the length is correct 1026 # return undef unless (length ($decompressed_header) == $unpack_size); 1027 1028 # check the decompressed 7zip header 1029 1030 $memory_buffer_read_offset = 0; # decompressed_header is a new memory buffer (which uses a offset to speed things up) 1031 1032 my $id = read_id (\$decompressed_header); 1033 1034 return undef unless ($id eq $SEVEN_ZIP_HEADER); 1035 1036 my $header = read_seven_zip_header (\$decompressed_header); 1037 1038 # override the "old" archive object 1039 1040 $archive = { 1041 "signature_header" => $signature_header, 1042 "parsed_header" => $header 1043 }; 1044 1045 $parsed_header = $archive->{'parsed_header'}; 1046 return "" unless (defined ($parsed_header)); 1047 1048 # this didn't change at all 1049 # $signature_header = $archive->{'signature_header'}; 1050 # return undef unless (defined ($signature_header)); 1051 1052 $streams_info = $parsed_header->{'streams_info'}; 1053 1054 if (! defined ($streams_info)) 1055 { 1056 show_empty_streams_info_warning ($file_path); 1057 1058 return ""; 1059 } 1060 1061 $unpack_info = $streams_info->{'unpack_info'}; 1062 return "" unless (defined ($unpack_info)); 1063 1064 $substreams_info = $streams_info->{'substreams_info'}; 1065 1066 $digests = $unpack_info->{'digests'}; 1067 return "" unless (defined ($digests)); 1068 1069 $folders = $unpack_info->{'folders'}; 1070 return "" unless (defined ($folders)); 1071 1072 my $number_folders = $unpack_info->{'number_folders'}; 1073 1074 $pack_info = $streams_info->{'pack_info'}; 1075 return "" unless (defined ($pack_info)); 1076 1077 # loop over all folders/coders to check if we find an AES encrypted stream 1078 1079 $position_pack = $pack_info->{'pack_pos'}; 1080 $current_seek_position = $position_after_header + $position_pack; # reset the seek position 1081 1082 for (my $folder_pos = 0; $folder_pos < $number_folders; $folder_pos++) 1083 { 1084 $folder = @$folders[$folder_pos]; 1085 last unless (defined ($folder)); 1086 1087 $number_coders = $folder->{'number_coders'}; 1088 1089 my $num_pack_sizes = scalar (@{$pack_info->{'pack_sizes'}}); 1090 1091 for (my $coder_pos = 0; $coder_pos < $number_coders; $coder_pos++) 1092 { 1093 $coder = $folder->{'coders'}[$coder_pos]; 1094 last unless (defined ($coder)); 1095 1096 $coder_id = $coder_pos; # Attention: coder_id != codec_id ! 1097 1098 $codec_id = $coder->{'codec_id'}; 1099 1100 # we stop after first AES found, but in theory we could also deal 1101 # with several different AES streams (in that case we would need 1102 # to print several hash buffers, but this is a very special case) 1103 1104 last if ($codec_id eq $SEVEN_ZIP_AES); 1105 1106 # ELSE: update seek position and index: 1107 1108 if ($current_index < $num_pack_sizes) # not all pack_sizes always need to be known (final ones can be skipped) 1109 { 1110 my $pack_size = $pack_info->{'pack_sizes'}[$current_index]; 1111 1112 $current_seek_position += $pack_size; 1113 } 1114 1115 $current_index++; 1116 } 1117 1118 last if ($codec_id eq $SEVEN_ZIP_AES); 1119 1120 last unless (defined ($coder)); 1121 } 1122 1123 # we unfortunately can't do anything if no AES encrypted data was found 1124 1125 if ($codec_id ne $SEVEN_ZIP_AES) 1126 { 1127 print STDERR "WARNING: no AES data found in the 7z file '" . $file_path . "'\n"; 1128 1129 return ""; 1130 } 1131 } 1132 elsif ($codec_id eq $SEVEN_ZIP_LZMA2) 1133 { 1134 print STDERR "WARNING: lzma2 compression found within '" . $file_path . "' is currently not supported, "; 1135 print STDERR "but could be probably added easily\n"; 1136 1137 return ""; 1138 } 1139 elsif ($codec_id ne $SEVEN_ZIP_AES) 1140 { 1141 print STDERR "WARNING: unsupported coder with codec id '" . unpack ("H*", $codec_id) . "' in file '" . $file_path . "' found.\n"; 1142 print STDERR "If you think this codec method from DOC/Methods.txt of the 7-Zip source code "; 1143 print STDERR "should be supported, please file a problem report/feature request\n"; 1144 1145 return ""; 1146 } 1147 1148 # 1149 # finally: fill hash_buf 1150 # 1151 1152 # first get the data with help of pack info 1153 1154 my $unpack_size = $unpack_info->{'unpack_sizes'}[$current_index]; 1155 1156 my $data_len = $pack_info->{'pack_sizes'}[$current_index]; 1157 1158 my $digests_index = $current_index; # correct ? 1159 1160 # reset the file pointer to the position after signature header and get the data 1161 1162 my_seek ($fp, $current_seek_position, 0); 1163 1164 # get remaining hash info (iv, number cycles power) 1165 1166 my $digest = get_digest ($digests_index, $unpack_info, $substreams_info); 1167 1168 return undef unless ((defined ($digest)) && ($digest->{'defined'} == 1)); 1169 1170 my $attributes = $coder->{'attributes'}; 1171 1172 my ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power) = get_decoder_properties ($attributes); 1173 1174 my $crc = $digest->{'crc'}; 1175 1176 # special case: we can truncate the data_len and use 32 bytes in total for both iv + data (last 32 bytes of data) 1177 1178 my $is_truncated = 0; 1179 my $padding_attack_possible = 0; 1180 1181 my $data; 1182 1183 if ($has_encrypted_header == 0) 1184 { 1185 my $length_difference = $data_len - $unpack_size; 1186 1187 if ($length_difference > 3) 1188 { 1189 if ($data_len > ($PASSWORD_RECOVERY_TOOL_DATA_LIMIT / 2)) 1190 { 1191 if ($PASSWORD_RECOVERY_TOOL_SUPPORT_PADDING_ATTACK == 1) 1192 { 1193 my_seek ($fp, $data_len - 32, 1); 1194 1195 $iv_buf = my_read ($fp, 16); 1196 $iv_len = 16; 1197 1198 $data = my_read ($fp, 16); 1199 $data_len = 16; 1200 1201 $unpack_size %= 16; 1202 1203 $is_truncated = 1; 1204 } 1205 } 1206 1207 $padding_attack_possible = 1; 1208 } 1209 } 1210 1211 my $type_of_compression = $SEVEN_ZIP_UNCOMPRESSED; 1212 my $compression_attributes = ""; 1213 1214 for (my $coder_pos = $coder_id + 1; $coder_pos < $number_coders; $coder_pos++) 1215 { 1216 $coder = $folder->{'coders'}[$coder_pos]; 1217 last unless (defined ($coder)); 1218 1219 $codec_id = $coder->{'codec_id'}; 1220 1221 if ($codec_id eq $SEVEN_ZIP_LZMA1) 1222 { 1223 $type_of_compression = $SEVEN_ZIP_LZMA1_COMPRESSED; 1224 } 1225 elsif ($codec_id eq $SEVEN_ZIP_LZMA2) 1226 { 1227 $type_of_compression = $SEVEN_ZIP_LZMA2_COMPRESSED; 1228 } 1229 elsif ($codec_id eq $SEVEN_ZIP_PPMD) 1230 { 1231 $type_of_compression = $SEVEN_ZIP_PPMD_COMPRESSED; 1232 } 1233 elsif ($codec_id eq $SEVEN_ZIP_BCJ) 1234 { 1235 $type_of_compression = $SEVEN_ZIP_BCJ_COMPRESSED; 1236 } 1237 elsif ($codec_id eq $SEVEN_ZIP_BCJ2) 1238 { 1239 $type_of_compression = $SEVEN_ZIP_BCJ2_COMPRESSED; 1240 } 1241 elsif ($codec_id eq $SEVEN_ZIP_BZIP2) 1242 { 1243 $type_of_compression = $SEVEN_ZIP_BZIP2_COMPRESSED; 1244 } 1245 elsif ($codec_id eq $SEVEN_ZIP_DEFLATE) 1246 { 1247 $type_of_compression = $SEVEN_ZIP_DEFLATE_COMPRESSED; 1248 } 1249 1250 if ($type_of_compression != $SEVEN_ZIP_UNCOMPRESSED) 1251 { 1252 if (defined ($coder->{'attributes'})) 1253 { 1254 $compression_attributes = unpack ("H*", $coder->{'attributes'}); 1255 } 1256 1257 last; # no need to continue looping, we found what we needed (and 2+ compressions are never combined by the 7z format) 1258 } 1259 } 1260 1261 # show a warning if the decompression algorithm is currently not supported by the cracker 1262 1263 if ($SHOW_LZMA_DECOMPRESS_AFTER_DECRYPT_WARNING == 1) 1264 { 1265 if ($type_of_compression != $SEVEN_ZIP_UNCOMPRESSED) 1266 { 1267 if ($is_truncated == 0) 1268 { 1269 if (grep (/^$type_of_compression$/, @PASSWORD_RECOVERY_TOOL_SUPPORTED_DECOMPRESSORS) == 0) 1270 { 1271 print STDERR "WARNING: to correctly verify the CRC checksum of the data contained within the file '". $file_path . "',\n"; 1272 print STDERR "the data must be decompressed using " . $SEVEN_ZIP_COMPRESSOR_NAMES{$type_of_compression}; 1273 print STDERR " after the decryption step.\n"; 1274 print STDERR "\n"; 1275 print STDERR "$PASSWORD_RECOVERY_TOOL_NAME currently does not support this particular decompression algorithm.\n"; 1276 print STDERR "\n"; 1277 1278 if ($padding_attack_possible == 1) 1279 { 1280 print STDERR "INFO: However there is also some good news in this particular case.\n"; 1281 print STDERR "Since AES-CBC is used by the 7z algorithm and the data length of this file allows a padding attack,\n"; 1282 print STDERR "the password recovery tool might be able to use that to verify the correctness of password candidates.\n"; 1283 print STDERR "By using this attack there might of course be a higher probability of false positives.\n"; 1284 print STDERR "\n"; 1285 } 1286 elsif ($type_of_compression == $SEVEN_ZIP_LZMA2_COMPRESSED) # this special case should only work for LZMA2 1287 { 1288 if ($data_len <= $LZMA2_MIN_COMPRESSED_LEN) 1289 { 1290 print STDERR "INFO: it might still be possible to crack the password of this archive since the data part seems\n"; 1291 print STDERR "to be very short and therefore it might use the LZMA2 uncompressed chunk feature\n"; 1292 print STDERR "\n"; 1293 } 1294 } 1295 } 1296 } 1297 } 1298 } 1299 1300 my $type_of_data = $SEVEN_ZIP_UNCOMPRESSED; # this variable will hold the "number" after the "$7z$" hash signature 1301 1302 if ($is_truncated == 1) 1303 { 1304 $type_of_data = $SEVEN_ZIP_TRUNCATED; # note: this means that we neither need the crc_len, nor the coder attributes 1305 } 1306 else 1307 { 1308 $type_of_data = $type_of_compression; 1309 } 1310 1311 my $crc_len = 0; 1312 1313 if (($type_of_data != $SEVEN_ZIP_UNCOMPRESSED) && ($type_of_data != $SEVEN_ZIP_TRUNCATED)) 1314 { 1315 if (scalar ($substreams_info->{'unpack_sizes'}) > 0) 1316 { 1317 $crc_len = $substreams_info->{'unpack_sizes'}[0]; # default: use the first file of the first stream 1318 } 1319 } 1320 1321 if (! defined ($data)) 1322 { 1323 if (($type_of_data != $SEVEN_ZIP_UNCOMPRESSED) && ($type_of_data != $SEVEN_ZIP_TRUNCATED)) 1324 { 1325 if ($ANALYZE_ALL_STREAMS_TO_FIND_SHORTEST_DATA_BUF == 1) 1326 { 1327 my $number_file_indices = scalar (@{$substreams_info->{'unpack_sizes'}}); 1328 my $number_streams = scalar (@{$substreams_info->{'unpack_stream_numbers'}}); 1329 my $number_pack_info = scalar (@{$pack_info->{'pack_sizes'}}); # same as $pack_info->{'number_pack_streams'} 1330 my $number_folders = scalar (@{$folders}); # same as $unpack_info->{'number_folders'} 1331 1332 # check if there is a stream with a smaller first file than the first file of the first stream 1333 # (this is just a clever approach to produce shorter hashes) 1334 1335 my $file_idx = 0; 1336 my $data_offset = 0; 1337 1338 my $data_offset_tmp = 0; 1339 1340 # sanity checks (otherwise we might overflow): 1341 1342 if ($number_pack_info < $number_streams) # should never happen (they should be equal) 1343 { 1344 $number_streams = $number_pack_info; 1345 } 1346 1347 if ($number_folders < $number_streams) # should never happen (they should be equal) 1348 { 1349 $number_streams = $number_folders; 1350 } 1351 1352 for (my $stream_idx = 0; $stream_idx < $number_streams; $stream_idx++) 1353 { 1354 my $next_file_idx = $substreams_info->{'unpack_stream_numbers'}[$stream_idx]; 1355 1356 my $length_first_file = $substreams_info->{'unpack_sizes'}[$file_idx]; 1357 1358 my $length_compressed = $pack_info->{'pack_sizes'}[$stream_idx]; 1359 1360 if ($SHOW_LIST_OF_ALL_STREAMS == 1) 1361 { 1362 print STDERR sprintf ("DEBUG: new stream found with first file consisting of %9d bytes of %10d bytes total stream length\n", $length_first_file, $length_compressed); 1363 } 1364 1365 if ($length_first_file < $crc_len) 1366 { 1367 my $digest = get_digest ($file_idx, $unpack_info, $substreams_info); 1368 1369 next unless ((defined ($digest)) && ($digest->{'defined'} == 1)); 1370 1371 # get new AES settings (salt, iv, costs): 1372 1373 my $coders = @$folders[$stream_idx]->{'coders'}; 1374 1375 my $aes_coder_idx = 0; 1376 my $aes_coder_found = 0; 1377 1378 for (my $coders_idx = 0; $coders_idx < $number_coders; $coders_idx++) 1379 { 1380 my $codec_id = @$coders[$coders_idx]->{'codec_id'}; 1381 1382 if ($codec_id eq $SEVEN_ZIP_AES) 1383 { 1384 $aes_coder_idx = $coders_idx; 1385 1386 $aes_coder_found = 1; 1387 } 1388 elsif (defined (@$coders[$coders_idx]->{'attributes'})) 1389 { 1390 $compression_attributes = unpack ("H*", @$coders[$coders_idx]->{'attributes'}); 1391 } 1392 } 1393 1394 next unless ($aes_coder_found == 1); 1395 1396 $attributes = @$coders[$aes_coder_idx]->{'attributes'}; 1397 1398 # 1399 # set the "new" hash properties (for this specific/better stream with smaller first file): 1400 # 1401 1402 ($salt_len, $salt_buf, $iv_len, $iv_buf, $number_cycles_power) = get_decoder_properties ($attributes); 1403 1404 $crc = $digest->{'crc'}; 1405 1406 $crc_len = $length_first_file; 1407 1408 $data_len = $length_compressed; 1409 1410 $unpack_size = $length_first_file; 1411 1412 $data_offset = $data_offset_tmp; 1413 1414 # we assume that $type_of_data and $type_of_compression didn't change between the streams 1415 # (this should/could be checked too to avoid any strange problems) 1416 } 1417 1418 $file_idx += $next_file_idx; 1419 1420 if ($file_idx >= $number_file_indices) # should never happen 1421 { 1422 last; 1423 } 1424 1425 $data_offset_tmp += $length_compressed; 1426 } 1427 1428 if ($SHOW_LIST_OF_ALL_STREAMS == 1) 1429 { 1430 print STDERR sprintf ("DEBUG: shortest file at the beginning of a stream consists of %d bytes (offset: %d bytes)\n", $crc_len, $data_offset); 1431 } 1432 1433 if ($data_offset > 0) 1434 { 1435 my_seek ($fp, $data_offset, 1); 1436 } 1437 } 1438 1439 if ($SHORTEN_HASH_LENGTH_TO_CRC_LENGTH == 1) 1440 { 1441 my $aes_len = int ($SHORTEN_HASH_FIXED_HEADER + $crc_len + $SHORTEN_HASH_EXTRA_PERCENT / 100 * $crc_len); 1442 1443 my $AES_BLOCK_SIZE = 16; 1444 1445 $aes_len += $AES_BLOCK_SIZE - 1; # add these bytes to be sure to always include the last "block" too (round up and cast) 1446 1447 $aes_len = int ($aes_len / $AES_BLOCK_SIZE) * $AES_BLOCK_SIZE; 1448 1449 if ($aes_len < $data_len) 1450 { 1451 $data_len = $aes_len; 1452 $unpack_size = $aes_len; 1453 } 1454 } 1455 } 1456 1457 $data = my_read ($fp, $data_len); # NOTE: we shouldn't read a very huge data buffer directly into memory 1458 # improvement: read the data in chunks of several MBs and keep printing it 1459 # directly to stdout (by also not returning a string from this function) 1460 # that would help to achieve minimal RAM consumption (even for very large hashes) 1461 } 1462 1463 return undef unless (length ($data) == $data_len); 1464 1465 if ($data_len > ($PASSWORD_RECOVERY_TOOL_DATA_LIMIT / 2)) 1466 { 1467 print STDERR "WARNING: the file '". $file_path . "' unfortunately can't be used with $PASSWORD_RECOVERY_TOOL_NAME since the data length\n"; 1468 print STDERR "in this particular case is too long ($data_len of the maximum allowed " .($PASSWORD_RECOVERY_TOOL_DATA_LIMIT / 2). " bytes).\n"; 1469 1470 if ($PASSWORD_RECOVERY_TOOL_SUPPORT_PADDING_ATTACK == 1) 1471 { 1472 print STDERR "Furthermore, it could not be truncated. This should only happen in very rare cases.\n"; 1473 } 1474 1475 return ""; 1476 } 1477 1478 $hash_buf = sprintf ("%s:%s%u\$%u\$%u\$%s\$%u\$%s\$%u\$%u\$%u\$%s", 1479 basename($file_path), 1480 $SEVEN_ZIP_HASH_SIGNATURE, 1481 $type_of_data, 1482 $number_cycles_power, 1483 $salt_len, 1484 unpack ("H*", $salt_buf), 1485 $iv_len, 1486 unpack ("H*", $iv_buf), 1487 $crc, 1488 $data_len, 1489 $unpack_size, 1490 unpack ("H*", $data) # could be very large. We could/should avoid loading/copying this data into memory 1491 ); 1492 1493 return $hash_buf if ($type_of_data == $SEVEN_ZIP_UNCOMPRESSED); 1494 return $hash_buf if ($type_of_data == $SEVEN_ZIP_TRUNCATED); 1495 1496 $hash_buf .= sprintf ("\$%u\$%s", 1497 $crc_len, 1498 $compression_attributes 1499 ); 1500 1501 return $hash_buf; 1502} 1503 1504sub read_seven_zip_signature_header 1505{ 1506 my $fp = shift; 1507 1508 my $signature; 1509 1510 # ArchiveVersion 1511 1512 my $major_version = my_read ($fp, 1); 1513 1514 $major_version = ord ($major_version); 1515 1516 my $minor_version = my_read ($fp, 1); 1517 1518 $minor_version = ord ($minor_version); 1519 1520 # StartHeaderCRC 1521 1522 my_read ($fp, 4); # skip start header CRC 1523 1524 # StartHeader 1525 1526 my $next_header_offset = get_uint64 ($fp); 1527 my $next_header_size = get_uint64 ($fp); 1528 1529 my_read ($fp, 4); # next header CRC 1530 1531 my $position_after_header = my_tell ($fp); 1532 1533 $signature = { 1534 "major_version" => $major_version, 1535 "minor_version" => $minor_version, 1536 "next_header_offset" => $next_header_offset, 1537 "next_header_size" => $next_header_size, 1538 "position_after_header" => $position_after_header 1539 }; 1540 1541 return $signature; 1542} 1543 1544sub skip_seven_zip_data 1545{ 1546 my $fp = shift; 1547 1548 # determine the length to skip 1549 1550 my $len = my_read ($fp, 1); 1551 1552 # do skip len bytes 1553 1554 $len = ord ($len); 1555 1556 my_read ($fp, $len); 1557} 1558 1559sub wait_for_seven_zip_id 1560{ 1561 my $fp = shift; 1562 my $id = shift; 1563 1564 while (1) 1565 { 1566 my $new_id = read_id ($fp); 1567 1568 if ($new_id eq $id) 1569 { 1570 return 1; 1571 } 1572 elsif ($new_id eq $SEVEN_ZIP_END) 1573 { 1574 return 0; 1575 } 1576 1577 skip_seven_zip_data ($fp); 1578 } 1579 1580 return 0; 1581} 1582 1583sub read_seven_zip_digests 1584{ 1585 my $fp = shift; 1586 1587 my $number_items = shift; 1588 1589 my @digests; 1590 1591 # init 1592 1593 for (my $i = 0; $i < $number_items; $i++) 1594 { 1595 my $digest = { 1596 "crc" => "", 1597 "defined" => 0 1598 }; 1599 1600 push (@digests, $digest) 1601 } 1602 1603 # get number of items 1604 1605 my @digests_defined = get_boolean_vector_check_all ($fp, $number_items); 1606 1607 # for each number of item, get a digest 1608 1609 for (my $i = 0; $i < $number_items; $i++) 1610 { 1611 my $crc = 0; 1612 1613 for (my $i = 0; $i < 4; $i++) 1614 { 1615 my $val = my_read ($fp, 1); 1616 1617 $val = ord ($val); 1618 1619 $crc |= ($val << (8 * $i)); 1620 } 1621 1622 $digests[$i]->{'crc'} = $crc; 1623 $digests[$i]->{'defined'} = $digests_defined[$i]; 1624 } 1625 1626 return @digests; 1627} 1628 1629sub read_seven_zip_pack_info 1630{ 1631 my $fp = shift; 1632 1633 my $pack_info; 1634 1635 # PackPos 1636 1637 my $pack_pos = read_number ($fp); 1638 1639 # NumPackStreams 1640 1641 my $number_pack_streams = read_number ($fp); 1642 1643 # must be "size" id 1644 1645 if (! wait_for_seven_zip_id ($fp, $SEVEN_ZIP_SIZE)) 1646 { 1647 return undef; 1648 } 1649 1650 my @pack_sizes = (0) x $number_pack_streams; 1651 1652 for (my $i = 0; $i < $number_pack_streams; $i++) 1653 { 1654 $pack_sizes[$i] = read_number ($fp); 1655 } 1656 1657 $pack_info = { 1658 "number_pack_streams" => $number_pack_streams, 1659 "pack_pos" => $pack_pos, 1660 "pack_sizes" => \@pack_sizes 1661 }; 1662 1663 # read remaining data 1664 1665 while (1) 1666 { 1667 my $id = read_id ($fp); 1668 1669 if ($id eq $SEVEN_ZIP_END) 1670 { 1671 return $pack_info; 1672 } 1673 elsif ($id eq $SEVEN_ZIP_CRC) 1674 { 1675 my $digests = read_seven_zip_digests ($fp, $number_pack_streams); 1676 1677 # we do not need those digests, ignore them 1678 # (but we need to read them from the stream) 1679 1680 next; 1681 } 1682 1683 skip_seven_zip_data ($fp); 1684 } 1685 1686 # something went wrong 1687 1688 return undef; 1689} 1690 1691sub read_seven_zip_folders 1692{ 1693 my $fp = shift; 1694 1695 my $folder; 1696 1697 my @coders = (); 1698 my @bindpairs = (); 1699 my $index_main_stream = 0; 1700 my $sum_input_streams = 0; 1701 my $sum_output_streams = 0; 1702 my $sum_packed_streams = 1; 1703 1704 # NumCoders 1705 1706 my $number_coders = read_number ($fp); 1707 1708 # loop 1709 1710 for (my $i = 0; $i < $number_coders; $i++) 1711 { 1712 my $main_byte = my_read ($fp, 1); 1713 1714 $main_byte = ord ($main_byte); 1715 1716 if ($main_byte & 0xC0) 1717 { 1718 return undef; 1719 } 1720 1721 my $codec_id_size = $main_byte & 0xf; 1722 1723 if ($codec_id_size > 8) 1724 { 1725 return undef; 1726 } 1727 1728 # the codec id (very important info for us): 1729 # codec_id: 06F10701 -> AES-256 + SHA-256 1730 # codec_id: 030101 -> lzma (we need to decompress - k_LZMA) 1731 # codec_id: 21 -> lzma2 (we need to decompress - k_LZMA2) 1732 1733 my $codec_id = my_read ($fp, $codec_id_size); 1734 1735 # NumInStreams 1736 1737 my $number_input_streams = 1; 1738 1739 # NumOutStreams 1740 1741 my $number_output_streams = 1; 1742 1743 if (($main_byte & 0x10) != 0) 1744 { 1745 $number_input_streams = read_number ($fp); 1746 $number_output_streams = read_number ($fp); 1747 } 1748 1749 $sum_input_streams += $number_input_streams; 1750 $sum_output_streams += $number_output_streams; 1751 1752 # attributes 1753 1754 my $attributes; 1755 1756 if (($main_byte & 0x020) != 0) 1757 { 1758 my $property_size = read_number ($fp); 1759 1760 $attributes = my_read ($fp, $property_size); 1761 } 1762 1763 $coders[$i] = { 1764 "codec_id" => $codec_id, 1765 "number_input_streams" => $number_input_streams, 1766 "number_output_streams" => $number_output_streams, 1767 "attributes" => $attributes 1768 }; 1769 } 1770 1771 if (($sum_input_streams != 1) || ($sum_output_streams != 1)) 1772 { 1773 # InStreamUsed / OutStreamUsed 1774 1775 my @input_stream_used = (0) x $sum_input_streams; 1776 my @output_stream_used = (0) x $sum_output_streams; 1777 1778 # BindPairs 1779 1780 my $number_bindpairs = $sum_output_streams - 1; 1781 1782 for (my $i = 0; $i < $number_bindpairs; $i++) 1783 { 1784 # input 1785 1786 my $index_input = read_number ($fp); 1787 1788 if ($input_stream_used[$index_input] == 1) 1789 { 1790 return undef; # the stream is used already, shouldn't happen at all 1791 } 1792 1793 $input_stream_used[$index_input] = 1; 1794 1795 # output 1796 1797 my $index_output = read_number ($fp); 1798 1799 if ($output_stream_used[$index_output] == 1) 1800 { 1801 return undef; 1802 } 1803 1804 $output_stream_used[$index_output] = 1; 1805 1806 my @new_bindpair = ($index_input, $index_output); 1807 1808 push (@bindpairs, \@new_bindpair); 1809 } 1810 1811 # PackedStreams 1812 1813 $sum_packed_streams = $sum_input_streams - $number_bindpairs; 1814 1815 if ($sum_packed_streams != 1) 1816 { 1817 for (my $i = 0; $i < $sum_packed_streams; $i++) 1818 { 1819 # we can ignore this 1820 1821 read_number ($fp); # my $index = read_number ($fp); 1822 } 1823 } 1824 1825 # determine the main stream 1826 1827 $index_main_stream = -1; 1828 1829 for (my $i = 0; $i < $sum_output_streams; $i++) 1830 { 1831 if ($output_stream_used[$i] == 0) 1832 { 1833 $index_main_stream = $i; 1834 1835 last; 1836 } 1837 } 1838 1839 if ($index_main_stream == -1) 1840 { 1841 return undef; # should not happen 1842 } 1843 } 1844 1845 $folder = { 1846 "number_coders" => $number_coders, 1847 "coders" => \@coders, 1848 "bindpairs" => \@bindpairs, 1849 "index_main_stream" => $index_main_stream, 1850 "sum_input_streams" => $sum_input_streams, 1851 "sum_output_streams" => $sum_output_streams, 1852 "sum_packed_streams" => $sum_packed_streams, 1853 }; 1854 1855 return $folder; 1856} 1857 1858sub read_seven_zip_unpack_info 1859{ 1860 my $fp = shift; 1861 1862 my $unpack_info; 1863 1864 my $number_folders = 0; 1865 my @folders = (); 1866 my @datastream_indices = (); 1867 my @unpack_sizes; 1868 my @digests; 1869 my @main_unpack_size_index; 1870 my @coder_unpack_sizes; 1871 1872 # check until we see the "folder" id 1873 1874 if (! wait_for_seven_zip_id ($fp, $SEVEN_ZIP_FOLDER)) 1875 { 1876 return undef; 1877 } 1878 1879 # NumFolders 1880 1881 $number_folders = read_number ($fp); 1882 1883 # External 1884 1885 my $external = my_read ($fp, 1); 1886 1887 # loop 1888 1889 my $sum_coders_output_streams = 0; 1890 my $sum_folders = 0; 1891 1892 for (my $i = 0; $i < $number_folders; $i++) 1893 { 1894 if ($external eq $SEVEN_ZIP_NOT_EXTERNAL) 1895 { 1896 my $folder = read_seven_zip_folders ($fp); 1897 1898 $folders[$i] = $folder; 1899 1900 $main_unpack_size_index[$i] = $folder->{'index_main_stream'}; 1901 $coder_unpack_sizes[$i] = $sum_coders_output_streams; 1902 1903 $sum_coders_output_streams += $folder->{'sum_output_streams'}; 1904 1905 $sum_folders++; 1906 } 1907 elsif ($external eq $SEVEN_ZIP_EXTERNAL) 1908 { 1909 $datastream_indices[$i] = read_number ($fp); 1910 } 1911 else 1912 { 1913 return undef; 1914 } 1915 } 1916 1917 if (!wait_for_seven_zip_id ($fp, $SEVEN_ZIP_UNPACK_SIZE)) 1918 { 1919 return undef; 1920 } 1921 1922 for (my $i = 0; $i < $sum_coders_output_streams; $i++) 1923 { 1924 $unpack_sizes[$i] = read_number ($fp); 1925 } 1926 1927 # read remaining data 1928 1929 while (1) 1930 { 1931 my $id = read_id ($fp); 1932 1933 if ($id eq $SEVEN_ZIP_END) 1934 { 1935 $unpack_info = { 1936 "number_folders" => $number_folders, 1937 "folders" => \@folders, 1938 "datastream_indices" => \@datastream_indices, 1939 "digests" => \@digests, 1940 "unpack_sizes" => \@unpack_sizes, 1941 "main_unpack_size_index" => \@main_unpack_size_index, 1942 "coder_unpack_sizes" => \@coder_unpack_sizes 1943 }; 1944 1945 return $unpack_info; 1946 } 1947 elsif ($id eq $SEVEN_ZIP_CRC) 1948 { 1949 my @new_digests = read_seven_zip_digests ($fp, $sum_folders); 1950 1951 for (my $i = 0; $i < $sum_folders; $i++) 1952 { 1953 $digests[$i]->{'defined'} = $new_digests[$i]->{'defined'}; 1954 $digests[$i]->{'crc'} = $new_digests[$i]->{'crc'}; 1955 } 1956 1957 next; 1958 } 1959 1960 skip_seven_zip_data ($fp); 1961 } 1962 1963 # something went wrong 1964 1965 return undef; 1966} 1967 1968sub get_folder_unpack_size 1969{ 1970 my $unpack_info = shift; 1971 my $folder_index = shift; 1972 1973 my $index = $unpack_info->{'coder_unpack_sizes'}[$folder_index] + $unpack_info->{'main_unpack_size_index'}[$folder_index]; 1974 1975 return $unpack_info->{'unpack_sizes'}[$index]; 1976} 1977 1978sub has_valid_folder_crc 1979{ 1980 my $digests = shift; 1981 my $index = shift; 1982 1983 if (! defined (@$digests[$index])) 1984 { 1985 return 0; 1986 } 1987 1988 my $digest = @$digests[$index]; 1989 1990 if ($digest->{'defined'} != 1) 1991 { 1992 return 0; 1993 } 1994 1995 if (length ($digest->{'crc'}) < 1) 1996 { 1997 return 0; 1998 } 1999 2000 return 1; 2001} 2002 2003sub read_seven_zip_substreams_info 2004{ 2005 my $fp = shift; 2006 2007 my $unpack_info = shift; 2008 2009 my $number_folders = $unpack_info->{'number_folders'}; 2010 my $folders = $unpack_info->{'folders'}; 2011 2012 my $folders_digests = $unpack_info->{'digests'}; 2013 2014 my $substreams_info; 2015 my @number_unpack_streams = (1) x $number_folders; 2016 my @unpack_sizes; 2017 my @digests; 2018 2019 # get the numbers of unpack streams 2020 2021 my $id; 2022 2023 while (1) 2024 { 2025 $id = read_id ($fp); 2026 2027 if ($id eq $SEVEN_ZIP_NUM_UNPACK_STREAM) 2028 { 2029 for (my $i = 0; $i < $number_folders; $i++) 2030 { 2031 $number_unpack_streams[$i] = read_number ($fp); 2032 } 2033 2034 next; 2035 } 2036 elsif ($id eq $SEVEN_ZIP_CRC) 2037 { 2038 last; 2039 } 2040 elsif ($id eq $SEVEN_ZIP_SIZE) 2041 { 2042 last; 2043 } 2044 elsif ($id eq $SEVEN_ZIP_END) 2045 { 2046 last; 2047 } 2048 2049 skip_seven_zip_data ($fp); 2050 } 2051 2052 if ($id eq $SEVEN_ZIP_SIZE) 2053 { 2054 for (my $i = 0; $i < $number_folders; $i++) 2055 { 2056 my $number_substreams = $number_unpack_streams[$i]; 2057 2058 if ($number_substreams == 0) 2059 { 2060 next; 2061 } 2062 2063 my $sum_unpack_sizes = 0; 2064 2065 for (my $j = 1; $j < $number_substreams; $j++) 2066 { 2067 my $size = read_number ($fp); 2068 2069 push (@unpack_sizes, $size); 2070 2071 $sum_unpack_sizes += $size; 2072 } 2073 2074 # add the folder unpack size itself 2075 2076 my $folder_unpack_size = get_folder_unpack_size ($unpack_info, $i); 2077 2078 if ($folder_unpack_size < $sum_unpack_sizes) 2079 { 2080 return undef; 2081 } 2082 2083 my $size = $folder_unpack_size - $sum_unpack_sizes; 2084 2085 push (@unpack_sizes, $size); 2086 } 2087 2088 $id = read_id ($fp); 2089 } 2090 else 2091 { 2092 for (my $i = 0; $i < $number_folders; $i++) 2093 { 2094 my $number_substreams = $number_unpack_streams[$i]; 2095 2096 if ($number_substreams > 1) 2097 { 2098 return undef; 2099 } 2100 2101 if ($number_substreams == 1) 2102 { 2103 push (@unpack_sizes, get_folder_unpack_size ($unpack_info, $i)); 2104 } 2105 } 2106 } 2107 2108 my $number_digests = 0; 2109 2110 for (my $i = 0; $i < $number_folders; $i++) 2111 { 2112 my $number_substreams = $number_unpack_streams[$i]; 2113 2114 if (($number_substreams != 1) || (has_valid_folder_crc ($folders_digests, $i) == 0)) 2115 { 2116 $number_digests += $number_substreams; 2117 } 2118 } 2119 2120 while (1) 2121 { 2122 if ($id eq $SEVEN_ZIP_END) 2123 { 2124 last; 2125 } 2126 elsif ($id eq $SEVEN_ZIP_CRC) 2127 { 2128 my @is_digest_defined = get_boolean_vector_check_all ($fp, $number_digests); 2129 2130 my $k = 0; 2131 my $k2 = 0; 2132 2133 for (my $i = 0; $i < $number_folders; $i++) 2134 { 2135 my $number_substreams = $number_unpack_streams[$i]; 2136 2137 if (($number_substreams == 1) && (has_valid_folder_crc ($folders_digests, $i))) 2138 { 2139 $digests[$k]->{'defined'} = 1; 2140 $digests[$k]->{'crc'} = @$folders_digests[$i]->{'crc'}; 2141 2142 $k++; 2143 } 2144 else 2145 { 2146 for (my $j = 0; $j < $number_substreams; $j++) 2147 { 2148 my $defined = $is_digest_defined[$k2]; 2149 2150 # increase k2 2151 2152 $k2++; 2153 2154 if ($defined == 1) 2155 { 2156 my $digest = 0; 2157 2158 for (my $i = 0; $i < 4; $i++) 2159 { 2160 my $val = my_read ($fp, 1); 2161 2162 $val = ord ($val); 2163 2164 $digest |= ($val << (8 * $i)); 2165 } 2166 2167 $digests[$k]->{'defined'} = 1; 2168 $digests[$k]->{'crc'} = $digest; 2169 } 2170 else 2171 { 2172 $digests[$k]->{'defined'} = 0; 2173 $digests[$k]->{'crc'} = 0; 2174 } 2175 2176 $k++; 2177 } 2178 } 2179 } 2180 } 2181 else 2182 { 2183 skip_seven_zip_data ($fp); 2184 } 2185 2186 $id = read_id ($fp); 2187 } 2188 2189 my $len_defined = scalar (@digests); 2190 my $len_unpack_sizes = scalar (@unpack_sizes); 2191 2192 if ($len_defined != $len_unpack_sizes) 2193 { 2194 my $k = 0; 2195 2196 for (my $i = 0; $i < $number_folders; $i++) 2197 { 2198 my $number_substreams = $number_unpack_streams[$i]; 2199 2200 if (($number_substreams == 1) && (has_valid_folder_crc ($folders_digests, $i))) 2201 { 2202 $digests[$k]->{'defined'} = 1; 2203 $digests[$k]->{'crc'} = @$folders_digests[$i]->{'crc'}; 2204 2205 $k++; 2206 } 2207 else 2208 { 2209 for (my $j = 0; $j < $number_substreams; $j++) 2210 { 2211 $digests[$k]->{'defined'} = 0; 2212 $digests[$k]->{'crc'} = 0; 2213 2214 $k++; 2215 } 2216 } 2217 } 2218 } 2219 2220 $substreams_info = { 2221 "unpack_stream_numbers" => \@number_unpack_streams, 2222 "unpack_sizes" => \@unpack_sizes, 2223 "number_digests" => $number_digests, 2224 "digests" => \@digests 2225 }; 2226 2227 return $substreams_info; 2228} 2229 2230sub read_seven_zip_streams_info 2231{ 2232 my $fp = shift; 2233 2234 my $streams_info; 2235 2236 my $pack_info; 2237 my $unpack_info; 2238 my $substreams_info; 2239 2240 # get the type of streams info (id) 2241 2242 my $id = read_id ($fp); 2243 2244 if ($id eq $SEVEN_ZIP_PACK_INFO) 2245 { 2246 $pack_info = read_seven_zip_pack_info ($fp); 2247 2248 return undef unless (defined ($pack_info)); 2249 2250 $id = read_id ($fp); 2251 } 2252 2253 if ($id eq $SEVEN_ZIP_UNPACK_INFO) 2254 { 2255 $unpack_info = read_seven_zip_unpack_info ($fp); 2256 2257 return undef unless (defined ($unpack_info)); 2258 2259 $id = read_id ($fp); 2260 } 2261 2262 if ($id eq $SEVEN_ZIP_SUBSTREAMS_INFO) 2263 { 2264 $substreams_info = read_seven_zip_substreams_info ($fp, $unpack_info); 2265 2266 return undef unless (defined ($substreams_info)); 2267 2268 $id = read_id ($fp); 2269 } 2270 else 2271 { 2272 my @number_unpack_streams = (); 2273 my @unpack_sizes = (); 2274 my $number_digests = 0; 2275 my $digests; 2276 2277 if (defined ($unpack_info)) 2278 { 2279 my $folders = $unpack_info->{'folders'}; 2280 2281 my $number_folders = $unpack_info->{'number_folders'}; 2282 2283 for (my $i = 0; $i < $number_folders; $i++) 2284 { 2285 $number_unpack_streams[$i] = 1; 2286 2287 my $folder_unpack_size = get_folder_unpack_size ($unpack_info, $i); 2288 2289 push (@unpack_sizes, $folder_unpack_size); 2290 } 2291 } 2292 2293 $substreams_info = { 2294 "unpack_stream_numbers" => \@number_unpack_streams, 2295 "unpack_sizes" => \@unpack_sizes, 2296 "number_digests" => $number_digests, 2297 "digests" => $digests 2298 }; 2299 } 2300 2301 $streams_info = { 2302 "pack_info" => $pack_info, 2303 "unpack_info" => $unpack_info, 2304 "substreams_info" => $substreams_info 2305 }; 2306 2307 return $streams_info; 2308} 2309 2310sub folder_seven_zip_decode 2311{ 2312 my $streams_info = shift; 2313 2314 my $number_coders = 0; 2315 2316 for (my $i = 0; $i < $number_coders; $i++) 2317 { 2318 } 2319 #parse_folder (); 2320 2321 return; 2322} 2323 2324sub read_seven_zip_archive_properties 2325{ 2326 my $fp = shift; 2327 2328 # also the 7-Zip source code just skip data from the archive property entry 2329 2330 while (1) 2331 { 2332 my $id = read_id ($fp); 2333 2334 if ($id eq $SEVEN_ZIP_END) 2335 { 2336 return 1; 2337 } 2338 2339 skip_seven_zip_data ($fp); 2340 } 2341 2342 # something went wrong 2343 2344 return 0; 2345} 2346 2347sub get_uint64_defined_vector 2348{ 2349 my $fp = shift; 2350 2351 my $number_items = shift; 2352 2353 my @values; 2354 2355 # first check if the values are defined 2356 2357 my @defines = get_boolean_vector_check_all ($fp, $number_items); 2358 2359 my $external = my_read ($fp, 1); 2360 2361 if ($external eq $SEVEN_ZIP_EXTERNAL) 2362 { 2363 # ignored for now 2364 } 2365 2366 for (my $i = 0; $i < $number_items; $i++) 2367 { 2368 my $defined = $defines[$i]; 2369 2370 my $value = 0; 2371 2372 if ($defined != 0) 2373 { 2374 $value = get_uint64 ($fp); 2375 } 2376 2377 $values[$i] = $value; 2378 } 2379 2380 return @values; 2381} 2382 2383sub read_seven_zip_files_info 2384{ 2385 my $fp = shift; 2386 2387 my $streams_info = shift; 2388 2389 my $files_info; 2390 2391 my @files; 2392 2393 # NumFiles 2394 2395 my $number_files = read_number ($fp); 2396 2397 # init file 2398 2399 for (my $i = 0; $i < $number_files; $i++) 2400 { 2401 $files[$i]->{'name_utf16'} = ""; 2402 $files[$i]->{'attribute_defined'} = 0; 2403 $files[$i]->{'attribute'} = 0; 2404 $files[$i]->{'is_empty_stream'} = 0; 2405 $files[$i]->{'start_position'} = 0; 2406 $files[$i]->{'creation_time'} = 0; 2407 $files[$i]->{'access_time'} = 0; 2408 $files[$i]->{'modification_time'} = 0; 2409 $files[$i]->{'size'} = 0; 2410 $files[$i]->{'has_stream'} = 0; 2411 $files[$i]->{'is_dir'} = 0; 2412 $files[$i]->{'crc_defined'} = 0; 2413 $files[$i]->{'crc'} = ""; 2414 } 2415 2416 my $number_empty_streams = 0; 2417 2418 my @empty_streams = (0) x $number_files; 2419 my @empty_files = (0) x $number_files; 2420 my @anti_files = (0) x $number_files; 2421 2422 # loop over all properties 2423 2424 while (1) 2425 { 2426 my $property_type_val = read_number ($fp); 2427 2428 my $property_type = num_to_id ($property_type_val); 2429 2430 if ($property_type eq $SEVEN_ZIP_END) 2431 { 2432 last; 2433 } 2434 2435 # Size 2436 2437 my $size = read_number ($fp); 2438 2439 # check and act according to the type of property found 2440 2441 my $is_known_type = 1; 2442 2443 if ($property_type_val > $SEVEN_ZIP_MAX_PROPERTY_TYPE) 2444 { 2445 # ignore (isKnownType false in 7-Zip source code) 2446 2447 my_read ($fp, $size); 2448 } 2449 else 2450 { 2451 if ($property_type eq $SEVEN_ZIP_NAME) 2452 { 2453 my $external = my_read ($fp, 1); 2454 2455 if ($external eq $SEVEN_ZIP_EXTERNAL) 2456 { 2457 # TODO: not implemented yet 2458 2459 return undef; 2460 } 2461 2462 my $files_size = scalar (@files); 2463 2464 for (my $i = 0; $i < $files_size; $i++) 2465 { 2466 my $name = ""; 2467 2468 while (1) 2469 { 2470 my $name_part = my_read ($fp, 2); 2471 2472 if ($name_part eq $SEVEN_ZIP_FILE_NAME_END) 2473 { 2474 last; 2475 } 2476 else 2477 { 2478 $name .= $name_part; 2479 } 2480 } 2481 2482 $files[$i]->{'name_utf16'} = $name; 2483 } 2484 } 2485 elsif ($property_type eq $SEVEN_ZIP_WIN_ATTRIBUTE) 2486 { 2487 my $files_size = scalar (@files); 2488 2489 my @booleans = get_boolean_vector_check_all ($fp, $number_files); 2490 2491 my $external = my_read ($fp, 1); 2492 2493 if ($external eq $SEVEN_ZIP_EXTERNAL) 2494 { 2495 # TODO: not implemented yet 2496 2497 return undef; 2498 } 2499 2500 for (my $i = 0; $i < $number_files; $i++) 2501 { 2502 my $defined = $booleans[$i]; 2503 2504 $files[$i]->{'attribute_defined'} = $defined; 2505 2506 if ($defined) 2507 { 2508 my $attributes = get_uint32 ($fp); 2509 2510 $files[$i]->{'attribute'} = $attributes; 2511 } 2512 } 2513 } 2514 elsif ($property_type eq $SEVEN_ZIP_EMPTY_STREAM) 2515 { 2516 @empty_streams = get_boolean_vector ($fp, $number_files); 2517 2518 $number_empty_streams = 0; 2519 2520 # loop over all boolean and set the files attribute + empty/anti stream vector 2521 2522 my $number_booleans = scalar (@empty_streams); 2523 2524 for (my $i = 0; $i < $number_booleans; $i++) 2525 { 2526 my $boolean = $empty_streams[$i]; 2527 2528 $files[$i]->{'is_empty_stream'} = $boolean; 2529 2530 if ($boolean) 2531 { 2532 $number_empty_streams++; 2533 } 2534 } 2535 2536 for (my $i = 0; $i < $number_empty_streams; $i++) 2537 { 2538 $empty_files[$i] = 0; 2539 $anti_files[$i] = 0; 2540 } 2541 } 2542 elsif ($property_type eq $SEVEN_ZIP_EMPTY_FILE) 2543 { 2544 @empty_files = get_boolean_vector ($fp, $number_empty_streams); 2545 } 2546 elsif ($property_type eq $SEVEN_ZIP_ANTI_FILE) 2547 { 2548 @anti_files = get_boolean_vector ($fp, $number_empty_streams); 2549 } 2550 elsif ($property_type eq $SEVEN_ZIP_START_POS) 2551 { 2552 my @start_positions = get_uint64_defined_vector ($fp, $number_files); 2553 2554 my $number_start_positions = scalar (@start_positions); 2555 2556 for (my $i = 0; $i < $number_start_positions; $i++) 2557 { 2558 $files[$i]->{'start_position'} = $start_positions[$i]; 2559 } 2560 } 2561 elsif ($property_type eq $SEVEN_ZIP_CREATION_TIME) 2562 { 2563 my @creation_times = get_uint64_defined_vector ($fp, $number_files); 2564 2565 my $number_creation_times = scalar (@creation_times); 2566 2567 for (my $i = 0; $i < $number_creation_times; $i++) 2568 { 2569 $files[$i]->{'creation_time'} = $creation_times[$i]; 2570 } 2571 } 2572 elsif ($property_type eq $SEVEN_ZIP_ACCESS_TIME) 2573 { 2574 my @access_times = get_uint64_defined_vector ($fp, $number_files); 2575 2576 my $number_access_times = scalar (@access_times); 2577 2578 for (my $i = 0; $i < $number_access_times; $i++) 2579 { 2580 $files[$i]->{'access_time'} = $access_times[$i]; 2581 } 2582 } 2583 elsif ($property_type eq $SEVEN_ZIP_MODIFICATION_TIME) 2584 { 2585 my @modification_times = get_uint64_defined_vector ($fp, $number_files); 2586 2587 my $number_modification_times = scalar (@modification_times); 2588 2589 for (my $i = 0; $i < $number_modification_times; $i++) 2590 { 2591 $files[$i]->{'modification_time'} = $modification_times[$i]; 2592 } 2593 } 2594 elsif ($property_type eq $SEVEN_ZIP_DUMMY) 2595 { 2596 my $compare_bytes = "\x00" x $size; 2597 2598 my $bytes = my_read ($fp, $size); 2599 2600 if ($bytes ne $compare_bytes) 2601 { 2602 return undef; 2603 } 2604 } 2605 else 2606 { 2607 # ignore (isKnownType also in 7-Zip source code) 2608 2609 my_read ($fp, $size); 2610 } 2611 } 2612 } 2613 2614 # next id should be SEVEN_ZIP_END, but we (and 7-ZIP source code too) do not care 2615 2616 my $id = read_id ($fp); 2617 2618 # check anti files 2619 2620 my $number_anti_items = 0; 2621 2622 for (my $i = 0; $i < $number_empty_streams; $i++) 2623 { 2624 if ($anti_files[$i] != 0) 2625 { 2626 $number_anti_items++; 2627 } 2628 } 2629 2630 # set digests depending on empty/anti files 2631 2632 my $index_sizes = 0; 2633 my $index_empty_files = 0; 2634 2635 my $unpack_info = $streams_info->{'unpack_info'}; 2636 my $substreams_info = $streams_info->{'substreams_info'}; 2637 2638 for (my $i = 0; $i < $number_files; $i++) 2639 { 2640 my $is_anti = 0; 2641 my $has_stream = 1; 2642 2643 if ($empty_streams[$i] == 1) 2644 { 2645 $has_stream = 0; 2646 } 2647 2648 $files[$i]->{'has_stream'} = $has_stream; 2649 $files[$i]->{'crc'} = ""; 2650 2651 if ($has_stream == 1) 2652 { 2653 $is_anti = 0; 2654 2655 $files[$i]->{'is_dir'} = 0; 2656 $files[$i]->{'size'} = $unpack_info->{'unpack_sizes'}[$index_sizes]; 2657 2658 $files[$i]->{'crc_defined'} = 0; 2659 $files[$i]->{'crc'} = ""; 2660 2661 my $is_crc_defined = has_valid_folder_crc ($unpack_info->{'digests'}, $index_sizes); 2662 2663 if ($is_crc_defined == 1) 2664 { 2665 $files[$i]->{'crc_defined'} = 1; 2666 2667 my $crc_item = $unpack_info->{'digests'}[$index_sizes]; 2668 2669 $files[$i]->{'crc'} = $crc_item->{'crc'}; 2670 } 2671 else 2672 { 2673 # can we really do this too? 2674 2675 $is_crc_defined = has_valid_folder_crc ($substreams_info->{'digests'}, $index_sizes); 2676 2677 if ($is_crc_defined == 1) 2678 { 2679 $files[$i]->{'crc_defined'} = 1; 2680 2681 my $crc_item = $substreams_info->{'digests'}[$index_sizes]; 2682 2683 $files[$i]->{'crc'} = $crc_item->{'crc'}; 2684 } 2685 } 2686 2687 $index_sizes++; 2688 } 2689 else 2690 { 2691 my $is_dir = 0; 2692 2693 if ($empty_files[$index_empty_files] == 0) 2694 { 2695 $files[$i]->{'is_dir'} = 1; 2696 } 2697 else 2698 { 2699 $files[$i]->{'is_dir'} = 0; 2700 } 2701 2702 $files[$i]->{'size'} = 0; 2703 2704 $files[$i]->{'crc_defined'} = 0; 2705 $files[$i]->{'crc'} = ""; 2706 2707 $index_empty_files++; 2708 } 2709 } 2710 2711 $files_info = { 2712 "number_files" => $number_files, 2713 "files" => \@files 2714 }; 2715 2716 return $files_info; 2717} 2718 2719sub read_seven_zip_header 2720{ 2721 my $fp = shift; 2722 2723 my $header; 2724 2725 my $additional_streams_info; 2726 my $streams_info; 2727 my $files_info; 2728 2729 # get the type of header 2730 2731 my $id = read_id ($fp); 2732 2733 if ($id eq $SEVEN_ZIP_ARCHIVE_PROPERTIES) 2734 { 2735 # we just ignore the data here (but we need to read it from the stream!) 2736 2737 if (! read_seven_zip_archive_properties ($fp)) 2738 { 2739 return undef; 2740 } 2741 2742 $id = read_id ($fp); 2743 } 2744 2745 if ($id eq $SEVEN_ZIP_ADD_STREAMS_INFO) 2746 { 2747 $additional_streams_info = read_and_decode_seven_zip_packed_stream ($fp); 2748 2749 return undef unless (defined ($additional_streams_info)); 2750 2751 # do we need to change the start position here ? 2752 2753 $id = read_id ($fp); 2754 } 2755 2756 if ($id eq $SEVEN_ZIP_MAIN_STREAMS_INFO) 2757 { 2758 $streams_info = read_seven_zip_streams_info ($fp); 2759 2760 return undef unless (defined ($streams_info)); 2761 2762 $id = read_id ($fp); 2763 } 2764 2765 if ($id eq $SEVEN_ZIP_FILES_INFO) 2766 { 2767 $files_info = read_seven_zip_files_info ($fp, $streams_info); 2768 2769 return undef unless (defined ($files_info)); 2770 } 2771 2772 $header = { 2773 "additional_streams_info" => $additional_streams_info, 2774 "streams_info" => $streams_info, 2775 "files_info" => $files_info, 2776 "type" => "raw" 2777 }; 2778 2779 return $header; 2780} 2781 2782sub read_and_decode_seven_zip_packed_stream 2783{ 2784 my $fp = shift; 2785 2786 my $packed_stream; 2787 2788 $packed_stream = read_seven_zip_streams_info ($fp); 2789 2790 # for each folder, get the decoder and decode the data 2791 2792 return $packed_stream; 2793} 2794 2795sub parse_seven_zip_header 2796{ 2797 my $fp = shift; 2798 2799 my $header; 2800 my $streams_info; 2801 2802 # get the type of the header (id) 2803 2804 my $id = read_id ($fp); 2805 2806 # check if either encoded/packed or encrypted: to get the details we need to check the method 2807 2808 if ($id ne $SEVEN_ZIP_HEADER) 2809 { 2810 if ($id ne $SEVEN_ZIP_ENCODED_HEADER) 2811 { 2812 # when we reach this code section we probably found an invalid 7z file (just ignore it!) 2813 # print STDERR "WARNING: only encoded headers are allowed if no raw header is present\n"; 2814 2815 return undef; 2816 } 2817 2818 $streams_info = read_and_decode_seven_zip_packed_stream ($fp); 2819 2820 return undef unless (defined ($streams_info)); 2821 2822 $header = { 2823 "additional_streams_info" => undef, 2824 "streams_info" => $streams_info, 2825 "files_info" => undef, 2826 "type" => "encoded" 2827 } 2828 2829 # Note: now the 7-Zip code normally parses the header (which we got from the decode operation above) 2830 # but we do not really need to do this here. Skip 2831 } 2832 else 2833 { 2834 $header = read_seven_zip_header ($fp); 2835 } 2836 2837 return $header; 2838} 2839 2840sub read_seven_zip_next_header 2841{ 2842 my $fp = shift; 2843 2844 my $header_size = shift; 2845 my $header_offset = shift; 2846 2847 my $header; 2848 2849 # get the header of size header_size at relative position header_offset 2850 2851 my_seek ($fp, $header_offset, 1); 2852 2853 # read the header 2854 2855 $header = parse_seven_zip_header ($fp); 2856 2857 return $header; 2858} 2859 2860sub read_seven_zip_archive 2861{ 2862 my $fp = shift; 2863 2864 my $archive; 2865 2866 # SignatureHeader 2867 2868 my $signature = read_seven_zip_signature_header ($fp); 2869 2870 return undef unless (defined ($signature)); 2871 2872 # parse the header 2873 2874 my $parsed_header = read_seven_zip_next_header ($fp, $signature->{'next_header_size'}, $signature->{'next_header_offset'}); 2875 2876 return undef unless (defined ($parsed_header)); 2877 2878 $archive = { 2879 "signature_header" => $signature, 2880 "parsed_header" => $parsed_header 2881 }; 2882 2883 return $archive; 2884} 2885 2886sub seven_zip_get_hash 2887{ 2888 my $file_path = shift; 2889 2890 my $hash_buf = ""; 2891 2892 # open file for reading 2893 2894 my $seven_zip_file; 2895 2896 if (! open ($seven_zip_file, "<$file_path")) 2897 { 2898 print STDERR "WARNING: could not open the file '$file_path' for reading\n"; 2899 2900 return $hash_buf; 2901 } 2902 2903 binmode ($seven_zip_file); 2904 2905 # check if valid and supported 7z file 2906 2907 if (! is_supported_seven_zip_file ($seven_zip_file)) 2908 { 2909 return sfx_get_hash ($seven_zip_file, $file_path); 2910 } 2911 2912 my $archive = read_seven_zip_archive ($seven_zip_file); 2913 2914 $hash_buf = extract_hash_from_archive ($seven_zip_file, $archive, $file_path); 2915 2916 # cleanup 2917 2918 close ($seven_zip_file); 2919 2920 return $hash_buf; 2921} 2922 2923# 2924# SFX related helper functions 2925# 2926 2927# The strategy here is as follows: 2928# 1. only use sfx-checks whenever the 7z header is not at start (i.e. if parsing of a "regular" 7z failed) 2929# 2. try to read PE 2930# 3. try to search for $SEVEN_ZIP_MAGIC within the 512 bytes bounderies 2931# 4. try to do a full scan ($SEVEN_ZIP_MAGIC_LEN bytes at a time) 2932 2933# sfx_7z_pe_search () searches for the 7z signature by seeking to the correct offset in the PE file 2934# (e.g. after the PE stub aka the executable part) 2935 2936sub sfx_7z_pe_search 2937{ 2938 my $fp = shift; 2939 2940 my $found = 0; 2941 2942 2943 # 1. DOS header (e_lfanew) 2944 # 2. Portable executable (PE) headers (NumberOfSections) 2945 # 3. Section headers (PointerToRawData + SizeOfRawData) 2946 2947 # we assume that the file is a common/standard PE executable, we will do some checks: 2948 2949 # DOS header 2950 2951 # we should have a MS-DOS MZ executable 2952 2953 my $bytes = my_read ($fp, 2); 2954 2955 return 0 if (length ($bytes) != 2); 2956 return 0 if ($bytes ne "MZ"); # 0x5a4d 2957 return 0 if (length (my_read ($fp, 58)) != 58); 2958 2959 $bytes = my_read ($fp, 4); 2960 2961 return 0 if (length ($bytes) != 4); 2962 2963 my $e_lfanew = unpack ("L", $bytes); 2964 2965 my_seek ($fp, $e_lfanew, 0); 2966 2967 # PE header 2968 2969 $bytes = my_read ($fp, 4); # PE0000 signature after DOS part 2970 2971 return 0 if (length ($bytes) != 4); 2972 return 0 if ($bytes ne "PE\x00\x00"); 2973 return 0 if (length (my_read ($fp, 2)) != 2); # skip FileHeader.Machine 2974 2975 $bytes = my_read ($fp, 2); 2976 2977 return 0 if (length ($bytes) != 2); 2978 2979 my $num_sections = unpack ("S", $bytes); 2980 2981 return 0 if ($num_sections < 1); 2982 2983 return 0 if (length (my_read ($fp, 16)) != 16); # skip rest of FileHeader 2984 return 0 if (length (my_read ($fp, 224)) != 224); # skip OptionalHeader 2985 2986 my $section_farthest = 0; 2987 my $pos_after_farthest_section = 0; 2988 2989 for (my $i = 0; $i < $num_sections; $i++) 2990 { 2991 # we loop through all the section headers 2992 2993 #my $name = my_read ($fp, 8); return 0 if (length (my_read ($fp, 8)) != 8); 2994 return 0 if (length (my_read ($fp, 16)) != 16); # skip Name, Misc, VirtualAddress, SizeOfRawData 2995 2996 # SizeOfRawData 2997 2998 $bytes = my_read ($fp, 4); 2999 3000 return 0 if (length ($bytes) != 4); 3001 3002 my $size_of_raw_data = unpack ("L", $bytes); 3003 3004 # PointerToRawData 3005 3006 $bytes = my_read ($fp, 4); 3007 3008 return 0 if (length ($bytes) != 4); 3009 3010 my $pointer_to_raw_data = unpack ("L", $bytes); 3011 3012 # the sections are not quaranteed to be ordered (=> compare all of them!) 3013 3014 if ($pointer_to_raw_data > $section_farthest) 3015 { 3016 $section_farthest = $pointer_to_raw_data; 3017 3018 $pos_after_farthest_section = $pointer_to_raw_data + $size_of_raw_data; 3019 } 3020 3021 3022 # loop to next SectionTable entry 3023 3024 return 0 if (length (my_read ($fp, 16)) != 16); # skip rest of SectionHeader 3025 } 3026 3027 # check if 7z signature found (after stub) 3028 3029 my_seek ($fp, $pos_after_farthest_section, 0); 3030 3031 $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN); 3032 3033 return 0 if (length ($bytes) != $SEVEN_ZIP_MAGIC_LEN); 3034 3035 if ($bytes eq $SEVEN_ZIP_MAGIC) 3036 { 3037 $found = 1; 3038 } 3039 3040 return $found; 3041} 3042 3043# sfx_7z_512_search () searches for the 7z signature by only looking at every 512 byte boundery 3044 3045sub sfx_7z_512_search 3046{ 3047 my $fp = shift; 3048 3049 my $found = 0; 3050 3051 3052 my $seek_skip = 512 - $SEVEN_ZIP_MAGIC_LEN; 3053 3054 my $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN); 3055 3056 my $len_bytes = length ($bytes); 3057 3058 while ($len_bytes == $SEVEN_ZIP_MAGIC_LEN) 3059 { 3060 if ($bytes eq $SEVEN_ZIP_MAGIC) 3061 { 3062 $found = 1; 3063 3064 last; 3065 } 3066 3067 my_seek ($fp, $seek_skip, 1); 3068 3069 $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN); 3070 3071 $len_bytes = length ($bytes); 3072 } 3073 3074 return $found; 3075} 3076 3077# sfx_7z_full_search () searches for the 7z signature by looking at every byte in the file 3078# (this type of search should only be performed if no other variant worked) 3079 3080sub sfx_7z_full_search 3081{ 3082 my $fp = shift; 3083 3084 my $found = 0; 3085 3086 my $idx_into_magic = 0; 3087 my $prev_idx_into_magic = 0; 3088 3089 my $len_bytes = $SEVEN_ZIP_MAGIC_LEN; 3090 3091 while ($len_bytes == $SEVEN_ZIP_MAGIC_LEN) 3092 { 3093 my $bytes = my_read ($fp, $SEVEN_ZIP_MAGIC_LEN); 3094 3095 last if (length ($bytes) == 0); 3096 3097 $prev_idx_into_magic = $idx_into_magic; 3098 3099 if ($bytes eq $SEVEN_ZIP_MAGIC) 3100 { 3101 $found = 1; 3102 3103 last; 3104 } 3105 3106 for (my $i = 0; $i < length ($bytes); $i++) 3107 { 3108 my $c = substr ($bytes, $i, 1); 3109 3110 if ($c ne substr ($SEVEN_ZIP_MAGIC, $idx_into_magic, 1)) 3111 { 3112 $idx_into_magic = 0; #reset 3113 } 3114 else 3115 { 3116 $idx_into_magic++; 3117 3118 if ($idx_into_magic == $SEVEN_ZIP_MAGIC_LEN) 3119 { 3120 $found = 1; 3121 3122 last; 3123 } 3124 } 3125 } 3126 3127 last if ($found == 1); 3128 3129 $len_bytes = length ($bytes); 3130 } 3131 3132 return ($found, $prev_idx_into_magic); 3133} 3134 3135sub sfx_get_hash 3136{ 3137 my $fp = shift; 3138 my $file_path = shift; 3139 3140 my $hash_buf = ""; 3141 3142 3143 my %db_positions_analysed = (); # holds a list of offsets that we already tried to parse 3144 3145 # we make the assumption that there is max one .7z file within the .sfx! 3146 3147 # Variant 1 (PE file structure parsing) 3148 3149 my_seek ($fp, 0, 0); 3150 3151 if (sfx_7z_pe_search ($fp)) 3152 { 3153 my $cur_pos = my_tell ($fp); 3154 3155 $db_positions_analysed{$cur_pos} = 1; # mark it as analyzed 3156 3157 my $archive = read_seven_zip_archive ($fp); 3158 3159 $hash_buf = extract_hash_from_archive ($fp, $archive, $file_path); 3160 3161 if (defined ($hash_buf)) 3162 { 3163 if (length ($hash_buf) > 0) 3164 { 3165 return $hash_buf; 3166 } 3167 } 3168 } 3169 3170 # Variant 2 (search only at the 512 bytes bounderies) 3171 3172 my_seek ($fp, 512, 0); 3173 3174 while (sfx_7z_512_search ($fp) != 0) 3175 { 3176 my $cur_pos = my_tell ($fp); 3177 3178 if (! exists ($db_positions_analysed{$cur_pos})) 3179 { 3180 $db_positions_analysed{$cur_pos} = 1; # mark it as analyzed 3181 3182 my $archive = read_seven_zip_archive ($fp); 3183 3184 $hash_buf = extract_hash_from_archive ($fp, $archive, $file_path); 3185 3186 if (defined ($hash_buf)) 3187 { 3188 if (length ($hash_buf) > 0) 3189 { 3190 return $hash_buf; 3191 } 3192 } 3193 } 3194 3195 last if (my_seek ($fp, $cur_pos + 512 - $SEVEN_ZIP_MAGIC_LEN, 0) != 1); 3196 } 3197 3198 # Variant 3 (full search - worst case - shouldn't happen at all with a standard .sfx) 3199 3200 my_seek ($fp, 0, 2); 3201 3202 my $file_size = my_tell ($fp); 3203 3204 if ($file_size > 8 * 1024 * 1024) # let's say that 8 MiB is already a huge file 3205 { 3206 print STDERR "WARNING: searching for the 7z signature in a $file_size bytes long file ('"; 3207 print STDERR $file_path . "') might take some time\n"; 3208 } 3209 3210 my_seek ($fp, 1, 0); # we do no that the signature is not at position 0, so we start at 1 3211 3212 my ($full_search_found, $full_search_idx) = sfx_7z_full_search ($fp); 3213 3214 while ($full_search_found != 0) 3215 { 3216 my $cur_pos = my_tell ($fp); 3217 3218 $cur_pos -= $full_search_idx; 3219 3220 my_seek ($fp, $cur_pos, 0); # we might not be there yet (depends if $full_search_idx != 0) 3221 3222 if (! exists ($db_positions_analysed{$cur_pos})) 3223 { 3224 # we can skip the database updates because it's our last try to find the 7z file 3225 # $db_positions_analysed{$cur_pos} = 1; 3226 3227 my $archive = read_seven_zip_archive ($fp); 3228 3229 $hash_buf = extract_hash_from_archive ($fp, $archive, $file_path); 3230 3231 if (defined ($hash_buf)) 3232 { 3233 if (length ($hash_buf) > 0) 3234 { 3235 return $hash_buf; 3236 } 3237 } 3238 } 3239 3240 my_seek ($fp, $cur_pos, 0); # seek back to position JUST AFTER the previously found signature 3241 3242 ($full_search_found, $full_search_idx) = sfx_7z_full_search ($fp); 3243 } 3244 3245 # in theory if we reach this code section we already know that parsing the file failed (but let's confirm it) 3246 3247 my $sfx_successfully_parsed = 0; 3248 3249 if (defined ($hash_buf)) 3250 { 3251 if (length ($hash_buf) > 0) 3252 { 3253 $sfx_successfully_parsed = 1; 3254 } 3255 } 3256 3257 if ($sfx_successfully_parsed == 0) 3258 { 3259 print STDERR "WARNING: the file '$file_path' is neither a supported 7-Zip file nor a supported SFX file\n"; 3260 } 3261 3262 # cleanup 3263 3264 close ($fp); 3265 3266 return $hash_buf; 3267} 3268 3269sub globbing_on_windows 3270{ 3271 my @file_list = @_; 3272 3273 my $os = $^O; 3274 3275 if (($os eq "MSWin32") || ($os eq "Win32")) 3276 { 3277 my $windows_globbing_module = "File::Glob"; 3278 my $windows_globbing = "bsd_glob"; 3279 3280 if (eval "require $windows_globbing_module") 3281 { 3282 no strict 'refs'; 3283 3284 $windows_globbing_module->import ($windows_globbing); 3285 3286 my @new_file_list = (); 3287 3288 foreach my $item (@file_list) 3289 { 3290 push (@new_file_list, $windows_globbing-> ($item)); 3291 } 3292 3293 @file_list = @new_file_list; 3294 } 3295 } 3296 3297 return @file_list; 3298} 3299 3300sub get_splitted_archive_raw_name 3301{ 3302 my $full_name = shift; 3303 3304 my $name_idx = rindex ($full_name, "."); 3305 3306 my $name = substr ($full_name, 0, $name_idx); 3307 3308 return $name; 3309} 3310 3311sub get_ordered_splitted_file_list 3312{ 3313 my @files = @_; 3314 3315 return () unless (scalar (@files) > 0); # never the case (already checked) 3316 3317 my $failed = 0; 3318 my $num_probably_splitted_files = 0; 3319 3320 my $file_prefix = ""; 3321 my $extension_length = 0; 3322 3323 foreach my $file_name (@files) 3324 { 3325 my $idx_extension = rindex ($file_name, "."); 3326 3327 if ($idx_extension == -1) 3328 { 3329 $failed = 1; 3330 last; 3331 } 3332 3333 my $prefix = substr ($file_name, 0, $idx_extension); 3334 my $extension = substr ($file_name, $idx_extension + 1); 3335 3336 if (length ($prefix) == 0) 3337 { 3338 $failed = 1; 3339 last; 3340 } 3341 3342 # detect change in file prefix (the actual "name") 3343 3344 if (length ($file_prefix) == 0) #init 3345 { 3346 $file_prefix = $prefix; 3347 } 3348 3349 if ($prefix ne $file_prefix) 3350 { 3351 $failed = 1; 3352 last; 3353 } 3354 3355 # check extensions (should be numbers only) 3356 3357 if ($extension !~ /^[0-9]*$/) 3358 { 3359 $failed = 1; 3360 last; 3361 } 3362 3363 if ($extension_length == 0) # init 3364 { 3365 $extension_length = length ($extension); 3366 } 3367 3368 if (length ($extension) != $extension_length) 3369 { 3370 $failed = 1; 3371 last; 3372 } 3373 3374 $num_probably_splitted_files++; 3375 } 3376 3377 return () unless (length ($file_prefix) > 0); 3378 return () unless ($extension_length > 0); 3379 3380 if ($failed == 1) 3381 { 3382 if ($num_probably_splitted_files > 1) 3383 { 3384 print STDERR "WARNING: it seems that some files could be part of a splitted 7z archive named '$file_prefix'\n"; 3385 print STDERR "ATTENTION: make sure to only specify the files belonging to the splitted archive (do not combine them with other archives)\n"; 3386 } 3387 3388 return (); 3389 } 3390 3391 # sort the list and check if there is no missing file 3392 # (at this point in time we can't verify if the last file is really the last one) 3393 3394 my @sorted_file_list = sort (@files); 3395 3396 my $max = scalar (@sorted_file_list); 3397 3398 return () if ($max != scalar (@files)); 3399 3400 for (my $count = 0; $count < $max; $count++) 3401 { 3402 my $current_extension = sprintf ("%0${extension_length}i", $count + 1); # the splits start with .001, .002, ... 3403 3404 return () if ($sorted_file_list[$count] ne "$file_prefix.$current_extension"); 3405 } 3406 3407 return @sorted_file_list; 3408} 3409 3410sub get_file_sizes_list 3411{ 3412 my @files = @_; 3413 3414 my %files_with_sizes = (); 3415 3416 my $accumulated_size = 0; 3417 3418 for (my $count = 0; $count < scalar (@files); $count++) 3419 { 3420 my $file = $files[$count]; 3421 3422 my @file_stat = stat ($file); 3423 3424 if (scalar (@file_stat) < 1) 3425 { 3426 print STDERR "ERROR: could not get the file size of the file '$file'\n"; 3427 3428 exit (1); 3429 } 3430 3431 $files_with_sizes{0}{'fh'} = undef; # the file handle 3432 $files_with_sizes{0}{'num'} = 0; 3433 3434 $files_with_sizes{$count + 1}{'name'} = $file; 3435 $files_with_sizes{$count + 1}{'size'} = $file_stat[7]; 3436 $files_with_sizes{$count + 1}{'start'} = $accumulated_size; 3437 3438 $accumulated_size += $file_stat[7]; 3439 } 3440 3441 return %files_with_sizes; 3442} 3443 3444sub splitted_seven_zip_open 3445{ 3446 my @files = @_; 3447 3448 my @sorted_file_list = get_ordered_splitted_file_list (@files); 3449 3450 return 0 if (scalar (@sorted_file_list) < 1); 3451 3452 my %file_list_with_sizes = get_file_sizes_list (@sorted_file_list); 3453 3454 # start to parse the file list 3455 3456 $memory_buffer_read_offset = 0; # just to be safe 3457 3458 my $first_splitted_file = $file_list_with_sizes{1}{'name'}; 3459 3460 my $hash_buf = ""; 3461 3462 # open file for reading 3463 3464 my $seven_zip_file; 3465 3466 if (! open ($seven_zip_file, "<$first_splitted_file")) 3467 { 3468 print STDERR "ERROR: could not open the the splitted archive file '$first_splitted_file' for reading\n"; 3469 3470 exit (1); 3471 } 3472 3473 binmode ($seven_zip_file); 3474 3475 $file_list_with_sizes{0}{'fh'} = $seven_zip_file; 3476 $file_list_with_sizes{0}{'num'} = 1; # meaning is: "first file" 3477 3478 # check if valid and supported 7z file 3479 3480 if (! is_supported_seven_zip_file (\%file_list_with_sizes)) 3481 { 3482 print STDERR "ERROR: the splitted archive file '$first_splitted_file' is not a valid 7z file\n"; 3483 3484 exit (1); 3485 } 3486 3487 my $archive = read_seven_zip_archive (\%file_list_with_sizes); 3488 3489 $hash_buf = extract_hash_from_archive (\%file_list_with_sizes, $archive, $first_splitted_file); 3490 3491 # cleanup 3492 3493 close ($seven_zip_file); 3494 3495 if (defined ($hash_buf)) 3496 { 3497 if (length ($hash_buf) > 0) 3498 { 3499 print $hash_buf . "\n"; 3500 } 3501 } 3502 3503 return 1; 3504} 3505 3506# 3507# Start 3508# 3509 3510if (scalar (@ARGV) lt 1) 3511{ 3512 usage ($0); 3513 3514 exit (1); 3515} 3516 3517my @file_list = globbing_on_windows (@ARGV); 3518 3519# try to handle this special case: splitted .7z files (.7z.001, .7z.002, .7z.003, ...) 3520# ATTENTION: there is one restriction here: splitted archives shouldn't be combined with other 3521# splitted or non-splitted archives 3522 3523my $was_splitted = splitted_seven_zip_open (@file_list); 3524 3525if ($was_splitted == 1) 3526{ 3527 exit (0); 3528} 3529 3530# "non-splitted" file list: 3531 3532foreach my $file_name (@file_list) 3533{ 3534 if (! -e $file_name) 3535 { 3536 print STDERR "WARNING: could not open file '$file_name'\n"; 3537 3538 next; 3539 } 3540 3541 my $hash_buf = seven_zip_get_hash ($file_name); 3542 3543 next unless (defined ($hash_buf)); 3544 next unless (length ($hash_buf) > 0); 3545 3546 print $hash_buf . "\n"; 3547} 3548 3549exit (0); 3550