1#!/usr/bin/perl 2 3# zipdetails 4# 5# Display info on the contents of a Zip file 6# 7 8use 5.010; # for unpack "Q<" 9 10my $NESTING_DEBUG = 0 ; 11 12BEGIN { 13 # Check for a 32-bit Perl 14 if (!eval { pack "Q", 1 }) { 15 warn "zipdetails requires 64 bit integers, ", 16 "this Perl has 32 bit integers.\n"; 17 exit(1); 18 } 19} 20 21BEGIN { pop @INC if $INC[-1] eq '.' } 22use strict; 23use warnings ; 24no warnings 'portable'; # for unpacking > 2^32 25use feature qw(state say); 26 27use IO::File; 28use Encode; 29use Getopt::Long; 30use List::Util qw(min max); 31 32my $VERSION = '4.004' ; 33 34sub fatal_tryWalk; 35sub fatal_truncated ; 36sub info ; 37sub warning ; 38sub error ; 39sub debug ; 40sub fatal ; 41sub topLevelFatal ; 42sub internalFatal; 43sub need ; 44sub decimalHex; 45 46use constant MAX64 => 0xFFFFFFFFFFFFFFFF ; 47use constant MAX32 => 0xFFFFFFFF ; 48use constant MAX16 => 0xFFFF ; 49 50# Compression types 51use constant ZIP_CM_STORE => 0 ; 52use constant ZIP_CM_IMPLODE => 6 ; 53use constant ZIP_CM_DEFLATE => 8 ; 54use constant ZIP_CM_BZIP2 => 12 ; 55use constant ZIP_CM_LZMA => 14 ; 56use constant ZIP_CM_PPMD => 98 ; 57 58# General Purpose Flag 59use constant ZIP_GP_FLAG_ENCRYPTED_MASK => (1 << 0) ; 60use constant ZIP_GP_FLAG_STREAMING_MASK => (1 << 3) ; 61use constant ZIP_GP_FLAG_PATCHED_MASK => (1 << 5) ; 62use constant ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK => (1 << 6) ; 63use constant ZIP_GP_FLAG_LZMA_EOS_PRESENT => (1 << 1) ; 64use constant ZIP_GP_FLAG_LANGUAGE_ENCODING => (1 << 11) ; 65use constant ZIP_GP_FLAG_PKWARE_ENHANCED_COMP => (1 << 12) ; 66use constant ZIP_GP_FLAG_ENCRYPTED_CD => (1 << 13) ; 67 68# All the encryption flags 69use constant ZIP_GP_FLAG_ALL_ENCRYPT => (ZIP_GP_FLAG_ENCRYPTED_MASK | ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK | ZIP_GP_FLAG_ENCRYPTED_CD ); 70 71# Internal File Attributes 72use constant ZIP_IFA_TEXT_MASK => 1; 73 74# Signatures for each of the headers 75use constant ZIP_LOCAL_HDR_SIG => 0x04034b50; 76use constant ZIP_DATA_HDR_SIG => 0x08074b50; 77use constant ZIP_CENTRAL_HDR_SIG => 0x02014b50; 78use constant ZIP_END_CENTRAL_HDR_SIG => 0x06054b50; 79use constant ZIP64_END_CENTRAL_REC_HDR_SIG => 0x06064b50; 80use constant ZIP64_END_CENTRAL_LOC_HDR_SIG => 0x07064b50; 81use constant ZIP_DIGITAL_SIGNATURE_SIG => 0x05054b50; 82use constant ZIP_ARCHIVE_EXTRA_DATA_RECORD_SIG => 0x08064b50; 83use constant ZIP_SINGLE_SEGMENT_MARKER => 0x30304b50; # APPNOTE 6.3.10, sec 8.5.4 84 85# Extra sizes 86use constant ZIP_EXTRA_HEADER_SIZE => 2 ; 87use constant ZIP_EXTRA_MAX_SIZE => 0xFFFF ; 88use constant ZIP_EXTRA_SUBFIELD_ID_SIZE => 2 ; 89use constant ZIP_EXTRA_SUBFIELD_LEN_SIZE => 2 ; 90use constant ZIP_EXTRA_SUBFIELD_HEADER_SIZE => ZIP_EXTRA_SUBFIELD_ID_SIZE + 91 ZIP_EXTRA_SUBFIELD_LEN_SIZE; 92use constant ZIP_EXTRA_SUBFIELD_MAX_SIZE => ZIP_EXTRA_MAX_SIZE - 93 ZIP_EXTRA_SUBFIELD_HEADER_SIZE; 94 95use constant ZIP_EOCD_MIN_SIZE => 22 ; 96 97 98use constant ZIP_LD_FILENAME_OFFSET => 30; 99use constant ZIP_CD_FILENAME_OFFSET => 46; 100 101my %ZIP_CompressionMethods = 102 ( 103 0 => 'Stored', 104 1 => 'Shrunk', 105 2 => 'Reduced compression factor 1', 106 3 => 'Reduced compression factor 2', 107 4 => 'Reduced compression factor 3', 108 5 => 'Reduced compression factor 4', 109 6 => 'Imploded', 110 7 => 'Reserved for Tokenizing compression algorithm', 111 8 => 'Deflated', 112 9 => 'Deflate64', 113 10 => 'PKWARE Data Compression Library Imploding', 114 11 => 'Reserved by PKWARE', 115 12 => 'BZIP2', 116 13 => 'Reserved by PKWARE', 117 14 => 'LZMA', 118 15 => 'Reserved by PKWARE', 119 16 => 'IBM z/OS CMPSC Compression', 120 17 => 'Reserved by PKWARE', 121 18 => 'IBM/TERSE or Xceed BWT', # APPNOTE has IBM/TERSE. Xceed reuses it unofficially 122 19 => 'IBM LZ77 z Architecture (PFS)', 123 20 => 'Ipaq8', # see https://encode.su/threads/1048-info-zip-lpaq8 124 92 => 'Reference', # Winzip Only from version 25 125 93 => 'Zstandard', 126 94 => 'MP3', 127 95 => 'XZ', 128 96 => 'WinZip JPEG Compression', 129 97 => 'WavPack compressed data', 130 98 => 'PPMd version I, Rev 1', 131 99 => 'AES Encryption', # Apple also use this code for LZFSE compression in IPA files 132 ); 133 134my %OS_Lookup = ( 135 0 => "MS-DOS", 136 1 => "Amiga", 137 2 => "OpenVMS", 138 3 => "Unix", 139 4 => "VM/CMS", 140 5 => "Atari ST", 141 6 => "HPFS (OS/2, NT 3.x)", 142 7 => "Macintosh", 143 8 => "Z-System", 144 9 => "CP/M", 145 10 => "Windows NTFS or TOPS-20", 146 11 => "MVS or NTFS", 147 12 => "VSE or SMS/QDOS", 148 13 => "Acorn RISC OS", 149 14 => "VFAT", 150 15 => "alternate MVS", 151 16 => "BeOS", 152 17 => "Tandem", 153 18 => "OS/400", 154 19 => "OS/X (Darwin)", 155 30 => "AtheOS/Syllable", 156 ); 157 158{ 159 package Signatures ; 160 161 my %Lookup = ( 162 # Map unpacked signature to 163 # decoder 164 # name 165 # central flag 166 167 # Core Signatures 168 ::ZIP_LOCAL_HDR_SIG, [ \&::LocalHeader, "Local File Header", 0 ], 169 ::ZIP_DATA_HDR_SIG, [ \&::DataDescriptor, "Data Descriptor", 0 ], 170 ::ZIP_CENTRAL_HDR_SIG, [ \&::CentralHeader, "Central Directory Header", 1 ], 171 ::ZIP_END_CENTRAL_HDR_SIG, [ \&::EndCentralHeader, "End Central Directory Record", 1 ], 172 ::ZIP_SINGLE_SEGMENT_MARKER, [ \&::SingleSegmentMarker, "Split Archive Single Segment Marker", 0], 173 174 # Zip64 175 ::ZIP64_END_CENTRAL_REC_HDR_SIG, [ \&::Zip64EndCentralHeader, "Zip64 End of Central Directory Record", 1 ], 176 ::ZIP64_END_CENTRAL_LOC_HDR_SIG, [ \&::Zip64EndCentralLocator, "Zip64 End of Central Directory Locator", 1 ], 177 178 # Digital signature (pkzip) 179 ::ZIP_DIGITAL_SIGNATURE_SIG, [ \&::DigitalSignature, "Digital Signature", 1 ], 180 181 # Archive Encryption Headers (pkzip) - never seen this one 182 ::ZIP_ARCHIVE_EXTRA_DATA_RECORD_SIG, [ \&::ArchiveExtraDataRecord, "Archive Extra Record", 1 ], 183 ); 184 185 sub decoder 186 { 187 my $signature = shift ; 188 189 return undef 190 unless exists $Lookup{$signature}; 191 192 return $Lookup{$signature}[0]; 193 } 194 195 sub name 196 { 197 my $signature = shift ; 198 199 return 'UNKNOWN' 200 unless exists $Lookup{$signature}; 201 202 return $Lookup{$signature}[1]; 203 } 204 205 sub titleName 206 { 207 my $signature = shift ; 208 209 uc name($signature); 210 } 211 212 sub hexValue 213 { 214 my $signature = shift ; 215 sprintf "0x%X", $signature ; 216 } 217 218 sub hexValue32 219 { 220 my $signature = shift ; 221 sprintf "0x%08X", $signature ; 222 } 223 224 sub hexValue16 225 { 226 my $signature = shift ; 227 sprintf "0x%04X", $signature ; 228 } 229 230 sub nameAndHex 231 { 232 my $signature = shift ; 233 234 return "'" . name($signature) . "' (" . hexValue32($signature) . ")" 235 } 236 237 sub isCentralHeader 238 { 239 my $signature = shift ; 240 241 return undef 242 unless exists $Lookup{$signature}; 243 244 return $Lookup{$signature}[2]; 245 } 246 #sub isValidSignature 247 #{ 248 # my $signature = shift ; 249 # return exists $Lookup{$signature}} 250 #} 251 252 sub getSigsForScan 253 { 254 my %sigs = 255 # map { $_ => 1 } 256 # map { substr($_->[0], 2, 2) => $_->[1] } # don't want the initial "PK" 257 map { substr(pack("V", $_), 2, 2) => $_ } 258 keys %Lookup ; 259 260 return %sigs; 261 } 262 263} 264 265my %Extras = ( 266 267 # Local Central 268 # ID Name Handler min size max size min size max size 269 0x0001, ['ZIP64', \&decode_Zip64, 0, 28, 0, 28], 270 0x0007, ['AV Info', undef], # TODO 271 0x0008, ['Extended Language Encoding', undef], # TODO 272 0x0009, ['OS/2 extended attributes', undef], # TODO 273 0x000a, ['NTFS FileTimes', \&decode_NTFS_Filetimes, 32, 32, 32, 32], 274 0x000c, ['OpenVMS', \&decode_OpenVMS, 4, undef, 4, undef], 275 0x000d, ['Unix', undef], 276 0x000e, ['Stream & Fork Descriptors', undef], # TODO 277 0x000f, ['Patch Descriptor', undef], 278 0x0014, ['PKCS#7 Store for X.509 Certificates', undef], 279 0x0015, ['X.509 Certificate ID and Signature for individual file', undef], 280 0x0016, ['X.509 Certificate ID for Central Directory', undef], 281 0x0017, ['Strong Encryption Header', \&decode_strong_encryption, 12, undef, 12, undef], 282 0x0018, ['Record Management Controls', undef], 283 0x0019, ['PKCS#7 Encryption Recipient Certificate List', undef], 284 0x0020, ['Reserved for Timestamp record', undef], 285 0x0021, ['Policy Decryption Key Record', undef], 286 0x0022, ['Smartcrypt Key Provider Record', undef], 287 0x0023, ['Smartcrypt Policy Key Data Record', undef], 288 289 # The Header ID mappings defined by Info-ZIP and third parties are: 290 291 0x0065, ['IBM S/390 attributes - uncompressed', \&decode_MVS, 4, undef, 4, undef], 292 0x0066, ['IBM S/390 attributes - compressed', undef], 293 0x07c8, ['Info-ZIP Macintosh (old, J. Lee)', undef], 294 0x10c5, ['Minizip CMS Signature', \&decode_Minizip_Signature, undef, undef, undef, undef], # https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md 295 0x1986, ['Pixar USD', undef], # TODO 296 0x1a51, ['Minizip Hash', \&decode_Minizip_Hash, 4, undef, 4, undef], # https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md 297 0x2605, ['ZipIt Macintosh (first version)', undef], 298 0x2705, ['ZipIt Macintosh v 1.3.5 and newer (w/o full filename)', undef], 299 0x2805, ['ZipIt Macintosh v 1.3.5 and newer', undef], 300 0x334d, ["Info-ZIP Macintosh (new, D. Haase's 'Mac3' field)", undef], # TODO 301 0x4154, ['Tandem NSK [TA]', undef], # TODO 302 0x4341, ['Acorn/SparkFS [AC]', undef], # TODO 303 0x4453, ['Windows NT security descriptor [SD]', \&decode_NT_security, 11, undef, 4, 4], # TODO 304 0x4690, ['POSZIP 4690', undef], 305 0x4704, ['VM/CMS', undef], 306 0x470f, ['MVS', undef], 307 0x4854, ['Theos [TH]', undef], 308 0x4b46, ['FWKCS MD5 [FK]', undef], 309 0x4c41, ['OS/2 access control list [AL]', undef], 310 0x4d49, ['Info-ZIP OpenVMS (obsolete) [IM]', undef], 311 0x4d63, ['Macintosh SmartZIP [cM]', undef], # TODO 312 0x4f4c, ['Xceed original location [LO]', undef], 313 0x5356, ['AOS/VS (binary ACL) [VS]', undef], 314 0x5455, ['Extended Timestamp [UT]', \&decode_UT, 1, 13, 1, 13], 315 0x554e, ['Xceed unicode extra field [UN]', \&decode_Xceed_unicode, 6, undef, 8, undef], 316 0x564B, ['Key-Value Pairs [KV]', \&decode_Key_Value_Pair, 13, undef, 13, undef],# TODO -- https://github.com/sozip/keyvaluepairs-spec/blob/master/zip_keyvalue_extra_field_specification.md 317 0x5855, ['Unix Extra type 1 [UX]', \&decode_UX, 12, 12, 8, 8], 318 0x5a4c, ['ZipArchive Unicode Filename [LZ]', undef], # https://www.artpol-software.com/ZipArchive 319 0x5a4d, ['ZipArchive Offsets Array [MZ]', undef], # https://www.artpol-software.com/ZipArchive 320 0x6375, ['Unicode Comment [uc]', \&decode_uc, 5, undef, 5, undef], 321 0x6542, ['BeOS/Haiku [Be]', undef], # TODO 322 0x6854, ['Theos [Th]', undef], 323 0x7075, ['Unicode Path [up]', \&decode_up, 5, undef, 5, undef], 324 0x756e, ['ASi Unix [un]', \&decode_ASi_Unix], # TODO 325 0x7441, ['AtheOS [At]', undef], 326 0x7855, ['Unix Extra type 2 [Ux]', \&decode_Ux, 4,4, 0, 0 ], 327 0x7875, ['Unix Extra type 3 [ux]', \&decode_ux, 3, undef, 3, undef], 328 0x9901, ['AES Encryption', \&decode_AES, 7, 7, 7, 7], 329 0x9903, ['Reference', \&decode_Reference, 20, 20, 20, 20], # Added in WinZip ver 25 330 0xa11e, ['Data Stream Alignment', \&decode_DataStreamAlignment, 2, undef, 2, undef ], 331 0xA220, ['Open Packaging Growth Hint', \&decode_GrowthHint, 4, undef, 4, undef ], 332 0xCAFE, ['Java Executable', \&decode_Java_exe, 0, 0, 0, 0], 333 0xCDCD, ['Minizip Central Directory', \&decode_Minizip_CD, 8, 8, 8, 8], # https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md 334 0xd935, ['Android APK Alignment', undef], # TODO 335 0xE57a, ['ALZip Codepage', undef], # TODO 336 0xfb4a, ['SMS/QDOS', undef], # TODO 337 ); 338 339 # Dummy entry only used in test harness, so only enable when ZIPDETAILS_TESTHARNESS is set 340 $Extras{0xFFFF} = 341 ['DUMMY', \&decode_DUMMY, undef, undef, undef, undef] 342 if $ENV{ZIPDETAILS_TESTHARNESS} ; 343 344sub extraFieldIdentifier 345{ 346 my $id = shift ; 347 348 my $name = $Extras{$id}[0] // "Unknown"; 349 350 return "Extra Field '$name' (ID " . hexValue16($id) .")"; 351} 352 353# Zip64EndCentralHeader version 2 354my %HashIDLookup = ( 355 0x0000 => 'none', 356 0x0001 => 'CRC32', 357 0x8003 => 'MD5', 358 0x8004 => 'SHA1', 359 0x8007 => 'RIPEMD160', 360 0x800C => 'SHA256', 361 0x800D => 'SHA384', 362 0x800E => 'SHA512', 363 ); 364 365 366# Zip64EndCentralHeader version 2, Strong Encryption Header & DecryptionHeader 367my %AlgIdLookup = ( 368 0x6601 => "DES", 369 0x6602 => "RC2 (version needed to extract < 5.2)", 370 0x6603 => "3DES 168", 371 0x6609 => "3DES 112", 372 0x660E => "AES 128", 373 0x660F => "AES 192", 374 0x6610 => "AES 256", 375 0x6702 => "RC2 (version needed to extract >= 5.2)", 376 0x6720 => "Blowfish", 377 0x6721 => "Twofish", 378 0x6801 => "RC4", 379 0xFFFF => "Unknown algorithm", 380 ); 381 382# Zip64EndCentralHeader version 2, Strong Encryption Header & DecryptionHeader 383my %FlagsLookup = ( 384 0x0001 => "Password required to decrypt", 385 0x0002 => "Certificates only", 386 0x0003 => "Password or certificate required to decrypt", 387 388 # Values > 0x0003 reserved for certificate processing 389 ); 390 391# Strong Encryption Header & DecryptionHeader 392my %HashAlgLookup = ( 393 0x8004 => 'SHA1', 394 ); 395 396my $FH; 397 398my $ZIP64 = 0 ; 399my $NIBBLES = 8; 400 401my $LocalHeaderCount = 0; 402my $CentralHeaderCount = 0; 403my $InfoCount = 0; 404my $WarningCount = 0; 405my $ErrorCount = 0; 406my $lastWasMessage = 0; 407 408my $fatalDisabled = 0; 409 410my $OFFSET = 0 ; 411 412# Prefix data 413my $POSSIBLE_PREFIX_DELTA = 0; 414my $PREFIX_DELTA = 0; 415 416my $TRAILING = 0 ; 417my $PAYLOADLIMIT = 256; 418my $ZERO = 0 ; 419my $APK = 0 ; 420my $START_APK = 0; 421my $APK_LEN = 0; 422 423my $CentralDirectory = CentralDirectory->new(); 424my $LocalDirectory = LocalDirectory->new(); 425my $HeaderOffsetIndex = HeaderOffsetIndex->new(); 426my $EOCD_Present = 0; 427 428sub prOff 429{ 430 my $offset = shift; 431 my $s = offset($OFFSET); 432 $OFFSET += $offset; 433 return $s; 434} 435 436sub offset 437{ 438 my $v = shift ; 439 440 sprintf("%0${NIBBLES}X", $v); 441} 442 443# Format variables 444my ($OFF, $ENDS_AT, $LENGTH, $CONTENT, $TEXT, $VALUE) ; 445 446my $FMT1 = 'STDOUT1'; 447my $FMT2 = 'STDOUT2'; 448 449sub setupFormat 450{ 451 my $wantVerbose = shift ; 452 my $nibbles = shift; 453 454 my $width = '@' . ('>' x ($nibbles -1)); 455 my $space = " " x length($width); 456 457 # See https://github.com/Perl/perl5/issues/14255 for issue with "^*" in perl < 5.22 458 # my $rightColumn = "^*" ; 459 my $rightColumn = "^" . ("<" x 132); 460 461 # Fill mode can split on space or newline chars 462 # Spliting on hyphen works differently from Perl 5.20 onwards 463 $: = " \n"; 464 465 my $fmt ; 466 467 if ($wantVerbose) { 468 469 eval "format $FMT1 = 470$width $width $width ^<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<< $rightColumn 471\$OFF, \$ENDS_AT, \$LENGTH, \$CONTENT, \$TEXT, \$VALUE 472$space $space $space ^<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<< $rightColumn~~ 473 \$CONTENT, \$TEXT, \$VALUE 474. 475"; 476 477 eval "format $FMT2 = 478$width $width $width ^<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<< $rightColumn 479\$OFF, \$ENDS_AT, \$LENGTH, \$CONTENT, \$TEXT, \$VALUE 480$space $space $space ^<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<< $rightColumn~~ 481 \$CONTENT, \$TEXT, \$VALUE 482. 483"; 484 485 } 486 else { 487 eval "format $FMT1 = 488$width ^<<<<<<<<<<<<<<<<<<<< $rightColumn 489\$OFF, \$TEXT, \$VALUE 490$space ^<<<<<<<<<<<<<<<<<<<< $rightColumn~~ 491 \$TEXT, \$VALUE 492. 493"; 494 495 eval "format $FMT2 = 496$width ^<<<<<<<<<<<<<<<<<< $rightColumn 497\$OFF, \$TEXT, \$VALUE 498$space ^<<<<<<<<<<<<<<<<<< $rightColumn~~ 499 \$TEXT, \$VALUE 500. 501" 502 } 503 504 no strict 'refs'; 505 open($FMT1, ">&", \*STDOUT); select $FMT1; $| = 1 ; 506 open($FMT2, ">&", \*STDOUT); select $FMT2; $| = 1 ; 507 508 select 'STDOUT'; 509 $| = 1; 510 511} 512 513sub mySpr 514{ 515 my $format = shift ; 516 517 return "" if ! defined $format; 518 return $format unless @_ ; 519 return sprintf $format, @_ ; 520} 521 522sub xDump 523{ 524 my $input = shift; 525 526 $input =~ tr/\0-\37\177-\377/./; 527 return $input; 528} 529 530sub hexDump 531{ 532 return uc join ' ', unpack('(H2)*', $_[0]); 533} 534 535sub hexDump16 536{ 537 return uc 538 join "\r", 539 map { join ' ', unpack('(H2)*', $_ ) } 540 unpack('(a16)*', $_[0]) ; 541} 542 543sub charDump2 544{ 545 sprintf "%v02X", $_[0]; 546} 547 548sub charDump 549{ 550 sprintf "%vX", $_[0]; 551} 552 553sub hexValue 554{ 555 return sprintf("0x%X", $_[0]); 556} 557 558sub hexValue32 559{ 560 return sprintf("0x%08X", $_[0]); 561} 562 563sub hexValue16 564{ 565 return sprintf("0x%04X", $_[0]); 566} 567 568sub outHexdump 569{ 570 my $size = shift; 571 my $text = shift; 572 my $limit = shift ; 573 574 return 0 575 if $size == 0; 576 577 # TODO - add a limit to data output 578 # if ($limit) 579 # { 580 # outSomeData($size, $text); 581 # } 582 # else 583 { 584 myRead(my $payload, $size); 585 out($payload, $text, hexDump16($payload)); 586 } 587 588 return $size; 589} 590 591sub decimalHex 592{ 593 sprintf("%0*X (%u)", $_[1] // 0, $_[0], $_[0]) 594} 595 596sub decimalHex0x 597{ 598 sprintf("0x%0*X (%u)", $_[1] // 0, $_[0], $_[0]) 599} 600 601sub decimalHex0xUndef 602{ 603 return 'Unknown' 604 if ! defined $_[0]; 605 606 return decimalHex0x @_; 607} 608 609sub out 610{ 611 my $data = shift; 612 my $text = shift; 613 my $format = shift; 614 615 my $size = length($data) ; 616 617 $ENDS_AT = offset($OFFSET + ($size ? $size - 1 : 0)) ; 618 $OFF = prOff($size); 619 $LENGTH = offset($size) ; 620 $CONTENT = hexDump($data); 621 $TEXT = $text; 622 $VALUE = mySpr $format, @_; 623 624 no warnings; 625 626 write $FMT1 ; 627 628 $lastWasMessage = 0; 629} 630 631sub out0 632{ 633 my $size = shift; 634 my $text = shift; 635 my $format = shift; 636 637 $ENDS_AT = offset($OFFSET + ($size ? $size - 1 : 0)) ; 638 $OFF = prOff($size); 639 $LENGTH = offset($size) ; 640 $CONTENT = '...'; 641 $TEXT = $text; 642 $VALUE = mySpr $format, @_; 643 644 write $FMT1; 645 646 skip($FH, $size); 647 648 $lastWasMessage = 0; 649} 650 651sub out1 652{ 653 my $text = shift; 654 my $format = shift; 655 656 $ENDS_AT = '' ; 657 $OFF = ''; 658 $LENGTH = '' ; 659 $CONTENT = ''; 660 $TEXT = $text; 661 $VALUE = mySpr $format, @_; 662 663 write $FMT1; 664 665 $lastWasMessage = 0; 666} 667 668sub out2 669{ 670 my $data = shift ; 671 my $text = shift ; 672 my $format = shift; 673 674 my $size = length($data) ; 675 $ENDS_AT = offset($OFFSET + ($size ? $size - 1 : 0)) ; 676 $OFF = prOff($size); 677 $LENGTH = offset($size); 678 $CONTENT = hexDump($data); 679 $TEXT = $text; 680 $VALUE = mySpr $format, @_; 681 682 no warnings; 683 write $FMT2; 684 685 $lastWasMessage = 0; 686} 687 688 689sub Value 690{ 691 my $letter = shift; 692 693 if ($letter eq 'C') 694 { return decimalHex($_[0], 2) } 695 elsif ($letter eq 'v') 696 { return decimalHex($_[0], 4) } 697 elsif ($letter eq 'V') 698 { return decimalHex($_[0], 8) } 699 elsif ($letter eq 'Q<') 700 { return decimalHex($_[0], 16) } 701 else 702 { internalFatal undef, "here letter $letter"} 703} 704 705sub outer 706{ 707 my $name = shift ; 708 my $unpack = shift ; 709 my $size = shift ; 710 my $cb1 = shift ; 711 my $cb2 = shift ; 712 713 714 myRead(my $buff, $size); 715 my (@value) = unpack $unpack, $buff; 716 my $hex = Value($unpack, @value); 717 718 if (defined $cb1) { 719 my $v ; 720 if (ref $cb1 eq 'CODE') { 721 $v = $cb1->(@value) ; 722 } 723 else { 724 $v = $cb1 ; 725 } 726 727 $v = "'" . $v unless $v =~ /^'/; 728 $v .= "'" unless $v =~ /'$/; 729 $hex .= " $v" ; 730 } 731 732 out $buff, $name, $hex ; 733 734 $cb2->(@value) 735 if defined $cb2 ; 736 737 return $value[0]; 738} 739 740sub out_C 741{ 742 my $name = shift ; 743 my $cb1 = shift ; 744 my $cb2 = shift ; 745 746 outer($name, 'C', 1, $cb1, $cb2); 747} 748 749sub out_v 750{ 751 my $name = shift ; 752 my $cb1 = shift ; 753 my $cb2 = shift ; 754 755 outer($name, 'v', 2, $cb1, $cb2); 756} 757 758sub out_V 759{ 760 my $name = shift ; 761 my $cb1 = shift ; 762 my $cb2 = shift ; 763 764 outer($name, 'V', 4, $cb1, $cb2); 765} 766 767sub out_Q 768{ 769 my $name = shift ; 770 my $cb1 = shift ; 771 my $cb2 = shift ; 772 773 outer($name, 'Q<', 8, $cb1, $cb2); 774} 775 776sub outSomeData 777{ 778 my $size = shift; 779 my $message = shift; 780 my $redact = shift ; 781 782 # return if $size == 0; 783 784 if ($size > 0) { 785 if ($size > $PAYLOADLIMIT) { 786 my $before = $FH->tell(); 787 out0 $size, $message; 788 } else { 789 myRead(my $buffer, $size ); 790 $buffer = "X" x $size 791 if $redact; 792 out $buffer, $message, xDump $buffer ; 793 } 794 } 795} 796 797sub outSomeDataParagraph 798{ 799 my $size = shift; 800 my $message = shift; 801 my $redact = shift ; 802 803 return if $size == 0; 804 805 print "\n"; 806 outSomeData($size, $message, $redact); 807 808} 809 810sub unpackValue_C 811{ 812 Value_v(unpack "C", $_[0]); 813} 814 815sub Value_C 816{ 817 return decimalHex($_[0], 2); 818} 819 820 821sub unpackValue_v 822{ 823 Value_v(unpack "v", $_[0]); 824} 825 826sub Value_v 827{ 828 return decimalHex($_[0], 4); 829} 830 831sub unpackValue_V 832{ 833 Value_V(unpack "V", $_[0]); 834} 835 836sub Value_V 837{ 838 return decimalHex($_[0] // 0, 8); 839} 840 841sub unpackValue_Q 842{ 843 my $v = unpack ("Q<", $_[0]); 844 Value_Q($v); 845} 846 847sub Value_Q 848{ 849 return decimalHex($_[0], 16); 850} 851 852sub read_Q 853{ 854 my $b ; 855 myRead($b, 8); 856 return ($b, unpack ("Q<" , $b)); 857} 858 859sub read_V 860{ 861 my $b ; 862 myRead($b, 4); 863 return ($b, unpack ("V", $b)); 864} 865 866sub read_v 867{ 868 my $b ; 869 myRead($b, 2); 870 return ($b, unpack "v", $b); 871} 872 873 874sub read_C 875{ 876 my $b ; 877 myRead($b, 1); 878 return ($b, unpack "C", $b); 879} 880 881sub seekTo 882{ 883 my $offset = shift ; 884 my $loc = shift ; 885 886 $loc = SEEK_SET 887 if ! defined $loc ; 888 889 $FH->seek($offset, $loc); 890 $OFFSET = $FH->tell(); 891} 892 893sub rewindRelative 894{ 895 my $offset = shift ; 896 897 $FH->seek(-$offset, SEEK_CUR); 898 # $OFFSET -= $offset; 899 $OFFSET = $FH->tell(); 900} 901 902sub deltaToNextSignature 903{ 904 my $start = $FH->tell(); 905 906 my $got = scanForSignature(1); 907 908 my $delta = $FH->tell() - $start ; 909 seekTo($start); 910 911 if ($got) 912 { 913 return $delta ; 914 } 915 916 return 0 ; 917} 918 919sub scanForSignature 920{ 921 my $walk = shift // 0; 922 923 # $count is only used to when 'walk' is enabled. 924 # Want to scan for a PK header at the start of the file. 925 # All other PK headers are should be directly after the previous PK record. 926 state $count = 0; 927 $count += $walk; 928 929 my %sigs = Signatures::getSigsForScan(); 930 931 my $start = $FH->tell(); 932 933 # TODO -- Fix this? 934 if (1 || $count <= 1) { 935 936 my $last = ''; 937 my $offset = 0; 938 my $buffer ; 939 940 BUFFER: 941 while ($FH->read($buffer, 1024 * 1000)) 942 { 943 my $combine = $last . $buffer ; 944 945 my $ix = 0; 946 while (1) 947 { 948 $ix = index($combine, "PK", $ix) ; 949 950 if ($ix == -1) 951 { 952 $last = ''; 953 next BUFFER; 954 } 955 956 my $rest = substr($combine, $ix + 2, 2); 957 958 if (! $sigs{$rest}) 959 { 960 $ix += 2; 961 next; 962 } 963 964 # possible match 965 my $here = $FH->tell(); 966 seekTo($here - length($combine) + $ix); 967 968 my $name = Signatures::name($sigs{$rest}); 969 return $sigs{$rest}; 970 } 971 972 $last = substr($combine, $ix+4); 973 } 974 } 975 else { 976 die "FIX THIS"; 977 return ! $FH->eof(); 978 } 979 980 # printf("scanForSignature %X\t%X (%X)\t%s\n", $start, $FH->tell(), $FH->tell() - $start, 'NO MATCH') ; 981 982 return 0; 983} 984 985my $is64In32 = 0; 986 987my $opt_verbose = 0; 988my $opt_scan = 0; 989my $opt_walk = 0; 990my $opt_Redact = 0; 991my $opt_utc = 0; 992my $opt_want_info_mesages = 1; 993my $opt_want_warning_mesages = 1; 994my $opt_want_error_mesages = 1; 995my $opt_want_message_exit_status = 0; 996my $exit_status_code = 0; 997my $opt_help =0; 998 999$Getopt::Long::bundling = 1 ; 1000 1001TextEncoding::setDefaults(); 1002 1003GetOptions("h|help" => \$opt_help, 1004 "v" => \$opt_verbose, 1005 "scan" => \$opt_scan, 1006 "walk" => \$opt_walk, 1007 "redact" => \$opt_Redact, 1008 "utc" => \$opt_utc, 1009 "version" => sub { print "$VERSION\n"; exit }, 1010 1011 # Filename/comment encoding 1012 "encoding=s" => \&TextEncoding::parseEncodingOption, 1013 "no-encoding" => \&TextEncoding::NoEncoding, 1014 "debug-encoding" => \&TextEncoding::debugEncoding, 1015 "output-encoding=s" => \&TextEncoding::parseEncodingOption, 1016 "language-encoding!" => \&TextEncoding::LanguageEncodingFlag, 1017 1018 # Message control 1019 "exit-bitmask!" => \$opt_want_message_exit_status, 1020 "messages!" => sub { 1021 my ($opt_name, $opt_value) = @_; 1022 $opt_want_info_mesages = 1023 $opt_want_warning_mesages = 1024 $opt_want_error_mesages = $opt_value; 1025 }, 1026 ) 1027 or exit 255 ; 1028 1029Usage() 1030 if $opt_help; 1031 1032die("No zipfile\n") 1033 unless @ARGV == 1; 1034 1035die("Cannot specify both '--walk' and '--scan'\n") 1036 if $opt_walk && $opt_scan ; 1037 1038my $filename = shift @ARGV; 1039 1040topLevelFatal "No such file" 1041 unless -e $filename ; 1042 1043topLevelFatal "'$filename' is a directory" 1044 if -d $filename ; 1045 1046topLevelFatal "'$filename' is not a standard file" 1047 unless -f $filename ; 1048 1049$FH = IO::File->new( "<$filename" ) 1050 or topLevelFatal "Cannot open '$filename': $!"; 1051binmode($FH); 1052 1053displayFileInfo($filename); 1054TextEncoding::encodingInfo(); 1055 1056my $FILELEN = -s $filename ; 1057$TRAILING = -s $filename ; 1058$NIBBLES = nibbles(-s $filename) ; 1059 1060topLevelFatal "'$filename' is empty" 1061 if $FILELEN == 0 ; 1062 1063topLevelFatal "file is too short to be a zip file" 1064 if $FILELEN < ZIP_EOCD_MIN_SIZE ; 1065 1066setupFormat($opt_verbose, $NIBBLES); 1067 1068my @Messages = (); 1069 1070if ($opt_scan || $opt_walk) 1071{ 1072 # Main loop for walk/scan processing 1073 1074 my $foundZipRecords = 0; 1075 my $foundCentralHeader = 0; 1076 my $lastEndsAt = 0; 1077 my $lastSignature = 0; 1078 my $lastHeader = {}; 1079 1080 $CentralDirectory->{alreadyScanned} = 1 ; 1081 1082 my $output_encryptedCD = 0; 1083 1084 reportPrefixData(); 1085 while(my $s = scanForSignature($opt_walk)) 1086 { 1087 my $here = $FH->tell(); 1088 my $delta = $here - $lastEndsAt ; 1089 1090 # delta can only be negative when '--scan' is used 1091 if ($delta < 0 ) 1092 { 1093 # nested or overlap 1094 # check if nested 1095 # remember & check if matching entry in CD 1096 # printf("### WARNING: OVERLAP/NESTED Record found 0x%X 0x%X $delta\n", $here, $lastEndsAt) ; 1097 } 1098 elsif ($here != $lastEndsAt) 1099 { 1100 # scanForSignature had to skip bytes to find the next signature 1101 1102 # some special cases that don't have signatures need to be checked first 1103 1104 seekTo($lastEndsAt); 1105 1106 if (! $output_encryptedCD && $CentralDirectory->isEncryptedCD()) 1107 { 1108 displayEncryptedCD(); 1109 $output_encryptedCD = 1; 1110 $lastEndsAt = $FH->tell(); 1111 next; 1112 } 1113 elsif ($lastSignature == ZIP_LOCAL_HDR_SIG && $lastHeader->{'streamed'} ) 1114 { 1115 # Check for size of possibe malformed Data Descriptor before outputting payload 1116 if (! $lastHeader->{'gotDataDescriptorSize'}) 1117 { 1118 my $hdrSize = checkForBadlyFormedDataDescriptor($lastHeader, $delta) ; 1119 1120 if ($hdrSize) 1121 { 1122 # remove size of Data Descriptor from payload 1123 $delta -= $hdrSize; 1124 $lastHeader->{'gotDataDescriptorSize'} = $hdrSize; 1125 } 1126 } 1127 1128 if(defined($lastHeader->{'payloadOutput'}) && ($lastEndsAt = BadlyFormedDataDescriptor($lastHeader, $delta))) 1129 { 1130 $HeaderOffsetIndex->rewindIndex(); 1131 $lastHeader->{entry}->readDataDescriptor(1) ; 1132 next; 1133 } 1134 1135 # Assume we have the payload when streaming is enabled 1136 outSomeData($delta, "PAYLOAD", $opt_Redact) ; 1137 $lastHeader->{'payloadOutput'} = 1; 1138 $lastEndsAt = $FH->tell(); 1139 1140 next; 1141 } 1142 elsif (Signatures::isCentralHeader($s) && $foundCentralHeader == 0) 1143 { 1144 # check for an APK header directly before the first central header 1145 $foundCentralHeader = 1; 1146 1147 ($START_APK, $APK, $APK_LEN) = chckForAPKSigningBlock($FH, $here, 0) ; 1148 1149 if ($START_APK) 1150 { 1151 seekTo($lastEndsAt+4); 1152 1153 scanApkBlock(); 1154 $lastEndsAt = $FH->tell(); 1155 next; 1156 } 1157 1158 seekTo($lastEndsAt); 1159 } 1160 1161 # Not a special case, so output generic padding message 1162 if ($delta > 0) 1163 { 1164 reportPrefixData($delta) 1165 if $lastEndsAt == 0 ; 1166 outSomeDataParagraph($delta, "UNEXPECTED PADDING"); 1167 info $FH->tell() - $delta, decimalHex0x($delta) . " Unexpected Padding bytes" 1168 if $FH->tell() - $delta ; 1169 $POSSIBLE_PREFIX_DELTA = $delta 1170 if $lastEndsAt == 0; 1171 $lastEndsAt = $FH->tell(); 1172 next; 1173 } 1174 else 1175 { 1176 seekTo($here); 1177 } 1178 1179 } 1180 1181 my ($buffer, $signature) = read_V(); 1182 1183 $lastSignature = $signature; 1184 1185 my $handler = Signatures::decoder($signature); 1186 if (!defined $handler) { 1187 internalFatal undef, "xxx"; 1188 } 1189 1190 $foundZipRecords = 1; 1191 $lastHeader = $handler->($signature, $buffer, $FH->tell() - 4) // {'streamed' => 0}; 1192 1193 $lastEndsAt = $FH->tell(); 1194 1195 seekTo($here + 4) 1196 if $opt_scan; 1197 } 1198 1199 topLevelFatal "'$filename' is not a zip file" 1200 unless $foundZipRecords ; 1201 1202} 1203else 1204{ 1205 # Main loop for non-walk/scan processing 1206 1207 # check for prefix data 1208 my $s = scanForSignature(); 1209 if ($s && $FH->tell() != 0) 1210 { 1211 $POSSIBLE_PREFIX_DELTA = $FH->tell(); 1212 } 1213 1214 seekTo(0); 1215 1216 scanCentralDirectory($FH); 1217 1218 fatal_tryWalk undef, "No Zip metadata found at end of file" 1219 if ! $CentralDirectory->exists() && ! $EOCD_Present ; 1220 1221 $CentralDirectory->{alreadyScanned} = 1 ; 1222 1223 Nesting::clearStack(); 1224 1225 # $HeaderOffsetIndex->dump(); 1226 1227 $OFFSET = 0 ; 1228 $FH->seek(0, SEEK_SET) ; 1229 1230 my $expectedOffset = 0; 1231 my $expectedSignature = 0; 1232 my $expectedBuffer = 0; 1233 my $foundCentralHeader = 0; 1234 my $processedAPK = 0; 1235 my $processedECD = 0; 1236 my $lastHeader ; 1237 1238 # my $lastWasLocalHeader = 0; 1239 # my $inCentralHeader = 0; 1240 1241 while (1) 1242 { 1243 last if $FH->eof(); 1244 1245 my $here = $FH->tell(); 1246 1247 if ($here >= $TRAILING) { 1248 my $delta = $FILELEN - $TRAILING; 1249 outSomeDataParagraph($delta, "TRAILING DATA"); 1250 info $FH->tell(), "Unexpected Trailing Data: " . decimalHex0x($delta) . " bytes"; 1251 1252 last; 1253 } 1254 1255 my ($buffer, $signature) = read_V(); 1256 1257 $expectedOffset = undef; 1258 $expectedSignature = undef; 1259 1260 # Check for split archive marker at start of file 1261 if ($here == 0 && $signature == ZIP_SINGLE_SEGMENT_MARKER) 1262 { 1263 # let it drop through 1264 $expectedSignature = ZIP_SINGLE_SEGMENT_MARKER; 1265 $expectedOffset = 0; 1266 } 1267 else 1268 { 1269 my $expectedEntry = $HeaderOffsetIndex->getNextIndex() ; 1270 if ($expectedEntry) 1271 { 1272 $expectedOffset = $expectedEntry->offset(); 1273 $expectedSignature = $expectedEntry->signature(); 1274 $expectedBuffer = pack "V", $expectedSignature ; 1275 } 1276 } 1277 1278 my $delta = $expectedOffset - $here ; 1279 1280 # if ($here != $expectedOffset && $signature != ZIP_DATA_HDR_SIG) 1281 # { 1282 # rewindRelative(4); 1283 # my $delta = $expectedOffset - $here ; 1284 # outSomeDataParagraph($delta, "UNEXPECTED PADDING"); 1285 # $HeaderOffsetIndex->rewindIndex(); 1286 # next; 1287 # } 1288 1289 # Need to check for use-case where 1290 # * there is a ZIP_DATA_HDR_SIG directly after a ZIP_LOCAL_HDR_SIG. 1291 # The HeaderOffsetIndex object doesn't have visibility of it. 1292 # * APK header directly before the CD 1293 # * zipbomb 1294 1295 if (defined $expectedOffset && $here != $expectedOffset && ( $CentralDirectory->exists() || $EOCD_Present) ) 1296 { 1297 if ($here > $expectedOffset) 1298 { 1299 # Probable zipbomb 1300 1301 # Cursor $OFFSET need to rewind 1302 $OFFSET = $expectedOffset; 1303 $FH->seek($OFFSET + 4, SEEK_SET) ; 1304 1305 $signature = $expectedSignature; 1306 $buffer = $expectedBuffer ; 1307 } 1308 1309 # If get here then $here is less than $expectedOffset 1310 1311 1312 # check for an APK header directly before the first central header 1313 # Make sure not to miss a streaming data descriptor 1314 if ($signature != ZIP_DATA_HDR_SIG && Signatures::isCentralHeader($expectedSignature) && $START_APK && ! $processedAPK ) 1315 { 1316 seekTo($here+4); 1317 # rewindRelative(4); 1318 scanApkBlock(); 1319 $HeaderOffsetIndex->rewindIndex(); 1320 $processedAPK = 1; 1321 next; 1322 } 1323 1324 # Check Encrypted Central Directory 1325 # if ($CentralHeaderSignatures{$expectedSignature} && $CentralDirectory->isEncryptedCD() && ! $processedECD) 1326 # { 1327 # # rewind the invalid signature 1328 # seekTo($here); 1329 # # rewindRelative(4); 1330 # displayEncryptedCD(); 1331 # $processedECD = 1; 1332 # next; 1333 # } 1334 1335 if ($signature != ZIP_DATA_HDR_SIG && $delta >= 0) 1336 { 1337 rewindRelative(4); 1338 if($lastHeader->{'streamed'} && BadlyFormedDataDescriptor($lastHeader, $delta)) 1339 { 1340 $lastHeader->{entry}->readDataDescriptor(1) ; 1341 $HeaderOffsetIndex->rewindIndex(); 1342 next; 1343 } 1344 1345 reportPrefixData($delta) 1346 if $here == 0; 1347 outSomeDataParagraph($delta, "UNEXPECTED PADDING"); 1348 info $FH->tell() - $delta, decimalHex0x($delta) . " Unexpected Padding bytes" 1349 if $FH->tell() - $delta ; 1350 $HeaderOffsetIndex->rewindIndex(); 1351 next; 1352 } 1353 1354 # ZIP_DATA_HDR_SIG drops through 1355 } 1356 1357 my $handler = Signatures::decoder($signature); 1358 1359 if (!defined $handler) 1360 { 1361 # if ($CentralDirectory->exists()) { 1362 1363 # # Should be at offset that central directory says 1364 # my $locOffset = $CentralDirectory->getNextLocalOffset(); 1365 # my $delta = $locOffset - $here ; 1366 1367 # if ($here + 4 == $locOffset ) { 1368 # for (0 .. 3) { 1369 # $FH->ungetc(ord(substr($buffer, $_, 1))) 1370 # } 1371 # outSomeData($delta, "UNEXPECTED PADDING"); 1372 # next; 1373 # } 1374 # } 1375 1376 1377 # if ($here == $CentralDirectory->{CentralDirectoryOffset} && $EOCD_Present && $CentralDirectory->isEncryptedCD()) 1378 # { 1379 # # rewind the invalid signature 1380 # rewindRelative(4); 1381 # displayEncryptedCD(); 1382 # next; 1383 # } 1384 # elsif ($here < $CentralDirectory->{CentralDirectoryOffset}) 1385 # { 1386 # # next 1387 # # if scanForSignature() ; 1388 1389 # my $skippedFrom = $FH->tell() ; 1390 # my $skippedContent = $CentralDirectory->{CentralDirectoryOffset} - $skippedFrom ; 1391 1392 # printf "\nWARNING!\nExpected Zip header not found at offset 0x%X\n", $here; 1393 # printf "Skipping 0x%X bytes to Central Directory...\n", $skippedContent; 1394 1395 # push @Messages, 1396 # sprintf("Expected Zip header not found at offset 0x%X, ", $skippedFrom) . 1397 # sprintf("skipped 0x%X bytes\n", $skippedContent); 1398 1399 # seekTo($CentralDirectory->{CentralDirectoryOffset}); 1400 1401 # next; 1402 # } 1403 # else 1404 { 1405 fatal $here, sprintf "Unexpected Zip Signature '%s' at offset %s", Value_V($signature), decimalHex0x($here) ; 1406 last; 1407 } 1408 } 1409 1410 $ZIP64 = 0 if $signature != ZIP_DATA_HDR_SIG ; 1411 $lastHeader = $handler->($signature, $buffer, $FH->tell() - 4); 1412 # $lastWasLocalHeader = $signature == ZIP_LOCAL_HDR_SIG ; 1413 $HeaderOffsetIndex->rewindIndex() 1414 if $signature == ZIP_DATA_HDR_SIG ; 1415 } 1416} 1417 1418 1419dislayMessages() 1420 if $opt_want_error_mesages ; 1421 1422exit $exit_status_code ; 1423 1424sub dislayMessages 1425{ 1426 1427 # Compare Central & Local for discrepencies 1428 1429 if ($CentralDirectory->isMiniZipEncrypted) 1430 { 1431 # don't compare local & central entries when minizip-ng encryption is in play 1432 info undef, "Zip file uses minizip-ng central directory encryption" 1433 } 1434 1435 elsif ($CentralDirectory->exists() && $LocalDirectory->exists()) 1436 { 1437 # TODO check number of entries matches eocd 1438 # TODO check header length matches reality 1439 1440 # Nesting::dump(); 1441 1442 $LocalDirectory->sortByLocalOffset(); 1443 my %cleanCentralEntries = %{ $CentralDirectory->{byCentralOffset} }; 1444 1445 if ($NESTING_DEBUG) 1446 { 1447 if (Nesting::encapsulationCount()) 1448 { 1449 say "# ENCAPSULATIONS"; 1450 1451 for my $index (sort { $a <=> $b } keys %{ Nesting::encapsulations() }) 1452 { 1453 my $outer = Nesting::entryByIndex($index) ; 1454 1455 say "# Nesting " . $outer->outputFilename . " " . $outer->offsetStart . " " . $outer->offsetEnd ; 1456 1457 for my $inner (sort { $a <=> $b } @{ Nesting::encapsulations()->{$index} } ) 1458 { 1459 say "# " . $inner->outputFilename . " " . $inner->offsetStart . " " . $inner->offsetEnd ;; 1460 } 1461 } 1462 } 1463 } 1464 1465 { 1466 # check for Local Directory orphans 1467 1468 my %orphans = map { $_->localHeaderOffset => $_->outputFilename } 1469 grep { $_->entryType == ZIP_LOCAL_HDR_SIG && # Want Local Headers 1470 ! $_->encapsulated && 1471 @{ $_->getCdEntries } == 0 1472 } 1473 values %{ Nesting::getEntriesByOffset() }; 1474 1475 1476 if (keys %orphans) 1477 { 1478 error undef, "Orphan Local Headers found: " . scalar(keys %orphans) ; 1479 1480 my $table = new SimpleTable; 1481 $table->addHeaderRow('Offset', 'Filename'); 1482 $table->addDataRow(decimalHex0x($_), $orphans{$_}) 1483 for sort { $a <=> $b } keys %orphans ; 1484 1485 $table->display(); 1486 } 1487 } 1488 1489 { 1490 # check for Central Directory orphans 1491 # probably only an issue with --walk & a zipbomb 1492 1493 my %orphans = map { $_->centralHeaderOffset => $_ } 1494 grep { $_->entryType == ZIP_CENTRAL_HDR_SIG # Want Central Headers 1495 && ! $_->ldEntry # Filter out orphans 1496 && ! $_->encapsulated # Not encapsulated 1497 } 1498 values %{ Nesting::getEntriesByOffset() }; 1499 1500 if (keys %orphans) 1501 { 1502 error undef, "Possible zipbomb -- Orphan Central Headers found: " . scalar(keys %orphans) ; 1503 1504 my $table = new SimpleTable; 1505 $table->addHeaderRow('Offset', 'Filename'); 1506 for (sort { $a <=> $b } keys %orphans ) 1507 { 1508 $table->addDataRow(decimalHex0x($_), $orphans{$_}{filename}); 1509 delete $cleanCentralEntries{ $_ }; 1510 } 1511 1512 $table->display(); 1513 } 1514 } 1515 1516 if (Nesting::encapsulationCount()) 1517 { 1518 # Benign Nested zips 1519 # This is the use-case where a zip file is "stored" in another zip file. 1520 # NOT a zipbomb -- want the benign nested entries 1521 1522 # Note: this is only active when scan is used 1523 1524 my %outerEntries = map { $_->localHeaderOffset => $_->outputFilename } 1525 grep { 1526 $_->entryType == ZIP_CENTRAL_HDR_SIG && 1527 ! $_->encapsulated && # not encapsulated 1528 $_->ldEntry && # central header has a local sibling 1529 $_->ldEntry->childrenCount && # local entry has embedded entries 1530 ! Nesting::childrenInCentralDir($_->ldEntry) 1531 } 1532 values %{ Nesting::getEntriesByOffset() }; 1533 1534 if (keys %outerEntries) 1535 { 1536 my $count = scalar keys %outerEntries; 1537 info undef, "Nested Zip files found: $count"; 1538 1539 my $table = new SimpleTable; 1540 $table->addHeaderRow('Offset', 'Filename'); 1541 $table->addDataRow(decimalHex0x($_), $outerEntries{$_}) 1542 for sort { $a <=> $b } keys %outerEntries ; 1543 1544 $table->display(); 1545 } 1546 } 1547 1548 if ($LocalDirectory->anyStreamedEntries) 1549 { 1550 # Check for a missing Data Descriptors 1551 1552 my %missingDataDescriptor = map { $_->localHeaderOffset => $_->outputFilename } 1553 grep { $_->entryType == ZIP_LOCAL_HDR_SIG && 1554 $_->streamed && 1555 ! $_->readDataDescriptor 1556 } 1557 values %{ Nesting::getEntriesByOffset() }; 1558 1559 1560 for my $offset (sort keys %missingDataDescriptor) 1561 { 1562 my $filename = $missingDataDescriptor{$offset}; 1563 error $offset, "Filename '$filename': Missing 'Data Descriptor'" ; 1564 } 1565 } 1566 1567 { 1568 # compare local & central for duplicate entries (CD entries point to same local header) 1569 1570 my %ByLocalOffset = map { $_->localHeaderOffset => $_ } 1571 grep { 1572 $_->entryType == ZIP_LOCAL_HDR_SIG # Want Local Headers 1573 && ! $_->encapsulated # Not encapsulated 1574 && @{ $_->getCdEntries } > 1 1575 } 1576 values %{ Nesting::getEntriesByOffset() }; 1577 1578 for my $offset (sort keys %ByLocalOffset) 1579 { 1580 my @entries = @{ $ByLocalOffset{$offset}->getCdEntries }; 1581 if (@entries > 1) 1582 { 1583 # found duplicates 1584 my $localEntry = $LocalDirectory->getByLocalOffset($offset) ; 1585 if ($localEntry) 1586 { 1587 error undef, "Possible zipbomb -- Duplicate Central Headers referring to one Local header for '" . $localEntry->outputFilename . "' at offset " . decimalHex0x($offset); 1588 } 1589 else 1590 { 1591 error undef, "Possible zipbomb -- Duplicate Central Headers referring to one Local header at offset " . decimalHex0x($offset); 1592 } 1593 1594 my $table = new SimpleTable; 1595 $table->addHeaderRow('Offset', 'Filename'); 1596 for (sort { $a->centralHeaderOffset <=> $b->centralHeaderOffset } @entries) 1597 { 1598 $table->addDataRow(decimalHex0x($_->centralHeaderOffset), $_->outputFilename); 1599 delete $cleanCentralEntries{ $_->centralHeaderOffset }; 1600 } 1601 1602 $table->display(); 1603 } 1604 } 1605 } 1606 1607 if (Nesting::encapsulationCount()) 1608 { 1609 # compare local & central for nested entries 1610 1611 # get the local offsets referenced in the CD 1612 # this deliberately ignores any valid nested local entries 1613 my @localOffsets = sort { $a <=> $b } keys %{ $CentralDirectory->{byLocalOffset} }; 1614 1615 # now check for nesting 1616 1617 my %nested ; 1618 my %bomb; 1619 1620 for my $offset (@localOffsets) 1621 { 1622 my $innerEntry = $LocalDirectory->{byLocalOffset}{$offset}; 1623 if ($innerEntry) 1624 { 1625 my $outerLocalEntry = Nesting::getOuterEncapsulation($innerEntry); 1626 if (defined $outerLocalEntry) 1627 { 1628 my $outerOffset = $outerLocalEntry->localHeaderOffset(); 1629 if ($CentralDirectory->{byLocalOffset}{ $offset }) 1630 { 1631 push @{ $bomb{ $outerOffset } }, $offset ; 1632 } 1633 else 1634 { 1635 push @{ $nested{ $outerOffset } }, $offset ; 1636 } 1637 } 1638 } 1639 } 1640 1641 if (keys %nested) 1642 { 1643 # The real central directory at eof does not know about these. 1644 # likely to be a zip file stored in another zip file 1645 warning undef, "Nested Local Entries found"; 1646 for my $loc (sort keys %nested) 1647 { 1648 my $count = scalar @{ $nested{$loc} }; 1649 my $outerEntry = $LocalDirectory->getByLocalOffset($loc); 1650 say "Local Header for '" . $outerEntry->outputFilename . "' at offset " . decimalHex0x($loc) . " has $count nested Local Headers"; 1651 for my $n ( @{ $nested{$loc} } ) 1652 { 1653 my $innerEntry = $LocalDirectory->getByLocalOffset($n); 1654 1655 say "# Nested Local Header for filename '" . $innerEntry->outputFilename . "' is at Offset " . decimalHex0x($n) ; 1656 } 1657 } 1658 } 1659 1660 if (keys %bomb) 1661 { 1662 # Central Directory knows about these, so this is a zipbomb 1663 1664 error undef, "Possible zipbomb -- Nested Local Entries found"; 1665 for my $loc (sort keys %bomb) 1666 { 1667 my $count = scalar @{ $bomb{$loc} }; 1668 my $outerEntry = $LocalDirectory->getByLocalOffset($loc); 1669 say "# Local Header for '" . $outerEntry->outputFilename . "' at offset " . decimalHex0x($loc) . " has $count nested Local Headers"; 1670 1671 my $table = new SimpleTable; 1672 $table->addHeaderRow('Offset', 'Filename'); 1673 $table->addDataRow(decimalHex0x($_), $LocalDirectory->getByLocalOffset($_)->outputFilename) 1674 for sort @{ $bomb{$loc} } ; 1675 1676 $table->display(); 1677 1678 delete $cleanCentralEntries{ $_ } 1679 for grep { defined $_ } 1680 map { $CentralDirectory->{byLocalOffset}{$_}{centralHeaderOffset} } 1681 @{ $bomb{$loc} } ; 1682 } 1683 } 1684 } 1685 1686 # Check if contents of local headers match with central headers 1687 # 1688 # When central header encryption is used the local header values are masked (see APPNOTE 6.3.10, sec 4) 1689 # In this usecase the central header will appear to be absent 1690 # 1691 # key fields 1692 # filename, compressed/uncompessed lengths, crc, compression method 1693 { 1694 for my $centralEntry ( sort { $a->centralHeaderOffset() <=> $b->centralHeaderOffset() } values %cleanCentralEntries ) 1695 { 1696 my $localOffset = $centralEntry->localHeaderOffset; 1697 my $localEntry = $LocalDirectory->getByLocalOffset($localOffset); 1698 1699 next 1700 unless $localEntry; 1701 1702 state $fields = [ 1703 # field name offset display name stringify 1704 ['filename', ZIP_CD_FILENAME_OFFSET, 1705 'Filename', undef, ], 1706 ['extractVersion', 7, 'Extract Zip Spec', sub { decimalHex0xUndef($_[0]) . " " . decodeZipVer($_[0]) }, ], 1707 ['generalPurposeFlags', 8, 'General Purpose Flag', \&decimalHex0xUndef, ], 1708 ['compressedMethod', 10, 'Compression Method', sub { decimalHex0xUndef($_[0]) . " " . getcompressionMethodName($_[0]) }, ], 1709 ['lastModDateTime', 12, 'Modification Time', sub { decimalHex0xUndef($_[0]) . " " . LastModTime($_[0]) }, ], 1710 ['crc32', 16, 'CRC32', \&decimalHex0xUndef, ], 1711 ['compressedSize', 20, 'Compressed Size', \&decimalHex0xUndef, ], 1712 ['uncompressedSize', 24, 'Uncompressed Size', \&decimalHex0xUndef, ], 1713 1714 ] ; 1715 1716 my $table = new SimpleTable; 1717 $table->addHeaderRow('Field Name', 'Central Offset', 'Central Value', 'Local Offset', 'Local Value'); 1718 1719 for my $data (@$fields) 1720 { 1721 my ($field, $offset, $name, $stringify) = @$data; 1722 # if the local header uses streaming and we are running a scan/walk, the compressed/uncompressed sizes will not be known 1723 my $localValue = $localEntry->{$field} ; 1724 my $centralValue = $centralEntry->{$field}; 1725 1726 if (($localValue // '-1') ne ($centralValue // '-2')) 1727 { 1728 if ($stringify) 1729 { 1730 $localValue = $stringify->($localValue); 1731 $centralValue = $stringify->($centralValue); 1732 } 1733 1734 $table->addDataRow($name, 1735 decimalHex0xUndef($centralEntry->centralHeaderOffset() + $offset), 1736 $centralValue, 1737 decimalHex0xUndef($localOffset+$offset), 1738 $localValue); 1739 } 1740 } 1741 1742 my $badFields = $table->hasData; 1743 if ($badFields) 1744 { 1745 error undef, "Found $badFields Field Mismatch for Filename '". $centralEntry->outputFilename . "'"; 1746 $table->display(); 1747 } 1748 } 1749 } 1750 1751 } 1752 elsif ($CentralDirectory->exists()) 1753 { 1754 my @messages = "Central Directory exists, but Local Directory not found" ; 1755 push @messages , "Try running with --walk' or '--scan' options" 1756 unless $opt_scan || $opt_walk ; 1757 error undef, @messages; 1758 } 1759 elsif ($LocalDirectory->exists()) 1760 { 1761 if ($CentralDirectory->isEncryptedCD()) 1762 { 1763 warning undef, "Local Directory exists, but Central Directory is encrypted" 1764 } 1765 else 1766 { 1767 error undef, "Local Directory exists, but Central Directory not found" 1768 } 1769 1770 } 1771 1772 if ($ErrorCount ||$WarningCount || $InfoCount ) 1773 { 1774 say "#" 1775 unless $lastWasMessage ; 1776 1777 say "# Error Count: $ErrorCount" 1778 if $ErrorCount; 1779 say "# Warning Count: $WarningCount" 1780 if $WarningCount; 1781 say "# Info Count: $InfoCount" 1782 if $InfoCount; 1783 } 1784 1785 if (@Messages) 1786 { 1787 my $count = scalar @Messages ; 1788 say "#\nWARNINGS"; 1789 say "# * $_\n" for @Messages ; 1790 } 1791 1792 say "#\n# Done"; 1793} 1794 1795sub checkForBadlyFormedDataDescriptor 1796{ 1797 my $lastHeader = shift; 1798 my $delta = shift // 0; 1799 1800 # check size of delta - a DATA HDR without a signature can only be 1801 # 12 bytes for 32-bit 1802 # 20 bytes for 64-bit 1803 1804 my $here = $FH->tell(); 1805 1806 my $localEntry = $lastHeader->{entry}; 1807 1808 return 0 1809 unless $opt_scan || $opt_walk ; 1810 1811 # delta can be the actual payload + a data descriptor without a sig 1812 1813 my $signature = unpack "V", peekAtOffset($here + $delta, 4); 1814 1815 if ($signature == ZIP_DATA_HDR_SIG) 1816 { 1817 return 0; 1818 } 1819 1820 my $cl32 = unpack "V", peekAtOffset($here + $delta - 8, 4); 1821 my $cl64 = unpack "Q<", peekAtOffset($here + $delta - 16, 8); 1822 1823 if ($cl32 == $delta - 12) 1824 { 1825 return 12; 1826 } 1827 1828 if ($cl64 == $delta - 20) 1829 { 1830 return 20 ; 1831 } 1832 1833 return 0; 1834} 1835 1836 1837sub BadlyFormedDataDescriptor 1838{ 1839 my $lastHeader= shift; 1840 my $delta = shift; 1841 1842 # check size of delta - a DATA HDR without a signature can only be 1843 # 12 bytes for 32-bit 1844 # 20 bytes for 64-bit 1845 1846 my $here = $FH->tell(); 1847 1848 my $localEntry = $lastHeader->{entry}; 1849 my $compressedSize = $lastHeader->{payloadLength} ; 1850 1851 my $sigName = Signatures::titleName(ZIP_DATA_HDR_SIG); 1852 1853 if ($opt_scan || $opt_walk) 1854 { 1855 # delta can be the actual payload + a data descriptor without a sig 1856 1857 if ($lastHeader->{'gotDataDescriptorSize'} == 12) 1858 { 1859 # seekTo($FH->tell() + $delta - 12) ; 1860 1861 # outSomeData($delta - 12, "PAYLOAD", $opt_Redact) ; 1862 1863 print "\n"; 1864 out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG); 1865 1866 error $FH->tell(), "Missimg $sigName Signature"; 1867 $localEntry->crc32( out_V "CRC"); 1868 $localEntry->compressedSize( out_V "Compressed Size"); 1869 $localEntry->uncompressedSize( out_V "Uncompressed Size"); 1870 1871 if ($localEntry->zip64) 1872 { 1873 error $here, "'$sigName': expected 64-bit values, got 32-bit"; 1874 } 1875 1876 return $FH->tell(); 1877 } 1878 1879 if ($lastHeader->{'gotDataDescriptorSize'} == 20) 1880 { 1881 # seekTo($FH->tell() + $delta - 20) ; 1882 1883 # outSomeData($delta - 20, "PAYLOAD", $opt_Redact) ; 1884 1885 print "\n"; 1886 out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG); 1887 1888 error $FH->tell(), "Missimg $sigName Signature"; 1889 $localEntry->crc32( out_V "CRC"); 1890 $localEntry->compressedSize( out_Q "Compressed Size"); 1891 $localEntry->uncompressedSize( out_Q "Uncompressed Size"); 1892 1893 if (! $localEntry->zip64) 1894 { 1895 error $here, "'$sigName': expected 32-bit values, got 64-bit"; 1896 } 1897 1898 return $FH->tell(); 1899 } 1900 1901 error 0, "MISSING $sigName"; 1902 1903 seekTo($here); 1904 return 0; 1905 } 1906 1907 my $cdEntry = $localEntry->getCdEntry; 1908 1909 if ($delta == 12) 1910 { 1911 $FH->seek($lastHeader->{payloadOffset} + $lastHeader->{payloadLength}, SEEK_SET) ; 1912 1913 my $cl = unpack "V", peekAtOffset($FH->tell() + 4, 4); 1914 if ($cl == $compressedSize) 1915 { 1916 print "\n"; 1917 out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG); 1918 1919 error $FH->tell(), "Missimg $sigName Signature"; 1920 $localEntry->crc32( out_V "CRC"); 1921 $localEntry->compressedSize( out_V "Compressed Size"); 1922 $localEntry->uncompressedSize( out_V "Uncompressed Size"); 1923 1924 if ($localEntry->zip64) 1925 { 1926 error $here, "'$sigName': expected 64-bit values, got 32-bit"; 1927 } 1928 1929 return $FH->tell(); 1930 } 1931 } 1932 1933 if ($delta == 20) 1934 { 1935 $FH->seek($lastHeader->{payloadOffset} + $lastHeader->{payloadLength}, SEEK_SET) ; 1936 1937 my $cl = unpack "Q<", peekAtOffset($FH->tell() + 4, 8); 1938 1939 if ($cl == $compressedSize) 1940 { 1941 print "\n"; 1942 out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG); 1943 1944 error $FH->tell(), "Missimg $sigName Signature"; 1945 $localEntry->crc32( out_V "CRC"); 1946 $localEntry->compressedSize( out_Q "Compressed Size"); 1947 $localEntry->uncompressedSize( out_Q "Uncompressed Size"); 1948 1949 if (! $localEntry->zip64 && ( $cdEntry && ! $cdEntry->zip64)) 1950 { 1951 error $here, "'$sigName': expected 32-bit values, got 64-bit"; 1952 } 1953 1954 return $FH->tell(); 1955 } 1956 } 1957 1958 seekTo($here); 1959 1960 error $here, "Missing $sigName"; 1961 return 0; 1962} 1963 1964sub getcompressionMethodName 1965{ 1966 my $id = shift ; 1967 " '" . ($ZIP_CompressionMethods{$id} || "Unknown Method") . "'" ; 1968} 1969 1970sub compressionMethod 1971{ 1972 my $id = shift ; 1973 Value_v($id) . getcompressionMethodName($id); 1974} 1975 1976sub LocalHeader 1977{ 1978 my $signature = shift ; 1979 my $data = shift ; 1980 my $startRecordOffset = shift ; 1981 1982 my $locHeaderOffset = $FH->tell() -4 ; 1983 1984 ++ $LocalHeaderCount; 1985 print "\n"; 1986 out $data, "LOCAL HEADER #$LocalHeaderCount" , Value_V($signature); 1987 1988 need 26, Signatures::name($signature); 1989 1990 my $buffer; 1991 my $orphan = 0; 1992 1993 my ($loc, $CDcompressedSize, $cdZip64, $zip64Sizes, $cdIndex, $cdEntryOffset) ; 1994 my $CentralEntryExists = $CentralDirectory->localOffset($startRecordOffset); 1995 my $localEntry = LocalDirectoryEntry->new(); 1996 1997 my $cdEntry; 1998 1999 if (! $opt_scan && ! $opt_walk && $CentralEntryExists) 2000 { 2001 $cdEntry = $CentralDirectory->getByLocalOffset($startRecordOffset); 2002 2003 if (! $cdEntry) 2004 { 2005 out1 "Orphan Entry: No matching central directory" ; 2006 $orphan = 1 ; 2007 } 2008 2009 $cdZip64 = $cdEntry->zip64ExtraPresent; 2010 $zip64Sizes = $cdEntry->zip64SizesPresent; 2011 $cdEntryOffset = $cdEntry->centralHeaderOffset ; 2012 $localEntry->addCdEntry($cdEntry) ; 2013 2014 if ($cdIndex && $cdIndex != $LocalHeaderCount) 2015 { 2016 # fatal undef, "$cdIndex != $LocalHeaderCount" 2017 } 2018 } 2019 2020 my $extractVer = out_C "Extract Zip Spec", \&decodeZipVer; 2021 out_C "Extract OS", \&decodeOS; 2022 2023 my ($bgp, $gpFlag) = read_v(); 2024 my ($bcm, $compressedMethod) = read_v(); 2025 2026 out $bgp, "General Purpose Flag", Value_v($gpFlag) ; 2027 GeneralPurposeBits($compressedMethod, $gpFlag); 2028 my $LanguageEncodingFlag = $gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING ; 2029 my $streaming = $gpFlag & ZIP_GP_FLAG_STREAMING_MASK ; 2030 $localEntry->languageEncodingFlag($LanguageEncodingFlag) ; 2031 2032 out $bcm, "Compression Method", compressionMethod($compressedMethod) ; 2033 info $FH->tell() - 2, "Unknown 'Compression Method' ID " . decimalHex0x($compressedMethod, 2) 2034 if ! defined $ZIP_CompressionMethods{$compressedMethod} ; 2035 2036 my $lastMod = out_V "Modification Time", sub { LastModTime($_[0]) }; 2037 2038 my $crc = out_V "CRC"; 2039 warning $FH->tell() - 4, "CRC field should be zero when streaming is enabled" 2040 if $streaming && $crc != 0 ; 2041 2042 my $compressedSize = out_V "Compressed Size"; 2043 # warning $FH->tell(), "Compressed Size should be zero when streaming is enabled"; 2044 2045 my $uncompressedSize = out_V "Uncompressed Size"; 2046 # warning $FH->tell(), "Uncompressed Size should be zero when streaming is enabled"; 2047 2048 my $filenameLength = out_v "Filename Length"; 2049 2050 if ($filenameLength == 0) 2051 { 2052 info $FH->tell()- 2, "Zero Length filename"; 2053 } 2054 2055 my $extraLength = out_v "Extra Length"; 2056 2057 my $filename = ''; 2058 if ($filenameLength) 2059 { 2060 need $filenameLength, Signatures::name($signature), 'Filename'; 2061 2062 myRead(my $raw_filename, $filenameLength); 2063 $localEntry->filename($raw_filename) ; 2064 $filename = outputFilename($raw_filename, $LanguageEncodingFlag); 2065 $localEntry->outputFilename($filename); 2066 } 2067 2068 $localEntry->localHeaderOffset($locHeaderOffset) ; 2069 $localEntry->offsetStart($locHeaderOffset) ; 2070 $localEntry->compressedSize($compressedSize) ; 2071 $localEntry->uncompressedSize($uncompressedSize) ; 2072 $localEntry->extractVersion($extractVer); 2073 $localEntry->generalPurposeFlags($gpFlag); 2074 $localEntry->lastModDateTime($lastMod); 2075 $localEntry->crc32($crc) ; 2076 $localEntry->zip64ExtraPresent($cdZip64) ; 2077 $localEntry->zip64SizesPresent($zip64Sizes) ; 2078 2079 $localEntry->compressedMethod($compressedMethod) ; 2080 $localEntry->streamed($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ; 2081 2082 $localEntry->std_localHeaderOffset($locHeaderOffset + $PREFIX_DELTA) ; 2083 $localEntry->std_compressedSize($compressedSize) ; 2084 $localEntry->std_uncompressedSize($uncompressedSize) ; 2085 $localEntry->std_diskNumber(0) ; 2086 2087 if ($extraLength) 2088 { 2089 need $extraLength, Signatures::name($signature), 'Extra'; 2090 walkExtra($extraLength, $localEntry); 2091 } 2092 2093 # APPNOTE 6.3.10, sec 4.3.8 2094 warning $FH->tell - $filenameLength, "Directory '$filename' must not have a payload" 2095 if ! $streaming && $filename =~ m#/$# && $localEntry->uncompressedSize ; 2096 2097 my @msg ; 2098 # if ($cdZip64 && ! $ZIP64) 2099 # { 2100 # # Central directory said this was Zip64 2101 # # some zip files don't have the Zip64 field in the local header 2102 # # seems to be a streaming issue. 2103 # push @msg, "Missing Zip64 extra field in Local Header #$hexHdrCount\n"; 2104 2105 # if (! $zip64Sizes) 2106 # { 2107 # # Central has a ZIP64 entry that doesn't have sizes 2108 # # Local doesn't have a Zip 64 at all 2109 # push @msg, "Unzip may complain about 'overlapped components' #$hexHdrCount\n"; 2110 # } 2111 # else 2112 # { 2113 # $ZIP64 = 1 2114 # } 2115 # } 2116 2117 2118 my $minizip_encrypted = $localEntry->minizip_secure; 2119 my $pk_encrypted = ($gpFlag & ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK) && $compressedMethod != 99 && ! $minizip_encrypted; 2120 2121 # Detecting PK strong encryption from a local header is a bit convoluted. 2122 # Cannot just use ZIP_GP_FLAG_ENCRYPTED_CD because minizip also uses this bit. 2123 # so jump through some hoops 2124 # extract ver is >= 5.0' 2125 # all the encryption flags are set in gpflags 2126 # TODO - add zero lengths for crc, compresssed & uncompressed 2127 2128 if (($gpFlag & ZIP_GP_FLAG_ALL_ENCRYPT) == ZIP_GP_FLAG_ALL_ENCRYPT && $extractVer >= 0x32 ) 2129 { 2130 $CentralDirectory->setPkEncryptedCD() 2131 } 2132 2133 my $size = 0; 2134 2135 # If no CD scanned, get compressed Size from local header. 2136 # Zip64 extra field takes priority 2137 my $cdl = defined $cdEntry 2138 ? $cdEntry->compressedSize() 2139 : undef; 2140 2141 $CDcompressedSize = $localEntry->compressedSize ; 2142 $CDcompressedSize = $cdl 2143 if defined $cdl && $gpFlag & ZIP_GP_FLAG_STREAMING_MASK; 2144 2145 my $cdu = defined $CentralDirectory->{byLocalOffset}{$locHeaderOffset} 2146 ? $CentralDirectory->{byLocalOffset}{$locHeaderOffset}{uncompressedSize} 2147 : undef; 2148 my $CDuncompressedSize = $localEntry->uncompressedSize ; 2149 2150 $CDuncompressedSize = $cdu 2151 if defined $cdu && $gpFlag & ZIP_GP_FLAG_STREAMING_MASK; 2152 2153 my $fullCompressedSize = $CDcompressedSize; 2154 2155 my $payloadOffset = $FH->tell(); 2156 $localEntry->payloadOffset($payloadOffset) ; 2157 $localEntry->offsetEnd($payloadOffset + $fullCompressedSize -1) ; 2158 2159 if ($CDcompressedSize) 2160 { 2161 # check if enough left in file for the payload 2162 my $available = $FILELEN - $FH->tell; 2163 if ($available < $CDcompressedSize ) 2164 { 2165 error $FH->tell, 2166 "file truncated while reading 'PAYLOAD'", 2167 expectedMessage($CDcompressedSize, $available); 2168 2169 $CDcompressedSize = $available; 2170 } 2171 } 2172 2173 # Next block can decrement the CDcompressedSize 2174 # possiblty to zero. Need to remember if it started out 2175 # as a non-zero value 2176 my $haveCDcompressedSize = $CDcompressedSize; 2177 2178 if ($compressedMethod == 99 && $localEntry->aesValid) # AES Encryption 2179 { 2180 $CDcompressedSize -= printAes($localEntry) 2181 } 2182 elsif (($gpFlag & ZIP_GP_FLAG_ALL_ENCRYPT) == 0) 2183 { 2184 if ($compressedMethod == ZIP_CM_LZMA) 2185 { 2186 2187 $size = printLzmaProperties() 2188 } 2189 2190 $CDcompressedSize -= $size; 2191 } 2192 elsif ($pk_encrypted) 2193 { 2194 $CDcompressedSize -= DecryptionHeader(); 2195 } 2196 2197 if ($haveCDcompressedSize) { 2198 2199 if ($compressedMethod == 92 && $CDcompressedSize == 20) { 2200 # Payload for a Reference is the SHA-1 hash of the uncompressed content 2201 myRead(my $sha1, 20); 2202 out $sha1, "PAYLOAD", "SHA-1 Hash: " . hexDump($sha1); 2203 } 2204 elsif ($compressedMethod == 99 && $localEntry->aesValid ) { 2205 outSomeData($CDcompressedSize, "PAYLOAD", $opt_Redact) ; 2206 my $auth ; 2207 myRead($auth, 10); 2208 out $auth, "AES Auth", hexDump16($auth); 2209 } 2210 else { 2211 outSomeData($CDcompressedSize, "PAYLOAD", $opt_Redact) ; 2212 } 2213 } 2214 2215 print "WARNING: $_" 2216 for @msg; 2217 2218 push @Messages, @msg ; 2219 2220 $LocalDirectory->addEntry($localEntry); 2221 2222 return { 2223 'localHeader' => 1, 2224 'streamed' => $gpFlag & ZIP_GP_FLAG_STREAMING_MASK, 2225 'offset' => $startRecordOffset, 2226 'length' => $FH->tell() - $startRecordOffset, 2227 'payloadLength' => $fullCompressedSize, 2228 'payloadOffset' => $payloadOffset, 2229 'entry' => $localEntry, 2230 } ; 2231} 2232 2233use constant Pack_ZIP_DIGITAL_SIGNATURE_SIG => pack("V", ZIP_DIGITAL_SIGNATURE_SIG); 2234 2235sub findDigitalSignature 2236{ 2237 my $cdSize = shift; 2238 2239 my $here = $FH->tell(); 2240 2241 my $data ; 2242 myRead($data, $cdSize); 2243 2244 seekTo($here); 2245 2246 # find SIG 2247 my $ix = index($data, Pack_ZIP_DIGITAL_SIGNATURE_SIG); 2248 if ($ix > -1) 2249 { 2250 # check size of signature meaans it is directly after the encrypted CD 2251 my $sigSize = unpack "v", substr($data, $ix+4, 2); 2252 if ($ix + 4 + 2 + $sigSize == $cdSize) 2253 { 2254 # return size of digital signature record 2255 return 4 + 2 + $sigSize ; 2256 } 2257 } 2258 2259 return 0; 2260} 2261 2262sub displayEncryptedCD 2263{ 2264 # First thing in the encrypted CD is the Decryption Header 2265 my $decryptHeaderSize = DecryptionHeader(1); 2266 2267 # Check for digital signature record in the CD 2268 # It needs to be the very last thing in the CD 2269 2270 my $delta = deltaToNextSignature(); 2271 print "\n"; 2272 outSomeData($delta, "ENCRYPTED CENTRAL DIRECTORY") 2273 if $delta; 2274} 2275 2276sub DecryptionHeader 2277{ 2278 # APPNOTE 6.3.10, sec 7.2.4 2279 2280 # -Decryption Header: 2281 # Value Size Description 2282 # ----- ---- ----------- 2283 # IVSize 2 bytes Size of initialization vector (IV) 2284 # IVData IVSize Initialization vector for this file 2285 # Size 4 bytes Size of remaining decryption header data 2286 # Format 2 bytes Format definition for this record 2287 # AlgID 2 bytes Encryption algorithm identifier 2288 # Bitlen 2 bytes Bit length of encryption key 2289 # Flags 2 bytes Processing flags 2290 # ErdSize 2 bytes Size of Encrypted Random Data 2291 # ErdData ErdSize Encrypted Random Data 2292 # Reserved1 4 bytes Reserved certificate processing data 2293 # Reserved2 (var) Reserved for certificate processing data 2294 # VSize 2 bytes Size of password validation data 2295 # VData VSize-4 Password validation data 2296 # VCRC32 4 bytes Standard ZIP CRC32 of password validation data 2297 2298 my $central = shift ; 2299 2300 if ($central) 2301 { 2302 print "\n"; 2303 out "", "CENTRAL HEADER DECRYPTION RECORD"; 2304 2305 } 2306 else 2307 { 2308 print "\n"; 2309 out "", "DECRYPTION HEADER RECORD"; 2310 } 2311 2312 my $bytecount = 2; 2313 2314 my $IVSize = out_v "IVSize"; 2315 outHexdump($IVSize, "IVData"); 2316 $bytecount += $IVSize; 2317 2318 my $Size = out_V "Size"; 2319 $bytecount += $Size + 4; 2320 2321 out_v "Format"; 2322 out_v "AlgId", sub { $AlgIdLookup{ $_[0] } // "Unknown algorithm" } ; 2323 out_v "BitLen"; 2324 out_v "Flags", sub { $FlagsLookup{ $_[0] } // "Reserved for certificate processing" } ; 2325 2326 my $ErdSize = out_v "ErdSize"; 2327 outHexdump($ErdSize, "ErdData"); 2328 2329 my $Reserved1_RCount = out_V "RCount"; 2330 Reserved2($Reserved1_RCount); 2331 2332 my $VSize = out_v "VSize"; 2333 outHexdump($VSize-4, "VData"); 2334 2335 out_V "VCRC32"; 2336 2337 return $bytecount ; 2338} 2339 2340sub Reserved2 2341{ 2342 # APPNOTE 6.3.10, sec 7.4.3 & 7.4.4 2343 2344 my $recipients = shift; 2345 2346 return 0 2347 if $recipients == 0; 2348 2349 out_v "HashAlg", sub { $HashAlgLookup{ $_[0] } // "Unknown algorithm" } ; 2350 my $HSize = out_v "HSize" ; 2351 2352 my $ix = 1; 2353 for (0 .. $recipients-1) 2354 { 2355 my $hex = sprintf("Key #%X", $ix) ; 2356 my $RESize = out_v "RESize $hex"; 2357 2358 outHexdump($HSize, "REHData $hex"); 2359 outHexdump($RESize - $HSize, "REKData $hex"); 2360 2361 ++ $ix; 2362 } 2363} 2364 2365sub redactData 2366{ 2367 my $data = shift; 2368 2369 # Redact everything apart from directory seperators 2370 $data =~ s(.)(X)g 2371 if $opt_Redact; 2372 2373 return $data; 2374} 2375 2376sub redactFilename 2377{ 2378 my $filename = shift; 2379 2380 # Redact everything apart from directory seperators 2381 $filename =~ s(.)(X)g 2382 if $opt_Redact; 2383 2384 return $filename; 2385} 2386 2387sub validateDirectory 2388{ 2389 # Check that Directries are stored correctly 2390 # 2391 # 1. Filename MUST end with a "/" 2392 # see APPNOTE 6.3.10, sec 4.3.8 2393 # 2. Uncompressed size == 0 2394 # see APPNOTE 6.3.10, sec 4.3.8 2395 # 3. warn if compressed size > 0 and Uncompressed size == 0 2396 # 4. check for presence of DOS directory attrib in External Attributes 2397 # 5. Check for Unix extrnal attribute S_IFDIR 2398 2399 my $offset = shift ; 2400 my $filename = shift ; 2401 my $extractVersion = shift; 2402 my $versionMadeBy = shift; 2403 my $compressedSize = shift; 2404 my $uncompressedSize = shift; 2405 my $externalAttributes = shift; 2406 2407 my $dosAttributes = $externalAttributes & 0xFFFF; 2408 my $otherAttributes = ($externalAttributes >> 16 ) & 0xFFFF; 2409 2410 my $probablyDirectory = 0; 2411 my $filenameOK = 0; 2412 my $attributesSet = 0; 2413 my $dosAttributeSet = 0; 2414 my $unixAttributeSet = 0; 2415 2416 if ($filename =~ m#/$#) 2417 { 2418 # filename claims it is a directory. 2419 $probablyDirectory = 1; 2420 $filenameOK = 1; 2421 } 2422 2423 if ($dosAttributes & 0x0010) # ATTR_DIRECTORY 2424 { 2425 $probablyDirectory = 1; 2426 $attributesSet = 1 ; 2427 $dosAttributeSet = 1 ; 2428 } 2429 2430 if ($versionMadeBy == 3 && $otherAttributes & 0x4000) # Unix & S_IFDIR 2431 { 2432 $probablyDirectory = 1; 2433 $attributesSet = 1; 2434 $unixAttributeSet = 1; 2435 } 2436 2437 return 2438 unless $probablyDirectory ; 2439 2440 error $offset + CentralDirectoryEntry::Offset_Filename(), 2441 "Directory '$filename' must end in a '/'", 2442 "'External Attributes' flag this as a directory" 2443 if ! $filenameOK && $uncompressedSize == 0; 2444 2445 info $offset + CentralDirectoryEntry::Offset_ExternalAttributes(), 2446 "DOS Directory flag not set in 'External Attributes' for Directory '$filename'" 2447 if $filenameOK && ! $dosAttributeSet; 2448 2449 info $offset + CentralDirectoryEntry::Offset_ExternalAttributes(), 2450 "Unix Directory flag not set in 'External Attributes' for Directory '$filename'" 2451 if $filenameOK && $versionMadeBy == 3 && ! $unixAttributeSet; 2452 2453 if ($uncompressedSize != 0) 2454 { 2455 # APPNOTE 6.3.10, sec 4.3.8 2456 error $offset + CentralDirectoryEntry::Offset_UncompressedSize(), 2457 "Directory '$filename' must not have a payload" 2458 } 2459 elsif ($compressedSize != 0) 2460 { 2461 2462 info $offset + CentralDirectoryEntry::Offset_CompressedSize(), 2463 "Directory '$filename' has compressed payload that uncompresses to nothing" 2464 } 2465 2466 if ($extractVersion < 20) 2467 { 2468 # APPNOTE 6.3.10, sec 4.4.3.2 2469 my $got = decodeZipVer($extractVersion); 2470 warning $offset + CentralDirectoryEntry::Offset_VersionNeededToExtract(), 2471 "'Extract Zip Spec' is '$got'. Need value >= '2.0' for Directory '$filename'" 2472 } 2473} 2474 2475sub validateFilename 2476{ 2477 my $filename = shift ; 2478 2479 return "Zero length filename" 2480 if $filename eq '' ; 2481 2482 # TODO 2483 # - check length of filename 2484 # getconf NAME_MAX . and getconf PATH_MAX . on Linux 2485 2486 # Start with APPNOTE restrictions 2487 2488 # APPNOTE 6.3.10, sec 4.4.17.1 2489 # 2490 # No absolute path 2491 # No backslash delimeters 2492 # No drive letters 2493 2494 return "Filename must not be an absolute path" 2495 if $filename =~ m#^/#; 2496 2497 return ["Backslash detected in filename", "Possible Windows path."] 2498 if $filename =~ m#\\#; 2499 2500 return "Windows Drive Letter '$1' not allowed in filename" 2501 if $filename =~ /^([a-z]:)/i ; 2502 2503 # Slip Vulnerability with use of ".." in a relative path 2504 # https://security.snyk.io/research/zip-slip-vulnerability 2505 return ["Use of '..' in filename is a Zip Slip Vulnerability", 2506 "See https://security.snyk.io/research/zip-slip-vulnerability" ] 2507 if $filename =~ m#^\.\./# || $filename =~ m#/\.\./# || $filename =~ m#/\.\.# ; 2508 2509 # Cannot have "." or ".." as the full filename 2510 return "Use of current-directory filename '.' may not unzip correctly" 2511 if $filename eq '.' ; 2512 2513 return "Use of parent-directory filename '..' may not unzip correctly" 2514 if $filename eq '..' ; 2515 2516 # Portability (mostly with Windows) 2517 2518 { 2519 # see https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file 2520 state $badDosFilename = join '|', map { quotemeta } 2521 qw(CON PRN AUX NUL 2522 COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 2523 LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9 2524 ) ; 2525 2526 # if $filename contains any invalid codepoints, we will get a warning like this 2527 # 2528 # Operation "pattern match (m//)" returns its argument for non-Unicode code point 2529 # 2530 # so silence it for now. 2531 2532 no warnings; 2533 2534 return "Portability Issue: '$1' is a reserved Windows device name" 2535 if $filename =~ /^($badDosFilename)$/io ; 2536 2537 # Can't have the device name with an extension either 2538 return "Portability Issue: '$1' is a reserved Windows device name" 2539 if $filename =~ /^($badDosFilename)\./io ; 2540 } 2541 2542 state $illegal_windows_chars = join '|', map { quotemeta } qw( < > : " | ? * ); 2543 return "Portability Issue: Windows filename cannot contain '$1'" 2544 if $filename =~ /($illegal_windows_chars)/o ; 2545 2546 return "Portability Issue: Null character '\\x00' is not allowed in a Windows or Linux filename" 2547 if $filename =~ /\x00/ ; 2548 2549 return sprintf "Portability Issue: Control character '\\x%02X' is not allowed in a Windows filename", ord($1) 2550 if $filename =~ /([\x00-\x1F])/ ; 2551 2552 return undef; 2553} 2554 2555sub getOutputFilename 2556{ 2557 my $raw_filename = shift; 2558 my $LanguageEncodingFlag = shift; 2559 my $message = shift // "Filename"; 2560 2561 my $filename ; 2562 my $decoded_filename; 2563 2564 if ($raw_filename eq '') 2565 { 2566 if ($message eq 'Filename') 2567 { 2568 warning $FH->tell() , 2569 "Filename ''", 2570 "Zero Length Filename" ; 2571 } 2572 2573 return '', '', 0; 2574 } 2575 elsif ($opt_Redact) 2576 { 2577 return redactFilename($raw_filename), '', 0 ; 2578 } 2579 else 2580 { 2581 $decoded_filename = TextEncoding::decode($raw_filename, $message, $LanguageEncodingFlag) ; 2582 $filename = TextEncoding::encode($decoded_filename, $message, $LanguageEncodingFlag) ; 2583 } 2584 2585 return $filename, $decoded_filename, $filename ne $raw_filename ; 2586} 2587 2588sub outputFilename 2589{ 2590 my $raw_filename = shift; 2591 my $LanguageEncodingFlag = shift; 2592 my $message = shift // "Filename"; 2593 2594 my ($filename, $decoded_filename, $modified) = getOutputFilename($raw_filename, $LanguageEncodingFlag); 2595 2596 out $raw_filename, $message, "'". $filename . "'"; 2597 2598 if (! $opt_Redact && TextEncoding::debugEncoding()) 2599 { 2600 # use Devel::Peek; 2601 # print "READ " ; Dump($raw_filename); 2602 # print "INTERNAL " ; Dump($decoded_filename); 2603 # print "OUTPUT " ; Dump($filename); 2604 2605 debug $FH->tell() - length($raw_filename), 2606 "$message Encoding Change" 2607 if $modified ; 2608 2609 # use Unicode::Normalize; 2610 # my $NormaizedForm ; 2611 # if (defined $decoded_filename) 2612 # { 2613 # $NormaizedForm .= Unicode::Normalize::checkNFD $decoded_filename ? 'NFD ' : ''; 2614 # $NormaizedForm .= Unicode::Normalize::checkNFC $decoded_filename ? 'NFC ' : ''; 2615 # $NormaizedForm .= Unicode::Normalize::checkNFKD $decoded_filename ? 'NFKD ' : ''; 2616 # $NormaizedForm .= Unicode::Normalize::checkNFKC $decoded_filename ? 'NFKC ' : ''; 2617 # $NormaizedForm .= Unicode::Normalize::checkFCD $decoded_filename ? 'FCD ' : ''; 2618 # $NormaizedForm .= Unicode::Normalize::checkFCC $decoded_filename ? 'FCC ' : ''; 2619 # } 2620 2621 debug $FH->tell() - length($raw_filename), 2622 "Encoding Debug for $message", 2623 "Octets Read from File [$raw_filename][" . length($raw_filename). "] [" . charDump2($raw_filename) . "]", 2624 "Via Unicode Codepoints [$decoded_filename][" . length($decoded_filename) . "] [" . charDump($decoded_filename) . "]", 2625 # "Unicode Normalization $NormaizedForm", 2626 "Octets Written [$filename][" . length($filename). "] [" . charDump2($filename) . "]"; 2627 } 2628 2629 if ($message eq 'Filename' && $opt_want_warning_mesages) 2630 { 2631 # Check for bad, unsafe & not portable filenames 2632 my $v = validateFilename($decoded_filename); 2633 2634 if ($v) 2635 { 2636 my @v = ref $v eq 'ARRAY' 2637 ? @$v 2638 : $v; 2639 2640 warning $FH->tell() - length($raw_filename), 2641 "Filename '$filename'", 2642 @v 2643 } 2644 } 2645 2646 return $filename; 2647} 2648 2649sub CentralHeader 2650{ 2651 my $signature = shift ; 2652 my $data = shift ; 2653 my $startRecordOffset = shift ; 2654 2655 my $cdEntryOffset = $FH->tell() - 4 ; 2656 2657 ++ $CentralHeaderCount; 2658 2659 print "\n"; 2660 out $data, "CENTRAL HEADER #$CentralHeaderCount", Value_V($signature); 2661 my $buffer; 2662 2663 need 42, Signatures::name($signature); 2664 2665 out_C "Created Zip Spec", \&decodeZipVer; 2666 my $made_by = out_C "Created OS", \&decodeOS; 2667 my $extractVer = out_C "Extract Zip Spec", \&decodeZipVer; 2668 out_C "Extract OS", \&decodeOS; 2669 2670 my ($bgp, $gpFlag) = read_v(); 2671 my ($bcm, $compressedMethod) = read_v(); 2672 2673 my $cdEntry = CentralDirectoryEntry->new($cdEntryOffset); 2674 2675 out $bgp, "General Purpose Flag", Value_v($gpFlag) ; 2676 GeneralPurposeBits($compressedMethod, $gpFlag); 2677 my $LanguageEncodingFlag = $gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING ; 2678 $cdEntry->languageEncodingFlag($LanguageEncodingFlag) ; 2679 2680 out $bcm, "Compression Method", compressionMethod($compressedMethod) ; 2681 info $FH->tell() - 2, "Unknown 'Compression Method' ID " . decimalHex0x($compressedMethod, 2) 2682 if ! defined $ZIP_CompressionMethods{$compressedMethod} ; 2683 2684 my $lastMod = out_V "Modification Time", sub { LastModTime($_[0]) }; 2685 2686 my $crc = out_V "CRC"; 2687 my $compressedSize = out_V "Compressed Size"; 2688 my $std_compressedSize = $compressedSize; 2689 my $uncompressedSize = out_V "Uncompressed Size"; 2690 my $std_uncompressedSize = $uncompressedSize; 2691 my $filenameLength = out_v "Filename Length"; 2692 if ($filenameLength == 0) 2693 { 2694 info $FH->tell()- 2, "Zero Length filename"; 2695 } 2696 my $extraLength = out_v "Extra Length"; 2697 my $comment_length = out_v "Comment Length"; 2698 my $disk_start = out_v "Disk Start"; 2699 my $std_disk_start = $disk_start; 2700 2701 my $int_file_attrib = out_v "Int File Attributes"; 2702 out1 "[Bit 0]", $int_file_attrib & 1 ? "1 'Text Data'" : "0 'Binary Data'"; 2703 out1 "[Bits 1-15]", Value_v($int_file_attrib & 0xFE) . " 'Unknown'" 2704 if $int_file_attrib & 0xFE ; 2705 2706 my $ext_file_attrib = out_V "Ext File Attributes"; 2707 2708 { 2709 # MS-DOS Attributes are bottom two bytes 2710 my $dos_attrib = $ext_file_attrib & 0xFFFF; 2711 2712 # See https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants 2713 # and https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-smb/65e0c225-5925-44b0-8104-6b91339c709f 2714 2715 out1 "[Bit 0]", "Read-Only" if $dos_attrib & 0x0001 ; 2716 out1 "[Bit 1]", "Hidden" if $dos_attrib & 0x0002 ; 2717 out1 "[Bit 2]", "System" if $dos_attrib & 0x0004 ; 2718 out1 "[Bit 3]", "Label" if $dos_attrib & 0x0008 ; 2719 out1 "[Bit 4]", "Directory" if $dos_attrib & 0x0010 ; 2720 out1 "[Bit 5]", "Archive" if $dos_attrib & 0x0020 ; 2721 out1 "[Bit 6]", "Device" if $dos_attrib & 0x0040 ; 2722 out1 "[Bit 7]", "Normal" if $dos_attrib & 0x0080 ; 2723 out1 "[Bit 8]", "Temporary" if $dos_attrib & 0x0100 ; 2724 out1 "[Bit 9]", "Sparse" if $dos_attrib & 0x0200 ; 2725 out1 "[Bit 10]", "Reparse Point" if $dos_attrib & 0x0400 ; 2726 out1 "[Bit 11]", "Compressed" if $dos_attrib & 0x0800 ; 2727 2728 out1 "[Bit 12]", "Offline" if $dos_attrib & 0x1000 ; 2729 out1 "[Bit 13]", "Not Indexed" if $dos_attrib & 0x2000 ; 2730 2731 # Zip files created on Mac seem to set this bit. Not clear why. 2732 out1 "[Bit 14]", "Possible Mac Flag" if $dos_attrib & 0x4000 ; 2733 2734 # p7Zip & 7z set this bit to flag that the high 16-bits are Unix attributes 2735 out1 "[Bit 15]", "Possible p7zip/7z Unix Flag" if $dos_attrib & 0x8000 ; 2736 2737 } 2738 2739 my $native_attrib = ($ext_file_attrib >> 16 ) & 0xFFFF; 2740 2741 if ($made_by == 3) # Unix 2742 { 2743 2744 state $mask = { 2745 0 => '---', 2746 1 => '--x', 2747 2 => '-w-', 2748 3 => '-wx', 2749 4 => 'r--', 2750 5 => 'r-x', 2751 6 => 'rw-', 2752 7 => 'rwx', 2753 } ; 2754 2755 my $rwx = ($native_attrib & 0777); 2756 2757 if ($rwx) 2758 { 2759 my $output = ''; 2760 $output .= $mask->{ ($rwx >> 6) & 07 } ; 2761 $output .= $mask->{ ($rwx >> 3) & 07 } ; 2762 $output .= $mask->{ ($rwx >> 0) & 07 } ; 2763 2764 out1 "[Bits 16-24]", Value_v($rwx) . " 'Unix attrib: $output'" ; 2765 out1 "[Bit 25]", "1 'Sticky'" 2766 if $rwx & 0x200 ; 2767 out1 "[Bit 26]", "1 'Set GID'" 2768 if $rwx & 0x400 ; 2769 out1 "[Bit 27]", "1 'Set UID'" 2770 if $rwx & 0x800 ; 2771 2772 my $not_rwx = (($native_attrib >> 12) & 0xF); 2773 if ($not_rwx) 2774 { 2775 state $masks = { 2776 0x0C => 'Socket', # 0x0C 0b1100 2777 0x0A => 'Symbolic Link', # 0x0A 0b1010 2778 0x08 => 'Regular File', # 0x08 0b1000 2779 0x06 => 'Block Device', # 0x06 0b0110 2780 0x04 => 'Directory', # 0x04 0b0100 2781 0x02 => 'Character Device', # 0x02 0b0010 2782 0x01 => 'FIFO', # 0x01 0b0001 2783 }; 2784 2785 my $got = $masks->{$not_rwx} // 'Unknown Unix attrib' ; 2786 out1 "[Bits 28-31]", Value_C($not_rwx) . " '$got'" 2787 } 2788 } 2789 } 2790 elsif ($native_attrib) 2791 { 2792 out1 "[Bits 24-31]", Value_v($native_attrib) . " 'Unknown attributes for OS ID $made_by'" 2793 } 2794 2795 my ($d, $locHeaderOffset) = read_V(); 2796 my $out = Value_V($locHeaderOffset); 2797 my $std_localHeaderOffset = $locHeaderOffset; 2798 2799 if ($locHeaderOffset != MAX32) 2800 { 2801 testPossiblePrefix($locHeaderOffset, ZIP_LOCAL_HDR_SIG); 2802 if ($PREFIX_DELTA) 2803 { 2804 $out .= " [Actual Offset is " . Value_V($locHeaderOffset + $PREFIX_DELTA) . "]" 2805 } 2806 } 2807 2808 out $d, "Local Header Offset", $out; 2809 2810 if ($locHeaderOffset != MAX32) 2811 { 2812 my $commonMessage = "'Local Header Offset' field in '" . Signatures::name($signature) . "' is invalid"; 2813 $locHeaderOffset = checkOffsetValue($locHeaderOffset, $startRecordOffset, 0, $commonMessage, $startRecordOffset + CentralDirectoryEntry::Offset_RelativeOffsetToLocal(), ZIP_LOCAL_HDR_SIG) ; 2814 } 2815 2816 my $filename = ''; 2817 if ($filenameLength) 2818 { 2819 need $filenameLength, Signatures::name($signature), 'Filename'; 2820 2821 myRead(my $raw_filename, $filenameLength); 2822 $cdEntry->filename($raw_filename) ; 2823 $filename = outputFilename($raw_filename, $LanguageEncodingFlag); 2824 $cdEntry->outputFilename($filename); 2825 } 2826 2827 $cdEntry->centralHeaderOffset($cdEntryOffset) ; 2828 $cdEntry->localHeaderOffset($locHeaderOffset) ; 2829 $cdEntry->compressedSize($compressedSize) ; 2830 $cdEntry->uncompressedSize($uncompressedSize) ; 2831 $cdEntry->zip64ExtraPresent(undef) ; #$cdZip64; ### FIX ME 2832 $cdEntry->zip64SizesPresent(undef) ; # $zip64Sizes; ### FIX ME 2833 $cdEntry->extractVersion($extractVer); 2834 $cdEntry->generalPurposeFlags($gpFlag); 2835 $cdEntry->compressedMethod($compressedMethod) ; 2836 $cdEntry->lastModDateTime($lastMod); 2837 $cdEntry->crc32($crc) ; 2838 $cdEntry->inCentralDir(1) ; 2839 2840 $cdEntry->std_localHeaderOffset($std_localHeaderOffset) ; 2841 $cdEntry->std_compressedSize($std_compressedSize) ; 2842 $cdEntry->std_uncompressedSize($std_uncompressedSize) ; 2843 $cdEntry->std_diskNumber($std_disk_start) ; 2844 2845 if ($extraLength) 2846 { 2847 need $extraLength, Signatures::name($signature), 'Extra'; 2848 2849 walkExtra($extraLength, $cdEntry); 2850 } 2851 2852 # $cdEntry->endCentralHeaderOffset($FH->tell() - 1); 2853 2854 # Can only validate for directory after zip64 data is read 2855 validateDirectory($cdEntryOffset, $filename, $extractVer, $made_by, 2856 $cdEntry->compressedSize, $cdEntry->uncompressedSize, $ext_file_attrib); 2857 2858 if ($comment_length) 2859 { 2860 need $comment_length, Signatures::name($signature), 'Comment'; 2861 2862 my $comment ; 2863 myRead($comment, $comment_length); 2864 outputFilename $comment, $LanguageEncodingFlag, "Comment"; 2865 $cdEntry->comment($comment); 2866 } 2867 2868 $cdEntry->offsetStart($cdEntryOffset) ; 2869 $cdEntry->offsetEnd($FH->tell() - 1) ; 2870 2871 $CentralDirectory->addEntry($cdEntry); 2872 2873 return { 'encapsulated' => $cdEntry ? $cdEntry->encapsulated() : 0}; 2874} 2875 2876sub decodeZipVer 2877{ 2878 my $ver = shift ; 2879 2880 return "" 2881 if ! defined $ver; 2882 2883 my $sHi = int($ver /10) ; 2884 my $sLo = $ver % 10 ; 2885 2886 "$sHi.$sLo"; 2887} 2888 2889sub decodeOS 2890{ 2891 my $ver = shift ; 2892 2893 $OS_Lookup{$ver} || "Unknown" ; 2894} 2895 2896sub Zip64EndCentralHeader 2897{ 2898 # Extra ID is 0x0001 2899 2900 # APPNOTE 6.3.10, section 4.3.14, 7.3.3, 7.3.4 & APPENDIX C 2901 2902 # TODO - APPNOTE allows an extensible data sector at end of this record (see APPNOTE 6.3.10, section 4.3.14.4) 2903 # The code below does NOT take this into account. 2904 2905 my $signature = shift ; 2906 my $data = shift ; 2907 my $startRecordOffset = shift ; 2908 2909 print "\n"; 2910 out $data, "ZIP64 END CENTRAL DIR RECORD", Value_V($signature); 2911 2912 need 8, Signatures::name($signature); 2913 2914 my $size = out_Q "Size of record"; 2915 2916 need $size, Signatures::name($signature); 2917 2918 out_C "Created Zip Spec", \&decodeZipVer; 2919 out_C "Created OS", \&decodeOS; 2920 my $extractSpec = out_C "Extract Zip Spec", \&decodeZipVer; 2921 out_C "Extract OS", \&decodeOS; 2922 my $diskNumber = out_V "Number of this disk"; 2923 my $cdDiskNumber = out_V "Central Dir Disk no"; 2924 my $entriesOnThisDisk = out_Q "Entries in this disk"; 2925 my $totalEntries = out_Q "Total Entries"; 2926 my $centralDirSize = out_Q "Size of Central Dir"; 2927 2928 my ($d, $centralDirOffset) = read_Q(); 2929 my $out = Value_Q($centralDirOffset); 2930 testPossiblePrefix($centralDirOffset, ZIP_CENTRAL_HDR_SIG); 2931 2932 $out .= " [Actual Offset is " . Value_Q($centralDirOffset + $PREFIX_DELTA) . "]" 2933 if $PREFIX_DELTA ; 2934 out $d, "Offset to Central dir", $out; 2935 2936 if (! emptyArchive($startRecordOffset, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirSize, $centralDirOffset)) 2937 { 2938 my $commonMessage = "'Offset to Central Directory' field in '" . Signatures::name($signature) . "' is invalid"; 2939 $centralDirOffset = checkOffsetValue($centralDirOffset, $startRecordOffset, $centralDirSize, $commonMessage, $startRecordOffset + 48, ZIP_CENTRAL_HDR_SIG, 0, $extractSpec < 0x3E) ; 2940 } 2941 2942 # Length of 44 means typical version 1 header 2943 return 2944 if $size == 44 ; 2945 2946 my $remaining = $size - 44; 2947 2948 # pkzip sets the extract zip spec to 6.2 (0x3E) to signal a v2 record 2949 # See APPNOTE 6.3.10, section, 7.3.3 2950 2951 if ($extractSpec >= 0x3E) 2952 { 2953 # Version 2 header (see APPNOTE 6.3.7, section 7.3.4, ) 2954 # Can use version 2 header to infer presence of encrypted CD 2955 $CentralDirectory->setPkEncryptedCD(); 2956 2957 2958 # Compression Method 2 bytes Method used to compress the 2959 # Central Directory 2960 # Compressed Size 8 bytes Size of the compressed data 2961 # Original Size 8 bytes Original uncompressed size 2962 # AlgId 2 bytes Encryption algorithm ID 2963 # BitLen 2 bytes Encryption key length 2964 # Flags 2 bytes Encryption flags 2965 # HashID 2 bytes Hash algorithm identifier 2966 # Hash Length 2 bytes Length of hash data 2967 # Hash Data (variable) Hash data 2968 2969 my ($bcm, $compressedMethod) = read_v(); 2970 out $bcm, "Compression Method", compressionMethod($compressedMethod) ; 2971 info $FH->tell() - 2, "Unknown 'Compression Method' ID " . decimalHex0x($compressedMethod, 2) 2972 if ! defined $ZIP_CompressionMethods{$compressedMethod} ; 2973 out_Q "Compressed Size"; 2974 out_Q "Uncompressed Size"; 2975 out_v "AlgId", sub { $AlgIdLookup{ $_[0] } // "Unknown algorithm" } ; 2976 out_v "BitLen"; 2977 out_v "Flags", sub { $FlagsLookup{ $_[0] } // "reserved for certificate processing" } ; 2978 out_v "HashID", sub { $HashIDLookup{ $_[0] } // "Unknown ID" } ; 2979 2980 my $hashLen = out_v "Hash Length "; 2981 outHexdump($hashLen, "Hash Data"); 2982 2983 $remaining -= $hashLen + 28; 2984 } 2985 2986 my $entry = Zip64EndCentralHeaderEntry->new(); 2987 2988 if ($remaining) 2989 { 2990 # Handle 'zip64 extensible data sector' here 2991 # See APPNOTE 6.3.10, section 4.3.14.3, 4.3.14.4 & APPENDIX C 2992 # Not seen a real example of this. Tested with hand crafted files. 2993 walkExtra($remaining, $entry); 2994 } 2995 2996 return {}; 2997} 2998 2999 3000sub Zip64EndCentralLocator 3001{ 3002 # APPNOTE 6.3.10, sec 4.3.15 3003 3004 my $signature = shift ; 3005 my $data = shift ; 3006 my $startRecordOffset = shift ; 3007 3008 print "\n"; 3009 out $data, "ZIP64 END CENTRAL DIR LOCATOR", Value_V($signature); 3010 3011 need 16, Signatures::name($signature); 3012 3013 # my ($nextRecord, $deltaActuallyAvailable) = $HeaderOffsetIndex->checkForOverlap(16); 3014 3015 # if ($deltaActuallyAvailable) 3016 # { 3017 # fatal_truncated_record( 3018 # sprintf("ZIP64 END CENTRAL DIR LOCATOR \@%X truncated", $FH->tell() - 4), 3019 # sprintf("Need 0x%X bytes, have 0x%X available", 16, $deltaActuallyAvailable), 3020 # sprintf("Next Record is %s \@0x%X", $nextRecord->name(), $nextRecord->offset()) 3021 # ) 3022 # } 3023 3024 # TODO - check values for traces of multi-part + crazy offsets 3025 out_V "Central Dir Disk no"; 3026 3027 my ($d, $zip64EndCentralDirOffset) = read_Q(); 3028 my $out = Value_Q($zip64EndCentralDirOffset); 3029 testPossiblePrefix($zip64EndCentralDirOffset, ZIP64_END_CENTRAL_REC_HDR_SIG); 3030 3031 $out .= " [Actual Offset is " . Value_Q($zip64EndCentralDirOffset + $PREFIX_DELTA) . "]" 3032 if $PREFIX_DELTA ; 3033 out $d, "Offset to Zip64 EOCD", $out; 3034 3035 my $totalDisks = out_V "Total no of Disks"; 3036 3037 if ($totalDisks > 0) 3038 { 3039 my $commonMessage = "'Offset to Zip64 End of Central Directory Record' field in '" . Signatures::name($signature) . "' is invalid"; 3040 $zip64EndCentralDirOffset = checkOffsetValue($zip64EndCentralDirOffset, $startRecordOffset, 0, $commonMessage, $FH->tell() - 12, ZIP64_END_CENTRAL_REC_HDR_SIG) ; 3041 } 3042 3043 return {}; 3044} 3045 3046sub needZip64EOCDLocator 3047{ 3048 # zip64 end of central directory field needed if any of the fields 3049 # in the End Central Header record are maxed out 3050 3051 my $diskNumber = shift ; 3052 my $cdDiskNumber = shift ; 3053 my $entriesOnThisDisk = shift ; 3054 my $totalEntries = shift ; 3055 my $centralDirSize = shift ; 3056 my $centralDirOffset = shift ; 3057 3058 return (full16($diskNumber) || # 4.4.19 3059 full16($cdDiskNumber) || # 4.4.20 3060 full16($entriesOnThisDisk) || # 4.4.21 3061 full16($totalEntries) || # 4.4.22 3062 full32($centralDirSize) || # 4.4.23 3063 full32($centralDirOffset) # 4.4.24 3064 ) ; 3065} 3066 3067sub emptyArchive 3068{ 3069 my $offset = shift; 3070 my $diskNumber = shift ; 3071 my $cdDiskNumber = shift ; 3072 my $entriesOnThisDisk = shift ; 3073 my $totalEntries = shift ; 3074 my $centralDirSize = shift ; 3075 my $centralDirOffset = shift ; 3076 3077 return (#$offset == 0 && 3078 $diskNumber == 0 && 3079 $cdDiskNumber == 0 && 3080 $entriesOnThisDisk == 0 && 3081 $totalEntries == 0 && 3082 $centralDirSize == 0 && 3083 $centralDirOffset== 0 3084 ) ; 3085} 3086 3087sub EndCentralHeader 3088{ 3089 # APPNOTE 6.3.10, sec 4.3.16 3090 3091 my $signature = shift ; 3092 my $data = shift ; 3093 my $startRecordOffset = shift ; 3094 3095 print "\n"; 3096 out $data, "END CENTRAL HEADER", Value_V($signature); 3097 3098 need 18, Signatures::name($signature); 3099 3100 # TODO - check values for traces of multi-part + crazy values 3101 my $diskNumber = out_v "Number of this disk"; 3102 my $cdDiskNumber = out_v "Central Dir Disk no"; 3103 my $entriesOnThisDisk = out_v "Entries in this disk"; 3104 my $totalEntries = out_v "Total Entries"; 3105 my $centralDirSize = out_V "Size of Central Dir"; 3106 3107 my ($d, $centralDirOffset) = read_V(); 3108 my $out = Value_V($centralDirOffset); 3109 testPossiblePrefix($centralDirOffset, ZIP_CENTRAL_HDR_SIG); 3110 3111 $out .= " [Actual Offset is " . Value_V($centralDirOffset + $PREFIX_DELTA) . "]" 3112 if $PREFIX_DELTA && $centralDirOffset != MAX32 ; 3113 out $d, "Offset to Central Dir", $out; 3114 3115 my $comment_length = out_v "Comment Length"; 3116 3117 if ($comment_length) 3118 { 3119 my $here = $FH->tell() ; 3120 my $available = $FILELEN - $here ; 3121 if ($available < $comment_length) 3122 { 3123 error $here, 3124 "file truncated while reading 'Comment' field in '" . Signatures::name($signature) . "'", 3125 expectedMessage($comment_length, $available); 3126 $comment_length = $available; 3127 } 3128 3129 if ($comment_length) 3130 { 3131 my $comment ; 3132 myRead($comment, $comment_length); 3133 outputFilename $comment, 0, "Comment"; 3134 } 3135 } 3136 3137 if ( ! Nesting::isNested($startRecordOffset, $FH->tell() -1)) 3138 { 3139 # Not nested 3140 if (! needZip64EOCDLocator($diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirSize, $centralDirOffset) && 3141 ! emptyArchive($startRecordOffset, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirSize, $centralDirOffset)) 3142 { 3143 my $commonMessage = "'Offset to Central Directory' field in '" . Signatures::name($signature) . "' is invalid"; 3144 $centralDirOffset = checkOffsetValue($centralDirOffset, $startRecordOffset, $centralDirSize, $commonMessage, $startRecordOffset + 16, ZIP_CENTRAL_HDR_SIG) ; 3145 } 3146 } 3147 # else do nothing 3148 3149 return {}; 3150} 3151 3152sub DataDescriptor 3153{ 3154 3155 # Data header record or Spanned archive marker. 3156 # 3157 3158 # ZIP_DATA_HDR_SIG at start of file flags a spanned zip file. 3159 # If it is a true marker, the next four bytes MUST be a ZIP_LOCAL_HDR_SIG 3160 # See APPNOTE 6.3.10, sec 8.5.3, 8.5.4 & 8.5.5 3161 3162 # If not at start of file, assume a Data Header Record 3163 # See APPNOTE 6.3.10, sec 4.3.9 & 4.3.9.3 3164 3165 my $signature = shift ; 3166 my $data = shift ; 3167 my $startRecordOffset = shift ; 3168 3169 my $here = $FH->tell(); 3170 3171 if ($here == 4) 3172 { 3173 # Spanned Archive Marker 3174 out $data, "SPLIT ARCHIVE MULTI-SEGMENT MARKER", Value_V($signature); 3175 return; 3176 3177 # my (undef, $next_sig) = read_V(); 3178 # seekTo(0); 3179 3180 # if ($next_sig == ZIP_LOCAL_HDR_SIG) 3181 # { 3182 # print "\n"; 3183 # out $data, "SPLIT ARCHIVE MULTI-SEGMENT MARKER", Value_V($signature); 3184 # seekTo($here); 3185 # return; 3186 # } 3187 } 3188 3189 my $sigName = Signatures::titleName(ZIP_DATA_HDR_SIG); 3190 3191 print "\n"; 3192 out $data, $sigName, Value_V($signature); 3193 3194 need 24, Signatures::name($signature); 3195 3196 # Ignore header payload if nested (assume 64-bit descriptor) 3197 if (Nesting::isNested( $here - 4, $here - 4 + 24 - 1)) 3198 { 3199 out "", "Skipping Nested Payload"; 3200 return {}; 3201 } 3202 3203 my $compressedSize; 3204 my $uncompressedSize; 3205 3206 my $localEntry = $LocalDirectory->lastStreamedEntryAdded(); 3207 my $centralEntry = $localEntry && $localEntry->getCdEntry ; 3208 3209 if (!$localEntry) 3210 { 3211 # found a Data Descriptor without a local header 3212 out "", "Skipping Data Descriptor", "No matching Local header with streaming bit set"; 3213 error $here - 4, "Orphan '$sigName' found", "No matching Local header with streaming bit set"; 3214 return {}; 3215 } 3216 3217 my $crc = out_V "CRC"; 3218 my $payloadLength = $here - 4 - $localEntry->payloadOffset; 3219 3220 my $deltaToNext = deltaToNextSignature(); 3221 my $cl32 = unpack "V", peekAtOffset($here + 4, 4); 3222 my $cl64 = unpack "Q<", peekAtOffset($here + 4, 8); 3223 3224 # use delta to next header & payload length 3225 # deals with use case where the payload length < 32 bit 3226 # will use a 32-bit value rather than the 64-bit value 3227 3228 # see if delta & payload size match 3229 if ($deltaToNext == 16 && $cl64 == $payloadLength) 3230 { 3231 if (! $localEntry->zip64 && ($centralEntry && ! $centralEntry->zip64)) 3232 { 3233 error $here, "'$sigName': expected 32-bit values, got 64-bit"; 3234 } 3235 3236 $compressedSize = out_Q "Compressed Size" ; 3237 $uncompressedSize = out_Q "Uncompressed Size" ; 3238 } 3239 elsif ($deltaToNext == 8 && $cl32 == $payloadLength) 3240 { 3241 if ($localEntry->zip64) 3242 { 3243 error $here, "'$sigName': expected 64-bit values, got 32-bit"; 3244 } 3245 3246 $compressedSize = out_V "Compressed Size" ; 3247 $uncompressedSize = out_V "Uncompressed Size" ; 3248 } 3249 3250 # Try matching juast payload lengths 3251 elsif ($cl32 == $payloadLength) 3252 { 3253 if ($localEntry->zip64) 3254 { 3255 error $here, "'$sigName': expected 64-bit values, got 32-bit"; 3256 } 3257 3258 $compressedSize = out_V "Compressed Size" ; 3259 $uncompressedSize = out_V "Uncompressed Size" ; 3260 3261 warning $here, "'$sigName': Zip Header not directly after Data Descriptor"; 3262 } 3263 elsif ($cl64 == $payloadLength) 3264 { 3265 if (! $localEntry->zip64 && ($centralEntry && ! $centralEntry->zip64)) 3266 { 3267 error $here, "'$sigName': expected 32-bit values, got 64-bit"; 3268 } 3269 3270 $compressedSize = out_Q "Compressed Size" ; 3271 $uncompressedSize = out_Q "Uncompressed Size" ; 3272 3273 warning $here, "'$sigName': Zip Header not directly after Data Descriptor"; 3274 } 3275 3276 # payloads don't match, so try delta 3277 elsif ($deltaToNext == 16) 3278 { 3279 if (! $localEntry->zip64 && ($centralEntry && ! $centralEntry->zip64)) 3280 { 3281 error $here, "'$sigName': expected 32-bit values, got 64-bit"; 3282 } 3283 3284 $compressedSize = out_Q "Compressed Size" ; 3285 # compressed size is wrong 3286 error $here, "'$sigName': Compressed size" . decimalHex0x($compressedSize) . " doesn't match with payload size " . decimalHex0x($payloadLength); 3287 3288 $uncompressedSize = out_Q "Uncompressed Size" ; 3289 } 3290 elsif ($deltaToNext == 8 ) 3291 { 3292 if ($localEntry->zip64) 3293 { 3294 error $here, "'$sigName': expected 64-bit values, got 32-bit"; 3295 } 3296 3297 $compressedSize = out_V "Compressed Size" ; 3298 # compressed size is wrong 3299 error $here, "'$sigName': Compressed Size " . decimalHex0x($compressedSize) . " doesn't match with payload size " . decimalHex0x($payloadLength); 3300 3301 $uncompressedSize = out_V "Uncompressed Size" ; 3302 } 3303 3304 # no payoad or delta match at all, so likely a false positive or data corruption 3305 else 3306 { 3307 warning $here, "Cannot determine size of Data Descriptor record"; 3308 } 3309 3310 # TODO - neither payload size or delta to next signature match 3311 3312 if ($localEntry) 3313 { 3314 $localEntry->readDataDescriptor(1) ; 3315 $localEntry->crc32($crc) ; 3316 $localEntry->compressedSize($compressedSize) ; 3317 $localEntry->uncompressedSize($uncompressedSize) ; 3318 } 3319 3320 # APPNOTE 6.3.10, sec 4.3.8 3321 my $filename = $localEntry->filename; 3322 warning undef, "Directory '$filename' must not have a payload" 3323 if $filename =~ m#/$# && $uncompressedSize ; 3324 3325 return { 3326 crc => $crc, 3327 compressedSize => $compressedSize, 3328 uncompressedSize => $uncompressedSize, 3329 }; 3330} 3331 3332sub SingleSegmentMarker 3333{ 3334 # ZIP_SINGLE_SEGMENT_MARKER at start of file flags a spanned zip file. 3335 # If this ia a true marker, the next four bytes MUST be a ZIP_LOCAL_HDR_SIG 3336 # See APPNOTE 6.3.10, sec 8.5.3, 8.5.4 & 8.5.5 3337 3338 my $signature = shift ; 3339 my $data = shift ; 3340 my $startRecordOffset = shift ; 3341 3342 my $here = $FH->tell(); 3343 3344 if ($here == 4) 3345 { 3346 my (undef, $next_sig) = read_V(); 3347 if ($next_sig == ZIP_LOCAL_HDR_SIG) 3348 { 3349 print "\n"; 3350 out $data, "SPLIT ARCHIVE SINGLE-SEGMENT MARKER", Value_V($signature); 3351 } 3352 seekTo($here); 3353 } 3354 3355 return {}; 3356} 3357 3358sub ArchiveExtraDataRecord 3359{ 3360 # TODO - not seen an example of this record 3361 3362 # APPNOTE 6.3.10, sec 4.3.11 3363 3364 my $signature = shift ; 3365 my $data = shift ; 3366 my $startRecordOffset = shift ; 3367 3368 out $data, "ARCHIVE EXTRA DATA RECORD", Value_V($signature); 3369 3370 need 2, Signatures::name($signature); 3371 3372 my $size = out_v "Size of record"; 3373 3374 need $size, Signatures::name($signature); 3375 3376 outHexdump($size, "Field data", 1); 3377 3378 return {}; 3379} 3380 3381sub DigitalSignature 3382{ 3383 my $signature = shift ; 3384 my $data = shift ; 3385 my $startRecordOffset = shift ; 3386 3387 print "\n"; 3388 out $data, "DIGITAL SIGNATURE RECORD", Value_V($signature); 3389 3390 need 2, Signatures::name($signature); 3391 my $Size = out_v "Size of record"; 3392 3393 need $Size, Signatures::name($signature); 3394 3395 3396 myRead(my $payload, $Size); 3397 out $payload, "Signature", hexDump16($payload); 3398 3399 return {}; 3400} 3401 3402sub GeneralPurposeBits 3403{ 3404 my $method = shift; 3405 my $gp = shift; 3406 3407 out1 "[Bit 0]", "1 'Encryption'" if $gp & ZIP_GP_FLAG_ENCRYPTED_MASK; 3408 3409 my %lookup = ( 3410 0 => "Normal Compression", 3411 1 => "Maximum Compression", 3412 2 => "Fast Compression", 3413 3 => "Super Fast Compression"); 3414 3415 3416 if ($method == ZIP_CM_DEFLATE) 3417 { 3418 my $mid = ($gp >> 1) & 0x03 ; 3419 3420 out1 "[Bits 1-2]", "$mid '$lookup{$mid}'"; 3421 } 3422 3423 if ($method == ZIP_CM_LZMA) 3424 { 3425 if ($gp & ZIP_GP_FLAG_LZMA_EOS_PRESENT) { 3426 out1 "[Bit 1]", "1 'LZMA EOS Marker Present'" ; 3427 } 3428 else { 3429 out1 "[Bit 1]", "0 'LZMA EOS Marker Not Present'" ; 3430 } 3431 } 3432 3433 if ($method == ZIP_CM_IMPLODE) # Imploding 3434 { 3435 out1 "[Bit 1]", ($gp & (1 << 1) ? "1 '8k" : "0 '4k") . " Sliding Dictionary'" ; 3436 out1 "[Bit 2]", ($gp & (2 << 1) ? "1 '3" : "0 '2" ) . " Shannon-Fano Trees'" ; 3437 } 3438 3439 out1 "[Bit 3]", "1 'Streamed'" if $gp & ZIP_GP_FLAG_STREAMING_MASK; 3440 out1 "[Bit 4]", "1 'Enhanced Deflating'" if $gp & 1 << 4; 3441 out1 "[Bit 5]", "1 'Compressed Patched'" if $gp & ZIP_GP_FLAG_PATCHED_MASK ; 3442 out1 "[Bit 6]", "1 'Strong Encryption'" if $gp & ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK; 3443 out1 "[Bit 11]", "1 'Language Encoding'" if $gp & ZIP_GP_FLAG_LANGUAGE_ENCODING; 3444 out1 "[Bit 12]", "1 'Pkware Enhanced Compression'" if $gp & ZIP_GP_FLAG_PKWARE_ENHANCED_COMP ; 3445 out1 "[Bit 13]", "1 'Encrypted Central Dir'" if $gp & ZIP_GP_FLAG_ENCRYPTED_CD ; 3446 3447 return (); 3448} 3449 3450 3451sub seekSet 3452{ 3453 my $fh = $_[0] ; 3454 my $size = $_[1]; 3455 3456 use Fcntl qw(SEEK_SET); 3457 seek($fh, $size, SEEK_SET); 3458 3459} 3460 3461sub skip 3462{ 3463 my $fh = $_[0] ; 3464 my $size = $_[1]; 3465 3466 use Fcntl qw(SEEK_CUR); 3467 seek($fh, $size, SEEK_CUR); 3468 3469} 3470 3471 3472sub myRead 3473{ 3474 my $got = \$_[0] ; 3475 my $size = $_[1]; 3476 3477 my $wantSize = $size; 3478 $$got = ''; 3479 3480 if ($size == 0) 3481 { 3482 return ; 3483 } 3484 3485 if ($size > 0) 3486 { 3487 my $buff ; 3488 my $status = $FH->read($buff, $size); 3489 return $status 3490 if $status < 0; 3491 $$got .= $buff ; 3492 } 3493 3494 my $len = length $$got; 3495 # fatal undef, "Truncated file (got $len, wanted $wantSize): $!" 3496 fatal undef, "Unexpected zip file truncation", 3497 expectedMessage($wantSize, $len) 3498 if length $$got != $wantSize; 3499} 3500 3501sub expectedMessage 3502{ 3503 my $expected = shift; 3504 my $got = shift; 3505 return "Expected " . decimalHex0x($expected) . " bytes, but only " . decimalHex0x($got) . " available" 3506} 3507 3508sub need 3509{ 3510 my $byteCount = shift ; 3511 my $message = shift ; 3512 my $field = shift // ''; 3513 3514 # return $FILELEN - $FH->tell() >= $byteCount; 3515 my $here = $FH->tell() ; 3516 my $available = $FILELEN - $here ; 3517 if ($available < $byteCount) 3518 { 3519 my @message ; 3520 3521 if ($field) 3522 { 3523 push @message, "Unexpected zip file truncation while reading '$field' field in '$message'"; 3524 } 3525 else 3526 { 3527 push @message, "Unexpected zip file truncation while reading '$message'"; 3528 } 3529 3530 3531 push @message, expectedMessage($byteCount, $available); 3532 # push @message, sprintf("Expected 0x%X bytes, but only 0x%X available", $byteCount, $available); 3533 push @message, "Try running with --walk' or '--scan' options" 3534 if ! $opt_scan && ! $opt_walk ; 3535 3536 fatal $here, @message; 3537 } 3538} 3539 3540sub testPossiblePrefix 3541{ 3542 my $offset = shift; 3543 my $expectedSignature = shift ; 3544 3545 if (testPossiblePrefixNoPREFIX_DELTA($offset, $expectedSignature)) 3546 { 3547 $PREFIX_DELTA = $POSSIBLE_PREFIX_DELTA; 3548 $POSSIBLE_PREFIX_DELTA = 0; 3549 3550 reportPrefixData(); 3551 3552 return 1 3553 } 3554 3555 return 0 3556} 3557 3558sub testPossiblePrefixNoPREFIX_DELTA 3559{ 3560 my $offset = shift; 3561 my $expectedSignature = shift ; 3562 3563 return 0 3564 if $offset + 4 > $FILELEN || ! $POSSIBLE_PREFIX_DELTA || $PREFIX_DELTA; 3565 3566 my $currentOFFSET = $OFFSET; 3567 my $gotSig = readSignatureFromOffset($offset); 3568 3569 if ($gotSig == $expectedSignature) 3570 { 3571 # do have possible prefix data, but the offset is correct 3572 $POSSIBLE_PREFIX_DELTA = $PREFIX_DELTA = 0; 3573 $OFFSET = $currentOFFSET; 3574 3575 return 0; 3576 } 3577 3578 $gotSig = readSignatureFromOffset($offset + $POSSIBLE_PREFIX_DELTA); 3579 3580 $OFFSET = $currentOFFSET; 3581 3582 return ($gotSig == $expectedSignature) ; 3583} 3584 3585sub offsetIsValid 3586{ 3587 my $offset = shift; 3588 my $headerStart = shift; 3589 my $centralDirSize = shift; 3590 my $commonMessage = shift ; 3591 my $expectedSignature = shift ; 3592 my $dereferencePointer = shift; 3593 3594 my $must_point_back = 1; 3595 3596 my $delta = $offset - $FILELEN + 1 ; 3597 3598 $offset += $PREFIX_DELTA 3599 if $PREFIX_DELTA ; 3600 3601 return sprintf("value %s is %s bytes past EOF", decimalHex0x($offset), decimalHex0x($delta)) 3602 if $delta > 0 ; 3603 3604 return sprintf "value %s must be less that %s", decimalHex0x($offset), decimalHex0x($headerStart) 3605 if $must_point_back && $offset >= $headerStart; 3606 3607 if ($dereferencePointer) 3608 { 3609 my $actual = $headerStart - $centralDirSize; 3610 my $cdSizeOK = ($actual == $offset); 3611 my $possibleDelta = $actual - $offset; 3612 3613 if ($centralDirSize && ! $cdSizeOK && $possibleDelta > 0 && readSignatureFromOffset($possibleDelta) == ZIP_LOCAL_HDR_SIG) 3614 { 3615 # If testing end of central dir, check if the location of the first CD header 3616 # is consistent with the central dir size. 3617 # Common use case is a SFX zip file 3618 3619 my $gotSig = readSignatureFromOffset($actual); 3620 my $v = hexValue32($gotSig); 3621 return 'value @ ' . hexValue($actual) . " should decode to signature for " . Signatures::nameAndHex($expectedSignature) . ". Got $v" # . hexValue32($gotSig) 3622 if $gotSig != $expectedSignature ; 3623 3624 $PREFIX_DELTA = $possibleDelta; 3625 reportPrefixData(); 3626 3627 return undef; 3628 } 3629 else 3630 { 3631 my $gotSig = readSignatureFromOffset($offset); 3632 my $v = hexValue32($gotSig); 3633 return 'value @ ' . hexValue($offset) . " should decode to signature for " . Signatures::nameAndHex($expectedSignature) . ". Got $v" # . hexValue32($gotSig) 3634 if $gotSig != $expectedSignature ; 3635 } 3636 } 3637 3638 return undef ; 3639} 3640 3641sub checkOffsetValue 3642{ 3643 my $offset = shift; 3644 my $headerStart = shift; 3645 my $centralDirSize = shift; 3646 my $commonMessage = shift ; 3647 my $messageOffset = shift; 3648 my $expectedSignature = shift ; 3649 my $fatal = shift // 0; 3650 my $dereferencePointer = shift // 1; 3651 3652 my $keepOFFSET = $OFFSET ; 3653 3654 my $message = offsetIsValid($offset, $headerStart, $centralDirSize, $commonMessage, $expectedSignature, $dereferencePointer); 3655 if ($message) 3656 { 3657 fatal_tryWalk($messageOffset, $commonMessage, $message) 3658 if $fatal; 3659 3660 error $messageOffset, $commonMessage, $message 3661 if ! $fatal; 3662 } 3663 3664 $OFFSET = $keepOFFSET; 3665 3666 return $offset + $PREFIX_DELTA; 3667 3668} 3669 3670sub fatal_tryWalk 3671{ 3672 my $offset = shift ; 3673 my $message = shift; 3674 3675 fatal($offset, $message, @_, "Try running with --walk' or '--scan' options"); 3676} 3677 3678sub fatal 3679{ 3680 my $offset = shift ; 3681 my $message = shift; 3682 3683 return if $fatalDisabled; 3684 3685 if (defined $offset) 3686 { 3687 warn "#\n# FATAL: Offset " . hexValue($offset) . ": $message\n"; 3688 } 3689 else 3690 { 3691 warn "#\n# FATAL: $message\n"; 3692 } 3693 3694 warn "# $_ . \n" 3695 for @_; 3696 warn "#\n" ; 3697 3698 exit 1; 3699} 3700 3701sub disableFatal 3702{ 3703 $fatalDisabled = 1 ; 3704} 3705 3706sub enableFatal 3707{ 3708 $fatalDisabled = 0 ; 3709} 3710 3711sub topLevelFatal 3712{ 3713 my $message = shift ; 3714 3715 no warnings 'utf8'; 3716 3717 warn "FATAL: $message\n"; 3718 3719 warn "$_ . \n" 3720 for @_; 3721 3722 exit 1; 3723} 3724 3725sub internalFatal 3726{ 3727 my $offset = shift ; 3728 my $message = shift; 3729 3730 no warnings 'utf8'; 3731 3732 if (defined $offset) 3733 { 3734 warn "# FATAL: Offset " . hexValue($offset) . ": Internal Error: $message\n"; 3735 } 3736 else 3737 { 3738 warn "# FATAL: Internal Error: $message\n"; 3739 } 3740 3741 warn "# $_ \n" 3742 for @_; 3743 3744 warn "# Please report error at https://github.com/pmqs/zipdetails/issues\n"; 3745 exit 1; 3746} 3747 3748sub warning 3749{ 3750 my $offset = shift ; 3751 my $message = shift; 3752 3753 no warnings 'utf8'; 3754 3755 return 3756 unless $opt_want_warning_mesages ; 3757 3758 say "#" 3759 unless $lastWasMessage ++ ; 3760 3761 if (defined $offset) 3762 { 3763 say "# WARNING: Offset " . hexValue($offset) . ": $message"; 3764 } 3765 else 3766 { 3767 say "# WARNING: $message"; 3768 } 3769 3770 3771 say "# $_" for @_ ; 3772 say "#"; 3773 ++ $WarningCount ; 3774 3775 $exit_status_code |= 2 3776 if $opt_want_message_exit_status ; 3777} 3778 3779sub error 3780{ 3781 my $offset = shift ; 3782 my $message = shift; 3783 3784 no warnings 'utf8'; 3785 3786 return 3787 unless $opt_want_error_mesages ; 3788 3789 say "#" 3790 unless $lastWasMessage ++ ; 3791 3792 if (defined $offset) 3793 { 3794 say "# ERROR: Offset " . hexValue($offset) . ": $message"; 3795 } 3796 else 3797 { 3798 say "# ERROR: $message"; 3799 } 3800 3801 3802 say "# $_" for @_ ; 3803 say "#"; 3804 3805 ++ $ErrorCount ; 3806 3807 $exit_status_code |= 4 3808 if $opt_want_message_exit_status ; 3809} 3810 3811sub debug 3812{ 3813 my $offset = shift ; 3814 my $message = shift; 3815 3816 no warnings 'utf8'; 3817 3818 say "#" 3819 unless $lastWasMessage ++ ; 3820 3821 if (defined $offset) 3822 { 3823 say "# DEBUG: Offset " . hexValue($offset) . ": $message"; 3824 } 3825 else 3826 { 3827 say "# DEBUG: $message"; 3828 } 3829 3830 3831 say "# $_" for @_ ; 3832 say "#"; 3833} 3834 3835sub internalError 3836{ 3837 my $message = shift; 3838 3839 no warnings 'utf8'; 3840 3841 say "#"; 3842 say "# ERROR: $message"; 3843 say "# $_" for @_ ; 3844 say "# Please report error at https://github.com/pmqs/zipdetails/issues"; 3845 say "#"; 3846 3847 ++ $ErrorCount ; 3848} 3849 3850sub reportPrefixData 3851{ 3852 my $delta = shift // $PREFIX_DELTA ; 3853 state $reported = 0; 3854 return if $reported || $delta == 0; 3855 3856 info 0, "found " . decimalHex0x($delta) . " bytes before beginning of zipfile" ; 3857 $reported = 1; 3858} 3859 3860sub info 3861{ 3862 my $offset = shift; 3863 my $message = shift; 3864 3865 no warnings 'utf8'; 3866 3867 return 3868 unless $opt_want_info_mesages ; 3869 3870 say "#" 3871 unless $lastWasMessage ++ ; 3872 3873 if (defined $offset) 3874 { 3875 say "# INFO: Offset " . hexValue($offset) . ": $message"; 3876 } 3877 else 3878 { 3879 say "# INFO: $message"; 3880 } 3881 3882 say "# $_" for @_ ; 3883 say "#"; 3884 3885 ++ $InfoCount ; 3886 3887 $exit_status_code |= 1 3888 if $opt_want_message_exit_status ; 3889} 3890 3891sub walkExtra 3892{ 3893 # APPNOTE 6.3.10, sec 4.4.11, 4.4.28, 4.5 3894 my $XLEN = shift; 3895 my $entry = shift; 3896 3897 # Caller has determined that there are $XLEN bytes available to read 3898 3899 my $buff ; 3900 my $offset = 0 ; 3901 3902 my $id; 3903 my $subLen; 3904 my $payload ; 3905 3906 my $count = 0 ; 3907 my $endExtraOffset = $FH->tell() + $XLEN ; 3908 3909 while ($offset < $XLEN) { 3910 3911 ++ $count; 3912 3913 # Detect if there is not enough data for an extra ID and length. 3914 # Android zipalign and zipflinger are prime candidates for these 3915 # non-standard extra sub-fields. 3916 my $remaining = $XLEN - $offset; 3917 if ($remaining < ZIP_EXTRA_SUBFIELD_HEADER_SIZE) { 3918 # There is not enough left. 3919 # Consume whatever is there and return so parsing 3920 # can continue. 3921 3922 myRead($payload, $remaining); 3923 my $data = hexDump($payload); 3924 3925 if ($payload =~ /^\x00+$/) 3926 { 3927 # All nulls 3928 out $payload, "Null Padding in Extra"; 3929 info $FH->tell() - length($payload), decimalHex0x(length $payload) . " Null Padding Bytes in Extra Field" ; 3930 } 3931 else 3932 { 3933 out $payload, "Extra Data", $data; 3934 error $FH->tell() - length($payload), "'Extra Data' Malformed"; 3935 } 3936 3937 return undef; 3938 } 3939 3940 myRead($id, ZIP_EXTRA_SUBFIELD_ID_SIZE); 3941 $offset += ZIP_EXTRA_SUBFIELD_ID_SIZE; 3942 my $lookID = unpack "v", $id ; 3943 if ($lookID == 0) 3944 { 3945 # check for null padding at end of extra 3946 my $here = $FH->tell(); 3947 my $rest; 3948 myRead($rest, $XLEN - $offset); 3949 if ($rest =~ /^\x00+$/) 3950 { 3951 my $len = length ($id . $rest) ; 3952 out $id . $rest, "Null Padding in Extra"; 3953 info $FH->tell() - $len, decimalHex0x($len) . " Null Padding Bytes in Extra Field"; 3954 return undef; 3955 } 3956 3957 seekTo($here); 3958 } 3959 3960 my ($who, $decoder, $local_min, $local_max, $central_min, $central_max) = @{ $Extras{$lookID} // ['', undef, undef, undef, undef, undef ] }; 3961 3962 my $idString = Value_v($lookID) ; 3963 $idString .= " '$who'" 3964 if $who; 3965 3966 out $id, "Extra ID #$count", $idString ; 3967 info $FH->tell() - 2, "Unknown Extra ID $idString" 3968 if ! exists $Extras{$lookID} ; 3969 3970 myRead($buff, ZIP_EXTRA_SUBFIELD_LEN_SIZE); 3971 $offset += ZIP_EXTRA_SUBFIELD_LEN_SIZE; 3972 3973 $subLen = unpack("v", $buff); 3974 out2 $buff, "Length", Value_v($subLen) ; 3975 3976 $remaining = $XLEN - $offset; 3977 if ($subLen > $remaining ) 3978 { 3979 error $FH->tell() -2, 3980 extraFieldIdentifier($lookID) . ": 'Length' field invalid", 3981 sprintf("value %s > %s bytes remaining", decimalHex0x($subLen), decimalHex0x($remaining)); 3982 outSomeData $remaining, " Extra Payload"; 3983 return undef; 3984 } 3985 3986 if (! defined $decoder) 3987 { 3988 if ($subLen) 3989 { 3990 myRead($payload, $subLen); 3991 my $data = hexDump16($payload); 3992 3993 out2 $payload, "Extra Payload", $data; 3994 } 3995 } 3996 else 3997 { 3998 if (testExtraLimits($lookID, $subLen, $entry->inCentralDir)) 3999 { 4000 my $endExtraOffset = $FH->tell() + $subLen; 4001 $decoder->($lookID, $subLen, $entry) ; 4002 4003 # Belt & Braces - should now be at $endExtraOffset 4004 # error here means issue in an extra handler 4005 # should noy happen, but just in case 4006 # TODO -- need tests for this 4007 my $here = $FH->tell() ; 4008 if ($here > $endExtraOffset) 4009 { 4010 # gone too far, so need to bomb out now 4011 internalFatal $here, "Overflow processing " . extraFieldIdentifier($lookID) . ".", 4012 sprintf("Should be at offset %s, actually at %s", decimalHex0x($endExtraOffset), decimalHex0x($here)); 4013 } 4014 elsif ($here < $endExtraOffset) 4015 { 4016 # not gone far enough, can recover 4017 error $here, 4018 sprintf("Expected to be at offset %s after processing %s, actually at %s", decimalHex0x($endExtraOffset), extraFieldIdentifier($lookID), decimalHex0x($here)), 4019 "Skipping " . decimalHex0x($endExtraOffset - $here) . " bytes"; 4020 outSomeData $endExtraOffset - $here, " Extra Data"; 4021 } 4022 } 4023 } 4024 4025 $offset += $subLen ; 4026 } 4027 4028 return undef ; 4029} 4030 4031sub testExtraLimits 4032{ 4033 my $lookID = shift; 4034 my $size = shift; 4035 my $inCentralDir = shift; 4036 4037 my ($who, undef, $local_min, $local_max, $central_min, $central_max) = @{ $Extras{$lookID} // ['', undef, undef, undef, undef, undef ] }; 4038 4039 my ($min, $max) = $inCentralDir 4040 ? ($central_min, $central_max) 4041 : ($local_min, $local_max) ; 4042 4043 return 1 4044 if ! defined $min && ! defined $max ; 4045 4046 if (defined $min && defined $max) 4047 { 4048 # both the same 4049 if ($min == $max) 4050 { 4051 if ($size != $min) 4052 { 4053 error $FH->tell() -2, sprintf "%s: 'Length' field invalid: expected %s, got %s", extraFieldIdentifier($lookID), decimalHex0x($min), decimalHex0x($size); 4054 outSomeData $size, " Extra Payload" if $size; 4055 return 0; 4056 } 4057 } 4058 else # min != max 4059 { 4060 if ($size < $min || $size > $max) 4061 { 4062 error $FH->tell() -2, sprintf "%s: 'Length' field invalid: value must be betweem %s and %s, got %s", extraFieldIdentifier($lookID), decimalHex0x($min), decimalHex0x($max), decimalHex0x($size); 4063 outSomeData $size, " Extra Payload" if $size ; 4064 return 0; 4065 } 4066 } 4067 4068 } 4069 else # must be defined $min & undefined max 4070 { 4071 if ($size < $min) 4072 { 4073 error $FH->tell() -2, sprintf "%s: 'Length' field invalid: value must be at least %s, got %s", extraFieldIdentifier($lookID), decimalHex0x($min), decimalHex0x($size); 4074 outSomeData $size, " Extra Payload" if $size; 4075 return 0; 4076 } 4077 } 4078 4079 return 1; 4080 4081} 4082 4083sub full32 4084{ 4085 return ($_[0] // 0) == MAX32 ; 4086} 4087 4088sub full16 4089{ 4090 return ($_[0] // 0) == MAX16 ; 4091} 4092 4093sub decode_Zip64 4094{ 4095 my $extraID = shift ; 4096 my $len = shift; 4097 my $entry = shift; 4098 4099 myRead(my $payload, $len); 4100 if ($entry->inCentralDir() ) 4101 { 4102 walk_Zip64_in_CD($extraID, $payload, $entry, 1) ; 4103 } 4104 else 4105 { 4106 walk_Zip64_in_LD($extraID, $payload, $entry, 1) ; 4107 4108 } 4109} 4110 4111sub walk_Zip64_in_LD 4112{ 4113 my $extraID = shift ; 4114 my $zip64Extended = shift; 4115 my $entry = shift; 4116 my $display = shift // 1 ; 4117 4118 my $fieldStart = $FH->tell() - length $zip64Extended; 4119 my $fieldOffset = $fieldStart ; 4120 4121 $ZIP64 = 1; 4122 $entry->zip64(1); 4123 4124 if (length $zip64Extended == 0) 4125 { 4126 info $fieldOffset, extraFieldIdentifier($extraID) . ": Length is Zero"; 4127 return; 4128 } 4129 4130 my $assumeLengthsPresent = (length($zip64Extended) == 16) ; 4131 my $assumeAllFieldsPresent = (length($zip64Extended) == 28) ; 4132 4133 if ($assumeLengthsPresent || $assumeAllFieldsPresent || full32 $entry->std_uncompressedSize ) 4134 { 4135 # TODO defer a warning if in local header & central/local don't have std_uncompressedSizeset to 0xffffffff 4136 if (length $zip64Extended < 8) 4137 { 4138 my $message = extraFieldIdentifier($extraID) . ": Expected " . decimalHex0x(8) . " bytes for 'Uncompressed Size': only " . decimalHex0x(length $zip64Extended) . " bytes present"; 4139 error $fieldOffset, $message; 4140 out2 $zip64Extended, $message; 4141 return; 4142 } 4143 4144 $fieldOffset += 8; 4145 my $data = substr($zip64Extended, 0, 8, "") ; 4146 $entry->uncompressedSize(unpack "Q<", $data); 4147 out2 $data, "Uncompressed Size", Value_Q($entry->uncompressedSize) 4148 if $display; 4149 } 4150 4151 if ($assumeLengthsPresent || $assumeAllFieldsPresent || full32 $entry->std_compressedSize) 4152 { 4153 if (length $zip64Extended < 8) 4154 { 4155 my $message = extraFieldIdentifier($extraID) . ": Expected " . decimalHex0x(8) . " bytes for 'Compressed Size': only " . decimalHex0x(length $zip64Extended) . " bytes present"; 4156 error $fieldOffset, $message; 4157 out2 $zip64Extended, $message; 4158 return; 4159 } 4160 4161 $fieldOffset += 8; 4162 4163 my $data = substr($zip64Extended, 0, 8, "") ; 4164 $entry->compressedSize( unpack "Q<", $data); 4165 out2 $data, "Compressed Size", Value_Q($entry->compressedSize) 4166 if $display; 4167 } 4168 4169 # Zip64 in local header should not have localHeaderOffset or disk number 4170 # but some zip files do 4171 4172 if ($assumeAllFieldsPresent) 4173 { 4174 $fieldOffset += 8; 4175 4176 my $data = substr($zip64Extended, 0, 8, "") ; 4177 my $localHeaderOffset = unpack "Q<", $data; 4178 out2 $data, "Offset to Local Dir", Value_Q($localHeaderOffset) 4179 if $display; 4180 } 4181 4182 if ($assumeAllFieldsPresent) 4183 { 4184 $fieldOffset += 4; 4185 4186 my $data = substr($zip64Extended, 0, 4, "") ; 4187 my $diskNumber = unpack "v", $data; 4188 out2 $data, "Disk Number", Value_V($diskNumber) 4189 if $display; 4190 } 4191 4192 if (length $zip64Extended) 4193 { 4194 if ($display) 4195 { 4196 out2 $zip64Extended, "Unexpected Data", hexDump16 $zip64Extended ; 4197 info $fieldOffset, extraFieldIdentifier($extraID) . ": Unexpected Data: " . decimalHex0x(length $zip64Extended) . " bytes"; 4198 } 4199 } 4200 4201} 4202 4203sub walk_Zip64_in_CD 4204{ 4205 my $extraID = shift ; 4206 my $zip64Extended = shift; 4207 my $entry = shift; 4208 my $display = shift // 1 ; 4209 4210 my $fieldStart = $FH->tell() - length $zip64Extended; 4211 my $fieldOffset = $fieldStart ; 4212 4213 $ZIP64 = 1; 4214 $entry->zip64(1); 4215 4216 if (length $zip64Extended == 0) 4217 { 4218 info $fieldOffset, extraFieldIdentifier($extraID) . ": Length is Zero"; 4219 return; 4220 } 4221 4222 my $assumeAllFieldsPresent = (length($zip64Extended) == 28) ; 4223 4224 if ($assumeAllFieldsPresent || full32 $entry->std_uncompressedSize ) 4225 { 4226 if (length $zip64Extended < 8) 4227 { 4228 my $message = extraFieldIdentifier($extraID) . ": Expected " . decimalHex0x(8) . " bytes for 'Uncompressed Size': only " . decimalHex0x(length $zip64Extended) . " bytes present"; 4229 error $fieldOffset, $message; 4230 out2 $zip64Extended, $message; 4231 return; 4232 } 4233 4234 $fieldOffset += 8; 4235 my $data = substr($zip64Extended, 0, 8, "") ; 4236 $entry->uncompressedSize(unpack "Q<", $data); 4237 out2 $data, "Uncompressed Size", Value_Q($entry->uncompressedSize) 4238 if $display; 4239 } 4240 4241 if ($assumeAllFieldsPresent || full32 $entry->std_compressedSize) 4242 { 4243 if (length $zip64Extended < 8) 4244 { 4245 my $message = extraFieldIdentifier($extraID) . ": Expected " . decimalHex0x(8) . " bytes for 'Compressed Size': only " . decimalHex0x(length $zip64Extended) . " bytes present"; 4246 error $fieldOffset, $message; 4247 out2 $zip64Extended, $message; 4248 return; 4249 } 4250 4251 $fieldOffset += 8; 4252 4253 my $data = substr($zip64Extended, 0, 8, "") ; 4254 $entry->compressedSize(unpack "Q<", $data); 4255 out2 $data, "Compressed Size", Value_Q($entry->compressedSize) 4256 if $display; 4257 } 4258 4259 if ($assumeAllFieldsPresent || full32 $entry->std_localHeaderOffset) 4260 { 4261 if (length $zip64Extended < 8) 4262 { 4263 my $message = extraFieldIdentifier($extraID) . ": Expected " . decimalHex0x(8) . " bytes for 'Offset to Local Dir': only " . decimalHex0x(length $zip64Extended) . " bytes present"; 4264 error $fieldOffset, $message; 4265 out2 $zip64Extended, $message; 4266 return; 4267 } 4268 4269 $fieldOffset += 8; 4270 4271 my $here = $FH->tell(); 4272 my $data = substr($zip64Extended, 0, 8, "") ; 4273 $entry->localHeaderOffset(unpack "Q<", $data); 4274 out2 $data, "Offset to Local Dir", Value_Q($entry->localHeaderOffset) 4275 if $display; 4276 4277 my $commonMessage = "'Offset to Local Dir' field in 'Zip64 Extra Field' is invalid"; 4278 $entry->localHeaderOffset(checkOffsetValue($entry->localHeaderOffset, $fieldStart, 0, $commonMessage, $fieldStart, ZIP_LOCAL_HDR_SIG, 0) ); 4279 } 4280 4281 if ($assumeAllFieldsPresent || full16 $entry->std_diskNumber) 4282 { 4283 if (length $zip64Extended < 4) 4284 { 4285 my $message = extraFieldIdentifier($extraID) . ": Expected " . decimalHex0x(4) . " bytes for 'Disk Number': only " . decimalHex0x(length $zip64Extended) . " bytes present"; 4286 error $fieldOffset, $message; 4287 out2 $zip64Extended, $message; 4288 return; 4289 } 4290 4291 $fieldOffset += 4; 4292 4293 my $here = $FH->tell(); 4294 my $data = substr($zip64Extended, 0, 4, "") ; 4295 $entry->diskNumber(unpack "v", $data); 4296 out2 $data, "Disk Number", Value_V($entry->diskNumber) 4297 if $display; 4298 $entry->zip64_diskNumberPresent(1); 4299 } 4300 4301 if (length $zip64Extended) 4302 { 4303 if ($display) 4304 { 4305 out2 $zip64Extended, "Unexpected Data", hexDump16 $zip64Extended ; 4306 info $fieldOffset, extraFieldIdentifier($extraID) . ": Unexpected Data: " . decimalHex0x(length $zip64Extended) . " bytes"; 4307 } 4308 } 4309} 4310 4311sub Ntfs2Unix 4312{ 4313 my $m = shift; 4314 my $v = shift; 4315 4316 # NTFS offset is 19DB1DED53E8000 4317 4318 my $hex = Value_Q($v) ; 4319 4320 # Treat empty value as special case 4321 # Could decode to 1 Jan 1601 4322 return "$hex 'No Date/Time'" 4323 if $v == 0; 4324 4325 $v -= 0x19DB1DED53E8000 ; 4326 my $ns = ($v % 10000000) * 100; 4327 my $elapse = int ($v/10000000); 4328 return "$hex '" . getT($elapse) . 4329 " " . sprintf("%0dns'", $ns); 4330} 4331 4332sub decode_NTFS_Filetimes 4333{ 4334 my $extraID = shift ; 4335 my $len = shift; 4336 my $entry = shift; 4337 4338 out_V " Reserved"; 4339 out_v " Tag1"; 4340 out_v " Size1" ; 4341 4342 my ($m, $s1) = read_Q; 4343 out $m, " Mtime", Ntfs2Unix($m, $s1); 4344 4345 my ($a, $s3) = read_Q; 4346 out $a, " Atime", Ntfs2Unix($a, $s3); 4347 4348 my ($c, $s2) = read_Q; 4349 out $c, " Ctime", Ntfs2Unix($c, $s2); 4350} 4351 4352sub OpenVMS_DateTime 4353{ 4354 my $ix = shift; 4355 my $tag = shift; 4356 my $size = shift; 4357 4358 # VMS epoch is 17 Nov 1858 4359 # Offset to Unix Epoch is -0x7C95674C3DA5C0 (-35067168005400000) 4360 4361 my ($data, $value) = read_Q(); 4362 4363 my $datetime = "No Date Time'"; 4364 if ($value != 0) 4365 { 4366 my $v = $value - 0x007C95674C3DA5C0 ; 4367 my $ns = ($v % 10000000) * 100 ; 4368 my $seconds = int($v / 10000000) ; 4369 $datetime = getT($seconds) . 4370 " " . sprintf("%0dns'", $ns); 4371 } 4372 4373 out2 $data, " Attribute", Value_Q($value) . " '$datetime"; 4374} 4375 4376sub OpenVMS_DumpBytes 4377{ 4378 my $ix = shift; 4379 my $tag = shift; 4380 my $size = shift; 4381 4382 myRead(my $data, $size); 4383 4384 out($data, " Attribute", hexDump16($data)); 4385 4386} 4387 4388sub OpenVMS_4ByteValue 4389{ 4390 my $ix = shift; 4391 my $tag = shift; 4392 my $size = shift; 4393 4394 my ($data, $value) = read_V(); 4395 4396 out2 $data, " Attribute", Value_V($value); 4397} 4398 4399sub OpenVMS_UCHAR 4400{ 4401 my $ix = shift; 4402 my $tag = shift; 4403 my $size = shift; 4404 4405 state $FCH = { 4406 0 => 'FCH$M_WASCONTIG', 4407 1 => 'FCH$M_NOBACKUP', 4408 2 => 'FCH$M_WRITEBACK', 4409 3 => 'FCH$M_READCHECK', 4410 4 => 'FCH$M_WRITCHECK', 4411 5 => 'FCH$M_CONTIGB', 4412 6 => 'FCH$M_LOCKED', 4413 6 => 'FCH$M_CONTIG', 4414 11 => 'FCH$M_BADACL', 4415 12 => 'FCH$M_SPOOL', 4416 13 => 'FCH$M_DIRECTORY', 4417 14 => 'FCH$M_BADBLOCK', 4418 15 => 'FCH$M_MARKDEL', 4419 16 => 'FCH$M_NOCHARGE', 4420 17 => 'FCH$M_ERASE', 4421 18 => 'FCH$M_SHELVED', 4422 20 => 'FCH$M_SCRATCH', 4423 21 => 'FCH$M_NOMOVE', 4424 22 => 'FCH$M_NOSHELVABLE', 4425 } ; 4426 4427 my ($data, $value) = read_V(); 4428 4429 out2 $data, " Attribute", Value_V($value); 4430 4431 for my $bit ( sort { $a <=> $b } keys %{ $FCH } ) 4432 { 4433 # print "$bit\n"; 4434 if ($value & (1 << $bit) ) 4435 { 4436 out1 " [Bit $bit]", $FCH->{$bit} ; 4437 } 4438 } 4439} 4440 4441sub OpenVMS_2ByteValue 4442{ 4443 my $ix = shift; 4444 my $tag = shift; 4445 my $size = shift; 4446 4447 my ($data, $value) = read_v(); 4448 4449 out2 $data, " Attribute", Value_v($value); 4450} 4451 4452sub OpenVMS_revision 4453{ 4454 my $ix = shift; 4455 my $tag = shift; 4456 my $size = shift; 4457 4458 my ($data, $value) = read_v(); 4459 4460 out2 $data, " Attribute", Value_v($value) . "'Revision Count " . Value_v($value) . "'"; 4461} 4462 4463sub decode_OpenVMS 4464{ 4465 my $extraID = shift ; 4466 my $len = shift; 4467 my $entry = shift; 4468 4469 state $openVMS_tags = { 4470 0x04 => [ 'ATR$C_RECATTR', \&OpenVMS_DumpBytes ], 4471 0x03 => [ 'ATR$C_UCHAR', \&OpenVMS_UCHAR ], 4472 0x11 => [ 'ATR$C_CREDATE', \&OpenVMS_DateTime ], 4473 0x12 => [ 'ATR$C_REVDATE', \&OpenVMS_DateTime ], 4474 0x13 => [ 'ATR$C_EXPDATE', \&OpenVMS_DateTime ], 4475 0x14 => [ 'ATR$C_BAKDATE', \&OpenVMS_DateTime ], 4476 0x0D => [ 'ATR$C_ASCDATES', \&OpenVMS_revision ], 4477 0x15 => [ 'ATR$C_UIC', \&OpenVMS_4ByteValue ], 4478 0x16 => [ 'ATR$C_FPRO', \&OpenVMS_DumpBytes ], 4479 0x17 => [ 'ATR$C_RPRO', \&OpenVMS_2ByteValue ], 4480 0x1D => [ 'ATR$C_JOURNAL', \&OpenVMS_DumpBytes ], 4481 0x1F => [ 'ATR$C_ADDACLENT', \&OpenVMS_DumpBytes ], 4482 } ; 4483 4484 out_V " CRC"; 4485 $len -= 4; 4486 4487 my $ix = 1; 4488 while ($len) 4489 { 4490 my ($data, $tag) = read_v(); 4491 my $tagname = 'Unknown Tag'; 4492 my $decoder = undef; 4493 4494 if ($openVMS_tags->{$tag}) 4495 { 4496 ($tagname, $decoder) = @{ $openVMS_tags->{$tag} } ; 4497 } 4498 4499 out2 $data, "Tag #$ix", Value_v($tag) . " '" . $tagname . "'" ; 4500 my $size = out_v " Size"; 4501 4502 if (defined $decoder) 4503 { 4504 $decoder->($ix, $tag, $size) ; 4505 } 4506 else 4507 { 4508 outSomeData($size, " Attribute"); 4509 } 4510 4511 ++ $ix; 4512 $len -= $size + 2 + 2; 4513 } 4514 4515} 4516 4517sub getT 4518{ 4519 my $time = shift ; 4520 4521 if ($opt_utc) 4522 { return scalar gmtime($time) // 'Unknown'} 4523 else 4524 { return scalar localtime($time) // 'Unknown' } 4525} 4526 4527sub getTime 4528{ 4529 my $time = shift ; 4530 4531 return "'Invalid Date or Time'" 4532 if ! defined $time; 4533 4534 return "'" . getT($time) . "'"; 4535} 4536 4537sub LastModTime 4538{ 4539 my $value = shift ; 4540 4541 return "'No Date/Time'" 4542 if $value == 0; 4543 4544 return getTime(_dosToUnixTime($value)) 4545} 4546 4547sub _dosToUnixTime 4548{ 4549 my $dt = shift; 4550 4551 # Mozilla xpi files have empty datetime 4552 # This is not a valid Dos datetime value 4553 return 0 if $dt == 0 ; 4554 4555 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; 4556 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; 4557 my $mday = ( ( $dt >> 16 ) & 0x1f ); 4558 4559 my $hour = ( ( $dt >> 11 ) & 0x1f ); 4560 my $min = ( ( $dt >> 5 ) & 0x3f ); 4561 my $sec = ( ( $dt << 1 ) & 0x3e ); 4562 4563 use Time::Local ; 4564 my $time_t; 4565 eval 4566 { 4567 # Use eval to catch crazy dates 4568 $time_t = Time::Local::timegm( $sec, $min, $hour, $mday, $mon, $year); 4569 } 4570 or do 4571 { 4572 my $dosDecode = $year+1900 . sprintf "-%02u-%02u %02u:%02u:%02u", $mon, $mday, $hour, $min, $sec; 4573 warning $FH->tell(), "'Modification Time' value " . decimalHex0x($dt, 4) . " decodes to '$dosDecode': not a valid DOS date/time" ; 4574 return undef 4575 }; 4576 4577 return $time_t; 4578 4579} 4580 4581sub decode_UT 4582{ 4583 # 0x5455 'UT: Extended Timestamp' 4584 4585 my $extraID = shift ; 4586 my $len = shift; 4587 my $entry = shift; 4588 4589 # Definition in IZ APPNOTE 4590 4591 # NOTE: Although the IZ appnote says that the central directory 4592 # doesn't store the Acces & Creation times, there are 4593 # some implementations that do poopulate the CD incorrectly. 4594 4595 # Caller has determined that at least one byte is available 4596 4597 # When $full is true assume all timestamps are present 4598 my $full = ($len == 13) ; 4599 4600 my $remaining = $len; 4601 4602 my ($data, $flags) = read_C(); 4603 4604 my $v = Value_C $flags; 4605 my @f ; 4606 push @f, "Modification" if $flags & 1; 4607 push @f, "Access" if $flags & 2; 4608 push @f, "Creation" if $flags & 4; 4609 $v .= " '" . join(' ', @f) . "'" 4610 if @f; 4611 4612 out $data, " Flags", $v; 4613 4614 info $FH->tell() - 1, extraFieldIdentifier($extraID) . ": Reserved bits set in 'Flags' field" 4615 if $flags & ~0x7; 4616 4617 -- $remaining; 4618 4619 if ($flags & 1 || $full) 4620 { 4621 if ($remaining == 0 ) 4622 { 4623 # Central Dir only has Modification Time 4624 error $FH->tell(), extraFieldIdentifier($extraID) . ": Missing field 'Modification Time'" ; 4625 return; 4626 } 4627 else 4628 { 4629 info $FH->tell(), extraFieldIdentifier($extraID) . ": Unexpected 'Modification Time' present" 4630 if ! ($flags & 1) ; 4631 4632 if ($remaining < 4) 4633 { 4634 outSomeData $remaining, " Extra Data"; 4635 error $FH->tell() - $remaining, 4636 extraFieldIdentifier($extraID) . ": Truncated reading 'Modification Time'", 4637 expectedMessage(4, $remaining); 4638 return; 4639 } 4640 4641 my ($data, $time) = read_V(); 4642 4643 out2 $data, "Modification Time", Value_V($time) . " " . getTime($time) ; 4644 4645 $remaining -= 4 ; 4646 } 4647 } 4648 4649 # The remaining sub-fields are only present in the Local Header 4650 4651 if ($flags & 2 || $full) 4652 { 4653 if ($remaining == 0 && $entry->inCentralDir) 4654 { 4655 # Central Dir doesn't have access time 4656 } 4657 else 4658 { 4659 info $FH->tell(), extraFieldIdentifier($extraID) . ": Unexpected 'Access Time' present" 4660 if ! ($flags & 2) || $entry->inCentralDir ; 4661 4662 if ($remaining < 4) 4663 { 4664 outSomeData $remaining, " Extra Data"; 4665 error $FH->tell() - $remaining, 4666 extraFieldIdentifier($extraID) . ": Truncated reading 'Access Time'" , 4667 expectedMessage(4, $remaining); 4668 4669 return; 4670 } 4671 4672 my ($data, $time) = read_V(); 4673 4674 out2 $data, "Access Time", Value_V($time) . " " . getTime($time) ; 4675 $remaining -= 4 ; 4676 } 4677 } 4678 4679 if ($flags & 4 || $full) 4680 { 4681 if ($remaining == 0 && $entry->inCentralDir) 4682 { 4683 # Central Dir doesn't have creation time 4684 } 4685 else 4686 { 4687 info $FH->tell(), extraFieldIdentifier($extraID) . ": Unexpected 'Creation Time' present" 4688 if ! ($flags & 4) || $entry->inCentralDir ; 4689 4690 if ($remaining < 4) 4691 { 4692 outSomeData $remaining, " Extra Data"; 4693 4694 error $FH->tell() - $remaining, 4695 extraFieldIdentifier($extraID) . ": Truncated reading 'Creation Time'" , 4696 expectedMessage(4, $remaining); 4697 4698 return; 4699 } 4700 4701 my ($data, $time) = read_V(); 4702 4703 out2 $data, "Creation Time", Value_V($time) . " " . getTime($time) ; 4704 } 4705 } 4706} 4707 4708 4709sub decode_Minizip_Signature 4710{ 4711 # 0x10c5 Minizip CMS Signature 4712 4713 my $extraID = shift ; 4714 my $len = shift; 4715 my $entry = shift; 4716 4717 # Definition in https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md#cms-signature-0x10c5 4718 4719 $CentralDirectory->setMiniZipEncrypted(); 4720 4721 if ($len == 0) 4722 { 4723 info $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Zero length Signature"; 4724 return; 4725 } 4726 4727 outHexdump($len, " Signature"); 4728 4729} 4730 4731sub decode_Minizip_Hash 4732{ 4733 # 0x1a51 Minizip Hash 4734 # Definition in https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md#hash-0x1a51 4735 4736 # caller ckecks there are at least 4 bytes available 4737 my $extraID = shift ; 4738 my $len = shift; 4739 my $entry = shift; 4740 4741 state $Algorithm = { 4742 10 => 'MD5', 4743 20 => 'SHA1', 4744 23 => 'SHA256', 4745 }; 4746 4747 my $remaining = $len; 4748 4749 $CentralDirectory->setMiniZipEncrypted(); 4750 4751 my ($data, $alg) = read_v(); 4752 my $algorithm = $Algorithm->{$alg} // "Unknown"; 4753 4754 out $data, " Algorithm", Value_v($alg) . " '$algorithm'"; 4755 if (! exists $Algorithm->{$alg}) 4756 { 4757 info $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown algorithm ID " .Value_v($alg); 4758 } 4759 4760 my ($d, $digestSize) = read_v(); 4761 out $d, " Digest Size", Value_v($digestSize); 4762 4763 $remaining -= 4; 4764 4765 if ($digestSize == 0) 4766 { 4767 info $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Zero length Digest"; 4768 } 4769 elsif ($digestSize > $remaining) 4770 { 4771 error $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Digest Size " . decimalHex0x($digestSize) . " > " . decimalHex0x($remaining) . " bytes remaining in extra field" ; 4772 $digestSize = $remaining ; 4773 } 4774 4775 outHexdump($digestSize, " Digest"); 4776 4777 $remaining -= $digestSize; 4778 4779 if ($remaining) 4780 { 4781 outHexdump($remaining, " Unexpected Data"); 4782 error $FH->tell() - $remaining, extraFieldIdentifier($extraID) . ": " . decimalHex0x($remaining) . " unexpected trailing bytes" ; 4783 } 4784} 4785 4786sub decode_Minizip_CD 4787{ 4788 # 0xcdcd Minizip Central Directory 4789 # Definition in https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md#central-directory-0xcdcd 4790 4791 my $extraID = shift ; 4792 my $len = shift; 4793 my $entry = shift; 4794 4795 $entry->minizip_secure(1); 4796 $CentralDirectory->setMiniZipEncrypted(); 4797 4798 my $size = out_Q " Entries"; 4799 4800 } 4801 4802sub decode_AES 4803{ 4804 # ref https://www.winzip.com/en/support/aes-encryption/ 4805 # Document version: 1.04 4806 # Last modified: January 30, 2009 4807 4808 my $extraID = shift ; 4809 my $len = shift; 4810 my $entry = shift; 4811 4812 return if $len == 0 ; 4813 4814 my $validAES = 1; 4815 4816 state $lookup = { 1 => "AE-1", 2 => "AE-2" }; 4817 my $vendorVersion = out_v " Vendor Version", sub { $lookup->{$_[0]} || "Unknown" } ; 4818 if (! $lookup->{$vendorVersion}) 4819 { 4820 $validAES = 0; 4821 warning $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown 'Vendor Version' $vendorVersion. Valid values are 1,2" 4822 } 4823 4824 my $id ; 4825 myRead($id, 2); 4826 my $idValue = out $id, " Vendor ID", unpackValue_v($id) . " '$id'"; 4827 4828 if ($id ne 'AE') 4829 { 4830 $validAES = 0; 4831 warning $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown 'Vendor ID' '$idValue'. Valid value is 'AE'" 4832 } 4833 4834 state $strengths = {1 => "128-bit encryption key", 4835 2 => "192-bit encryption key", 4836 3 => "256-bit encryption key", 4837 }; 4838 4839 my $strength = out_C " Encryption Strength", sub {$strengths->{$_[0]} || "Unknown" } ; 4840 4841 if (! $strengths->{$strength}) 4842 { 4843 $validAES = 0; 4844 warning $FH->tell() - 1, extraFieldIdentifier($extraID) . ": Unknown 'Encryption Strength' $strength. Valid values are 1,2,3" 4845 } 4846 4847 my ($bmethod, $method) = read_v(); 4848 out $bmethod, " Compression Method", compressionMethod($method) ; 4849 if (! defined $ZIP_CompressionMethods{$method}) 4850 { 4851 $validAES = 0; 4852 warning $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown 'Compression Method' ID " . decimalHex0x($method, 2) 4853 } 4854 4855 $entry->aesStrength($strength) ; 4856 $entry->aesValid($validAES) ; 4857} 4858 4859sub decode_Reference 4860{ 4861 # ref https://www.winzip.com/en/support/compression-methods/ 4862 4863 my $len = shift; 4864 my $entry = shift; 4865 4866 out_V " CRC"; 4867 myRead(my $uuid, 16); 4868 # UUID is big endian 4869 out2 $uuid, "UUID", 4870 unpack('H*', substr($uuid, 0, 4)) . '-' . 4871 unpack('H*', substr($uuid, 4, 2)) . '-' . 4872 unpack('H*', substr($uuid, 6, 2)) . '-' . 4873 unpack('H*', substr($uuid, 8, 2)) . '-' . 4874 unpack('H*', substr($uuid, 10, 6)) ; 4875} 4876 4877sub decode_DUMMY 4878{ 4879 my $extraID = shift ; 4880 my $len = shift; 4881 my $entry = shift; 4882 4883 out_V " Data"; 4884} 4885 4886sub decode_GrowthHint 4887{ 4888 # APPNOTE 6.3.10, sec 4.6.10 4889 4890 my $extraID = shift ; 4891 my $len = shift; 4892 my $entry = shift; 4893 4894 # caller has checked that 4 bytes are available, 4895 # so can output values without checking available space 4896 out_v " Signature" ; 4897 out_v " Initial Value"; 4898 4899 my $padding; 4900 myRead($padding, $len - 4); 4901 4902 out2 $padding, "Padding", hexDump16($padding); 4903 4904 if ($padding !~ /^\x00+$/) 4905 { 4906 info $FH->tell(), extraFieldIdentifier($extraID) . ": 'Padding' is not all NULL bytes"; 4907 } 4908} 4909 4910sub decode_DataStreamAlignment 4911{ 4912 # APPNOTE 6.3.10, sec 4.6.11 4913 4914 my $extraID = shift ; 4915 my $len = shift; 4916 my $entry = shift; 4917 4918 my $inCentralHdr = $entry->inCentralDir ; 4919 4920 return if $len == 0 ; 4921 4922 my ($data, $alignment) = read_v(); 4923 4924 out $data, " Alignment", Value_v($alignment) ; 4925 4926 my $recompress_value = $alignment & 0x8000 ? 1 : 0; 4927 4928 my $recompressing = $recompress_value ? "True" : "False"; 4929 $alignment &= 0x7FFF ; 4930 my $hexAl = sprintf("%X", $alignment); 4931 4932 out1 " [Bit 15]", "$recompress_value 'Recompress $recompressing'"; 4933 out1 " [Bits 0-14]", "$hexAl 'Minimal Alignment $alignment'"; 4934 4935 if (! $inCentralHdr && $len - 2 > 0) 4936 { 4937 my $padding; 4938 myRead($padding, $len - 2); 4939 4940 out2 $padding, "Padding", hexDump16($padding); 4941 } 4942} 4943 4944 4945sub decode_UX 4946{ 4947 my $extraID = shift ; 4948 my $len = shift; 4949 my $entry = shift; 4950 4951 my $inCentralHdr = $entry->inCentralDir ; 4952 4953 return if $len == 0 ; 4954 4955 my ($data, $time) = read_V(); 4956 out2 $data, "Access Time", Value_V($time) . " " . getTime($time) ; 4957 4958 ($data, $time) = read_V(); 4959 out2 $data, "Modification Time", Value_V($time) . " " . getTime($time) ; 4960 4961 if (! $inCentralHdr ) { 4962 out_v " UID" ; 4963 out_v " GID"; 4964 } 4965} 4966 4967sub decode_Ux 4968{ 4969 my $extraID = shift ; 4970 my $len = shift; 4971 my $entry = shift; 4972 4973 return if $len == 0 ; 4974 out_v " UID" ; 4975 out_v " GID"; 4976} 4977 4978sub decodeLitteEndian 4979{ 4980 my $value = shift ; 4981 4982 if (length $value == 8) 4983 { 4984 return unpackValueQ ($value) 4985 } 4986 elsif (length $value == 4) 4987 { 4988 return unpackValue_V ($value) 4989 } 4990 elsif (length $value == 2) 4991 { 4992 return unpackValue_v ($value) 4993 } 4994 elsif (length $value == 1) 4995 { 4996 return unpackValue_C ($value) 4997 } 4998 else { 4999 # TODO - fix this 5000 internalFatal undef, "unsupported decodeLitteEndian length '" . length ($value) . "'"; 5001 } 5002} 5003 5004sub decode_ux 5005{ 5006 my $extraID = shift ; 5007 my $len = shift; 5008 my $entry = shift; 5009 5010 # caller has checked that 3 bytes are available 5011 5012 return if $len == 0 ; 5013 5014 my $version = out_C " Version" ; 5015 info $FH->tell() - 1, extraFieldIdentifier($extraID) . ": 'Version' should be " . decimalHex0x(1) . ", got " . decimalHex0x($version, 1) 5016 if $version != 1 ; 5017 5018 my $available = $len - 1 ; 5019 5020 my $uidSize = out_C " UID Size"; 5021 $available -= 1; 5022 5023 if ($uidSize) 5024 { 5025 if ($available < $uidSize) 5026 { 5027 outSomeData($available, " Bad Extra Data"); 5028 error $FH->tell() - $available, 5029 extraFieldIdentifier($extraID) . ": truncated reading 'UID'", 5030 expectedMessage($uidSize, $available); 5031 return; 5032 } 5033 5034 myRead(my $data, $uidSize); 5035 out2 $data, "UID", decodeLitteEndian($data); 5036 $available -= $uidSize ; 5037 } 5038 5039 if ($available < 1) 5040 { 5041 error $FH->tell(), 5042 extraFieldIdentifier($extraID) . ": truncated reading 'GID Size'", 5043 expectedMessage($uidSize, $available); 5044 return ; 5045 } 5046 5047 my $gidSize = out_C " GID Size"; 5048 $available -= 1 ; 5049 if ($gidSize) 5050 { 5051 if ($available < $gidSize) 5052 { 5053 outSomeData($available, " Bad Extra Data"); 5054 error $FH->tell() - $available, 5055 extraFieldIdentifier($extraID) . ": truncated reading 'GID'", 5056 expectedMessage($gidSize, $available); 5057 return; 5058 } 5059 5060 myRead(my $data, $gidSize); 5061 out2 $data, "GID", decodeLitteEndian($data); 5062 $available -= $gidSize ; 5063 } 5064 5065} 5066 5067sub decode_Java_exe 5068{ 5069 my $extraID = shift ; 5070 my $len = shift; 5071 my $entry = shift; 5072 5073} 5074 5075sub decode_up 5076{ 5077 # APPNOTE 6.3.10, sec 4.6.9 5078 5079 my $extraID = shift ; 5080 my $len = shift; 5081 my $entry = shift; 5082 5083 out_C " Version"; 5084 out_V " NameCRC32"; 5085 5086 if ($len - 5 > 0) 5087 { 5088 myRead(my $data, $len - 5); 5089 5090 outputFilename($data, 1, " UnicodeName"); 5091 } 5092} 5093 5094sub decode_ASi_Unix 5095{ 5096 my $extraID = shift ; 5097 my $len = shift; 5098 my $entry = shift; 5099 5100 # https://stackoverflow.com/questions/76581811/why-does-unzip-ignore-my-zip64-end-of-central-directory-record 5101 5102 out_V " CRC"; 5103 my $native_attrib = out_v " Mode"; 5104 5105 # TODO - move to separate sub & tidy 5106 if (1) # Unix 5107 { 5108 5109 state $mask = { 5110 0 => '---', 5111 1 => '--x', 5112 2 => '-w-', 5113 3 => '-wx', 5114 4 => 'r--', 5115 5 => 'r-x', 5116 6 => 'rw-', 5117 7 => 'rwx', 5118 } ; 5119 5120 my $rwx = ($native_attrib & 0777); 5121 5122 if ($rwx) 5123 { 5124 my $output = ''; 5125 $output .= $mask->{ ($rwx >> 6) & 07 } ; 5126 $output .= $mask->{ ($rwx >> 3) & 07 } ; 5127 $output .= $mask->{ ($rwx >> 0) & 07 } ; 5128 5129 out1 " [Bits 0-8]", Value_v($rwx) . " 'Unix attrib: $output'" ; 5130 out1 " [Bit 9]", "1 'Sticky'" 5131 if $rwx & 0x200 ; 5132 out1 " [Bit 10]", "1 'Set GID'" 5133 if $rwx & 0x400 ; 5134 out1 " [Bit 11]", "1 'Set UID'" 5135 if $rwx & 0x800 ; 5136 5137 my $not_rwx = (($native_attrib >> 12) & 0xF); 5138 if ($not_rwx) 5139 { 5140 state $masks = { 5141 0x0C => 'Socket', # 0x0C 0b1100 5142 0x0A => 'Symbolic Link', # 0x0A 0b1010 5143 0x08 => 'Regular File', # 0x08 0b1000 5144 0x06 => 'Block Device', # 0x06 0b0110 5145 0x04 => 'Directory', # 0x04 0b0100 5146 0x02 => 'Character Device', # 0x02 0b0010 5147 0x01 => 'FIFO', # 0x01 0b0001 5148 }; 5149 5150 my $got = $masks->{$not_rwx} // 'Unknown Unix attrib' ; 5151 out1 " [Bits 12-15]", Value_C($not_rwx) . " '$got'" 5152 } 5153 } 5154 } 5155 5156 5157 my $s = out_V " SizDev"; 5158 out_v " UID"; 5159 out_v " GID"; 5160 5161} 5162 5163sub decode_uc 5164{ 5165 # APPNOTE 6.3.10, sec 4.6.8 5166 5167 my $extraID = shift ; 5168 my $len = shift; 5169 my $entry = shift; 5170 5171 out_C " Version"; 5172 out_V " ComCRC32"; 5173 5174 if ($len - 5 > 0) 5175 { 5176 myRead(my $data, $len - 5); 5177 5178 outputFilename($data, 1, " UnicodeCom"); 5179 } 5180} 5181 5182sub decode_Xceed_unicode 5183{ 5184 # 0x554e 5185 5186 my $extraID = shift ; 5187 my $len = shift; 5188 my $entry = shift; 5189 5190 my $data ; 5191 my $remaining = $len; 5192 5193 # No public definition available, so reverse engineer the content. 5194 5195 # See https://github.com/pmqs/zipdetails/issues/13 for C# source that populates 5196 # this field. 5197 5198 # Fiddler https://www.telerik.com/fiddler) creates this field. 5199 5200 # Local Header only has UTF16LE filename 5201 # 5202 # Field definition 5203 # 4 bytes Signature always XCUN 5204 # 2 bytes Filename Length (divided by 2) 5205 # Filename 5206 5207 # Central has UTF16LE filename & comment 5208 # 5209 # Field definition 5210 # 4 bytes Signature always XCUN 5211 # 2 bytes Filename Length (divided by 2) 5212 # 2 bytes Comment Length (divided by 2) 5213 # Filename 5214 # Comment 5215 5216 # First 4 bytes appear to be little-endian "XCUN" all the time 5217 # Just double check 5218 my ($idb, $id) = read_V(); 5219 $remaining -= 4; 5220 5221 my $outid = decimalHex0x($id); 5222 $outid .= " 'XCUN'" 5223 if $idb eq 'NUCX'; 5224 5225 out $idb, " ID", $outid; 5226 5227 # Next 2 bytes contains a count of the filename length divided by 2 5228 # Dividing by 2 gives the number of UTF-16 characters. 5229 my $filenameLength = out_v " Filename Length"; 5230 $filenameLength *= 2; # Double to get number of bytes to read 5231 $remaining -= 2; 5232 5233 my $commentLength = 0; 5234 5235 if ($entry->inCentralDir) 5236 { 5237 # Comment length only in Central Directory 5238 # Again stored divided by 2. 5239 $commentLength = out_v " Comment Length"; 5240 $commentLength *= 2; # Double to get number of bytes to read 5241 $remaining -= 2; 5242 } 5243 5244 # next is a UTF16 encoded filename 5245 5246 if ($filenameLength) 5247 { 5248 if ($filenameLength > $remaining ) 5249 { 5250 myRead($data, $remaining); 5251 out redactData($data), " UTF16LE Filename", "'" . redactFilename(decode("UTF16LE", $data)) . "'"; 5252 5253 error $FH->tell() - $remaining, 5254 extraFieldIdentifier($extraID) . ": Truncated reading 'UTF16LE Filename'", 5255 expectedMessage($filenameLength, $remaining); 5256 return undef; 5257 } 5258 5259 myRead($data, $filenameLength); 5260 out redactData($data), " UTF16LE Filename", "'" . redactFilename(decode("UTF16LE", $data)) . "'"; 5261 $remaining -= $filenameLength; 5262 } 5263 5264 # next is a UTF16 encoded comment 5265 5266 if ($commentLength) 5267 { 5268 if ($commentLength > $remaining ) 5269 { 5270 myRead($data, $remaining); 5271 out redactData($data), " UTF16LE Comment", "'" . redactFilename(decode("UTF16LE", $data)) . "'"; 5272 5273 error $FH->tell() - $remaining, 5274 extraFieldIdentifier($extraID) . ": Truncated reading 'UTF16LE Comment'", 5275 expectedMessage($filenameLength, $remaining); 5276 return undef; 5277 } 5278 5279 myRead($data, $commentLength); 5280 out redactData($data), " UTF16LE Comment", "'" . redactFilename(decode("UTF16LE", $data)) . "'"; 5281 $remaining -= $commentLength; 5282 } 5283 5284 if ($remaining) 5285 { 5286 outHexdump($remaining, " Unexpected Data"); 5287 error $FH->tell() - $remaining, extraFieldIdentifier($extraID) . ": " . decimalHex0x($remaining) . " unexpected trailing bytes" ; 5288 } 5289} 5290 5291sub decode_Key_Value_Pair 5292{ 5293 # 0x564B 'KV' 5294 # https://github.com/sozip/keyvaluepairs-spec/blob/master/zip_keyvalue_extra_field_specification.md 5295 5296 my $extraID = shift ; 5297 my $len = shift; 5298 my $entry = shift; 5299 5300 my $remaining = $len; 5301 5302 myRead(my $signature, 13); 5303 $remaining -= 13; 5304 5305 if ($signature ne 'KeyValuePairs') 5306 { 5307 error $FH->tell() - 13, extraFieldIdentifier($extraID) . ": 'Signature' field not 'KeyValuePairs'" ; 5308 myRead(my $payload, $remaining); 5309 my $data = hexDump16($signature . $payload); 5310 5311 out2 $signature . $payload, "Extra Payload", $data; 5312 5313 return ; 5314 } 5315 5316 out $signature, ' Signature', "'KeyValuePairs'"; 5317 my $kvPairs = out_C " KV Count"; 5318 $remaining -= 1; 5319 5320 for my $index (1 .. $kvPairs) 5321 { 5322 my $key; 5323 my $klen = out_v " Key size #$index"; 5324 $remaining -= 4; 5325 5326 myRead($key, $klen); 5327 outputFilename $key, 1, " Key #$index"; 5328 $remaining -= $klen; 5329 5330 my $value; 5331 my $vlen = out_v " Value size #$index"; 5332 $remaining -= 4; 5333 5334 myRead($value, $vlen); 5335 outputFilename $value, 1, " Value #$index"; 5336 $remaining -= $vlen; 5337 } 5338 5339 # TODO check that 5340 # * count of kv pairs is accurate 5341 # * no truncation in middle of kv data 5342 # * no trailing data 5343} 5344 5345sub decode_NT_security 5346{ 5347 # IZ Appnote 5348 my $extraID = shift ; 5349 my $len = shift; 5350 my $entry = shift; 5351 5352 my $inCentralHdr = $entry->inCentralDir ; 5353 5354 out_V " Uncompressed Size" ; 5355 5356 if (! $inCentralHdr) { 5357 5358 out_C " Version" ; 5359 5360 out_v " CType", sub { "'" . ($ZIP_CompressionMethods{$_[0]} || "Unknown Method") . "'" }; 5361 5362 out_V " CRC" ; 5363 5364 my $plen = $len - 4 - 1 - 2 - 4; 5365 outHexdump $plen, " Extra Payload"; 5366 } 5367} 5368 5369sub decode_MVS 5370{ 5371 # APPNOTE 6.3.10, Appendix 5372 my $extraID = shift ; 5373 my $len = shift; 5374 my $entry = shift; 5375 5376 # data in Big-Endian 5377 myRead(my $data, $len); 5378 my $ID = unpack("N", $data); 5379 5380 if ($ID == 0xE9F3F9F0) # EBCDIC for "Z390" 5381 { 5382 my $d = substr($data, 0, 4, '') ; 5383 out($d, " ID", "'Z390'"); 5384 } 5385 5386 out($data, " Extra Payload", hexDump16($data)); 5387} 5388 5389sub decode_strong_encryption 5390{ 5391 # APPNOTE 6.3.10, sec 4.5.12 & 7.4.2 5392 5393 my $extraID = shift ; 5394 my $len = shift; 5395 my $entry = shift; 5396 5397 # TODO check for overflow is contents > $len 5398 out_v " Format"; 5399 out_v " AlgId", sub { $AlgIdLookup{ $_[0] } // "Unknown algorithm" } ; 5400 out_v " BitLen"; 5401 out_v " Flags", sub { $FlagsLookup{ $_[0] } // "reserved for certificate processing" } ; 5402 5403 # see APPNOTE 6.3.10, sec 7.4.2 for this part 5404 my $recipients = out_V " Recipients"; 5405 5406 my $available = $len - 12; 5407 5408 if ($recipients) 5409 { 5410 if ($available < 2) 5411 { 5412 outSomeData($available, " Badly formed extra data"); 5413 # TODO - need warning 5414 return; 5415 } 5416 5417 out_v " HashAlg", sub { $HashAlgLookup{ $_[0] } // "Unknown algorithm" } ; 5418 $available -= 2; 5419 5420 if ($available < 2) 5421 { 5422 outSomeData($available, " Badly formed extra data"); 5423 # TODO - need warning 5424 return; 5425 } 5426 5427 my $HSize = out_v " HSize" ; 5428 $available -= 2; 5429 5430 # should have $recipients * $HSize bytes available 5431 if ($recipients * $HSize != $available) 5432 { 5433 outSomeData($available, " Badly formed extra data"); 5434 # TODO - need warning 5435 return; 5436 } 5437 5438 my $ix = 1; 5439 for (0 .. $recipients-1) 5440 { 5441 myRead(my $payload, $HSize); 5442 my $data = hexDump16($payload); 5443 5444 out2 $payload, sprintf("Key #%X", $ix), $data; 5445 ++ $ix; 5446 } 5447 } 5448} 5449 5450 5451sub printAes 5452{ 5453 # ref https://www.winzip.com/en/support/aes-encryption/ 5454 5455 my $entry = shift; 5456 5457 return 0 5458 if ! $entry->aesValid; 5459 5460 my %saltSize = ( 5461 1 => 8, 5462 2 => 12, 5463 3 => 16, 5464 ); 5465 5466 myRead(my $salt, $saltSize{$entry->aesStrength } // 0); 5467 out $salt, "AES Salt", hexDump16($salt); 5468 myRead(my $pwv, 2); 5469 out $pwv, "AES Pwd Ver", hexDump16($pwv); 5470 5471 return $saltSize{$entry->aesStrength} + 2 + 10; 5472} 5473 5474sub printLzmaProperties 5475{ 5476 my $len = 0; 5477 5478 my $b1; 5479 my $b2; 5480 my $buffer; 5481 5482 myRead($b1, 2); 5483 my ($verHi, $verLow) = unpack ("CC", $b1); 5484 5485 out $b1, "LZMA Version", sprintf("%02X%02X", $verHi, $verLow) . " '$verHi.$verLow'"; 5486 my $LzmaPropertiesSize = out_v "LZMA Properties Size"; 5487 $len += 4; 5488 5489 my $LzmaInfo = out_C "LZMA Info", sub { $_[0] == 93 ? "(Default)" : ""}; 5490 5491 my $PosStateBits = 0; 5492 my $LiteralPosStateBits = 0; 5493 my $LiteralContextBits = 0; 5494 $PosStateBits = int($LzmaInfo / (9 * 5)); 5495 $LzmaInfo -= $PosStateBits * 9 * 5; 5496 $LiteralPosStateBits = int($LzmaInfo / 9); 5497 $LiteralContextBits = $LzmaInfo - $LiteralPosStateBits * 9; 5498 5499 out1 " PosStateBits", $PosStateBits; 5500 out1 " LiteralPosStateBits", $LiteralPosStateBits; 5501 out1 " LiteralContextBits", $LiteralContextBits; 5502 5503 out_V "LZMA Dictionary Size"; 5504 5505 # TODO - assumption that this is 5 5506 $len += $LzmaPropertiesSize; 5507 5508 skip($FH, $LzmaPropertiesSize - 5) 5509 if $LzmaPropertiesSize != 5 ; 5510 5511 return $len; 5512} 5513 5514sub peekAtOffset 5515{ 5516 # my $fh = shift; 5517 my $offset = shift; 5518 my $len = shift; 5519 5520 my $here = $FH->tell(); 5521 5522 seekTo($offset) ; 5523 5524 my $buffer; 5525 myRead($buffer, $len); 5526 seekTo($here); 5527 5528 length $buffer == $len 5529 or return ''; 5530 5531 return $buffer; 5532} 5533 5534sub readFromOffset 5535{ 5536 # my $fh = shift; 5537 my $offset = shift; 5538 my $len = shift; 5539 5540 seekTo($offset) ; 5541 5542 my $buffer; 5543 myRead($buffer, $len); 5544 5545 length $buffer == $len 5546 or return ''; 5547 5548 return $buffer; 5549} 5550 5551sub readSignatureFromOffset 5552{ 5553 my $offset = shift ; 5554 5555 # catch use case where attempting to read past EOF 5556 # sub is expecting to return a 32-bit value so return 54-bit out-of-bound value 5557 return MAX64 5558 if $offset + 4 > $FILELEN ; 5559 5560 my $here = $FH->tell(); 5561 my $buffer = readFromOffset($offset, 4); 5562 my $gotSig = unpack("V", $buffer) ; 5563 seekTo($here); 5564 5565 return $gotSig; 5566} 5567 5568 5569sub chckForAPKSigningBlock 5570{ 5571 my $fh = shift; 5572 my $cdOffset = shift; 5573 my $cdSize = shift; 5574 5575 # APK Signing Block comes directy before the Central directory 5576 # See https://source.android.com/security/apksigning/v2 5577 5578 # If offset available is less than 44, it isn't an APK signing block 5579 # 5580 # len1 8 5581 # id 4 5582 # kv with zero len 8 5583 # len1 8 5584 # magic 16 5585 # ---------- 5586 # 44 5587 5588 return (0, 0, '') 5589 if $cdOffset < 44 || $FILELEN - $cdSize < 44 ; 5590 5591 # Step 1 - 16 bytes before CD is literal string "APK Sig Block 42" 5592 my $magicOffset = $cdOffset - 16; 5593 my $buffer = readFromOffset($magicOffset, 16); 5594 5595 return (0, 0, '') 5596 if $buffer ne "APK Sig Block 42" ; 5597 5598 # Step 2 - read the second length field 5599 # and check that it looks ok 5600 $buffer = readFromOffset($cdOffset - 16 - 8, 8); 5601 my $len2 = unpack("Q<", $buffer); 5602 5603 return (0, 0, '') 5604 if $len2 == 0 || $len2 > $FILELEN; 5605 5606 # Step 3 - read the first length field. 5607 # It should be identical to the second one. 5608 5609 my $startApkOffset = $cdOffset - 8 - $len2 ; 5610 5611 $buffer = readFromOffset($startApkOffset, 8); 5612 my $len1 = unpack("Q<", $buffer); 5613 5614 return (0, 0, '') 5615 if $len1 != $len2; 5616 5617 return ($startApkOffset, $cdOffset - 16 - 8, $buffer); 5618} 5619 5620sub scanApkBlock 5621{ 5622 state $IDs = { 5623 0x7109871a => "APK Signature v2", 5624 0xf05368c0 => "APK Signature v3", 5625 0x42726577 => "Verity Padding Block", # from https://android.googlesource.com/platform/tools/apksig/+/master/src/main/java/com/android/apksig/internal/apk/ApkSigningBlockUtils.java 5626 0x6dff800d => "Source Stamp", 5627 0x504b4453 => "Dependency Info", 5628 0x71777777 => "APK Channel Block", 5629 0xff3b5998 => "Zero Block", 5630 0x2146444e => "Play Metadata", 5631 } ; 5632 5633 5634 seekTo($FH->tell() - 4) ; 5635 print "\n"; 5636 out "", "APK SIGNING BLOCK"; 5637 5638 scanApkPadding(); 5639 out_Q "Block Length Copy #1"; 5640 my $ix = 1; 5641 5642 while ($FH->tell() < $APK - 8) 5643 { 5644 my ($bytes, $id, $len); 5645 ($bytes, $len) = read_Q ; 5646 out $bytes, "ID/Value Length #" . sprintf("%X", $ix), Value_Q($len); 5647 5648 ($bytes, $id) = read_V; 5649 5650 out $bytes, " ID", Value_V($id) . " '" . ($IDs->{$id} // 'Unknown ID') . "'"; 5651 5652 outSomeData($len-4, " Value"); 5653 ++ $ix; 5654 } 5655 5656 out_Q "Block Length Copy #2"; 5657 5658 my $magic ; 5659 myRead($magic, 16); 5660 5661 out $magic, "Magic", qq['$magic']; 5662} 5663 5664sub scanApkPadding 5665{ 5666 my $here = $FH->tell(); 5667 5668 return 5669 if $here == $START_APK; 5670 5671 # found some padding 5672 5673 my $delta = $START_APK - $here; 5674 my $padding = peekAtOffset($here, $delta); 5675 5676 if ($padding =~ /^\x00+$/) 5677 { 5678 outSomeData($delta, "Null Padding"); 5679 } 5680 else 5681 { 5682 outHexdump($delta, "Unexpected Padding"); 5683 } 5684} 5685 5686sub scanCentralDirectory 5687{ 5688 my $fh = shift; 5689 5690 my $here = $fh->tell(); 5691 5692 # Use cases 5693 # 1 32-bit CD 5694 # 2 64-bit CD 5695 5696 my ($offset, $size) = findCentralDirectoryOffset($fh); 5697 $CentralDirectory->{CentralDirectoryOffset} = $offset; 5698 $CentralDirectory->{CentralDirectorySize} = $size; 5699 5700 return () 5701 if ! defined $offset; 5702 5703 $fh->seek($offset, SEEK_SET) ; 5704 5705 # Now walk the Central Directory Records 5706 my $buffer ; 5707 my $cdIndex = 0; 5708 my $cdEntryOffset = 0; 5709 5710 while ($fh->read($buffer, ZIP_CD_FILENAME_OFFSET) == ZIP_CD_FILENAME_OFFSET && 5711 unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 5712 5713 my $startHeader = $fh->tell() - ZIP_CD_FILENAME_OFFSET; 5714 5715 my $cdEntryOffset = $fh->tell() - ZIP_CD_FILENAME_OFFSET; 5716 $HeaderOffsetIndex->addOffsetNoPrefix($cdEntryOffset, ZIP_CENTRAL_HDR_SIG) ; 5717 5718 ++ $cdIndex ; 5719 5720 my $extractVer = unpack("v", substr($buffer, 6, 1)); 5721 my $gpFlag = unpack("v", substr($buffer, 8, 2)); 5722 my $lastMod = unpack("V", substr($buffer, 10, 4)); 5723 my $crc = unpack("V", substr($buffer, 16, 4)); 5724 my $compressedSize = unpack("V", substr($buffer, 20, 4)); 5725 my $uncompressedSize = unpack("V", substr($buffer, 24, 4)); 5726 my $filename_length = unpack("v", substr($buffer, 28, 2)); 5727 my $extra_length = unpack("v", substr($buffer, 30, 2)); 5728 my $comment_length = unpack("v", substr($buffer, 32, 2)); 5729 my $diskNumber = unpack("v", substr($buffer, 34, 2)); 5730 my $locHeaderOffset = unpack("V", substr($buffer, 42, 4)); 5731 5732 my $cdZip64 = 0; 5733 my $zip64Sizes = 0; 5734 5735 if (! full32 $locHeaderOffset) 5736 { 5737 # Check for corrupt offset 5738 # 1. ponting paset EOF 5739 # 2. offset points forward in the file 5740 # 3. value at offset is not a CD record signature 5741 5742 my $commonMessage = "'Local Header Offset' field in '" . Signatures::name(ZIP_CENTRAL_HDR_SIG) . "' is invalid"; 5743 checkOffsetValue($locHeaderOffset, $startHeader, 0, $commonMessage, 5744 $startHeader + CentralDirectoryEntry::Offset_RelativeOffsetToLocal(), 5745 ZIP_LOCAL_HDR_SIG, 1) ; 5746 } 5747 5748 $fh->read(my $filename, $filename_length) ; 5749 5750 my $cdEntry = CentralDirectoryEntry->new(); 5751 5752 $cdEntry->centralHeaderOffset($startHeader) ; 5753 $cdEntry->localHeaderOffset($locHeaderOffset) ; 5754 $cdEntry->compressedSize($compressedSize) ; 5755 $cdEntry->uncompressedSize($uncompressedSize) ; 5756 $cdEntry->extractVersion($extractVer); 5757 $cdEntry->generalPurposeFlags($gpFlag); 5758 $cdEntry->filename($filename) ; 5759 $cdEntry->lastModDateTime($lastMod); 5760 $cdEntry->languageEncodingFlag($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ; 5761 $cdEntry->diskNumber($diskNumber) ; 5762 $cdEntry->crc32($crc) ; 5763 $cdEntry->zip64ExtraPresent($cdZip64) ; 5764 5765 $cdEntry->std_localHeaderOffset($locHeaderOffset) ; 5766 $cdEntry->std_compressedSize($compressedSize) ; 5767 $cdEntry->std_uncompressedSize($uncompressedSize) ; 5768 $cdEntry->std_diskNumber($diskNumber) ; 5769 5770 5771 if ($extra_length) 5772 { 5773 $fh->read(my $extraField, $extra_length) ; 5774 5775 # Check for Zip64 5776 my $zip64Extended = findID(0x0001, $extraField); 5777 5778 if ($zip64Extended) 5779 { 5780 $cdZip64 = 1; 5781 walk_Zip64_in_CD(1, $zip64Extended, $cdEntry, 0); 5782 } 5783 } 5784 5785 $cdEntry->offsetStart($startHeader) ; 5786 $cdEntry->offsetEnd($FH->tell() - 1); 5787 5788 # don't call addEntry until after the extra fields have been scanned 5789 # the localheader offset value may be updated in th ezip64 extra field. 5790 $CentralDirectory->addEntry($cdEntry); 5791 $HeaderOffsetIndex->addOffset($cdEntry->localHeaderOffset, ZIP_LOCAL_HDR_SIG) ; 5792 5793 skip($fh, $comment_length ) ; 5794 } 5795 5796 $FH->seek($fh->tell() - ZIP_CD_FILENAME_OFFSET, SEEK_SET); 5797 5798 # Check for Digital Signature 5799 $HeaderOffsetIndex->addOffset($fh->tell() - 4, ZIP_DIGITAL_SIGNATURE_SIG) 5800 if $fh->read($buffer, 4) == 4 && 5801 unpack("V", $buffer) == ZIP_DIGITAL_SIGNATURE_SIG ; 5802 5803 $CentralDirectory->sortByLocalOffset(); 5804 $HeaderOffsetIndex->sortOffsets(); 5805 5806 $fh->seek($here, SEEK_SET) ; 5807 5808} 5809 5810use constant ZIP64_END_CENTRAL_LOC_HDR_SIZE => 20; 5811use constant ZIP64_END_CENTRAL_REC_HDR_MIN_SIZE => 56; 5812 5813sub offsetFromZip64 5814{ 5815 my $fh = shift ; 5816 my $here = shift; 5817 my $eocdSize = shift; 5818 5819 #### Zip64 end of central directory locator 5820 5821 # check enough bytes available for zip64 locator record 5822 fatal_tryWalk undef, "Cannot find signature for " . Signatures::nameAndHex(ZIP64_END_CENTRAL_LOC_HDR_SIG), # 'Zip64 end of central directory locator': 0x07064b50" 5823 "Possible truncated or corrupt zip file" 5824 if $here < ZIP64_END_CENTRAL_LOC_HDR_SIZE ; 5825 5826 $fh->seek($here - ZIP64_END_CENTRAL_LOC_HDR_SIZE, SEEK_SET) ; 5827 $here = $FH->tell(); 5828 5829 my $buffer; 5830 my $got = 0; 5831 $fh->read($buffer, ZIP64_END_CENTRAL_LOC_HDR_SIZE); 5832 5833 my $gotSig = unpack("V", $buffer); 5834 fatal_tryWalk $here - 4, sprintf("Expected signature for " . Signatures::nameAndHex(ZIP64_END_CENTRAL_LOC_HDR_SIG) . " not found, got 0x%X", $gotSig) 5835 if $gotSig != ZIP64_END_CENTRAL_LOC_HDR_SIG ; 5836 5837 $HeaderOffsetIndex->addOffset($fh->tell() - ZIP64_END_CENTRAL_LOC_HDR_SIZE, ZIP64_END_CENTRAL_LOC_HDR_SIG) ; 5838 5839 5840 my $cd64 = unpack "Q<", substr($buffer, 8, 8); 5841 my $totalDisks = unpack "V", substr($buffer, 16, 4); 5842 5843 testPossiblePrefix($cd64, ZIP64_END_CENTRAL_REC_HDR_SIG); 5844 5845 if ($totalDisks > 0) 5846 { 5847 my $commonMessage = "'Offset to Zip64 End of Central Directory Record' field in '" . Signatures::name(ZIP64_END_CENTRAL_LOC_HDR_SIG) . "' is invalid"; 5848 $cd64 = checkOffsetValue($cd64, $here, 0, $commonMessage, $here + 8, ZIP64_END_CENTRAL_REC_HDR_SIG, 1) ; 5849 } 5850 5851 my $delta = $here - $cd64; 5852 5853 #### Zip64 end of central directory record 5854 5855 my $zip64eocd_name = "'" . Signatures::name(ZIP64_END_CENTRAL_REC_HDR_SIG) . "'"; 5856 my $zip64eocd_name_value = Signatures::nameAndHex(ZIP64_END_CENTRAL_REC_HDR_SIG); 5857 my $zip64eocd_value = Signatures::hexValue(ZIP64_END_CENTRAL_REC_HDR_SIG); 5858 5859 # check enough bytes available 5860 # fatal_tryWalk sprintf "Size of 'Zip64 End of Central Directory Record' 0x%X too small", $cd64 5861 fatal_tryWalk undef, sprintf "Size of $zip64eocd_name 0x%X too small", $cd64 5862 if $delta < ZIP64_END_CENTRAL_REC_HDR_MIN_SIZE; 5863 5864 # Seek to Zip64 End of Central Directory Record 5865 $fh->seek($cd64, SEEK_SET) ; 5866 $HeaderOffsetIndex->addOffsetNoPrefix($fh->tell(), ZIP64_END_CENTRAL_REC_HDR_SIG) ; 5867 5868 $fh->read($buffer, ZIP64_END_CENTRAL_REC_HDR_MIN_SIZE) ; 5869 5870 my $sig = unpack("V", substr($buffer, 0, 4)) ; 5871 fatal_tryWalk undef, sprintf "Cannot find $zip64eocd_name: expected $zip64eocd_value but got 0x%X", $sig 5872 if $sig != ZIP64_END_CENTRAL_REC_HDR_SIG ; 5873 5874 # pkzip sets the extract zip spec to 6.2 (0x3E) to signal a v2 record 5875 # See APPNOTE 6.3.10, section, 7.3.3 5876 5877 # Version 1 header is 44 bytes (assuming no extensible data sector) 5878 # Version 2 header (see APPNOTE 6.3.7, section) is > 44 bytes 5879 5880 my $extractSpec = unpack "C", substr($buffer, 14, 1); 5881 my $diskNumber = unpack "V", substr($buffer, 16, 4); 5882 my $cdDiskNumber = unpack "V", substr($buffer, 20, 4); 5883 my $entriesOnThisDisk = unpack "Q<", substr($buffer, 24, 8); 5884 my $totalEntries = unpack "Q<", substr($buffer, 32, 8); 5885 my $centralDirSize = unpack "Q<", substr($buffer, 40, 8); 5886 my $centralDirOffset = unpack "Q<", substr($buffer, 48, 8); 5887 5888 if ($extractSpec >= 0x3E) 5889 { 5890 $opt_walk = 1; 5891 $CentralDirectory->setPkEncryptedCD(); 5892 } 5893 5894 if (! emptyArchive($here, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirSize, $centralDirOffset)) 5895 { 5896 my $commonMessage = "'Offset to Central Directory' field in $zip64eocd_name is invalid"; 5897 $centralDirOffset = checkOffsetValue($centralDirOffset, $here, 0, $commonMessage, $here + 48, ZIP_CENTRAL_HDR_SIG, 1, $extractSpec < 0x3E) ; 5898 } 5899 5900 # TODO - APPNOTE allows an extensible data sector here (see APPNOTE 6.3.10, section 4.3.14.2) -- need to take this into account 5901 5902 return ($centralDirOffset, $centralDirSize) ; 5903} 5904 5905use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); 5906 5907sub findCentralDirectoryOffset 5908{ 5909 my $fh = shift ; 5910 5911 # Most common use-case is where there is no comment, so 5912 # know exactly where the end of central directory record 5913 # should be. 5914 5915 need ZIP_EOCD_MIN_SIZE, Signatures::name(ZIP_END_CENTRAL_HDR_SIG); 5916 5917 $fh->seek(-ZIP_EOCD_MIN_SIZE(), SEEK_END) ; 5918 my $here = $fh->tell(); 5919 5920 my $is64bit = $here > MAX32; 5921 my $over64bit = $here & (~ MAX32); 5922 5923 my $buffer; 5924 $fh->read($buffer, ZIP_EOCD_MIN_SIZE); 5925 5926 my $zip64 = 0; 5927 my $diskNumber ; 5928 my $cdDiskNumber ; 5929 my $entriesOnThisDisk ; 5930 my $totalEntries ; 5931 my $centralDirSize ; 5932 my $centralDirOffset ; 5933 my $commentLength = 0; 5934 my $trailingBytes = 0; 5935 5936 if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 5937 5938 $HeaderOffsetIndex->addOffset($here + $PREFIX_DELTA, ZIP_END_CENTRAL_HDR_SIG) ; 5939 5940 $diskNumber = unpack("v", substr($buffer, 4, 2)); 5941 $cdDiskNumber = unpack("v", substr($buffer, 6, 2)); 5942 $entriesOnThisDisk= unpack("v", substr($buffer, 8, 2)); 5943 $totalEntries = unpack("v", substr($buffer, 10, 2)); 5944 $centralDirSize = unpack("V", substr($buffer, 12, 4)); 5945 $centralDirOffset = unpack("V", substr($buffer, 16, 4)); 5946 $commentLength = unpack("v", substr($buffer, 20, 2)); 5947 } 5948 else { 5949 $fh->seek(0, SEEK_END) ; 5950 5951 my $fileLen = $fh->tell(); 5952 my $want = 0 ; 5953 5954 while(1) { 5955 $want += 1024 * 32; 5956 my $seekTo = $fileLen - $want; 5957 if ($seekTo < 0 ) { 5958 $seekTo = 0; 5959 $want = $fileLen ; 5960 } 5961 $fh->seek( $seekTo, SEEK_SET); 5962 $fh->read($buffer, $want) ; 5963 my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); 5964 5965 if ($pos >= 0 && $want - $pos > ZIP_EOCD_MIN_SIZE) { 5966 $here = $seekTo + $pos ; 5967 $HeaderOffsetIndex->addOffset($here + $PREFIX_DELTA, ZIP_END_CENTRAL_HDR_SIG) ; 5968 5969 $diskNumber = unpack("v", substr($buffer, $pos + 4, 2)); 5970 $cdDiskNumber = unpack("v", substr($buffer, $pos + 6, 2)); 5971 $entriesOnThisDisk= unpack("v", substr($buffer, $pos + 8, 2)); 5972 $totalEntries = unpack("v", substr($buffer, $pos + 10, 2)); 5973 $centralDirSize = unpack("V", substr($buffer, $pos + 12, 4)); 5974 $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); 5975 $commentLength = unpack("v", substr($buffer, $pos + 20, 2)) // 0; 5976 5977 my $expectedEof = $fileLen - $want + $pos + ZIP_EOCD_MIN_SIZE + $commentLength ; 5978 # check for trailing data after end of zip 5979 if ($expectedEof < $fileLen ) { 5980 $TRAILING = $expectedEof ; 5981 $trailingBytes = $FILELEN - $expectedEof ; 5982 } 5983 last ; 5984 } 5985 5986 return undef 5987 if $want == $fileLen; 5988 5989 } 5990 } 5991 5992 $EOCD_Present = 1; 5993 5994 # Empty zip file can just contain an EOCD record 5995 return (0, 0) 5996 if ZIP_EOCD_MIN_SIZE + $commentLength + $trailingBytes == $FILELEN ; 5997 5998 if (needZip64EOCDLocator($diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirOffset, $centralDirSize) && 5999 ! emptyArchive($here, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirOffset, $centralDirSize)) 6000 { 6001 ($centralDirOffset, $centralDirSize) = offsetFromZip64($fh, $here, ZIP_EOCD_MIN_SIZE + $commentLength + $trailingBytes) 6002 } 6003 elsif ($is64bit) 6004 { 6005 # use-case is where a 64-bit zip file doesn't use the 64-bit 6006 # extensions. 6007 # print "EOCD not 64-bit $centralDirOffset ($here)\n" ; 6008 6009 fatal_tryWalk $here, "Zip file > 4Gig. Expected 'Offset to Central Dir' to be 0xFFFFFFFF, got " . hexValue($centralDirOffset); 6010 6011 $centralDirOffset += $over64bit; 6012 $is64In32 = 1; 6013 } 6014 else 6015 { 6016 if ($centralDirSize) 6017 { 6018 my $commonMessage = "'Offset to Central Directory' field in '" . Signatures::name(ZIP_END_CENTRAL_HDR_SIG) . "' is invalid"; 6019 $centralDirOffset = checkOffsetValue($centralDirOffset, $here, $centralDirSize, $commonMessage, $here + 16, ZIP_CENTRAL_HDR_SIG, 1) ; 6020 } 6021 } 6022 6023 return (0, 0) 6024 if $totalEntries == 0 && $entriesOnThisDisk == 0; 6025 6026 # APK Signing Block is directly before the first CD entry 6027 # Check if it is present 6028 ($START_APK, $APK, $APK_LEN) = chckForAPKSigningBlock($fh, $centralDirOffset, ZIP_EOCD_MIN_SIZE + $commentLength); 6029 6030 return ($centralDirOffset, $centralDirSize) ; 6031} 6032 6033sub findID 6034{ 6035 my $id_want = shift ; 6036 my $data = shift; 6037 6038 my $XLEN = length $data ; 6039 6040 my $offset = 0 ; 6041 while ($offset < $XLEN) { 6042 6043 return undef 6044 if $offset + ZIP_EXTRA_SUBFIELD_HEADER_SIZE > $XLEN ; 6045 6046 my $id = substr($data, $offset, ZIP_EXTRA_SUBFIELD_ID_SIZE); 6047 $id = unpack("v", $id); 6048 $offset += ZIP_EXTRA_SUBFIELD_ID_SIZE; 6049 6050 my $subLen = unpack("v", substr($data, $offset, 6051 ZIP_EXTRA_SUBFIELD_LEN_SIZE)); 6052 $offset += ZIP_EXTRA_SUBFIELD_LEN_SIZE ; 6053 6054 return undef 6055 if $offset + $subLen > $XLEN ; 6056 6057 return substr($data, $offset, $subLen) 6058 if $id eq $id_want ; 6059 6060 $offset += $subLen ; 6061 } 6062 6063 return undef ; 6064} 6065 6066 6067sub nibbles 6068{ 6069 my @nibbles = ( 6070 [ 16 => 0x1000000000000000 ], 6071 [ 15 => 0x100000000000000 ], 6072 [ 14 => 0x10000000000000 ], 6073 [ 13 => 0x1000000000000 ], 6074 [ 12 => 0x100000000000 ], 6075 [ 11 => 0x10000000000 ], 6076 [ 10 => 0x1000000000 ], 6077 [ 9 => 0x100000000 ], 6078 [ 8 => 0x10000000 ], 6079 [ 7 => 0x1000000 ], 6080 [ 6 => 0x100000 ], 6081 [ 5 => 0x10000 ], 6082 [ 4 => 0x1000 ], 6083 [ 4 => 0x100 ], 6084 [ 4 => 0x10 ], 6085 [ 4 => 0x1 ], 6086 ); 6087 my $value = shift ; 6088 6089 for my $pair (@nibbles) 6090 { 6091 my ($count, $limit) = @{ $pair }; 6092 6093 return $count 6094 if $value >= $limit ; 6095 } 6096} 6097 6098{ 6099 package HeaderOffsetEntry; 6100 6101 sub new 6102 { 6103 my $class = shift ; 6104 my $offset = shift ; 6105 my $signature = shift; 6106 6107 bless [ $offset, $signature, Signatures::name($signature)] , $class; 6108 6109 } 6110 6111 sub offset 6112 { 6113 my $self = shift; 6114 return $self->[0]; 6115 } 6116 6117 sub signature 6118 { 6119 my $self = shift; 6120 return $self->[1]; 6121 } 6122 6123 sub name 6124 { 6125 my $self = shift; 6126 return $self->[2]; 6127 } 6128 6129} 6130 6131{ 6132 package HeaderOffsetIndex; 6133 6134 # Store a list of header offsets recorded when scannning the central directory 6135 6136 sub new 6137 { 6138 my $class = shift ; 6139 6140 my %object = ( 6141 'offsetIndex' => [], 6142 'offset2Index' => {}, 6143 'offset2Signature' => {}, 6144 'currentIndex' => -1, 6145 'currentSignature' => 0, 6146 # 'sigNames' => $sigNames, 6147 ) ; 6148 6149 bless \%object, $class; 6150 } 6151 6152 sub sortOffsets 6153 { 6154 my $self = shift ; 6155 6156 @{ $self->{offsetIndex} } = sort { $a->[0] <=> $b->[0] } 6157 @{ $self->{offsetIndex} }; 6158 my $ix = 0; 6159 $self->{offset2Index}{$_} = $ix++ 6160 for @{ $self->{offsetIndex} } ; 6161 } 6162 6163 sub addOffset 6164 { 6165 my $self = shift ; 6166 my $offset = shift ; 6167 my $signature = shift ; 6168 6169 $offset += $PREFIX_DELTA ; 6170 $self->addOffsetNoPrefix($offset, $signature); 6171 } 6172 6173 sub addOffsetNoPrefix 6174 { 6175 my $self = shift ; 6176 my $offset = shift ; 6177 my $signature = shift ; 6178 6179 my $name = Signatures::name($signature); 6180 6181 if (! defined $self->{offset2Signature}{$offset}) 6182 { 6183 push @{ $self->{offsetIndex} }, HeaderOffsetEntry->new($offset, $signature) ; 6184 $self->{offset2Signature}{$offset} = $signature; 6185 } 6186 } 6187 6188 sub getNextIndex 6189 { 6190 my $self = shift ; 6191 my $offset = shift ; 6192 6193 $self->{currentIndex} ++; 6194 6195 return ${ $self->{offsetIndex} }[$self->{currentIndex}] // undef 6196 } 6197 6198 sub rewindIndex 6199 { 6200 my $self = shift ; 6201 my $offset = shift ; 6202 6203 $self->{currentIndex} --; 6204 } 6205 6206 sub dump 6207 { 6208 my $self = shift; 6209 6210 say "### HeaderOffsetIndex"; 6211 say "### Offset\tSignature"; 6212 for my $x ( @{ $self->{offsetIndex} } ) 6213 { 6214 my ($offset, $sig) = @$x; 6215 printf "### %X %d\t\t" . $x->name() . "\n", $x->offset(), $x->offset(); 6216 } 6217 } 6218 6219 sub checkForOverlap 6220 { 6221 my $self = shift ; 6222 my $need = shift; 6223 6224 my $needOffset = $FH->tell() + $need; 6225 6226 for my $hdrOffset (@{ $self->{offsetIndex} }) 6227 { 6228 my $delta = $hdrOffset - $needOffset; 6229 return [$self->{offsetIndex}{$hdrOffset}, $needOffset - $hdrOffset] 6230 if $delta <= 0 ; 6231 } 6232 6233 return [undef, undef]; 6234 } 6235 6236} 6237 6238{ 6239 package FieldsAndAccessors; 6240 6241 sub Add 6242 { 6243 use Data::Dumper ; 6244 6245 my $classname = shift; 6246 my $object = shift; 6247 my $fields = shift ; 6248 my $no_handler = shift // {}; 6249 6250 state $done = {}; 6251 6252 6253 while (my ($name, $value) = each %$fields) 6254 { 6255 my $method = "${classname}::$name"; 6256 6257 $object->{$name} = $value; 6258 6259 # don't auto-create a handler 6260 next 6261 if $no_handler->{$name}; 6262 6263 no strict 'refs'; 6264 6265 # Don't use lvalue sub for now - vscode debugger breaks with it enabled. 6266 # https://github.com/richterger/Perl-LanguageServer/issues/194 6267 # *$method = sub : lvalue { 6268 # $_[0]->{$name} ; 6269 # } 6270 # unless defined $done->{$method}; 6271 6272 # Auto-generate getter/setter 6273 *$method = sub { 6274 $_[0]->{$name} = $_[1] 6275 if @_ == 2; 6276 return $_[0]->{$name} ; 6277 } 6278 unless defined $done->{$method}; 6279 6280 ++ $done->{$method}; 6281 6282 6283 } 6284 } 6285} 6286 6287{ 6288 package BaseEntry ; 6289 6290 sub new 6291 { 6292 my $class = shift ; 6293 6294 state $index = 0; 6295 6296 my %fields = ( 6297 'index' => $index ++, 6298 'zip64' => 0, 6299 'offsetStart' => 0, 6300 'offsetEnd' => 0, 6301 'inCentralDir' => 0, 6302 'encapsulated' => 0, # enclosed in outer zip 6303 'childrenCount' => 0, # this entry is a zip with enclosed children 6304 'streamed' => 0, 6305 'languageEncodingFlag' => 0, 6306 'entryType' => 0, 6307 ) ; 6308 6309 my $self = bless {}, $class; 6310 6311 FieldsAndAccessors::Add($class, $self, \%fields) ; 6312 6313 return $self; 6314 } 6315 6316 sub increment_childrenCount 6317 { 6318 my $self = shift; 6319 $self->{childrenCount} ++; 6320 } 6321} 6322 6323{ 6324 package LocalCentralEntryBase ; 6325 6326 use parent -norequire , 'BaseEntry' ; 6327 6328 sub new 6329 { 6330 my $class = shift ; 6331 6332 my $self = $class->SUPER::new(); 6333 6334 6335 my %fields = ( 6336 # fields from the header 6337 'centralHeaderOffset' => 0, 6338 'localHeaderOffset' => 0, 6339 6340 'extractVersion' => 0, 6341 'generalPurposeFlags' => 0, 6342 'compressedMethod' => 0, 6343 'lastModDateTime' => 0, 6344 'crc32' => 0, 6345 'compressedSize' => 0, 6346 'uncompressedSize' => 0, 6347 'filename' => '', 6348 'outputFilename' => '', 6349 # inferred data 6350 # 'InCentralDir' => 0, 6351 # 'zip64' => 0, 6352 6353 'zip64ExtraPresent' => 0, 6354 'zip64SizesPresent' => 0, 6355 'payloadOffset' => 0, 6356 6357 # zip64 extra 6358 'zip64_compressedSize' => undef, 6359 'zip64_uncompressedSize' => undef, 6360 'zip64_localHeaderOffset' => undef, 6361 'zip64_diskNumber' => undef, 6362 'zip64_diskNumberPresent' => 0, 6363 6364 # Values direct from the header before merging any Zip64 values 6365 'std_compressedSize' => undef, 6366 'std_uncompressedSize' => undef, 6367 'std_localHeaderOffset' => undef, 6368 'std_diskNumber' => undef, 6369 6370 # AES 6371 'aesStrength' => 0, 6372 'aesValid' => 0, 6373 6374 # Minizip CD encryption 6375 'minizip_secure' => 0, 6376 6377 ) ; 6378 6379 FieldsAndAccessors::Add($class, $self, \%fields) ; 6380 6381 return $self; 6382 } 6383} 6384 6385{ 6386 package Zip64EndCentralHeaderEntry ; 6387 6388 use parent -norequire , 'LocalCentralEntryBase' ; 6389 6390 sub new 6391 { 6392 my $class = shift ; 6393 6394 my $self = $class->SUPER::new(); 6395 6396 6397 my %fields = ( 6398 'inCentralDir' => 1, 6399 ) ; 6400 6401 FieldsAndAccessors::Add($class, $self, \%fields) ; 6402 6403 return $self; 6404 } 6405 6406} 6407 6408{ 6409 package CentralDirectoryEntry; 6410 6411 use parent -norequire , 'LocalCentralEntryBase' ; 6412 6413 use constant Offset_VersionMadeBy => 4; 6414 use constant Offset_VersionNeededToExtract => 6; 6415 use constant Offset_GeneralPurposeFlags => 8; 6416 use constant Offset_CompressionMethod => 10; 6417 use constant Offset_ModificationTime => 12; 6418 use constant Offset_ModificationDate => 14; 6419 use constant Offset_CRC32 => 16; 6420 use constant Offset_CompressedSize => 20; 6421 use constant Offset_UncompressedSize => 24; 6422 use constant Offset_FilenameLength => 28; 6423 use constant Offset_ExtraFieldLength => 30; 6424 use constant Offset_FileCommentLength => 32; 6425 use constant Offset_DiskNumber => 34; 6426 use constant Offset_InternalAttributes => 36; 6427 use constant Offset_ExternalAttributes => 38; 6428 use constant Offset_RelativeOffsetToLocal => 42; 6429 use constant Offset_Filename => 46; 6430 6431 sub new 6432 { 6433 my $class = shift ; 6434 my $offset = shift; 6435 6436 # check for existing entry 6437 return $CentralDirectory->{byCentralOffset}{$offset} 6438 if defined $offset && defined $CentralDirectory->{byCentralOffset}{$offset} ; 6439 6440 my $self = $class->SUPER::new(); 6441 6442 my %fields = ( 6443 'diskNumber' => 0, 6444 'comment' => "", 6445 'ldEntry' => undef, 6446 ) ; 6447 6448 FieldsAndAccessors::Add($class, $self, \%fields) ; 6449 6450 $self->inCentralDir(1) ; 6451 $self->entryType(::ZIP_CENTRAL_HDR_SIG) ; 6452 6453 return $self; 6454 } 6455} 6456 6457{ 6458 package CentralDirectory; 6459 6460 sub new 6461 { 6462 my $class = shift ; 6463 6464 my %object = ( 6465 'entries' => [], 6466 'count' => 0, 6467 'byLocalOffset' => {}, 6468 'byCentralOffset' => {}, 6469 'byName' => {}, 6470 'offset2Index' => {}, 6471 'normalized_filenames' => {}, 6472 'CentralDirectoryOffset' => 0, 6473 'CentralDirectorySize' => 0, 6474 'zip64' => 0, 6475 'encryptedCD' => 0, 6476 'minizip_secure' => 0, 6477 'alreadyScanned' => 0, 6478 ) ; 6479 6480 bless \%object, $class; 6481 } 6482 6483 sub addEntry 6484 { 6485 my $self = shift ; 6486 my $entry = shift ; 6487 6488 my $localHeaderOffset = $entry->localHeaderOffset ; 6489 my $CentralDirectoryOffset = $entry->centralHeaderOffset ; 6490 my $filename = $entry->filename ; 6491 6492 Nesting::add($entry); 6493 6494 # Create a reference from Central to Local header entries 6495 my $ldEntry = Nesting::getLdEntryByOffset($localHeaderOffset); 6496 if ($ldEntry) 6497 { 6498 $entry->ldEntry($ldEntry) ; 6499 6500 # LD -> CD 6501 # can have multiple LD entries point to same CD 6502 # so need to keep a list 6503 $ldEntry->addCdEntry($entry); 6504 } 6505 6506 # only check for duplicate in real CD scan 6507 if ($self->{alreadyScanned} && ! $entry->encapsulated ) 6508 { 6509 my $existing = $self->{byName}{$filename} ; 6510 if ($existing && $existing->centralHeaderOffset != $entry->centralHeaderOffset) 6511 { 6512 ::error $CentralDirectoryOffset, 6513 "Duplicate Central Directory entries for filename '$filename'", 6514 "Current Central Directory entry at offset " . ::decimalHex0x($CentralDirectoryOffset), 6515 "Duplicate Central Directory entry at offset " . ::decimalHex0x($self->{byName}{$filename}{centralHeaderOffset}); 6516 6517 # not strictly illegal to have duplicate filename, so save this one 6518 } 6519 else 6520 { 6521 my $existingNormalizedEntry = $self->normalize_filename($entry, $filename); 6522 if ($existingNormalizedEntry) 6523 { 6524 ::warning $CentralDirectoryOffset, 6525 "Portability Issue: Found case-insensitive duplicate for filename '$filename'", 6526 "Current Central Directory entry at offset " . ::decimalHex0x($CentralDirectoryOffset), 6527 "Duplicate Central Directory entry for filename '" . $existingNormalizedEntry->outputFilename . "' at offset " . ::decimalHex0x($existingNormalizedEntry->centralHeaderOffset); 6528 } 6529 } 6530 } 6531 6532 # CD can get processed twice, so return if already processed 6533 return 6534 if $self->{byCentralOffset}{$CentralDirectoryOffset} ; 6535 6536 if (! $entry->encapsulated ) 6537 { 6538 push @{ $self->{entries} }, $entry; 6539 6540 $self->{byLocalOffset}{$localHeaderOffset} = $entry; 6541 $self->{byCentralOffset}{$CentralDirectoryOffset} = $entry; 6542 $self->{byName}{ $filename } = $entry; 6543 $self->{offset2Index} = $self->{count} ++; 6544 } 6545 6546 } 6547 6548 sub exists 6549 { 6550 my $self = shift ; 6551 6552 return scalar @{ $self->{entries} }; 6553 } 6554 6555 sub sortByLocalOffset 6556 { 6557 my $self = shift ; 6558 6559 @{ $self->{entries} } = sort { $a->localHeaderOffset() <=> $b->localHeaderOffset() } 6560 @{ $self->{entries} }; 6561 } 6562 6563 sub getByLocalOffset 6564 { 6565 my $self = shift ; 6566 my $offset = shift ; 6567 6568 # TODO - what happens if none exists? 6569 my $entry = $self->{byLocalOffset}{$offset - $PREFIX_DELTA} ; 6570 return $entry ; 6571 } 6572 6573 sub localOffset 6574 { 6575 my $self = shift ; 6576 my $offset = shift ; 6577 6578 # TODO - what happens if none exists? 6579 return $self->{byLocalOffset}{$offset - $PREFIX_DELTA} ; 6580 } 6581 6582 sub getNextLocalOffset 6583 { 6584 my $self = shift ; 6585 my $offset = shift ; 6586 6587 my $index = $self->{offset2Index} ; 6588 6589 if ($index + 1 >= $self->{count}) 6590 { 6591 return 0; 6592 } 6593 6594 return ${ $self->{entries} }[$index+1]->localHeaderOffset() ; 6595 } 6596 6597 sub inCD 6598 { 6599 my $self = shift ; 6600 $FH->tell() >= $self->{CentralDirectoryOffset}; 6601 } 6602 6603 sub setPkEncryptedCD 6604 { 6605 my $self = shift ; 6606 6607 $self->{encryptedCD} = 1 ; 6608 6609 } 6610 6611 sub setMiniZipEncrypted 6612 { 6613 my $self = shift ; 6614 6615 $self->{minizip_secure} = 1 ; 6616 } 6617 6618 sub isMiniZipEncrypted 6619 { 6620 my $self = shift ; 6621 return $self->{minizip_secure}; 6622 } 6623 6624 sub isEncryptedCD 6625 { 6626 my $self = shift ; 6627 return $self->{encryptedCD} && ! $self->{minizip_secure}; 6628 } 6629 6630 sub normalize_filename 6631 { 6632 # check if there is a filename that already exists 6633 # with the same name when normalized to lower case 6634 6635 my $self = shift ; 6636 my $entry = shift; 6637 my $filename = shift; 6638 6639 my $nFilename = lc $filename; 6640 6641 my $lookup = $self->{normalized_filenames}{$nFilename}; 6642 # if ($lookup && $lookup ne $filename) 6643 if ($lookup) 6644 { 6645 return $lookup, 6646 } 6647 6648 $self->{normalized_filenames}{$nFilename} = $entry; 6649 6650 return undef; 6651 } 6652} 6653 6654{ 6655 package LocalDirectoryEntry; 6656 6657 use parent -norequire , 'LocalCentralEntryBase' ; 6658 6659 use constant Offset_VersionNeededToExtract => 4; 6660 use constant Offset_GeneralPurposeFlags => 6; 6661 use constant Offset_CompressionMethod => 8; 6662 use constant Offset_ModificationTime => 10; 6663 use constant Offset_ModificationDate => 12; 6664 use constant Offset_CRC32 => 14; 6665 use constant Offset_CompressedSize => 18; 6666 use constant Offset_UncompressedSize => 22; 6667 use constant Offset_FilenameLength => 26; 6668 use constant Offset_ExtraFieldLength => 27; 6669 use constant Offset_Filename => 30; 6670 6671 sub new 6672 { 6673 my $class = shift ; 6674 6675 my $self = $class->SUPER::new(); 6676 6677 my %fields = ( 6678 'streamedMatch' => 0, 6679 'readDataDescriptor' => 0, 6680 'cdEntryIndex' => {}, 6681 'cdEntryList' => [], 6682 ) ; 6683 6684 FieldsAndAccessors::Add($class, $self, \%fields) ; 6685 6686 $self->inCentralDir(0) ; 6687 $self->entryType(::ZIP_LOCAL_HDR_SIG) ; 6688 6689 return $self; 6690 } 6691 6692 sub addCdEntry 6693 { 6694 my $self = shift ; 6695 my $entry = shift; 6696 6697 # don't want encapsulated entries 6698 # and protect against duplicates 6699 return 6700 if $entry->encapsulated || 6701 $self->{cdEntryIndex}{$entry->index} ++ >= 1; 6702 6703 push @{ $self->{cdEntryList} }, $entry ; 6704 } 6705 6706 sub getCdEntry 6707 { 6708 my $self = shift ; 6709 6710 return [] 6711 if ! $self->{cdEntryList} ; 6712 6713 return $self->{cdEntryList}[0] ; 6714 } 6715 6716 sub getCdEntries 6717 { 6718 my $self = shift ; 6719 return $self->{cdEntryList} ; 6720 } 6721} 6722 6723{ 6724 package LocalDirectory; 6725 6726 sub new 6727 { 6728 my $class = shift ; 6729 6730 my %object = ( 6731 'entries' => [], 6732 'count' => 0, 6733 'byLocalOffset' => {}, 6734 'byName' => {}, 6735 'offset2Index' => {}, 6736 'normalized_filenames' => {}, 6737 'CentralDirectoryOffset' => 0, 6738 'CentralDirectorySize' => 0, 6739 'zip64' => 0, 6740 'encryptedCD' => 0, 6741 'streamedPresent' => 0, 6742 ) ; 6743 6744 bless \%object, $class; 6745 } 6746 6747 sub isLocalEntryNested 6748 { 6749 my $self = shift ; 6750 my $localEntry = shift; 6751 6752 return Nesting::getFirstEncapsulation($localEntry); 6753 6754 } 6755 6756 sub addEntry 6757 { 6758 my $self = shift ; 6759 my $localEntry = shift ; 6760 6761 my $filename = $localEntry->filename ; 6762 my $localHeaderOffset = $localEntry->localHeaderOffset; 6763 my $payloadOffset = $localEntry->payloadOffset ; 6764 6765 my $existingEntry = $self->{byName}{$filename} ; 6766 6767 my $endSurfaceArea = $payloadOffset + ($localEntry->compressedSize // 0) ; 6768 6769 if ($existingEntry) 6770 { 6771 ::error $localHeaderOffset, 6772 "Duplicate Local Directory entry for filename '$filename'", 6773 "Current Local Directory entry at offset " . ::decimalHex0x($localHeaderOffset), 6774 "Duplicate Local Directory entry at offset " . ::decimalHex0x($existingEntry->localHeaderOffset), 6775 } 6776 else 6777 { 6778 6779 my ($existing_filename, $offset) = $self->normalize_filename($filename); 6780 if ($existing_filename) 6781 { 6782 ::warning $localHeaderOffset, 6783 "Portability Issue: Found case-insensitive duplicate for filename '$filename'", 6784 "Current Local Directory entry at offset " . ::decimalHex0x($localHeaderOffset), 6785 "Duplicate Local Directory entry for filename '$existing_filename' at offset " . ::decimalHex0x($offset); 6786 } 6787 } 6788 6789 # keep nested local entries for zipbomb deteection 6790 push @{ $self->{entries} }, $localEntry; 6791 6792 $self->{byLocalOffset}{$localHeaderOffset} = $localEntry; 6793 $self->{byName}{ $filename } = $localEntry; 6794 6795 $self->{streamedPresent} ++ 6796 if $localEntry->streamed; 6797 6798 Nesting::add($localEntry); 6799 } 6800 6801 sub exists 6802 { 6803 my $self = shift ; 6804 6805 return scalar @{ $self->{entries} }; 6806 } 6807 6808 sub sortByLocalOffset 6809 { 6810 my $self = shift ; 6811 6812 @{ $self->{entries} } = sort { $a->localHeaderOffset() <=> $b->localHeaderOffset() } 6813 @{ $self->{entries} }; 6814 } 6815 6816 sub localOffset 6817 { 6818 my $self = shift ; 6819 my $offset = shift ; 6820 6821 return $self->{byLocalOffset}{$offset} ; 6822 } 6823 6824 sub getByLocalOffset 6825 { 6826 my $self = shift ; 6827 my $offset = shift ; 6828 6829 # TODO - what happens if none exists? 6830 my $entry = $self->{byLocalOffset}{$offset} ; 6831 return $entry ; 6832 } 6833 6834 sub getNextLocalOffset 6835 { 6836 my $self = shift ; 6837 my $offset = shift ; 6838 6839 my $index = $self->{offset2Index} ; 6840 6841 if ($index + 1 >= $self->{count}) 6842 { 6843 return 0; 6844 } 6845 6846 return ${ $self->{entries} }[$index+1]->localHeaderOffset ; 6847 } 6848 6849 sub lastStreamedEntryAdded 6850 { 6851 my $self = shift ; 6852 my $offset = shift ; 6853 6854 for my $entry ( reverse @{ $self->{entries} } ) 6855 { 6856 if ($entry->streamed)# && ! $entry->streamedMatch) 6857 { 6858 $entry->streamedMatch($entry->streamedMatch + 1) ; 6859 return $entry; 6860 } 6861 } 6862 6863 return undef; 6864 } 6865 6866 sub inCD 6867 { 6868 my $self = shift ; 6869 $FH->tell() >= $self->{CentralDirectoryOffset}; 6870 } 6871 6872 sub setPkEncryptedCD 6873 { 6874 my $self = shift ; 6875 6876 $self->{encryptedCD} = 1 ; 6877 6878 } 6879 6880 sub isEncryptedCD 6881 { 6882 my $self = shift ; 6883 return $self->{encryptedCD} ; 6884 } 6885 6886 sub anyStreamedEntries 6887 { 6888 my $self = shift ; 6889 return $self->{streamedPresent} ; 6890 } 6891 6892 sub normalize_filename 6893 { 6894 # check if there is a filename that already exists 6895 # with the same name when normalized to lower case 6896 6897 my $self = shift ; 6898 my $filename = shift; 6899 6900 my $nFilename = lc $filename; 6901 6902 my $lookup = $self->{normalized_filenames}{$nFilename}; 6903 if ($lookup && $lookup ne $filename) 6904 { 6905 return $self->{byName}{$lookup}{outputFilename}, 6906 $self->{byName}{$lookup}{localHeaderOffset} 6907 } 6908 6909 $self->{normalized_filenames}{$nFilename} = $filename; 6910 6911 return undef, undef; 6912 } 6913} 6914 6915{ 6916 package Eocd ; 6917 6918 sub new 6919 { 6920 my $class = shift ; 6921 6922 my %object = ( 6923 'zip64' => 0, 6924 ) ; 6925 6926 bless \%object, $class; 6927 } 6928} 6929 6930sub displayFileInfo 6931{ 6932 return; 6933 6934 my $filename = shift; 6935 6936 info undef, 6937 "Filename : '$filename'", 6938 "Size : " . (-s $filename) . " (" . decimalHex0x(-s $filename) . ")", 6939 # "Native Encoding: '" . TextEncoding::getNativeLocaleName() . "'", 6940} 6941 6942{ 6943 package TextEncoding; 6944 6945 my $nativeLocaleEncoding = getNativeLocale(); 6946 my $opt_EncodingFrom = $nativeLocaleEncoding; 6947 my $opt_EncodingTo = $nativeLocaleEncoding ; 6948 my $opt_Encoding_Enabled; 6949 my $opt_Debug_Encoding; 6950 my $opt_use_LanguageEncodingFlag; 6951 6952 sub setDefaults 6953 { 6954 $nativeLocaleEncoding = getNativeLocale(); 6955 $opt_EncodingFrom = $nativeLocaleEncoding; 6956 $opt_EncodingTo = $nativeLocaleEncoding ; 6957 $opt_Encoding_Enabled = 1; 6958 $opt_Debug_Encoding = 0; 6959 $opt_use_LanguageEncodingFlag = 1; 6960 } 6961 6962 sub getNativeLocale 6963 { 6964 state $enc; 6965 6966 if (! defined $enc) 6967 { 6968 eval 6969 { 6970 require encoding ; 6971 my $encoding = encoding::_get_locale_encoding() ; 6972 if (! $encoding) 6973 { 6974 # CP437 is the legacy default for zip files 6975 $encoding = 'cp437'; 6976 # ::warning undef, "Cannot determine system charset: defaulting to '$encoding'" 6977 } 6978 $enc = Encode::find_encoding($encoding) ; 6979 } ; 6980 } 6981 6982 return $enc; 6983 } 6984 6985 sub getNativeLocaleName 6986 { 6987 state $name; 6988 6989 return $name 6990 if defined $name ; 6991 6992 if (! defined $name) 6993 { 6994 my $enc = getNativeLocale(); 6995 if ($enc) 6996 { 6997 $name = $enc->name() 6998 } 6999 else 7000 { 7001 $name = 'unknown' 7002 } 7003 } 7004 7005 return $name ; 7006 } 7007 7008 sub parseEncodingOption 7009 { 7010 my $opt_name = shift; 7011 my $opt_value = shift; 7012 7013 my $enc = Encode::find_encoding($opt_value) ; 7014 die "Encoding '$opt_value' not found for option '$opt_name'\n" 7015 unless ref $enc; 7016 7017 if ($opt_name eq 'encoding') 7018 { 7019 $opt_EncodingFrom = $enc; 7020 } 7021 elsif ($opt_name eq 'output-encoding') 7022 { 7023 $opt_EncodingTo = $enc; 7024 } 7025 else 7026 { 7027 die "Unknown option $opt_name\n" 7028 } 7029 } 7030 7031 sub NoEncoding 7032 { 7033 my $opt_name = shift; 7034 my $opt_value = shift; 7035 7036 $opt_Encoding_Enabled = 0 ; 7037 } 7038 7039 sub LanguageEncodingFlag 7040 { 7041 my $opt_name = shift; 7042 my $opt_value = shift; 7043 7044 $opt_use_LanguageEncodingFlag = $opt_value ; 7045 } 7046 7047 sub debugEncoding 7048 { 7049 if (@_) 7050 { 7051 $opt_Debug_Encoding = 1 ; 7052 } 7053 7054 return $opt_Debug_Encoding ; 7055 } 7056 7057 sub encodingInfo 7058 { 7059 return 7060 unless $opt_Encoding_Enabled && $opt_Debug_Encoding ; 7061 7062 my $enc = TextEncoding::getNativeLocaleName(); 7063 my $from = $opt_EncodingFrom->name(); 7064 my $to = $opt_EncodingTo->name(); 7065 7066 ::debug undef, "Debug Encoding Enabled", 7067 "System Default Encoding: '$enc'", 7068 "Encoding used when reading from zip file: '$from'", 7069 "Encoding used for display output: '$to'"; 7070 } 7071 7072 sub cleanEval 7073 { 7074 chomp $_[0] ; 7075 $_[0] =~ s/ at .+ line \d+\.$// ; 7076 return $_[0]; 7077 } 7078 7079 sub decode 7080 { 7081 my $name = shift ; 7082 my $type = shift ; 7083 my $LanguageEncodingFlag = shift ; 7084 7085 return $name 7086 if ! $opt_Encoding_Enabled ; 7087 7088 # TODO - check for badly formed content 7089 if ($LanguageEncodingFlag && $opt_use_LanguageEncodingFlag) 7090 { 7091 # use "utf-8-strict" to catch invalid codepoints 7092 eval { $name = Encode::decode('utf-8-strict', $name, Encode::FB_CROAK ) } ; 7093 ::warning $FH->tell() - length $name, "Could not decode 'UTF-8' $type: " . cleanEval $@ 7094 if $@ ; 7095 } 7096 else 7097 { 7098 eval { $name = $opt_EncodingFrom->decode($name, Encode::FB_CROAK ) } ; 7099 ::warning $FH->tell() - length $name, "Could not decode '" . $opt_EncodingFrom->name() . "' $type: " . cleanEval $@ 7100 if $@; 7101 } 7102 7103 # remove any BOM 7104 $name =~ s/^\x{FEFF}//; 7105 7106 return $name ; 7107 } 7108 7109 sub encode 7110 { 7111 my $name = shift ; 7112 my $type = shift ; 7113 my $LanguageEncodingFlag = shift ; 7114 7115 return $name 7116 if ! $opt_Encoding_Enabled; 7117 7118 if ($LanguageEncodingFlag && $opt_use_LanguageEncodingFlag) 7119 { 7120 eval { $name = Encode::encode('utf8', $name, Encode::FB_CROAK ) } ; 7121 ::warning $FH->tell() - length $name, "Could not encode 'utf8' $type: " . cleanEval $@ 7122 if $@ ; 7123 } 7124 else 7125 { 7126 eval { $name = $opt_EncodingTo->encode($name, Encode::FB_CROAK ) } ; 7127 ::warning $FH->tell() - length $name, "Could not encode '" . $opt_EncodingTo->name() . "' $type: " . cleanEval $@ 7128 if $@; 7129 } 7130 7131 return $name; 7132 } 7133} 7134 7135{ 7136 package Nesting; 7137 7138 use Data::Dumper; 7139 7140 my @nestingStack = (); 7141 my %encapsulations; 7142 my %inner2outer; 7143 my $encapsulationCount = 0; 7144 my %index2entry ; 7145 my %offset2entry ; 7146 7147 # my %localOffset2cdEntry; 7148 7149 sub clearStack 7150 { 7151 @nestingStack = (); 7152 %encapsulations = (); 7153 %inner2outer = (); 7154 %index2entry = (); 7155 %offset2entry = (); 7156 $encapsulationCount = 0; 7157 } 7158 7159 sub dump 7160 { 7161 my $indent = shift // 0; 7162 7163 for my $offset (sort {$a <=> $b} keys %offset2entry) 7164 { 7165 my $leading = " " x $indent ; 7166 say $leading . "\nOffset $offset" ; 7167 say Dumper($offset2entry{$offset}) 7168 } 7169 } 7170 7171 sub add 7172 { 7173 my $entry = shift; 7174 7175 getEnclosingEntry($entry); 7176 push @nestingStack, $entry; 7177 $index2entry{ $entry->index } = $entry; 7178 $offset2entry{ $entry->offsetStart } = $entry; 7179 } 7180 7181 sub getEnclosingEntry 7182 { 7183 my $entry = shift; 7184 7185 my $filename = $entry->filename; 7186 7187 pop @nestingStack 7188 while @nestingStack && $entry->offsetStart > $nestingStack[-1]->offsetEnd ; 7189 7190 my $match = undef; 7191 7192 if (@nestingStack && 7193 $entry->offsetStart >= $nestingStack[-1]->offsetStart && 7194 $entry->offsetEnd <= $nestingStack[-1]->offsetEnd && 7195 $entry->index != $nestingStack[-1]->index) 7196 { 7197 # Nested entry found 7198 $match = $nestingStack[-1]; 7199 push @{ $encapsulations{ $match->index } }, $entry; 7200 $inner2outer{ $entry->index} = $match->index; 7201 ++ $encapsulationCount; 7202 7203 $entry->encapsulated(1) ; 7204 $match->increment_childrenCount(); 7205 7206 if ($NESTING_DEBUG) 7207 { 7208 say "#### nesting " . (caller(1))[3] . " index #" . $entry->index . ' "' . 7209 $entry->outputFilename . '" [' . $entry->offsetStart . "->" . $entry->offsetEnd . "]" . 7210 " in #" . $match->index . ' "' . 7211 $match->outputFilename . '" [' . $match->offsetStart . "->" . $match->offsetEnd . "]" ; 7212 } 7213 } 7214 7215 return $match; 7216 } 7217 7218 sub isNested 7219 { 7220 my $offsetStart = shift; 7221 my $offsetEnd = shift; 7222 7223 if ($NESTING_DEBUG) 7224 { 7225 say "### Want: offsetStart " . ::decimalHex0x($offsetStart) . " offsetEnd " . ::decimalHex0x($offsetEnd); 7226 for my $entry (@nestingStack) 7227 { 7228 say "### Have: offsetStart " . ::decimalHex0x($entry->offsetStart) . " offsetEnd " . ::decimalHex0x($entry->offsetEnd); 7229 } 7230 } 7231 7232 return 0 7233 unless @nestingStack ; 7234 7235 my @copy = @nestingStack ; 7236 7237 pop @copy 7238 while @copy && $offsetStart > $copy[-1]->offsetEnd ; 7239 7240 return @copy && 7241 $offsetStart >= $copy[-1]->offsetStart && 7242 $offsetEnd <= $copy[-1]->offsetEnd ; 7243 } 7244 7245 sub getOuterEncapsulation 7246 { 7247 my $entry = shift; 7248 7249 my $outerIndex = $inner2outer{ $entry->index } ; 7250 7251 return undef 7252 if ! defined $outerIndex ; 7253 7254 return $index2entry{$outerIndex} // undef; 7255 } 7256 7257 sub getEncapsulations 7258 { 7259 my $entry = shift; 7260 7261 return $encapsulations{ $entry->index } ; 7262 } 7263 7264 sub getFirstEncapsulation 7265 { 7266 my $entry = shift; 7267 7268 my $got = $encapsulations{ $entry->index } ; 7269 7270 return defined $got ? $$got[0] : undef; 7271 } 7272 7273 sub encapsulations 7274 { 7275 return \%encapsulations; 7276 } 7277 7278 sub encapsulationCount 7279 { 7280 return $encapsulationCount; 7281 } 7282 7283 sub childrenInCentralDir 7284 { 7285 # find local header entries that have children that are not referenced in the CD 7286 # tis means it is likely a benign nextd zip file 7287 my $entry = shift; 7288 7289 for my $child (@{ $encapsulations{$entry->index} } ) 7290 { 7291 next 7292 unless $child->entryType == ::ZIP_LOCAL_HDR_SIG ; 7293 7294 return 1 7295 if @{ $child->cdEntryList }; 7296 } 7297 7298 return 0; 7299 } 7300 7301 sub entryByIndex 7302 { 7303 my $index = shift; 7304 return $index2entry{$index}; 7305 } 7306 7307 sub getEntryByOffset 7308 { 7309 my $offset = shift; 7310 return $offset2entry{$offset}; 7311 } 7312 7313 sub getLdEntryByOffset 7314 { 7315 my $offset = shift; 7316 my $entry = $offset2entry{$offset}; 7317 7318 return $entry 7319 if $entry && $entry->entryType == ::ZIP_LOCAL_HDR_SIG; 7320 7321 return undef; 7322 } 7323 7324 sub getEntriesByOffset 7325 { 7326 return \%offset2entry ; 7327 } 7328} 7329 7330{ 7331 package SimpleTable ; 7332 7333 use List::Util qw(max sum); 7334 7335 sub new 7336 { 7337 my $class = shift; 7338 7339 my %object = ( 7340 header => [], 7341 data => [], 7342 columns => 0, 7343 prefix => '# ', 7344 ); 7345 bless \%object, $class; 7346 } 7347 7348 sub addHeaderRow 7349 { 7350 my $self = shift; 7351 push @{ $self->{header} }, [ @_ ] ; 7352 $self->{columns} = max($self->{columns}, scalar @_ ) ; 7353 } 7354 7355 sub addDataRow 7356 { 7357 my $self = shift; 7358 7359 push @{ $self->{data} }, [ @_ ] ; 7360 $self->{columns} = max($self->{columns}, scalar @_ ) ; 7361 } 7362 7363 sub hasData 7364 { 7365 my $self = shift; 7366 7367 return scalar @{ $self->{data} } ; 7368 } 7369 7370 sub display 7371 { 7372 my $self = shift; 7373 7374 # work out the column widths 7375 my @colW = (0) x $self->{columns} ; 7376 for my $row (@{ $self->{data} }, @{ $self->{header} }) 7377 { 7378 my @r = @$row; 7379 for my $ix (0 .. $self->{columns} -1) 7380 { 7381 $colW[$ix] = max($colW[$ix], 7382 3 + length( $r[$ix] ) 7383 ); 7384 } 7385 } 7386 7387 my $width = sum(@colW) ; #+ @colW ; 7388 my @template ; 7389 for my $w (@colW) 7390 { 7391 push @template, ' ' x ($w - 3); 7392 } 7393 7394 print $self->{prefix} . '-' x ($width + 1) . "\n"; 7395 7396 for my $row (@{ $self->{header} }) 7397 { 7398 my @outputRow = @template; 7399 7400 print $self->{prefix} . '| '; 7401 for my $ix (0 .. $self->{columns} -1) 7402 { 7403 my $field = $template[$ix] ; 7404 substr($field, 0, length($row->[$ix]), $row->[$ix]); 7405 print $field . ' | '; 7406 } 7407 print "\n"; 7408 7409 } 7410 7411 print $self->{prefix} . '-' x ($width + 1) . "\n"; 7412 7413 for my $row (@{ $self->{data} }) 7414 { 7415 my @outputRow = @template; 7416 7417 print $self->{prefix} . '| '; 7418 for my $ix (0 .. $self->{columns} -1) 7419 { 7420 my $field = $template[$ix] ; 7421 substr($field, 0, length($row->[$ix]), $row->[$ix]); 7422 print $field . ' | '; 7423 } 7424 print "\n"; 7425 } 7426 7427 print $self->{prefix} . '-' x ($width + 1) . "\n"; 7428 print "#\n"; 7429 } 7430} 7431 7432sub Usage 7433{ 7434 my $enc = TextEncoding::getNativeLocaleName(); 7435 7436 my $message = <<EOM; 7437zipdetails [OPTIONS] file 7438 7439Display details about the internal structure of a Zip file. 7440 7441OPTIONS 7442 7443 General Options 7444 -h, --help 7445 Display help 7446 --redact 7447 Hide filename and payload data in the output 7448 --scan 7449 Enable pessimistic scanning mode. 7450 Blindly scan the file looking for zip headers 7451 Expect false-positives. 7452 --utc 7453 Display date/time fields in UTC. Default is local time 7454 -v 7455 Enable verbose mode -- output more stuff 7456 --version 7457 Print zipdetails version number 7458 This is version $VERSION 7459 --walk 7460 Enable optimistic scanning mode. 7461 Blindly scan the file looking for zip headers 7462 Expect false-positives. 7463 7464 Filename/Comment Encoding 7465 --encoding e 7466 Use encoding "e" when reading filename/comments from the zip file 7467 Uses system encoding ('$enc') by default 7468 --no-encoding 7469 Disable filename & comment encoding. Default disabled. 7470 --output-encoding e 7471 Use encoding "e" when writing filename/comments to the display 7472 Uses system encoding ('$enc') by default 7473 --debug-encoding 7474 Display eatra info when a filename/comment encoding has changed 7475 --language-encoding, --no-language-encoding 7476 Enable/disable support for the zip file "Language Encoding" flag. 7477 When this flag is set in a zip file the filename/comment is assumed 7478 to be encoded in UTF8. 7479 Default is enabled 7480 7481 Message Control 7482 --messages, --no-messages 7483 Enable/disable all info/warning/error messages. Default enabled. 7484 --exit-bitmask, --no-exit-bitmask 7485 Enable/disable exit status bitmask for messages. Default disabled. 7486 Bitmask values are 7487 Info 1 7488 Warning 2 7489 Error 4 7490 7491Copyright (c) 2011-2024 Paul Marquess. All rights reserved. 7492 7493This program is free software; you can redistribute it and/or 7494modify it under the same terms as Perl itself. 7495EOM 7496 7497 if (@_) 7498 { 7499 warn "$_\n" 7500 for @_ ; 7501 warn "\n"; 7502 7503 die $message ; 7504 } 7505 7506 print $message ; 7507 exit 0; 7508 7509} 7510 75111; 7512 7513__END__ 7514 7515=head1 NAME 7516 7517zipdetails - display the internal structure of zip files 7518 7519=head1 SYNOPSIS 7520 7521 zipdetails [options] zipfile.zip 7522 7523=head1 DESCRIPTION 7524 7525This program creates a detailed report on the internal structure of zip 7526files. For each item of metadata within a zip file the program will output 7527 7528=over 5 7529 7530=item the offset into the zip file where the item is located. 7531 7532=item a textual representation for the item. 7533 7534=item an optional hex dump of the item. 7535 7536=back 7537 7538 7539The program assumes a prior understanding of the internal structure of Zip 7540files. You should have a copy of the zip file definition, 7541L<APPNOTE.TXT|https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT>, 7542at hand to help understand the output from this program. 7543 7544=head2 Default Behaviour 7545 7546By default the program expects to be given a well-formed zip file. It will 7547navigate the zip file by first parsing the zip C<Central Directory> at the end 7548of the file. If the C<Central Directory> is found, it will then walk 7549sequentally through the zip records starting at the beginning of the file. 7550See L<Advanced Analysis> for other processing options. 7551 7552If the program finds any structural or portability issues with the zip file 7553it will print a message at the point it finds the issue and/or in a summary 7554at the end of the output report. Whilst the set of issues that can be 7555detected it exhaustive, don't assume that this program can find I<all> the 7556possible issues in a zip file - there are likely edge conditions that need 7557to be addressed. 7558 7559If you have suggestions for use-cases where this could be enhanced please 7560consider creating an enhancement request (see L<"SUPPORT">). 7561 7562=head3 Date & Time fields 7563 7564Date/time fields found in zip files are displayed in local time. Use the 7565C<--utc> option to display these fields in Coordinated Universal Time (UTC). 7566 7567=head3 Filenames & Comments 7568 7569Filenames and comments are decoded/encoded using the default system 7570encoding of the host running C<zipdetails>. When the sytem encoding cannot 7571be determined C<cp437> will be used. 7572 7573The exceptions are 7574 7575=over 5 7576 7577=item * 7578 7579when the C<Language Encoding Flag> is set in the zip file, the 7580filename/comment fields are assumed to be encoded in UTF-8. 7581 7582=item * 7583 7584the definition for the metadata field implies UTF-8 charset encoding 7585 7586=back 7587 7588See L<"Filename Encoding Issues"> and L<Filename & Comment Encoding 7589Options> for ways to control the encoding of filename/comment fields. 7590 7591=head2 OPTIONS 7592 7593=head3 General Options 7594 7595=over 5 7596 7597=item C<-h>, C<--help> 7598 7599Display help 7600 7601=item C<--redact> 7602 7603Obscure filenames and payload data in the output. Handy for the use case 7604where the zip files contains sensitive data that cannot be shared. 7605 7606=item C<--scan> 7607 7608Pessimistically scan the zip file loking for possible zip records. Can be 7609error-prone. For very large zip files this option is slow. Consider using 7610the C<--walk> option first. See L<"Advanced Analysis Options"> 7611 7612=item C<--utc> 7613 7614By default, date/time fields are displayed in local time. Use this option to 7615display them in in Coordinated Universal Time (UTC). 7616 7617=item C<-v> 7618 7619Enable Verbose mode. See L<"Verbose Output">. 7620 7621=item C<--version> 7622 7623Display version number of the program and exit. 7624 7625=item C<--walk> 7626 7627Optimistically walk the zip file looking for possible zip records. 7628See L<"Advanced Analysis Options"> 7629 7630=back 7631 7632=head3 Filename & Comment Encoding Options 7633 7634See L<"Filename Encoding Issues"> 7635 7636=over 5 7637 7638=item C<--encoding name> 7639 7640Use encoding "name" when reading filenames/comments from the zip file. 7641 7642When this option is not specified the default the system encoding is used. 7643 7644=item C< --no-encoding> 7645 7646Disable all filename & comment encoding/decoding. Filenames/comments are 7647processed as byte streams. 7648 7649This option is not enabled by default. 7650 7651=item C<--output-encoding name> 7652 7653Use encoding "name" when writing filename/comments to the display. By 7654default the system encoding will be used. 7655 7656=item C<--language-encoding>, C<--no-language-encoding> 7657 7658Modern zip files set a metadata entry in zip files, called the "Language 7659encoding flag", when they write filenames/comments encoded in UTF-8. 7660 7661Occasionally some applications set the C<Language Encoding Flag> but write 7662data that is not UTF-8 in the filename/comment fields of the zip file. This 7663will usually result in garbled text being output for the 7664filenames/comments. 7665 7666To deal with this use-case, set the C<--no-language-encoding> option and, 7667if needed, set the C<--encoding name> option to encoding actually used. 7668 7669Default is C<--language-encoding>. 7670 7671=item C<--debug-encoding> 7672 7673Display extra debugging info when a filename/comment encoding has changed. 7674 7675=back 7676 7677=head3 Message Control Options 7678 7679=over 5 7680 7681=item C<--messages>, C<--no-messages> 7682 7683Enable/disable the output of all info/warning/error messages. 7684 7685Disabling messages means that no checks are carried out to check that the 7686zip file is well-formed. 7687 7688Default is enabled. 7689 7690=item C<--exit-bitmask>, C<--no-exit-bitmask> 7691 7692Enable/disable exit status bitmask for messages. Default disabled. 7693Bitmask values are: 1 for info, 2 for warning and 4 for error. 7694 7695=back 7696 7697 7698=head2 Default Output 7699 7700By default C<zipdetails> will output each metadata field from the zip file 7701in three columns. 7702 7703=over 5 7704 7705=item 1 7706 7707The offset, in hex, to the start of the field relative to the beginning of 7708the file. 7709 7710=item 2 7711 7712The name of the field. 7713 7714=item 3 7715 7716Detailed information about the contents of the field. The format depends on 7717the type of data: 7718 7719=over 5 7720 7721=item * Numeric Values 7722 7723If the field contains an 8-bit, 16-bit, 32-bit or 64-bit numeric value, it 7724will be displayed in both hex and decimal -- for example "C<002A (42)>". 7725 7726Note that Zip files store most numeric values in I<little-endian> encoding 7727(there area few rare instances where I<big-endian> is used). The value read 7728from the zip file will have the I<endian> encoding removed before being 7729displayed. 7730 7731Next, is an optional description of what the numeric value means. 7732 7733=item * String 7734 7735If the field corresponds to a printable string, it will be output enclosed 7736in single quotes. 7737 7738=item * Binary Data 7739 7740The term I<Binary Data> is just a catch-all for all other metadata in the 7741zip file. This data is displayed as a series of ascii-hex byte values in 7742the same order they are stored in the zip file. 7743 7744=back 7745 7746=back 7747 7748For example, assuming you have a zip file, C<test,zip>, with one entry 7749 7750 $ unzip -l test.zip 7751 Archive: test.zip 7752 Length Date Time Name 7753 --------- ---------- ----- ---- 7754 446 2023-03-22 20:03 lorem.txt 7755 --------- ------- 7756 446 1 file 7757 7758Running C<zipdetails> will gives this output 7759 7760 $ zipdetails test.zip 7761 7762 0000 LOCAL HEADER #1 04034B50 (67324752) 7763 0004 Extract Zip Spec 14 (20) '2.0' 7764 0005 Extract OS 00 (0) 'MS-DOS' 7765 0006 General Purpose Flag 0000 (0) 7766 [Bits 1-2] 0 'Normal Compression' 7767 0008 Compression Method 0008 (8) 'Deflated' 7768 000A Modification Time 5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023' 7769 000E CRC F90EE7FF (4178503679) 7770 0012 Compressed Size 0000010E (270) 7771 0016 Uncompressed Size 000001BE (446) 7772 001A Filename Length 0009 (9) 7773 001C Extra Length 0000 (0) 7774 001E Filename 'lorem.txt' 7775 0027 PAYLOAD 7776 7777 0135 CENTRAL HEADER #1 02014B50 (33639248) 7778 0139 Created Zip Spec 1E (30) '3.0' 7779 013A Created OS 03 (3) 'Unix' 7780 013B Extract Zip Spec 14 (20) '2.0' 7781 013C Extract OS 00 (0) 'MS-DOS' 7782 013D General Purpose Flag 0000 (0) 7783 [Bits 1-2] 0 'Normal Compression' 7784 013F Compression Method 0008 (8) 'Deflated' 7785 0141 Modification Time 5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023' 7786 0145 CRC F90EE7FF (4178503679) 7787 0149 Compressed Size 0000010E (270) 7788 014D Uncompressed Size 000001BE (446) 7789 0151 Filename Length 0009 (9) 7790 0153 Extra Length 0000 (0) 7791 0155 Comment Length 0000 (0) 7792 0157 Disk Start 0000 (0) 7793 0159 Int File Attributes 0001 (1) 7794 [Bit 0] 1 'Text Data' 7795 015B Ext File Attributes 81ED0000 (2179792896) 7796 [Bits 16-24] 01ED (493) 'Unix attrib: rwxr-xr-x' 7797 [Bits 28-31] 08 (8) 'Regular File' 7798 015F Local Header Offset 00000000 (0) 7799 0163 Filename 'lorem.txt' 7800 7801 016C END CENTRAL HEADER 06054B50 (101010256) 7802 0170 Number of this disk 0000 (0) 7803 0172 Central Dir Disk no 0000 (0) 7804 0174 Entries in this disk 0001 (1) 7805 0176 Total Entries 0001 (1) 7806 0178 Size of Central Dir 00000037 (55) 7807 017C Offset to Central Dir 00000135 (309) 7808 0180 Comment Length 0000 (0) 7809 # 7810 # Done 7811 7812 7813=head2 Verbose Output 7814 7815If the C<-v> option is present, the metadata output is split into the 7816following columns: 7817 7818=over 5 7819 7820=item 1 7821 7822The offset, in hex, to the start of the field relative to the beginning of 7823the file. 7824 7825=item 2 7826 7827The offset, in hex, to the end of the field relative to the beginning of 7828the file. 7829 7830=item 3 7831 7832The length, in hex, of the field. 7833 7834=item 4 7835 7836A hex dump of the bytes in field in the order they are stored in the zip file. 7837 7838=item 5 7839 7840A textual description of the field. 7841 7842=item 6 7843 7844Information about the contents of the field. See the description in the 7845L<Default Output> for more details. 7846 7847=back 7848 7849Here is the same zip file, C<test.zip>, dumped using the C<zipdetails> 7850C<-v> option: 7851 7852 $ zipdetails -v test.zip 7853 7854 0000 0003 0004 50 4B 03 04 LOCAL HEADER #1 04034B50 (67324752) 7855 0004 0004 0001 14 Extract Zip Spec 14 (20) '2.0' 7856 0005 0005 0001 00 Extract OS 00 (0) 'MS-DOS' 7857 0006 0007 0002 00 00 General Purpose Flag 0000 (0) 7858 [Bits 1-2] 0 'Normal Compression' 7859 0008 0009 0002 08 00 Compression Method 0008 (8) 'Deflated' 7860 000A 000D 0004 72 A0 76 56 Modification Time 5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023' 7861 000E 0011 0004 FF E7 0E F9 CRC F90EE7FF (4178503679) 7862 0012 0015 0004 0E 01 00 00 Compressed Size 0000010E (270) 7863 0016 0019 0004 BE 01 00 00 Uncompressed Size 000001BE (446) 7864 001A 001B 0002 09 00 Filename Length 0009 (9) 7865 001C 001D 0002 00 00 Extra Length 0000 (0) 7866 001E 0026 0009 6C 6F 72 65 Filename 'lorem.txt' 7867 6D 2E 74 78 7868 74 7869 0027 0134 010E ... PAYLOAD 7870 7871 0135 0138 0004 50 4B 01 02 CENTRAL HEADER #1 02014B50 (33639248) 7872 0139 0139 0001 1E Created Zip Spec 1E (30) '3.0' 7873 013A 013A 0001 03 Created OS 03 (3) 'Unix' 7874 013B 013B 0001 14 Extract Zip Spec 14 (20) '2.0' 7875 013C 013C 0001 00 Extract OS 00 (0) 'MS-DOS' 7876 013D 013E 0002 00 00 General Purpose Flag 0000 (0) 7877 [Bits 1-2] 0 'Normal Compression' 7878 013F 0140 0002 08 00 Compression Method 0008 (8) 'Deflated' 7879 0141 0144 0004 72 A0 76 56 Modification Time 5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023' 7880 0145 0148 0004 FF E7 0E F9 CRC F90EE7FF (4178503679) 7881 0149 014C 0004 0E 01 00 00 Compressed Size 0000010E (270) 7882 014D 0150 0004 BE 01 00 00 Uncompressed Size 000001BE (446) 7883 0151 0152 0002 09 00 Filename Length 0009 (9) 7884 0153 0154 0002 00 00 Extra Length 0000 (0) 7885 0155 0156 0002 00 00 Comment Length 0000 (0) 7886 0157 0158 0002 00 00 Disk Start 0000 (0) 7887 0159 015A 0002 01 00 Int File Attributes 0001 (1) 7888 [Bit 0] 1 'Text Data' 7889 015B 015E 0004 00 00 ED 81 Ext File Attributes 81ED0000 (2179792896) 7890 [Bits 16-24] 01ED (493) 'Unix attrib: rwxr-xr-x' 7891 [Bits 28-31] 08 (8) 'Regular File' 7892 015F 0162 0004 00 00 00 00 Local Header Offset 00000000 (0) 7893 0163 016B 0009 6C 6F 72 65 Filename 'lorem.txt' 7894 6D 2E 74 78 7895 74 7896 7897 016C 016F 0004 50 4B 05 06 END CENTRAL HEADER 06054B50 (101010256) 7898 0170 0171 0002 00 00 Number of this disk 0000 (0) 7899 0172 0173 0002 00 00 Central Dir Disk no 0000 (0) 7900 0174 0175 0002 01 00 Entries in this disk 0001 (1) 7901 0176 0177 0002 01 00 Total Entries 0001 (1) 7902 0178 017B 0004 37 00 00 00 Size of Central Dir 00000037 (55) 7903 017C 017F 0004 35 01 00 00 Offset to Central Dir 00000135 (309) 7904 0180 0181 0002 00 00 Comment Length 0000 (0) 7905 # 7906 # Done 7907 7908=head2 Advanced Analysis 7909 7910If you have a corrupt or non-standard zip file, particulatly one where the 7911C<Central Directory> metadata at the end of the file is absent/incomplete, you 7912can use either the C<--walk> option or the C<--scan> option to search for 7913any zip metadata that is still present in the file. 7914 7915When either of these options is enabled, this program will bypass the 7916initial step of reading the C<Central Directory> at the end of the file and 7917simply scan the zip file sequentially from the start of the file looking 7918for zip metedata records. Although this can be error prone, for the most 7919part it will find any zip file metadata that is still present in the file. 7920 7921The difference between the two options is how aggressive the sequential 7922scan is: C<--walk> is optimistic, while C<--scan> is pessimistic. 7923 7924To understand the difference in more detail you need to know a bit about 7925how zip file metadata is structured. Under the hood, a zip file uses a 7926series of 4-byte signatures to flag the start of a each of the metadata 7927records it uses. When the C<--walk> or the C<--scan> option is enabled both 7928work identically by scanning the file from the beginning looking for any 7929the of these valid 4-byte metadata signatures. When a 4-byte signature is 7930found both options will blindly assume that it has found a vald metadata 7931record and display it. 7932 7933=head3 C<--walk> 7934 7935The C<--walk> option optimistically assumes that it has found a real zip 7936metatada record and so starts the scan for the next record directly after 7937the record it has just output. 7938 7939=head3 C<--scan> 7940 7941The C<--scan> option is pessimistic and assumes the 4-byte signature 7942sequence may have been a false-positive, so before starting the scan for 7943the next resord, it will rewind to the location in the file directly after 7944the 4-byte sequecce it just processed. This means it will rescan data that 7945has already been processed. For very lage zip files the C<--scan> option 7946can be really realy slow, so trying the C<--walk> option first. 7947 7948B<Important Note>: If the zip file being processed contains one or more 7949nested zip files, and the outer zip file uses the C<STORE> compression 7950method, the C<--scan> option will display the zip metadata for both the 7951outer & inner zip files. 7952 7953=head2 Filename Encoding Issues 7954 7955Sometimes when displaying the contents of a zip file the filenames (or 7956comments) appear to be garbled. This section walks through the reasons and 7957mitigations that can be applied to work around these issues. 7958 7959=head3 Background 7960 7961When zip files were first created in the 1980's, there was no Unicode or 7962UTF-8. Issues around character set encoding interoperability were not a 7963major concern. 7964 7965Initially, the only official encoding supported in zip files was IBM Code 7966Page 437 (AKA C<CP437>). As time went on users in locales where C<CP437> 7967wasn't appropriate stored filenames in the encoding native to their locale. 7968If you were running a system that matched the locale of the zip file, all 7969was well. If not, you had to post-process the filenames after unzipping the 7970zip file. 7971 7972Fast forward to the introduction of Unicode and UTF-8 encoding. The 7973approach now used by all major zip implementations is to set the C<Language 7974encoding flag> (also known as C<EFS>) in the zip file metadata to signal 7975that a filename/comment is encoded in UTF-8. 7976 7977To ensure maximum interoperability when sharing zip files store 7-bit 7978filenames as-is in the zip file. For anything else the C<EFS> bit needs to 7979be set and the filename is encoded in UTF-8. Although this rule is kept to 7980for the most part, there are exceptions out in the wild. 7981 7982=head3 Dealing with Encoding Errors 7983 7984The most common filename encoding issue is where the C<EFS> bit is not set and 7985the filename is stored in a character set that doesnt't match the system 7986encoding. This mostly impacts legacy zip files that predate the 7987introduction of Unicode. 7988 7989To deal with this issue you first need to know what encoding was used in 7990the zip file. For example, if the filename is encoded in C<ISO-8859-1> you 7991can display the filenames using the C<--encoding> option 7992 7993 zipdetails --encoding ISO-8859-1 myfile.zip 7994 7995A less common variation of this is where the C<EFS> bit is set, signalling 7996that the filename will be encoded in UTF-8, but the filename is not encoded 7997in UTF-8. To deal with this scenarion, use the C<--no-language-encoding> 7998option along with the C<--encoding> option. 7999 8000 8001=head1 LIMITATIONS 8002 8003The following zip file features are not supported by this program: 8004 8005=over 5 8006 8007=item * 8008 8009Multi-part/Split/Spanned Zip Archives. 8010 8011This program cannot give an overall report on the combined parts of a 8012multi-part zip file. 8013 8014The best you can do is run with either the C<--scan> or C<--walk> options 8015against individual parts. Some will contains zipfile metadata which will be 8016detected and some will only contain compressed payload data. 8017 8018 8019=item * 8020 8021Encrypted Central Directory 8022 8023When pkzip I<Strong Encryption> is enabled in a zip file this program can 8024still parse most of the metadata in the zip file. The exception is when the 8025C<Central Directory> of a zip file is also encrypted. This program cannot 8026parse any metadata from an encrypted C<Central Directory>. 8027 8028=item * 8029 8030Corrupt Zip files 8031 8032When C<zipdetails> encounters a corrupt zip file, it will do one or more of 8033the following 8034 8035=over 5 8036 8037=item * 8038 8039Display details of the corruption and carry on 8040 8041=item * 8042 8043Display details of the corruption and terminate 8044 8045=item * 8046 8047Terminate with a generic message 8048 8049=back 8050 8051Which of the above is output is dependent in the severity of the 8052corruption. 8053 8054=back 8055 8056=head1 TODO 8057 8058=head2 JSON/YML Output 8059 8060Output some of the zip file metadata as a JSON or YML document. 8061 8062=head2 Corrupt Zip files 8063 8064Although the detection and reporting of most of the common corruption use-cases is 8065present in C<zipdetails>, there are likely to be other edge cases that need 8066to be supported. 8067 8068If you have a corrupt Zip file that isn't being processed properly, please 8069report it (see L<"SUPPORT">). 8070 8071=head1 SUPPORT 8072 8073General feedback/questions/bug reports should be sent to 8074L<https://github.com/pmqs/zipdetails/issues>. 8075 8076=head1 SEE ALSO 8077 8078 8079The primary reference for Zip files is 8080L<APPNOTE.TXT|https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT>. 8081 8082An alternative reference is the Info-Zip appnote. This is available from 8083L<ftp://ftp.info-zip.org/pub/infozip/doc/> 8084 8085For details of WinZip AES encryption see L<AES Encryption Information: 8086Encryption Specification AE-1 and 8087AE-2|https://www.winzip.com/en/support/aes-encryption/>. 8088 8089The C<zipinfo> program that comes with the info-zip distribution 8090(L<http://www.info-zip.org/>) can also display details of the structure of a zip 8091file. 8092 8093 8094=head1 AUTHOR 8095 8096Paul Marquess F<pmqs@cpan.org>. 8097 8098=head1 COPYRIGHT 8099 8100Copyright (c) 2011-2024 Paul Marquess. All rights reserved. 8101 8102This program is free software; you can redistribute it and/or modify it under 8103the same terms as Perl itself. 8104