1#!/usr/bin/perl
2
3# zipdetails
4#
5# Display info on the contents of a Zip file
6#
7
8use 5.010; # for unpack "Q<"
9
10my $NESTING_DEBUG = 0 ;
11
12BEGIN {
13    # Check for a 32-bit Perl
14    if (!eval { pack "Q", 1 }) {
15        warn "zipdetails requires 64 bit integers, ",
16                "this Perl has 32 bit integers.\n";
17        exit(1);
18    }
19}
20
21BEGIN { pop @INC if $INC[-1] eq '.' }
22use strict;
23use warnings ;
24no  warnings 'portable'; # for unpacking > 2^32
25use feature qw(state say);
26
27use IO::File;
28use Encode;
29use Getopt::Long;
30use List::Util qw(min max);
31
32my $VERSION = '4.004' ;
33
34sub fatal_tryWalk;
35sub fatal_truncated ;
36sub info ;
37sub warning ;
38sub error ;
39sub debug ;
40sub fatal ;
41sub topLevelFatal ;
42sub internalFatal;
43sub need ;
44sub decimalHex;
45
46use constant MAX64 => 0xFFFFFFFFFFFFFFFF ;
47use constant MAX32 => 0xFFFFFFFF ;
48use constant MAX16 => 0xFFFF ;
49
50# Compression types
51use constant ZIP_CM_STORE                      => 0 ;
52use constant ZIP_CM_IMPLODE                    => 6 ;
53use constant ZIP_CM_DEFLATE                    => 8 ;
54use constant ZIP_CM_BZIP2                      => 12 ;
55use constant ZIP_CM_LZMA                       => 14 ;
56use constant ZIP_CM_PPMD                       => 98 ;
57
58# General Purpose Flag
59use constant ZIP_GP_FLAG_ENCRYPTED_MASK        => (1 << 0) ;
60use constant ZIP_GP_FLAG_STREAMING_MASK        => (1 << 3) ;
61use constant ZIP_GP_FLAG_PATCHED_MASK          => (1 << 5) ;
62use constant ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK => (1 << 6) ;
63use constant ZIP_GP_FLAG_LZMA_EOS_PRESENT      => (1 << 1) ;
64use constant ZIP_GP_FLAG_LANGUAGE_ENCODING     => (1 << 11) ;
65use constant ZIP_GP_FLAG_PKWARE_ENHANCED_COMP  => (1 << 12) ;
66use constant ZIP_GP_FLAG_ENCRYPTED_CD          => (1 << 13) ;
67
68# All the encryption flags
69use constant ZIP_GP_FLAG_ALL_ENCRYPT            => (ZIP_GP_FLAG_ENCRYPTED_MASK | ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK | ZIP_GP_FLAG_ENCRYPTED_CD );
70
71# Internal File Attributes
72use constant ZIP_IFA_TEXT_MASK                 => 1;
73
74# Signatures for each of the headers
75use constant ZIP_LOCAL_HDR_SIG                 => 0x04034b50;
76use constant ZIP_DATA_HDR_SIG                  => 0x08074b50;
77use constant ZIP_CENTRAL_HDR_SIG               => 0x02014b50;
78use constant ZIP_END_CENTRAL_HDR_SIG           => 0x06054b50;
79use constant ZIP64_END_CENTRAL_REC_HDR_SIG     => 0x06064b50;
80use constant ZIP64_END_CENTRAL_LOC_HDR_SIG     => 0x07064b50;
81use constant ZIP_DIGITAL_SIGNATURE_SIG         => 0x05054b50;
82use constant ZIP_ARCHIVE_EXTRA_DATA_RECORD_SIG => 0x08064b50;
83use constant ZIP_SINGLE_SEGMENT_MARKER         => 0x30304b50; # APPNOTE 6.3.10, sec 8.5.4
84
85# Extra sizes
86use constant ZIP_EXTRA_HEADER_SIZE          => 2 ;
87use constant ZIP_EXTRA_MAX_SIZE             => 0xFFFF ;
88use constant ZIP_EXTRA_SUBFIELD_ID_SIZE     => 2 ;
89use constant ZIP_EXTRA_SUBFIELD_LEN_SIZE    => 2 ;
90use constant ZIP_EXTRA_SUBFIELD_HEADER_SIZE => ZIP_EXTRA_SUBFIELD_ID_SIZE +
91                                               ZIP_EXTRA_SUBFIELD_LEN_SIZE;
92use constant ZIP_EXTRA_SUBFIELD_MAX_SIZE    => ZIP_EXTRA_MAX_SIZE -
93                                               ZIP_EXTRA_SUBFIELD_HEADER_SIZE;
94
95use constant ZIP_EOCD_MIN_SIZE              => 22 ;
96
97
98use constant ZIP_LD_FILENAME_OFFSET         => 30;
99use constant ZIP_CD_FILENAME_OFFSET         => 46;
100
101my %ZIP_CompressionMethods =
102    (
103          0 => 'Stored',
104          1 => 'Shrunk',
105          2 => 'Reduced compression factor 1',
106          3 => 'Reduced compression factor 2',
107          4 => 'Reduced compression factor 3',
108          5 => 'Reduced compression factor 4',
109          6 => 'Imploded',
110          7 => 'Reserved for Tokenizing compression algorithm',
111          8 => 'Deflated',
112          9 => 'Deflate64',
113         10 => 'PKWARE Data Compression Library Imploding',
114         11 => 'Reserved by PKWARE',
115         12 => 'BZIP2',
116         13 => 'Reserved by PKWARE',
117         14 => 'LZMA',
118         15 => 'Reserved by PKWARE',
119         16 => 'IBM z/OS CMPSC Compression',
120         17 => 'Reserved by PKWARE',
121         18 => 'IBM/TERSE or Xceed BWT', # APPNOTE has IBM/TERSE. Xceed reuses it unofficially
122         19 => 'IBM LZ77 z Architecture (PFS)',
123         20 => 'Ipaq8', # see https://encode.su/threads/1048-info-zip-lpaq8
124         92 => 'Reference', # Winzip Only from version 25
125         93 => 'Zstandard',
126         94 => 'MP3',
127         95 => 'XZ',
128         96 => 'WinZip JPEG Compression',
129         97 => 'WavPack compressed data',
130         98 => 'PPMd version I, Rev 1',
131         99 => 'AES Encryption', # Apple also use this code for LZFSE compression in IPA files
132     );
133
134my %OS_Lookup = (
135    0   => "MS-DOS",
136    1   => "Amiga",
137    2   => "OpenVMS",
138    3   => "Unix",
139    4   => "VM/CMS",
140    5   => "Atari ST",
141    6   => "HPFS (OS/2, NT 3.x)",
142    7   => "Macintosh",
143    8   => "Z-System",
144    9   => "CP/M",
145    10  => "Windows NTFS or TOPS-20",
146    11  => "MVS or NTFS",
147    12  => "VSE or SMS/QDOS",
148    13  => "Acorn RISC OS",
149    14  => "VFAT",
150    15  => "alternate MVS",
151    16  => "BeOS",
152    17  => "Tandem",
153    18  => "OS/400",
154    19  => "OS/X (Darwin)",
155    30  => "AtheOS/Syllable",
156    );
157
158{
159    package Signatures ;
160
161    my %Lookup = (
162        # Map unpacked signature to
163        #   decoder
164        #   name
165        #   central flag
166
167        # Core Signatures
168        ::ZIP_LOCAL_HDR_SIG,             [ \&::LocalHeader, "Local File Header", 0 ],
169        ::ZIP_DATA_HDR_SIG,              [ \&::DataDescriptor,   "Data Descriptor", 0 ],
170        ::ZIP_CENTRAL_HDR_SIG,           [ \&::CentralHeader, "Central Directory Header", 1 ],
171        ::ZIP_END_CENTRAL_HDR_SIG,       [ \&::EndCentralHeader, "End Central Directory Record", 1 ],
172        ::ZIP_SINGLE_SEGMENT_MARKER,     [ \&::SingleSegmentMarker, "Split Archive Single Segment Marker", 0],
173
174        # Zip64
175        ::ZIP64_END_CENTRAL_REC_HDR_SIG, [ \&::Zip64EndCentralHeader, "Zip64 End of Central Directory Record", 1 ],
176        ::ZIP64_END_CENTRAL_LOC_HDR_SIG, [ \&::Zip64EndCentralLocator, "Zip64 End of Central Directory Locator", 1 ],
177
178        #  Digital signature (pkzip)
179        ::ZIP_DIGITAL_SIGNATURE_SIG,     [ \&::DigitalSignature, "Digital Signature", 1 ],
180
181        #  Archive Encryption Headers (pkzip) - never seen this one
182        ::ZIP_ARCHIVE_EXTRA_DATA_RECORD_SIG,  [ \&::ArchiveExtraDataRecord, "Archive Extra Record", 1 ],
183    );
184
185    sub decoder
186    {
187        my $signature = shift ;
188
189        return undef
190            unless exists $Lookup{$signature};
191
192        return $Lookup{$signature}[0];
193    }
194
195    sub name
196    {
197        my $signature = shift ;
198
199        return 'UNKNOWN'
200            unless exists $Lookup{$signature};
201
202        return $Lookup{$signature}[1];
203    }
204
205    sub titleName
206    {
207        my $signature = shift ;
208
209        uc name($signature);
210    }
211
212    sub hexValue
213    {
214        my $signature = shift ;
215        sprintf "0x%X", $signature ;
216    }
217
218    sub hexValue32
219    {
220        my $signature = shift ;
221        sprintf "0x%08X", $signature ;
222    }
223
224    sub hexValue16
225    {
226        my $signature = shift ;
227        sprintf "0x%04X", $signature ;
228    }
229
230    sub nameAndHex
231    {
232        my $signature = shift ;
233
234        return "'" . name($signature) . "' (" . hexValue32($signature) . ")"
235    }
236
237    sub isCentralHeader
238    {
239        my $signature = shift ;
240
241        return undef
242            unless exists $Lookup{$signature};
243
244        return $Lookup{$signature}[2];
245    }
246    #sub isValidSignature
247    #{
248    #    my $signature = shift ;
249    #    return exists $Lookup{$signature}}
250    #}
251
252    sub getSigsForScan
253    {
254        my %sigs =
255            # map { $_ => 1         }
256            # map { substr($_->[0], 2, 2) => $_->[1] } # don't want the initial "PK"
257            map { substr(pack("V", $_), 2, 2) => $_           }
258            keys %Lookup ;
259
260        return %sigs;
261    }
262
263}
264
265my %Extras = (
266
267      #                                                                                                 Local                   Central
268      # ID       Name                                                       Handler                     min size    max size    min size max size
269      0x0001,  ['ZIP64',                                                    \&decode_Zip64,             0,  28, 0,  28],
270      0x0007,  ['AV Info',                                                  undef], # TODO
271      0x0008,  ['Extended Language Encoding',                               undef], # TODO
272      0x0009,  ['OS/2 extended attributes',                                 undef], # TODO
273      0x000a,  ['NTFS FileTimes',                                           \&decode_NTFS_Filetimes,    32, 32, 32, 32],
274      0x000c,  ['OpenVMS',                                                  \&decode_OpenVMS,            4, undef,  4, undef],
275      0x000d,  ['Unix',                                                     undef],
276      0x000e,  ['Stream & Fork Descriptors',                                undef], # TODO
277      0x000f,  ['Patch Descriptor',                                         undef],
278      0x0014,  ['PKCS#7 Store for X.509 Certificates',                      undef],
279      0x0015,  ['X.509 Certificate ID and Signature for individual file',   undef],
280      0x0016,  ['X.509 Certificate ID for Central Directory',               undef],
281      0x0017,  ['Strong Encryption Header',                                 \&decode_strong_encryption,  12,    undef,  12,    undef],
282      0x0018,  ['Record Management Controls',                               undef],
283      0x0019,  ['PKCS#7 Encryption Recipient Certificate List',             undef],
284      0x0020,  ['Reserved for Timestamp record',                            undef],
285      0x0021,  ['Policy Decryption Key Record',                             undef],
286      0x0022,  ['Smartcrypt Key Provider Record',                           undef],
287      0x0023,  ['Smartcrypt Policy Key Data Record',                        undef],
288
289      # The Header ID mappings defined by Info-ZIP and third parties are:
290
291      0x0065,  ['IBM S/390 attributes - uncompressed',                      \&decode_MVS,                    4,  undef,  4,  undef],
292      0x0066,  ['IBM S/390 attributes - compressed',                        undef],
293      0x07c8,  ['Info-ZIP Macintosh (old, J. Lee)',                         undef],
294      0x10c5,  ['Minizip CMS Signature',                                    \&decode_Minizip_Signature,     undef, undef, undef, undef], # https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md
295      0x1986,  ['Pixar USD',                                                undef], # TODO
296      0x1a51,  ['Minizip Hash',                                             \&decode_Minizip_Hash,          4, undef, 4, undef], # https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md
297      0x2605,  ['ZipIt Macintosh (first version)',                          undef],
298      0x2705,  ['ZipIt Macintosh v 1.3.5 and newer (w/o full filename)',    undef],
299      0x2805,  ['ZipIt Macintosh v 1.3.5 and newer',                        undef],
300      0x334d,  ["Info-ZIP Macintosh (new, D. Haase's 'Mac3' field)",        undef], # TODO
301      0x4154,  ['Tandem NSK [TA]',                                          undef], # TODO
302      0x4341,  ['Acorn/SparkFS [AC]',                                       undef], # TODO
303      0x4453,  ['Windows NT security descriptor [SD]',                      \&decode_NT_security,           11, undef,  4, 4], # TODO
304      0x4690,  ['POSZIP 4690',                                              undef],
305      0x4704,  ['VM/CMS',                                                   undef],
306      0x470f,  ['MVS',                                                      undef],
307      0x4854,  ['Theos [TH]',                                               undef],
308      0x4b46,  ['FWKCS MD5 [FK]',                                           undef],
309      0x4c41,  ['OS/2 access control list [AL]',                            undef],
310      0x4d49,  ['Info-ZIP OpenVMS (obsolete) [IM]',                         undef],
311      0x4d63,  ['Macintosh SmartZIP [cM]',                                  undef], # TODO
312      0x4f4c,  ['Xceed original location [LO]',                             undef],
313      0x5356,  ['AOS/VS (binary ACL) [VS]',                                 undef],
314      0x5455,  ['Extended Timestamp [UT]',                                  \&decode_UT,                    1, 13,  1, 13],
315      0x554e,  ['Xceed unicode extra field [UN]',                           \&decode_Xceed_unicode,         6,  undef,  8,  undef],
316      0x564B,  ['Key-Value Pairs [KV]',                                     \&decode_Key_Value_Pair,        13, undef, 13, undef],# TODO -- https://github.com/sozip/keyvaluepairs-spec/blob/master/zip_keyvalue_extra_field_specification.md
317      0x5855,  ['Unix Extra type 1 [UX]',                                   \&decode_UX,                    12, 12,     8, 8],
318      0x5a4c,  ['ZipArchive Unicode Filename [LZ]',                         undef],  # https://www.artpol-software.com/ZipArchive
319      0x5a4d,  ['ZipArchive Offsets Array [MZ]',                            undef],  # https://www.artpol-software.com/ZipArchive
320      0x6375,  ['Unicode Comment [uc]',                                     \&decode_uc,                    5, undef,  5, undef],
321      0x6542,  ['BeOS/Haiku [Be]',                                          undef], # TODO
322      0x6854,  ['Theos [Th]',                                               undef],
323      0x7075,  ['Unicode Path [up]',                                        \&decode_up,                    5, undef,   5, undef],
324      0x756e,  ['ASi Unix [un]',                                            \&decode_ASi_Unix], # TODO
325      0x7441,  ['AtheOS [At]',                                              undef],
326      0x7855,  ['Unix Extra type 2 [Ux]',                                   \&decode_Ux,                    4,4,   0, 0 ],
327      0x7875,  ['Unix Extra type 3 [ux]',                                   \&decode_ux,                    3, undef,   3, undef],
328      0x9901,  ['AES Encryption',                                           \&decode_AES,                   7, 7,       7, 7],
329      0x9903,  ['Reference',                                                \&decode_Reference,             20, 20,     20, 20], # Added in WinZip ver 25
330      0xa11e,  ['Data Stream Alignment',                                    \&decode_DataStreamAlignment,   2, undef,   2, undef ],
331      0xA220,  ['Open Packaging Growth Hint',                               \&decode_GrowthHint,            4, undef,   4, undef ],
332      0xCAFE,  ['Java Executable',                                          \&decode_Java_exe,              0, 0,       0, 0],
333      0xCDCD,  ['Minizip Central Directory',                                \&decode_Minizip_CD,            8, 8, 8, 8], # https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md
334      0xd935,  ['Android APK Alignment',                                    undef], # TODO
335      0xE57a,  ['ALZip Codepage',                                           undef], # TODO
336      0xfb4a,  ['SMS/QDOS',                                                 undef], # TODO
337       );
338
339      # Dummy entry only used in test harness, so only enable when ZIPDETAILS_TESTHARNESS is set
340      $Extras{0xFFFF} =
341               ['DUMMY',                                                    \&decode_DUMMY,                 undef, undef, undef, undef]
342            if $ENV{ZIPDETAILS_TESTHARNESS} ;
343
344sub extraFieldIdentifier
345{
346    my $id = shift ;
347
348    my $name = $Extras{$id}[0] // "Unknown";
349
350    return "Extra Field '$name' (ID " .  hexValue16($id) .")";
351}
352
353# Zip64EndCentralHeader version 2
354my %HashIDLookup  = (
355        0x0000 => 'none',
356        0x0001 => 'CRC32',
357        0x8003 => 'MD5',
358        0x8004 => 'SHA1',
359        0x8007 => 'RIPEMD160',
360        0x800C => 'SHA256',
361        0x800D => 'SHA384',
362        0x800E => 'SHA512',
363    );
364
365
366# Zip64EndCentralHeader version 2, Strong Encryption Header & DecryptionHeader
367my %AlgIdLookup = (
368        0x6601 => "DES",
369        0x6602 => "RC2 (version needed to extract < 5.2)",
370        0x6603 => "3DES 168",
371        0x6609 => "3DES 112",
372        0x660E => "AES 128",
373        0x660F => "AES 192",
374        0x6610 => "AES 256",
375        0x6702 => "RC2 (version needed to extract >= 5.2)",
376        0x6720 => "Blowfish",
377        0x6721 => "Twofish",
378        0x6801 => "RC4",
379        0xFFFF => "Unknown algorithm",
380    );
381
382# Zip64EndCentralHeader version 2, Strong Encryption Header & DecryptionHeader
383my %FlagsLookup = (
384        0x0001 => "Password required to decrypt",
385        0x0002 => "Certificates only",
386        0x0003 => "Password or certificate required to decrypt",
387
388        # Values > 0x0003 reserved for certificate processing
389    );
390
391# Strong Encryption Header & DecryptionHeader
392my %HashAlgLookup = (
393        0x8004  => 'SHA1',
394    );
395
396my $FH;
397
398my $ZIP64 = 0 ;
399my $NIBBLES = 8;
400
401my $LocalHeaderCount = 0;
402my $CentralHeaderCount = 0;
403my $InfoCount = 0;
404my $WarningCount = 0;
405my $ErrorCount = 0;
406my $lastWasMessage = 0;
407
408my $fatalDisabled = 0;
409
410my $OFFSET = 0 ;
411
412# Prefix data
413my $POSSIBLE_PREFIX_DELTA = 0;
414my $PREFIX_DELTA = 0;
415
416my $TRAILING = 0 ;
417my $PAYLOADLIMIT = 256;
418my $ZERO = 0 ;
419my $APK = 0 ;
420my $START_APK = 0;
421my $APK_LEN = 0;
422
423my $CentralDirectory = CentralDirectory->new();
424my $LocalDirectory = LocalDirectory->new();
425my $HeaderOffsetIndex = HeaderOffsetIndex->new();
426my $EOCD_Present = 0;
427
428sub prOff
429{
430    my $offset = shift;
431    my $s = offset($OFFSET);
432    $OFFSET += $offset;
433    return $s;
434}
435
436sub offset
437{
438    my $v = shift ;
439
440    sprintf("%0${NIBBLES}X", $v);
441}
442
443# Format variables
444my ($OFF,  $ENDS_AT, $LENGTH,  $CONTENT, $TEXT, $VALUE) ;
445
446my $FMT1 = 'STDOUT1';
447my $FMT2 = 'STDOUT2';
448
449sub setupFormat
450{
451    my $wantVerbose = shift ;
452    my $nibbles = shift;
453
454    my $width = '@' . ('>' x ($nibbles -1));
455    my $space = " " x length($width);
456
457    # See https://github.com/Perl/perl5/issues/14255 for issue with "^*" in perl < 5.22
458    # my $rightColumn = "^*" ;
459    my $rightColumn = "^" . ("<" x 132);
460
461    # Fill mode can split on space or newline chars
462    # Spliting on hyphen works differently from Perl 5.20 onwards
463    $: = " \n";
464
465    my $fmt ;
466
467    if ($wantVerbose) {
468
469        eval "format $FMT1 =
470$width $width $width ^<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<< $rightColumn
471\$OFF,     \$ENDS_AT, \$LENGTH,  \$CONTENT, \$TEXT,    \$VALUE
472$space $space $space ^<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<< $rightColumn~~
473                    \$CONTENT, \$TEXT,                 \$VALUE
474.
475";
476
477        eval "format $FMT2 =
478$width $width $width ^<<<<<<<<<<<  ^<<<<<<<<<<<<<<<<<< $rightColumn
479\$OFF,     \$ENDS_AT, \$LENGTH,  \$CONTENT, \$TEXT,               \$VALUE
480$space $space $space ^<<<<<<<<<<<  ^<<<<<<<<<<<<<<<<<< $rightColumn~~
481              \$CONTENT, \$TEXT,               \$VALUE
482.
483";
484
485    }
486    else {
487        eval "format $FMT1 =
488$width ^<<<<<<<<<<<<<<<<<<<< $rightColumn
489\$OFF,      \$TEXT,               \$VALUE
490$space ^<<<<<<<<<<<<<<<<<<<< $rightColumn~~
491                    \$TEXT,               \$VALUE
492.
493";
494
495        eval "format $FMT2 =
496$width   ^<<<<<<<<<<<<<<<<<< $rightColumn
497\$OFF,     \$TEXT,               \$VALUE
498$space   ^<<<<<<<<<<<<<<<<<< $rightColumn~~
499                    \$TEXT,               \$VALUE
500.
501"
502    }
503
504    no strict 'refs';
505    open($FMT1, ">&", \*STDOUT); select $FMT1; $| = 1 ;
506    open($FMT2, ">&", \*STDOUT); select $FMT2; $| = 1 ;
507
508    select 'STDOUT';
509    $| = 1;
510
511}
512
513sub mySpr
514{
515    my $format = shift ;
516
517    return "" if ! defined $format;
518    return $format unless @_ ;
519    return sprintf $format, @_ ;
520}
521
522sub xDump
523{
524    my $input = shift;
525
526    $input =~ tr/\0-\37\177-\377/./;
527    return $input;
528}
529
530sub hexDump
531{
532    return uc join ' ', unpack('(H2)*', $_[0]);
533}
534
535sub hexDump16
536{
537    return uc
538           join "\r",
539           map { join ' ', unpack('(H2)*', $_ ) }
540           unpack('(a16)*', $_[0]) ;
541}
542
543sub charDump2
544{
545    sprintf "%v02X", $_[0];
546}
547
548sub charDump
549{
550    sprintf "%vX", $_[0];
551}
552
553sub hexValue
554{
555    return sprintf("0x%X", $_[0]);
556}
557
558sub hexValue32
559{
560    return sprintf("0x%08X", $_[0]);
561}
562
563sub hexValue16
564{
565    return sprintf("0x%04X", $_[0]);
566}
567
568sub outHexdump
569{
570    my $size = shift;
571    my $text = shift;
572    my $limit = shift ;
573
574    return 0
575        if $size == 0;
576
577    # TODO - add a limit to data output
578    # if ($limit)
579    # {
580    #     outSomeData($size, $text);
581    # }
582    # else
583    {
584        myRead(my $payload, $size);
585        out($payload, $text, hexDump16($payload));
586    }
587
588    return $size;
589}
590
591sub decimalHex
592{
593    sprintf("%0*X (%u)", $_[1] // 0, $_[0], $_[0])
594}
595
596sub decimalHex0x
597{
598    sprintf("0x%0*X (%u)", $_[1] // 0, $_[0], $_[0])
599}
600
601sub decimalHex0xUndef
602{
603    return 'Unknown'
604        if ! defined $_[0];
605
606    return decimalHex0x @_;
607}
608
609sub out
610{
611    my $data = shift;
612    my $text = shift;
613    my $format = shift;
614
615    my $size = length($data) ;
616
617    $ENDS_AT = offset($OFFSET + ($size ? $size - 1 : 0)) ;
618    $OFF     = prOff($size);
619    $LENGTH  = offset($size) ;
620    $CONTENT = hexDump($data);
621    $TEXT    = $text;
622    $VALUE   = mySpr $format,  @_;
623
624    no warnings;
625
626    write $FMT1 ;
627
628    $lastWasMessage = 0;
629}
630
631sub out0
632{
633    my $size = shift;
634    my $text = shift;
635    my $format = shift;
636
637    $ENDS_AT = offset($OFFSET + ($size ? $size - 1 : 0)) ;
638    $OFF     = prOff($size);
639    $LENGTH  = offset($size) ;
640    $CONTENT = '...';
641    $TEXT    = $text;
642    $VALUE   = mySpr $format,  @_;
643
644    write $FMT1;
645
646    skip($FH, $size);
647
648    $lastWasMessage = 0;
649}
650
651sub out1
652{
653    my $text = shift;
654    my $format = shift;
655
656    $ENDS_AT = '' ;
657    $OFF     = '';
658    $LENGTH  = '' ;
659    $CONTENT = '';
660    $TEXT    = $text;
661    $VALUE   = mySpr $format,  @_;
662
663    write $FMT1;
664
665    $lastWasMessage = 0;
666}
667
668sub out2
669{
670    my $data = shift ;
671    my $text = shift ;
672    my $format = shift;
673
674    my $size = length($data) ;
675    $ENDS_AT = offset($OFFSET + ($size ? $size - 1 : 0)) ;
676    $OFF     = prOff($size);
677    $LENGTH  = offset($size);
678    $CONTENT = hexDump($data);
679    $TEXT    = $text;
680    $VALUE   = mySpr $format,  @_;
681
682    no warnings;
683    write $FMT2;
684
685    $lastWasMessage = 0;
686}
687
688
689sub Value
690{
691    my $letter = shift;
692
693    if ($letter eq 'C')
694      { return decimalHex($_[0], 2) }
695    elsif ($letter eq 'v')
696      { return decimalHex($_[0], 4) }
697    elsif ($letter eq 'V')
698      { return decimalHex($_[0], 8) }
699    elsif ($letter eq 'Q<')
700      { return decimalHex($_[0], 16) }
701    else
702      { internalFatal undef, "here letter $letter"}
703}
704
705sub outer
706{
707    my $name = shift ;
708    my $unpack = shift ;
709    my $size = shift ;
710    my $cb1  = shift ;
711    my $cb2  = shift ;
712
713
714    myRead(my $buff, $size);
715    my (@value) = unpack $unpack, $buff;
716    my $hex = Value($unpack,  @value);
717
718    if (defined $cb1) {
719        my $v ;
720        if (ref $cb1 eq 'CODE') {
721            $v = $cb1->(@value) ;
722        }
723        else {
724            $v = $cb1 ;
725        }
726
727        $v = "'" . $v unless $v =~ /^'/;
728        $v .= "'"     unless $v =~ /'$/;
729        $hex .= " $v" ;
730    }
731
732    out $buff, $name, $hex ;
733
734    $cb2->(@value)
735        if defined $cb2 ;
736
737    return $value[0];
738}
739
740sub out_C
741{
742    my $name = shift ;
743    my $cb1  = shift ;
744    my $cb2  = shift ;
745
746    outer($name, 'C', 1, $cb1, $cb2);
747}
748
749sub out_v
750{
751    my $name = shift ;
752    my $cb1  = shift ;
753    my $cb2  = shift ;
754
755    outer($name, 'v', 2, $cb1, $cb2);
756}
757
758sub out_V
759{
760    my $name = shift ;
761    my $cb1  = shift ;
762    my $cb2  = shift ;
763
764    outer($name, 'V', 4, $cb1, $cb2);
765}
766
767sub out_Q
768{
769    my $name = shift ;
770    my $cb1  = shift ;
771    my $cb2  = shift ;
772
773    outer($name, 'Q<', 8, $cb1, $cb2);
774}
775
776sub outSomeData
777{
778    my $size = shift;
779    my $message = shift;
780    my $redact = shift ;
781
782    # return if $size == 0;
783
784    if ($size > 0) {
785        if ($size > $PAYLOADLIMIT) {
786            my $before = $FH->tell();
787            out0 $size, $message;
788        } else {
789            myRead(my $buffer, $size );
790            $buffer = "X" x $size
791                if $redact;
792            out $buffer, $message, xDump $buffer ;
793        }
794    }
795}
796
797sub outSomeDataParagraph
798{
799    my $size = shift;
800    my $message = shift;
801    my $redact = shift ;
802
803    return if $size == 0;
804
805    print "\n";
806    outSomeData($size, $message, $redact);
807
808}
809
810sub unpackValue_C
811{
812    Value_v(unpack "C", $_[0]);
813}
814
815sub Value_C
816{
817    return decimalHex($_[0], 2);
818}
819
820
821sub unpackValue_v
822{
823    Value_v(unpack "v", $_[0]);
824}
825
826sub Value_v
827{
828    return decimalHex($_[0], 4);
829}
830
831sub unpackValue_V
832{
833    Value_V(unpack "V", $_[0]);
834}
835
836sub Value_V
837{
838    return decimalHex($_[0] // 0, 8);
839}
840
841sub unpackValue_Q
842{
843    my $v = unpack ("Q<", $_[0]);
844    Value_Q($v);
845}
846
847sub Value_Q
848{
849    return decimalHex($_[0], 16);
850}
851
852sub read_Q
853{
854    my $b ;
855    myRead($b, 8);
856    return ($b, unpack ("Q<" , $b));
857}
858
859sub read_V
860{
861    my $b ;
862    myRead($b, 4);
863    return ($b, unpack ("V", $b));
864}
865
866sub read_v
867{
868    my $b ;
869    myRead($b, 2);
870    return ($b, unpack "v", $b);
871}
872
873
874sub read_C
875{
876    my $b ;
877    myRead($b, 1);
878    return ($b, unpack "C", $b);
879}
880
881sub seekTo
882{
883    my $offset = shift ;
884    my $loc = shift ;
885
886    $loc = SEEK_SET
887        if ! defined $loc ;
888
889    $FH->seek($offset, $loc);
890    $OFFSET = $FH->tell();
891}
892
893sub rewindRelative
894{
895    my $offset = shift ;
896
897    $FH->seek(-$offset, SEEK_CUR);
898    # $OFFSET -= $offset;
899    $OFFSET = $FH->tell();
900}
901
902sub deltaToNextSignature
903{
904    my $start = $FH->tell();
905
906    my $got = scanForSignature(1);
907
908    my $delta = $FH->tell() - $start ;
909    seekTo($start);
910
911    if ($got)
912    {
913        return $delta ;
914    }
915
916    return 0 ;
917}
918
919sub scanForSignature
920{
921    my $walk = shift // 0;
922
923    # $count is only used to when 'walk' is enabled.
924    # Want to scan for a PK header at the start of the file.
925    # All other PK headers are should be directly after the previous PK record.
926    state $count = 0;
927    $count += $walk;
928
929    my %sigs = Signatures::getSigsForScan();
930
931    my $start = $FH->tell();
932
933    # TODO -- Fix this?
934    if (1 || $count <= 1) {
935
936        my $last = '';
937        my $offset = 0;
938        my $buffer ;
939
940        BUFFER:
941        while ($FH->read($buffer, 1024 * 1000))
942        {
943            my $combine = $last . $buffer ;
944
945            my $ix = 0;
946            while (1)
947            {
948                $ix = index($combine, "PK", $ix) ;
949
950                if ($ix == -1)
951                {
952                    $last = '';
953                    next BUFFER;
954                }
955
956                my $rest = substr($combine, $ix + 2, 2);
957
958                if (! $sigs{$rest})
959                {
960                    $ix += 2;
961                    next;
962                }
963
964                # possible match
965                my $here = $FH->tell();
966                seekTo($here - length($combine) + $ix);
967
968                my $name = Signatures::name($sigs{$rest});
969                return $sigs{$rest};
970            }
971
972            $last = substr($combine, $ix+4);
973        }
974    }
975    else {
976        die "FIX THIS";
977        return ! $FH->eof();
978    }
979
980    # printf("scanForSignature %X\t%X (%X)\t%s\n", $start, $FH->tell(), $FH->tell() - $start, 'NO MATCH') ;
981
982    return 0;
983}
984
985my $is64In32 = 0;
986
987my $opt_verbose = 0;
988my $opt_scan = 0;
989my $opt_walk = 0;
990my $opt_Redact = 0;
991my $opt_utc = 0;
992my $opt_want_info_mesages = 1;
993my $opt_want_warning_mesages = 1;
994my $opt_want_error_mesages = 1;
995my $opt_want_message_exit_status = 0;
996my $exit_status_code = 0;
997my $opt_help =0;
998
999$Getopt::Long::bundling = 1 ;
1000
1001TextEncoding::setDefaults();
1002
1003GetOptions("h|help"     => \$opt_help,
1004           "v"          => \$opt_verbose,
1005           "scan"       => \$opt_scan,
1006           "walk"       => \$opt_walk,
1007           "redact"     => \$opt_Redact,
1008           "utc"        => \$opt_utc,
1009           "version"    => sub { print "$VERSION\n"; exit },
1010
1011           # Filename/comment encoding
1012           "encoding=s"          => \&TextEncoding::parseEncodingOption,
1013           "no-encoding"         => \&TextEncoding::NoEncoding,
1014           "debug-encoding"      => \&TextEncoding::debugEncoding,
1015           "output-encoding=s"   => \&TextEncoding::parseEncodingOption,
1016           "language-encoding!"  => \&TextEncoding::LanguageEncodingFlag,
1017
1018           # Message control
1019           "exit-bitmask!"      => \$opt_want_message_exit_status,
1020           "messages!"          => sub {
1021                                            my ($opt_name, $opt_value) = @_;
1022                                            $opt_want_info_mesages =
1023                                            $opt_want_warning_mesages =
1024                                            $opt_want_error_mesages = $opt_value;
1025                                       },
1026    )
1027  or exit 255 ;
1028
1029Usage()
1030    if $opt_help;
1031
1032die("No zipfile\n")
1033    unless @ARGV == 1;
1034
1035die("Cannot specify both '--walk' and '--scan'\n")
1036    if $opt_walk && $opt_scan ;
1037
1038my $filename = shift @ARGV;
1039
1040topLevelFatal "No such file"
1041    unless -e $filename ;
1042
1043topLevelFatal "'$filename' is a directory"
1044    if -d $filename ;
1045
1046topLevelFatal "'$filename' is not a standard file"
1047    unless -f $filename ;
1048
1049$FH = IO::File->new( "<$filename" )
1050    or topLevelFatal "Cannot open '$filename': $!";
1051binmode($FH);
1052
1053displayFileInfo($filename);
1054TextEncoding::encodingInfo();
1055
1056my $FILELEN = -s $filename ;
1057$TRAILING = -s $filename ;
1058$NIBBLES = nibbles(-s $filename) ;
1059
1060topLevelFatal "'$filename' is empty"
1061    if $FILELEN == 0 ;
1062
1063topLevelFatal "file is too short to be a zip file"
1064    if $FILELEN <  ZIP_EOCD_MIN_SIZE ;
1065
1066setupFormat($opt_verbose, $NIBBLES);
1067
1068my @Messages = ();
1069
1070if ($opt_scan || $opt_walk)
1071{
1072    # Main loop for walk/scan processing
1073
1074    my $foundZipRecords = 0;
1075    my $foundCentralHeader = 0;
1076    my $lastEndsAt = 0;
1077    my $lastSignature = 0;
1078    my $lastHeader = {};
1079
1080    $CentralDirectory->{alreadyScanned} = 1 ;
1081
1082    my $output_encryptedCD = 0;
1083
1084    reportPrefixData();
1085    while(my $s = scanForSignature($opt_walk))
1086    {
1087        my $here = $FH->tell();
1088        my $delta = $here - $lastEndsAt ;
1089
1090        # delta can only be negative when '--scan' is used
1091        if ($delta < 0 )
1092        {
1093            # nested or overlap
1094            # check if nested
1095            # remember & check if matching entry in CD
1096            # printf("### WARNING: OVERLAP/NESTED Record found 0x%X 0x%X $delta\n", $here, $lastEndsAt) ;
1097        }
1098        elsif ($here != $lastEndsAt)
1099        {
1100            # scanForSignature had to skip bytes to find the next signature
1101
1102            # some special cases that don't have signatures need to be checked first
1103
1104            seekTo($lastEndsAt);
1105
1106            if (! $output_encryptedCD && $CentralDirectory->isEncryptedCD())
1107            {
1108                displayEncryptedCD();
1109                $output_encryptedCD = 1;
1110                $lastEndsAt = $FH->tell();
1111                next;
1112            }
1113            elsif ($lastSignature == ZIP_LOCAL_HDR_SIG && $lastHeader->{'streamed'} )
1114            {
1115                # Check for size of possibe malformed Data Descriptor before outputting payload
1116                if (! $lastHeader->{'gotDataDescriptorSize'})
1117                {
1118                    my $hdrSize = checkForBadlyFormedDataDescriptor($lastHeader, $delta) ;
1119
1120                    if ($hdrSize)
1121                    {
1122                        # remove size of Data Descriptor from payload
1123                        $delta -= $hdrSize;
1124                        $lastHeader->{'gotDataDescriptorSize'} = $hdrSize;
1125                    }
1126                }
1127
1128                if(defined($lastHeader->{'payloadOutput'}) && ($lastEndsAt = BadlyFormedDataDescriptor($lastHeader, $delta)))
1129                {
1130                    $HeaderOffsetIndex->rewindIndex();
1131                    $lastHeader->{entry}->readDataDescriptor(1) ;
1132                    next;
1133                }
1134
1135                # Assume we have the payload when streaming is enabled
1136                outSomeData($delta, "PAYLOAD", $opt_Redact) ;
1137                $lastHeader->{'payloadOutput'} = 1;
1138                $lastEndsAt = $FH->tell();
1139
1140                next;
1141            }
1142            elsif (Signatures::isCentralHeader($s) && $foundCentralHeader == 0)
1143            {
1144                # check for an APK header directly before the first central header
1145                $foundCentralHeader = 1;
1146
1147                ($START_APK, $APK, $APK_LEN) = chckForAPKSigningBlock($FH, $here, 0) ;
1148
1149                if ($START_APK)
1150                {
1151                    seekTo($lastEndsAt+4);
1152
1153                    scanApkBlock();
1154                    $lastEndsAt = $FH->tell();
1155                    next;
1156                }
1157
1158                seekTo($lastEndsAt);
1159            }
1160
1161            # Not a special case, so output generic padding message
1162            if ($delta > 0)
1163            {
1164                reportPrefixData($delta)
1165                    if $lastEndsAt == 0 ;
1166                outSomeDataParagraph($delta, "UNEXPECTED PADDING");
1167                info  $FH->tell() - $delta, decimalHex0x($delta) . " Unexpected Padding bytes"
1168                    if $FH->tell() - $delta ;
1169                $POSSIBLE_PREFIX_DELTA = $delta
1170                    if $lastEndsAt ==  0;
1171                $lastEndsAt = $FH->tell();
1172                next;
1173            }
1174            else
1175            {
1176                seekTo($here);
1177            }
1178
1179        }
1180
1181        my ($buffer, $signature) = read_V();
1182
1183        $lastSignature = $signature;
1184
1185        my $handler = Signatures::decoder($signature);
1186        if (!defined $handler) {
1187            internalFatal undef, "xxx";
1188        }
1189
1190        $foundZipRecords = 1;
1191        $lastHeader = $handler->($signature, $buffer, $FH->tell() - 4) // {'streamed' => 0};
1192
1193        $lastEndsAt = $FH->tell();
1194
1195        seekTo($here + 4)
1196            if $opt_scan;
1197    }
1198
1199    topLevelFatal "'$filename' is not a zip file"
1200        unless $foundZipRecords ;
1201
1202}
1203else
1204{
1205    # Main loop for non-walk/scan processing
1206
1207    # check for prefix data
1208    my $s = scanForSignature();
1209    if ($s && $FH->tell() != 0)
1210    {
1211        $POSSIBLE_PREFIX_DELTA = $FH->tell();
1212    }
1213
1214    seekTo(0);
1215
1216    scanCentralDirectory($FH);
1217
1218    fatal_tryWalk undef, "No Zip metadata found at end of file"
1219        if ! $CentralDirectory->exists() && ! $EOCD_Present ;
1220
1221    $CentralDirectory->{alreadyScanned} = 1 ;
1222
1223    Nesting::clearStack();
1224
1225    # $HeaderOffsetIndex->dump();
1226
1227    $OFFSET = 0 ;
1228    $FH->seek(0, SEEK_SET) ;
1229
1230    my $expectedOffset = 0;
1231    my $expectedSignature = 0;
1232    my $expectedBuffer = 0;
1233    my $foundCentralHeader = 0;
1234    my $processedAPK = 0;
1235    my $processedECD = 0;
1236    my $lastHeader ;
1237
1238    # my $lastWasLocalHeader = 0;
1239    # my $inCentralHeader = 0;
1240
1241    while (1)
1242    {
1243        last if $FH->eof();
1244
1245        my $here = $FH->tell();
1246
1247        if ($here >= $TRAILING) {
1248            my $delta = $FILELEN - $TRAILING;
1249            outSomeDataParagraph($delta, "TRAILING DATA");
1250            info  $FH->tell(), "Unexpected Trailing Data: " . decimalHex0x($delta) . " bytes";
1251
1252            last;
1253        }
1254
1255        my ($buffer, $signature) = read_V();
1256
1257        $expectedOffset = undef;
1258        $expectedSignature = undef;
1259
1260        # Check for split archive marker at start of file
1261        if ($here == 0 && $signature == ZIP_SINGLE_SEGMENT_MARKER)
1262        {
1263            #  let it drop through
1264            $expectedSignature = ZIP_SINGLE_SEGMENT_MARKER;
1265            $expectedOffset = 0;
1266        }
1267        else
1268        {
1269            my $expectedEntry = $HeaderOffsetIndex->getNextIndex() ;
1270            if ($expectedEntry)
1271            {
1272                $expectedOffset = $expectedEntry->offset();
1273                $expectedSignature = $expectedEntry->signature();
1274                $expectedBuffer = pack "V", $expectedSignature ;
1275            }
1276        }
1277
1278        my $delta = $expectedOffset - $here ;
1279
1280        # if ($here != $expectedOffset && $signature != ZIP_DATA_HDR_SIG)
1281        # {
1282        #     rewindRelative(4);
1283        #     my $delta = $expectedOffset - $here ;
1284        #     outSomeDataParagraph($delta, "UNEXPECTED PADDING");
1285        #     $HeaderOffsetIndex->rewindIndex();
1286        #     next;
1287        # }
1288
1289        # Need to check for use-case where
1290        # * there is a ZIP_DATA_HDR_SIG directly after a ZIP_LOCAL_HDR_SIG.
1291        #   The HeaderOffsetIndex object doesn't have visibility of it.
1292        # * APK header directly before the CD
1293        # * zipbomb
1294
1295        if (defined $expectedOffset && $here != $expectedOffset && ( $CentralDirectory->exists() || $EOCD_Present) )
1296        {
1297            if ($here > $expectedOffset)
1298            {
1299                # Probable zipbomb
1300
1301                # Cursor $OFFSET need to rewind
1302                $OFFSET = $expectedOffset;
1303                $FH->seek($OFFSET + 4, SEEK_SET) ;
1304
1305                $signature = $expectedSignature;
1306                $buffer = $expectedBuffer ;
1307            }
1308
1309            # If get here then $here is less than $expectedOffset
1310
1311
1312            # check for an APK header directly before the first central header
1313            # Make sure not to miss a streaming data descriptor
1314            if ($signature != ZIP_DATA_HDR_SIG && Signatures::isCentralHeader($expectedSignature) && $START_APK && ! $processedAPK )
1315            {
1316                seekTo($here+4);
1317                # rewindRelative(4);
1318                scanApkBlock();
1319                $HeaderOffsetIndex->rewindIndex();
1320                $processedAPK = 1;
1321                next;
1322            }
1323
1324            # Check Encrypted Central Directory
1325            # if ($CentralHeaderSignatures{$expectedSignature} && $CentralDirectory->isEncryptedCD() && ! $processedECD)
1326            # {
1327            #     # rewind the invalid signature
1328            #     seekTo($here);
1329            #     # rewindRelative(4);
1330            #     displayEncryptedCD();
1331            #     $processedECD = 1;
1332            #     next;
1333            # }
1334
1335            if ($signature != ZIP_DATA_HDR_SIG && $delta >= 0)
1336            {
1337                rewindRelative(4);
1338                if($lastHeader->{'streamed'} && BadlyFormedDataDescriptor($lastHeader, $delta))
1339                {
1340                    $lastHeader->{entry}->readDataDescriptor(1) ;
1341                    $HeaderOffsetIndex->rewindIndex();
1342                    next;
1343                }
1344
1345                reportPrefixData($delta)
1346                    if $here == 0;
1347                outSomeDataParagraph($delta, "UNEXPECTED PADDING");
1348                info  $FH->tell() - $delta, decimalHex0x($delta) . " Unexpected Padding bytes"
1349                    if $FH->tell() - $delta ;
1350                $HeaderOffsetIndex->rewindIndex();
1351                next;
1352            }
1353
1354            # ZIP_DATA_HDR_SIG drops through
1355        }
1356
1357        my $handler = Signatures::decoder($signature);
1358
1359        if (!defined $handler)
1360        {
1361            # if ($CentralDirectory->exists()) {
1362
1363            #     # Should be at offset that central directory says
1364            #     my $locOffset = $CentralDirectory->getNextLocalOffset();
1365            #     my $delta = $locOffset - $here ;
1366
1367            #     if ($here + 4 == $locOffset ) {
1368            #         for (0 .. 3) {
1369            #             $FH->ungetc(ord(substr($buffer, $_, 1)))
1370            #         }
1371            #         outSomeData($delta, "UNEXPECTED PADDING");
1372            #         next;
1373            #     }
1374            # }
1375
1376
1377            # if ($here == $CentralDirectory->{CentralDirectoryOffset} && $EOCD_Present && $CentralDirectory->isEncryptedCD())
1378            # {
1379            #     # rewind the invalid signature
1380            #     rewindRelative(4);
1381            #     displayEncryptedCD();
1382            #     next;
1383            # }
1384            # elsif ($here < $CentralDirectory->{CentralDirectoryOffset})
1385            # {
1386            #     # next
1387            #     #     if scanForSignature() ;
1388
1389            #     my $skippedFrom = $FH->tell() ;
1390            #     my $skippedContent = $CentralDirectory->{CentralDirectoryOffset} - $skippedFrom ;
1391
1392            #     printf "\nWARNING!\nExpected Zip header not found at offset 0x%X\n", $here;
1393            #     printf "Skipping 0x%X bytes to Central Directory...\n", $skippedContent;
1394
1395            #     push @Messages,
1396            #         sprintf("Expected Zip header not found at offset 0x%X, ", $skippedFrom) .
1397            #         sprintf("skipped 0x%X bytes\n", $skippedContent);
1398
1399            #     seekTo($CentralDirectory->{CentralDirectoryOffset});
1400
1401            #     next;
1402            # }
1403            # else
1404            {
1405                fatal $here, sprintf "Unexpected Zip Signature '%s' at offset %s", Value_V($signature), decimalHex0x($here) ;
1406                last;
1407            }
1408        }
1409
1410        $ZIP64 = 0 if $signature != ZIP_DATA_HDR_SIG ;
1411        $lastHeader = $handler->($signature, $buffer, $FH->tell() - 4);
1412        # $lastWasLocalHeader = $signature == ZIP_LOCAL_HDR_SIG ;
1413        $HeaderOffsetIndex->rewindIndex()
1414            if $signature == ZIP_DATA_HDR_SIG ;
1415    }
1416}
1417
1418
1419dislayMessages()
1420    if $opt_want_error_mesages ;
1421
1422exit $exit_status_code ;
1423
1424sub dislayMessages
1425{
1426
1427    # Compare Central & Local for discrepencies
1428
1429    if ($CentralDirectory->isMiniZipEncrypted)
1430    {
1431        # don't compare local & central entries when minizip-ng encryption is in play
1432        info undef, "Zip file uses minizip-ng central directory encryption"
1433    }
1434
1435    elsif ($CentralDirectory->exists() && $LocalDirectory->exists())
1436    {
1437        # TODO check number of entries matches eocd
1438        # TODO check header length matches reality
1439
1440        # Nesting::dump();
1441
1442        $LocalDirectory->sortByLocalOffset();
1443        my %cleanCentralEntries = %{ $CentralDirectory->{byCentralOffset} };
1444
1445        if ($NESTING_DEBUG)
1446        {
1447            if (Nesting::encapsulationCount())
1448            {
1449                say "# ENCAPSULATIONS";
1450
1451                for my $index (sort { $a <=> $b } keys %{ Nesting::encapsulations() })
1452                {
1453                    my $outer = Nesting::entryByIndex($index) ;
1454
1455                    say "# Nesting " . $outer->outputFilename . " " . $outer->offsetStart . " " . $outer->offsetEnd ;
1456
1457                    for my $inner (sort { $a <=> $b } @{  Nesting::encapsulations()->{$index} } )
1458                    {
1459                        say "#  " . $inner->outputFilename . " " . $inner->offsetStart . " " . $inner->offsetEnd ;;
1460                    }
1461                }
1462            }
1463        }
1464
1465        {
1466            # check for Local Directory orphans
1467
1468           my %orphans = map  {   $_->localHeaderOffset => $_->outputFilename }
1469                         grep {   $_->entryType == ZIP_LOCAL_HDR_SIG && # Want Local Headers
1470                                ! $_->encapsulated   &&
1471                                  @{ $_->getCdEntries } == 0
1472                           }
1473                         values %{ Nesting::getEntriesByOffset() };
1474
1475
1476            if (keys %orphans)
1477            {
1478                error undef, "Orphan Local Headers found: " . scalar(keys %orphans) ;
1479
1480                my $table = new SimpleTable;
1481                $table->addHeaderRow('Offset', 'Filename');
1482                $table->addDataRow(decimalHex0x($_), $orphans{$_})
1483                    for sort { $a <=> $b } keys %orphans ;
1484
1485                $table->display();
1486            }
1487        }
1488
1489        {
1490            # check for Central Directory orphans
1491            # probably only an issue with --walk & a zipbomb
1492
1493           my %orphans = map  {      $_->centralHeaderOffset => $_         }
1494                         grep {      $_->entryType == ZIP_CENTRAL_HDR_SIG # Want Central Headers
1495                                && ! $_->ldEntry                     # Filter out orphans
1496                                && ! $_->encapsulated                # Not encapsulated
1497                         }
1498                         values %{ Nesting::getEntriesByOffset() };
1499
1500            if (keys %orphans)
1501            {
1502                error undef, "Possible zipbomb -- Orphan Central Headers found: " . scalar(keys %orphans) ;
1503
1504                my $table = new SimpleTable;
1505                $table->addHeaderRow('Offset', 'Filename');
1506                for (sort { $a <=> $b } keys %orphans )
1507                {
1508                    $table->addDataRow(decimalHex0x($_), $orphans{$_}{filename});
1509                    delete $cleanCentralEntries{ $_ };
1510                }
1511
1512                $table->display();
1513            }
1514        }
1515
1516        if (Nesting::encapsulationCount())
1517        {
1518            # Benign Nested zips
1519            # This is the use-case where a zip file is "stored" in another zip file.
1520            # NOT a zipbomb -- want the benign nested entries
1521
1522            # Note: this is only active when scan is used
1523
1524           my %outerEntries = map  { $_->localHeaderOffset => $_->outputFilename }
1525                              grep {
1526                                      $_->entryType == ZIP_CENTRAL_HDR_SIG &&
1527                                    ! $_->encapsulated && # not encapsulated
1528                                      $_->ldEntry && # central header has a local sibling
1529                                      $_->ldEntry->childrenCount && # local entry has embedded entries
1530                                    ! Nesting::childrenInCentralDir($_->ldEntry)
1531                                   }
1532                              values %{ Nesting::getEntriesByOffset() };
1533
1534            if (keys %outerEntries)
1535            {
1536                my $count = scalar keys %outerEntries;
1537                info  undef, "Nested Zip files found: $count";
1538
1539                my $table = new SimpleTable;
1540                $table->addHeaderRow('Offset', 'Filename');
1541                $table->addDataRow(decimalHex0x($_), $outerEntries{$_})
1542                    for sort { $a <=> $b } keys %outerEntries ;
1543
1544                $table->display();
1545            }
1546        }
1547
1548        if ($LocalDirectory->anyStreamedEntries)
1549        {
1550            # Check for a missing Data Descriptors
1551
1552           my %missingDataDescriptor = map  {   $_->localHeaderOffset => $_->outputFilename }
1553                                       grep {   $_->entryType == ZIP_LOCAL_HDR_SIG &&
1554                                                $_->streamed &&
1555                                              ! $_->readDataDescriptor
1556                                            }
1557                              values %{ Nesting::getEntriesByOffset() };
1558
1559
1560            for my $offset (sort keys %missingDataDescriptor)
1561            {
1562                my $filename = $missingDataDescriptor{$offset};
1563                error  $offset, "Filename '$filename': Missing 'Data Descriptor'" ;
1564            }
1565        }
1566
1567        {
1568            # compare local & central for duplicate entries (CD entries point to same local header)
1569
1570           my %ByLocalOffset = map  {      $_->localHeaderOffset => $_ }
1571                               grep {
1572                                           $_->entryType == ZIP_LOCAL_HDR_SIG # Want Local Headers
1573                                      && ! $_->encapsulated                   # Not encapsulated
1574                                      && @{ $_->getCdEntries } > 1
1575                                    }
1576                               values %{ Nesting::getEntriesByOffset() };
1577
1578            for my $offset (sort keys %ByLocalOffset)
1579            {
1580                my @entries =  @{ $ByLocalOffset{$offset}->getCdEntries };
1581                if (@entries > 1)
1582                {
1583                    # found duplicates
1584                    my $localEntry =  $LocalDirectory->getByLocalOffset($offset) ;
1585                    if ($localEntry)
1586                    {
1587                        error undef, "Possible zipbomb -- Duplicate Central Headers referring to one Local header for '" . $localEntry->outputFilename . "' at offset " . decimalHex0x($offset);
1588                    }
1589                    else
1590                    {
1591                        error undef, "Possible zipbomb -- Duplicate Central Headers referring to one Local header at offset " . decimalHex0x($offset);
1592                    }
1593
1594                    my $table = new SimpleTable;
1595                    $table->addHeaderRow('Offset', 'Filename');
1596                    for (sort { $a->centralHeaderOffset <=> $b->centralHeaderOffset } @entries)
1597                    {
1598                        $table->addDataRow(decimalHex0x($_->centralHeaderOffset), $_->outputFilename);
1599                        delete $cleanCentralEntries{ $_->centralHeaderOffset };
1600                    }
1601
1602                    $table->display();
1603                }
1604            }
1605        }
1606
1607        if (Nesting::encapsulationCount())
1608        {
1609            # compare local & central for nested entries
1610
1611            # get the local offsets referenced in the CD
1612            # this deliberately ignores any valid nested local entries
1613            my @localOffsets = sort { $a <=> $b } keys %{ $CentralDirectory->{byLocalOffset} };
1614
1615            # now check for nesting
1616
1617            my %nested ;
1618            my %bomb;
1619
1620            for my $offset (@localOffsets)
1621            {
1622                my $innerEntry = $LocalDirectory->{byLocalOffset}{$offset};
1623                if ($innerEntry)
1624                {
1625                    my $outerLocalEntry = Nesting::getOuterEncapsulation($innerEntry);
1626                    if (defined $outerLocalEntry)
1627                    {
1628                        my $outerOffset = $outerLocalEntry->localHeaderOffset();
1629                        if ($CentralDirectory->{byLocalOffset}{ $offset })
1630                        {
1631                            push @{ $bomb{ $outerOffset } }, $offset ;
1632                        }
1633                        else
1634                        {
1635                            push @{ $nested{ $outerOffset } }, $offset ;
1636                        }
1637                    }
1638                }
1639            }
1640
1641            if (keys %nested)
1642            {
1643                # The real central directory at eof does not know about these.
1644                # likely to be a zip file stored in another zip file
1645                warning  undef, "Nested Local Entries found";
1646                for my $loc (sort keys %nested)
1647                {
1648                    my $count = scalar @{ $nested{$loc} };
1649                    my $outerEntry = $LocalDirectory->getByLocalOffset($loc);
1650                    say "Local Header for '" . $outerEntry->outputFilename . "' at offset " . decimalHex0x($loc) .  " has $count nested Local Headers";
1651                    for my $n ( @{ $nested{$loc} } )
1652                    {
1653                        my $innerEntry = $LocalDirectory->getByLocalOffset($n);
1654
1655                        say "#  Nested Local Header for filename '" . $innerEntry->outputFilename . "' is at Offset " . decimalHex0x($n)  ;
1656                    }
1657                }
1658            }
1659
1660            if (keys %bomb)
1661            {
1662                # Central Directory knows about these, so this is a zipbomb
1663
1664                error undef, "Possible zipbomb -- Nested Local Entries found";
1665                for my $loc (sort keys %bomb)
1666                {
1667                    my $count = scalar @{ $bomb{$loc} };
1668                    my $outerEntry = $LocalDirectory->getByLocalOffset($loc);
1669                    say "# Local Header for '" . $outerEntry->outputFilename . "' at offset " . decimalHex0x($loc) .  " has $count nested Local Headers";
1670
1671                    my $table = new SimpleTable;
1672                    $table->addHeaderRow('Offset', 'Filename');
1673                    $table->addDataRow(decimalHex0x($_), $LocalDirectory->getByLocalOffset($_)->outputFilename)
1674                        for sort @{ $bomb{$loc} } ;
1675
1676                    $table->display();
1677
1678                    delete $cleanCentralEntries{ $_ }
1679                        for grep { defined $_ }
1680                            map  { $CentralDirectory->{byLocalOffset}{$_}{centralHeaderOffset} }
1681                            @{ $bomb{$loc} } ;
1682                }
1683            }
1684        }
1685
1686        # Check if contents of local headers match with central headers
1687        #
1688        # When central header encryption is used the local header values are masked (see APPNOTE 6.3.10, sec 4)
1689        # In this usecase the central header will appear to be absent
1690        #
1691        # key fields
1692        #    filename, compressed/uncompessed lengths, crc, compression method
1693        {
1694            for my $centralEntry ( sort { $a->centralHeaderOffset() <=> $b->centralHeaderOffset() } values %cleanCentralEntries )
1695            {
1696                my $localOffset = $centralEntry->localHeaderOffset;
1697                my $localEntry = $LocalDirectory->getByLocalOffset($localOffset);
1698
1699                next
1700                    unless $localEntry;
1701
1702                state $fields = [
1703                    # field name         offset    display name         stringify
1704                    ['filename',            ZIP_CD_FILENAME_OFFSET,
1705                                                'Filename',             undef, ],
1706                    ['extractVersion',       7, 'Extract Zip Spec',     sub { decimalHex0xUndef($_[0]) . " " . decodeZipVer($_[0]) }, ],
1707                    ['generalPurposeFlags',  8, 'General Purpose Flag', \&decimalHex0xUndef, ],
1708                    ['compressedMethod',    10, 'Compression Method',   sub { decimalHex0xUndef($_[0]) . " " . getcompressionMethodName($_[0]) }, ],
1709                    ['lastModDateTime',     12, 'Modification Time',    sub { decimalHex0xUndef($_[0]) . " " . LastModTime($_[0]) }, ],
1710                    ['crc32',               16, 'CRC32',                \&decimalHex0xUndef, ],
1711                    ['compressedSize',      20, 'Compressed Size',      \&decimalHex0xUndef, ],
1712                    ['uncompressedSize',    24, 'Uncompressed Size',    \&decimalHex0xUndef, ],
1713
1714                ] ;
1715
1716                my $table = new SimpleTable;
1717                $table->addHeaderRow('Field Name', 'Central Offset', 'Central Value', 'Local Offset', 'Local Value');
1718
1719                for my $data (@$fields)
1720                {
1721                    my ($field, $offset, $name, $stringify) = @$data;
1722                    # if the local header uses streaming and we are running a scan/walk, the compressed/uncompressed sizes will not be known
1723                    my $localValue = $localEntry->{$field} ;
1724                    my $centralValue = $centralEntry->{$field};
1725
1726                    if (($localValue // '-1') ne ($centralValue // '-2'))
1727                    {
1728                        if ($stringify)
1729                        {
1730                            $localValue = $stringify->($localValue);
1731                            $centralValue = $stringify->($centralValue);
1732                        }
1733
1734                        $table->addDataRow($name,
1735                                            decimalHex0xUndef($centralEntry->centralHeaderOffset() + $offset),
1736                                            $centralValue,
1737                                            decimalHex0xUndef($localOffset+$offset),
1738                                            $localValue);
1739                    }
1740                }
1741
1742                my $badFields = $table->hasData;
1743                if ($badFields)
1744                {
1745                    error undef, "Found $badFields Field Mismatch for Filename '". $centralEntry->outputFilename . "'";
1746                    $table->display();
1747                }
1748            }
1749        }
1750
1751    }
1752    elsif ($CentralDirectory->exists())
1753    {
1754        my @messages = "Central Directory exists, but Local Directory not found" ;
1755        push @messages , "Try running with --walk' or '--scan' options"
1756            unless $opt_scan || $opt_walk ;
1757        error undef, @messages;
1758    }
1759    elsif ($LocalDirectory->exists())
1760    {
1761        if ($CentralDirectory->isEncryptedCD())
1762        {
1763            warning undef, "Local Directory exists, but Central Directory is encrypted"
1764        }
1765        else
1766        {
1767            error undef, "Local Directory exists, but Central Directory not found"
1768        }
1769
1770    }
1771
1772    if ($ErrorCount ||$WarningCount || $InfoCount )
1773    {
1774        say "#"
1775            unless $lastWasMessage ;
1776
1777        say "# Error Count: $ErrorCount"
1778            if $ErrorCount;
1779        say "# Warning Count: $WarningCount"
1780            if $WarningCount;
1781        say "# Info Count: $InfoCount"
1782            if $InfoCount;
1783    }
1784
1785    if (@Messages)
1786    {
1787        my $count = scalar @Messages ;
1788        say "#\nWARNINGS";
1789        say "# * $_\n" for @Messages ;
1790    }
1791
1792    say "#\n# Done";
1793}
1794
1795sub checkForBadlyFormedDataDescriptor
1796{
1797    my $lastHeader = shift;
1798    my $delta = shift // 0;
1799
1800    # check size of delta - a DATA HDR without a signature can only be
1801    #     12 bytes for 32-bit
1802    #     20 bytes for 64-bit
1803
1804    my $here = $FH->tell();
1805
1806    my $localEntry = $lastHeader->{entry};
1807
1808    return 0
1809        unless $opt_scan || $opt_walk ;
1810
1811    # delta can be the actual payload + a data descriptor without a sig
1812
1813    my $signature = unpack "V",  peekAtOffset($here + $delta, 4);
1814
1815    if ($signature == ZIP_DATA_HDR_SIG)
1816    {
1817        return 0;
1818    }
1819
1820    my $cl32 = unpack "V",  peekAtOffset($here + $delta - 8,  4);
1821    my $cl64 = unpack "Q<", peekAtOffset($here + $delta - 16, 8);
1822
1823    if ($cl32 == $delta - 12)
1824    {
1825        return 12;
1826    }
1827
1828    if ($cl64 == $delta - 20)
1829    {
1830        return 20 ;
1831    }
1832
1833    return 0;
1834}
1835
1836
1837sub BadlyFormedDataDescriptor
1838{
1839    my $lastHeader= shift;
1840    my $delta = shift;
1841
1842    # check size of delta - a DATA HDR without a signature can only be
1843    #     12 bytes for 32-bit
1844    #     20 bytes for 64-bit
1845
1846    my $here = $FH->tell();
1847
1848    my $localEntry = $lastHeader->{entry};
1849    my $compressedSize = $lastHeader->{payloadLength} ;
1850
1851    my $sigName = Signatures::titleName(ZIP_DATA_HDR_SIG);
1852
1853    if ($opt_scan || $opt_walk)
1854    {
1855        # delta can be the actual payload + a data descriptor without a sig
1856
1857        if ($lastHeader->{'gotDataDescriptorSize'} == 12)
1858        {
1859            # seekTo($FH->tell() + $delta - 12) ;
1860
1861            # outSomeData($delta - 12, "PAYLOAD", $opt_Redact) ;
1862
1863            print "\n";
1864            out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG);
1865
1866            error $FH->tell(), "Missimg $sigName Signature";
1867            $localEntry->crc32(              out_V "CRC");
1868            $localEntry->compressedSize(   out_V "Compressed Size");
1869            $localEntry->uncompressedSize( out_V "Uncompressed Size");
1870
1871            if ($localEntry->zip64)
1872            {
1873                error $here, "'$sigName': expected 64-bit values, got 32-bit";
1874            }
1875
1876            return $FH->tell();
1877        }
1878
1879        if ($lastHeader->{'gotDataDescriptorSize'} == 20)
1880        {
1881            # seekTo($FH->tell() + $delta - 20) ;
1882
1883            # outSomeData($delta - 20, "PAYLOAD", $opt_Redact) ;
1884
1885            print "\n";
1886            out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG);
1887
1888            error $FH->tell(), "Missimg $sigName Signature";
1889            $localEntry->crc32(              out_V "CRC");
1890            $localEntry->compressedSize(   out_Q "Compressed Size");
1891            $localEntry->uncompressedSize( out_Q "Uncompressed Size");
1892
1893            if (! $localEntry->zip64)
1894            {
1895                error $here, "'$sigName': expected 32-bit values, got 64-bit";
1896            }
1897
1898            return $FH->tell();
1899        }
1900
1901        error 0, "MISSING $sigName";
1902
1903        seekTo($here);
1904        return 0;
1905    }
1906
1907    my $cdEntry = $localEntry->getCdEntry;
1908
1909    if ($delta == 12)
1910    {
1911        $FH->seek($lastHeader->{payloadOffset} + $lastHeader->{payloadLength}, SEEK_SET) ;
1912
1913        my $cl = unpack "V", peekAtOffset($FH->tell() + 4, 4);
1914        if ($cl == $compressedSize)
1915        {
1916            print "\n";
1917            out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG);
1918
1919            error $FH->tell(), "Missimg $sigName Signature";
1920            $localEntry->crc32(              out_V "CRC");
1921            $localEntry->compressedSize(   out_V "Compressed Size");
1922            $localEntry->uncompressedSize( out_V "Uncompressed Size");
1923
1924            if ($localEntry->zip64)
1925            {
1926                error $here, "'$sigName': expected 64-bit values, got 32-bit";
1927            }
1928
1929            return $FH->tell();
1930        }
1931    }
1932
1933    if ($delta == 20)
1934    {
1935        $FH->seek($lastHeader->{payloadOffset} + $lastHeader->{payloadLength}, SEEK_SET) ;
1936
1937        my $cl = unpack "Q<", peekAtOffset($FH->tell() + 4, 8);
1938
1939        if ($cl == $compressedSize)
1940        {
1941            print "\n";
1942            out1 "Missing $sigName Signature", Value_V(ZIP_DATA_HDR_SIG);
1943
1944            error $FH->tell(), "Missimg $sigName Signature";
1945            $localEntry->crc32(              out_V "CRC");
1946            $localEntry->compressedSize(   out_Q "Compressed Size");
1947            $localEntry->uncompressedSize( out_Q "Uncompressed Size");
1948
1949            if (! $localEntry->zip64 && ( $cdEntry && ! $cdEntry->zip64))
1950            {
1951                error $here, "'$sigName': expected 32-bit values, got 64-bit";
1952            }
1953
1954            return $FH->tell();
1955        }
1956    }
1957
1958    seekTo($here);
1959
1960    error $here, "Missing $sigName";
1961    return 0;
1962}
1963
1964sub getcompressionMethodName
1965{
1966    my $id = shift ;
1967    " '" . ($ZIP_CompressionMethods{$id} || "Unknown Method") . "'" ;
1968}
1969
1970sub compressionMethod
1971{
1972    my $id = shift ;
1973    Value_v($id) . getcompressionMethodName($id);
1974}
1975
1976sub LocalHeader
1977{
1978    my $signature = shift ;
1979    my $data = shift ;
1980    my $startRecordOffset = shift ;
1981
1982    my $locHeaderOffset = $FH->tell() -4 ;
1983
1984    ++ $LocalHeaderCount;
1985    print "\n";
1986    out $data, "LOCAL HEADER #$LocalHeaderCount" , Value_V($signature);
1987
1988    need 26, Signatures::name($signature);
1989
1990    my $buffer;
1991    my $orphan = 0;
1992
1993    my ($loc, $CDcompressedSize, $cdZip64, $zip64Sizes, $cdIndex, $cdEntryOffset) ;
1994    my $CentralEntryExists = $CentralDirectory->localOffset($startRecordOffset);
1995    my $localEntry = LocalDirectoryEntry->new();
1996
1997    my $cdEntry;
1998
1999    if (! $opt_scan && ! $opt_walk && $CentralEntryExists)
2000    {
2001        $cdEntry = $CentralDirectory->getByLocalOffset($startRecordOffset);
2002
2003        if (! $cdEntry)
2004        {
2005            out1 "Orphan Entry: No matching central directory" ;
2006            $orphan = 1 ;
2007        }
2008
2009        $cdZip64 = $cdEntry->zip64ExtraPresent;
2010        $zip64Sizes = $cdEntry->zip64SizesPresent;
2011        $cdEntryOffset = $cdEntry->centralHeaderOffset ;
2012        $localEntry->addCdEntry($cdEntry) ;
2013
2014        if ($cdIndex && $cdIndex != $LocalHeaderCount)
2015        {
2016            # fatal undef, "$cdIndex != $LocalHeaderCount"
2017        }
2018    }
2019
2020    my $extractVer = out_C  "Extract Zip Spec", \&decodeZipVer;
2021    out_C  "Extract OS", \&decodeOS;
2022
2023    my ($bgp, $gpFlag) = read_v();
2024    my ($bcm, $compressedMethod) = read_v();
2025
2026    out $bgp, "General Purpose Flag", Value_v($gpFlag) ;
2027    GeneralPurposeBits($compressedMethod, $gpFlag);
2028    my $LanguageEncodingFlag = $gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING ;
2029    my $streaming = $gpFlag & ZIP_GP_FLAG_STREAMING_MASK ;
2030    $localEntry->languageEncodingFlag($LanguageEncodingFlag) ;
2031
2032    out $bcm, "Compression Method",   compressionMethod($compressedMethod) ;
2033    info $FH->tell() - 2, "Unknown 'Compression Method' ID " . decimalHex0x($compressedMethod, 2)
2034        if ! defined $ZIP_CompressionMethods{$compressedMethod} ;
2035
2036    my $lastMod = out_V "Modification Time", sub { LastModTime($_[0]) };
2037
2038    my $crc              = out_V "CRC";
2039    warning $FH->tell() - 4, "CRC field should be zero when streaming is enabled"
2040        if $streaming && $crc != 0 ;
2041
2042    my $compressedSize   = out_V "Compressed Size";
2043    # warning $FH->tell(), "Compressed Size should be zero when streaming is enabled";
2044
2045    my $uncompressedSize = out_V "Uncompressed Size";
2046    # warning $FH->tell(), "Uncompressed Size should be zero when streaming is enabled";
2047
2048    my $filenameLength   = out_v "Filename Length";
2049
2050    if ($filenameLength == 0)
2051    {
2052        info $FH->tell()- 2, "Zero Length filename";
2053    }
2054
2055    my $extraLength        = out_v "Extra Length";
2056
2057    my $filename = '';
2058    if ($filenameLength)
2059    {
2060        need $filenameLength, Signatures::name($signature), 'Filename';
2061
2062        myRead(my $raw_filename, $filenameLength);
2063        $localEntry->filename($raw_filename) ;
2064        $filename = outputFilename($raw_filename, $LanguageEncodingFlag);
2065        $localEntry->outputFilename($filename);
2066    }
2067
2068    $localEntry->localHeaderOffset($locHeaderOffset) ;
2069    $localEntry->offsetStart($locHeaderOffset) ;
2070    $localEntry->compressedSize($compressedSize) ;
2071    $localEntry->uncompressedSize($uncompressedSize) ;
2072    $localEntry->extractVersion($extractVer);
2073    $localEntry->generalPurposeFlags($gpFlag);
2074    $localEntry->lastModDateTime($lastMod);
2075    $localEntry->crc32($crc) ;
2076    $localEntry->zip64ExtraPresent($cdZip64) ;
2077    $localEntry->zip64SizesPresent($zip64Sizes) ;
2078
2079    $localEntry->compressedMethod($compressedMethod) ;
2080    $localEntry->streamed($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) ;
2081
2082    $localEntry->std_localHeaderOffset($locHeaderOffset + $PREFIX_DELTA) ;
2083    $localEntry->std_compressedSize($compressedSize) ;
2084    $localEntry->std_uncompressedSize($uncompressedSize) ;
2085    $localEntry->std_diskNumber(0) ;
2086
2087    if ($extraLength)
2088    {
2089        need $extraLength, Signatures::name($signature), 'Extra';
2090        walkExtra($extraLength, $localEntry);
2091    }
2092
2093    # APPNOTE 6.3.10, sec 4.3.8
2094    warning $FH->tell - $filenameLength, "Directory '$filename' must not have a payload"
2095        if ! $streaming && $filename =~ m#/$# && $localEntry->uncompressedSize ;
2096
2097    my @msg ;
2098    # if ($cdZip64 && ! $ZIP64)
2099    # {
2100    #     # Central directory said this was Zip64
2101    #     # some zip files don't have the Zip64 field in the local header
2102    #     # seems to be a streaming issue.
2103    #     push @msg, "Missing Zip64 extra field in Local Header #$hexHdrCount\n";
2104
2105    #     if (! $zip64Sizes)
2106    #     {
2107    #         # Central has a ZIP64 entry that doesn't have sizes
2108    #         # Local doesn't have a Zip 64 at all
2109    #         push @msg, "Unzip may complain about 'overlapped components' #$hexHdrCount\n";
2110    #     }
2111    #     else
2112    #     {
2113    #         $ZIP64 = 1
2114    #     }
2115    # }
2116
2117
2118    my $minizip_encrypted = $localEntry->minizip_secure;
2119    my $pk_encrypted      = ($gpFlag & ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK) && $compressedMethod != 99 && ! $minizip_encrypted;
2120
2121    # Detecting PK strong encryption from a local header is a bit convoluted.
2122    # Cannot just use ZIP_GP_FLAG_ENCRYPTED_CD because minizip also uses this bit.
2123    # so jump through some hoops
2124    #     extract ver is >= 5.0'
2125    #     all the encryption flags are set in gpflags
2126    #     TODO - add zero lengths for crc, compresssed & uncompressed
2127
2128    if (($gpFlag & ZIP_GP_FLAG_ALL_ENCRYPT) == ZIP_GP_FLAG_ALL_ENCRYPT  && $extractVer >= 0x32  )
2129    {
2130        $CentralDirectory->setPkEncryptedCD()
2131    }
2132
2133    my $size = 0;
2134
2135    # If no CD scanned, get compressed Size from local header.
2136    # Zip64 extra field takes priority
2137    my $cdl = defined $cdEntry
2138                ? $cdEntry->compressedSize()
2139                : undef;
2140
2141    $CDcompressedSize = $localEntry->compressedSize ;
2142    $CDcompressedSize = $cdl
2143        if defined $cdl && $gpFlag & ZIP_GP_FLAG_STREAMING_MASK;
2144
2145    my $cdu = defined $CentralDirectory->{byLocalOffset}{$locHeaderOffset}
2146                ? $CentralDirectory->{byLocalOffset}{$locHeaderOffset}{uncompressedSize}
2147                : undef;
2148    my $CDuncompressedSize = $localEntry->uncompressedSize ;
2149
2150    $CDuncompressedSize = $cdu
2151        if defined $cdu && $gpFlag & ZIP_GP_FLAG_STREAMING_MASK;
2152
2153    my $fullCompressedSize = $CDcompressedSize;
2154
2155    my $payloadOffset = $FH->tell();
2156    $localEntry->payloadOffset($payloadOffset) ;
2157    $localEntry->offsetEnd($payloadOffset + $fullCompressedSize -1) ;
2158
2159    if ($CDcompressedSize)
2160    {
2161        # check if enough left in file for the payload
2162        my $available = $FILELEN - $FH->tell;
2163        if ($available < $CDcompressedSize )
2164        {
2165            error $FH->tell,
2166                  "file truncated while reading 'PAYLOAD'",
2167                  expectedMessage($CDcompressedSize, $available);
2168
2169            $CDcompressedSize = $available;
2170        }
2171    }
2172
2173    # Next block can decrement the CDcompressedSize
2174    # possiblty to zero. Need to remember if it started out
2175    # as a non-zero value
2176    my $haveCDcompressedSize = $CDcompressedSize;
2177
2178    if ($compressedMethod == 99 && $localEntry->aesValid) # AES Encryption
2179    {
2180        $CDcompressedSize -= printAes($localEntry)
2181    }
2182    elsif (($gpFlag & ZIP_GP_FLAG_ALL_ENCRYPT) == 0)
2183    {
2184        if ($compressedMethod == ZIP_CM_LZMA)
2185        {
2186
2187            $size = printLzmaProperties()
2188        }
2189
2190        $CDcompressedSize -= $size;
2191    }
2192    elsif ($pk_encrypted)
2193    {
2194        $CDcompressedSize -= DecryptionHeader();
2195    }
2196
2197    if ($haveCDcompressedSize) {
2198
2199        if ($compressedMethod == 92 && $CDcompressedSize == 20) {
2200            # Payload for a Reference is the SHA-1 hash of the uncompressed content
2201            myRead(my $sha1, 20);
2202            out $sha1, "PAYLOAD",  "SHA-1 Hash: " . hexDump($sha1);
2203        }
2204        elsif ($compressedMethod == 99 && $localEntry->aesValid ) {
2205            outSomeData($CDcompressedSize, "PAYLOAD", $opt_Redact) ;
2206            my $auth ;
2207            myRead($auth, 10);
2208            out $auth, "AES Auth",  hexDump16($auth);
2209        }
2210        else {
2211            outSomeData($CDcompressedSize, "PAYLOAD", $opt_Redact) ;
2212        }
2213    }
2214
2215    print "WARNING: $_"
2216        for @msg;
2217
2218    push @Messages, @msg ;
2219
2220    $LocalDirectory->addEntry($localEntry);
2221
2222    return {
2223                'localHeader'   => 1,
2224                'streamed'      => $gpFlag & ZIP_GP_FLAG_STREAMING_MASK,
2225                'offset'        => $startRecordOffset,
2226                'length'        => $FH->tell() - $startRecordOffset,
2227                'payloadLength' => $fullCompressedSize,
2228                'payloadOffset' => $payloadOffset,
2229                'entry'         => $localEntry,
2230        } ;
2231}
2232
2233use constant Pack_ZIP_DIGITAL_SIGNATURE_SIG => pack("V", ZIP_DIGITAL_SIGNATURE_SIG);
2234
2235sub findDigitalSignature
2236{
2237    my $cdSize = shift;
2238
2239    my $here = $FH->tell();
2240
2241    my $data ;
2242    myRead($data, $cdSize);
2243
2244    seekTo($here);
2245
2246    # find SIG
2247    my $ix = index($data, Pack_ZIP_DIGITAL_SIGNATURE_SIG);
2248    if ($ix > -1)
2249    {
2250        # check size of signature meaans it is directly after the encrypted CD
2251        my $sigSize = unpack "v", substr($data, $ix+4, 2);
2252        if ($ix + 4 + 2 + $sigSize == $cdSize)
2253        {
2254            # return size of digital signature record
2255            return 4 + 2 + $sigSize ;
2256        }
2257    }
2258
2259    return 0;
2260}
2261
2262sub displayEncryptedCD
2263{
2264    # First thing in the encrypted CD is the Decryption Header
2265    my $decryptHeaderSize = DecryptionHeader(1);
2266
2267    # Check for digital signature record in the CD
2268    # It needs to be the very last thing in the CD
2269
2270    my $delta = deltaToNextSignature();
2271    print "\n";
2272    outSomeData($delta, "ENCRYPTED CENTRAL DIRECTORY")
2273        if $delta;
2274}
2275
2276sub DecryptionHeader
2277{
2278    # APPNOTE 6.3.10, sec 7.2.4
2279
2280    # -Decryption Header:
2281    # Value     Size     Description
2282    # -----     ----     -----------
2283    # IVSize    2 bytes  Size of initialization vector (IV)
2284    # IVData    IVSize   Initialization vector for this file
2285    # Size      4 bytes  Size of remaining decryption header data
2286    # Format    2 bytes  Format definition for this record
2287    # AlgID     2 bytes  Encryption algorithm identifier
2288    # Bitlen    2 bytes  Bit length of encryption key
2289    # Flags     2 bytes  Processing flags
2290    # ErdSize   2 bytes  Size of Encrypted Random Data
2291    # ErdData   ErdSize  Encrypted Random Data
2292    # Reserved1 4 bytes  Reserved certificate processing data
2293    # Reserved2 (var)    Reserved for certificate processing data
2294    # VSize     2 bytes  Size of password validation data
2295    # VData     VSize-4  Password validation data
2296    # VCRC32    4 bytes  Standard ZIP CRC32 of password validation data
2297
2298    my $central = shift ;
2299
2300    if ($central)
2301    {
2302        print "\n";
2303        out "", "CENTRAL HEADER DECRYPTION RECORD";
2304
2305    }
2306    else
2307    {
2308        print "\n";
2309        out "", "DECRYPTION HEADER RECORD";
2310    }
2311
2312    my $bytecount = 2;
2313
2314    my $IVSize = out_v "IVSize";
2315    outHexdump($IVSize, "IVData");
2316    $bytecount += $IVSize;
2317
2318    my $Size = out_V "Size";
2319    $bytecount += $Size + 4;
2320
2321    out_v "Format";
2322    out_v "AlgId", sub { $AlgIdLookup{ $_[0] } // "Unknown algorithm" } ;
2323    out_v "BitLen";
2324    out_v "Flags", sub { $FlagsLookup{ $_[0] } // "Reserved for certificate processing" } ;
2325
2326    my $ErdSize = out_v "ErdSize";
2327    outHexdump($ErdSize, "ErdData");
2328
2329    my $Reserved1_RCount = out_V "RCount";
2330    Reserved2($Reserved1_RCount);
2331
2332    my $VSize = out_v "VSize";
2333    outHexdump($VSize-4, "VData");
2334
2335    out_V "VCRC32";
2336
2337    return $bytecount ;
2338}
2339
2340sub Reserved2
2341{
2342    # APPNOTE 6.3.10, sec 7.4.3 & 7.4.4
2343
2344    my $recipients = shift;
2345
2346    return 0
2347        if $recipients == 0;
2348
2349    out_v "HashAlg", sub { $HashAlgLookup{ $_[0] } // "Unknown algorithm" } ;
2350    my $HSize = out_v "HSize" ;
2351
2352    my $ix = 1;
2353    for (0 .. $recipients-1)
2354    {
2355        my $hex = sprintf("Key #%X", $ix) ;
2356        my $RESize = out_v "RESize $hex";
2357
2358        outHexdump($HSize, "REHData $hex");
2359        outHexdump($RESize - $HSize, "REKData $hex");
2360
2361        ++ $ix;
2362    }
2363}
2364
2365sub redactData
2366{
2367    my $data = shift;
2368
2369    # Redact everything apart from directory seperators
2370    $data =~ s(.)(X)g
2371        if $opt_Redact;
2372
2373    return $data;
2374}
2375
2376sub redactFilename
2377{
2378    my $filename = shift;
2379
2380    # Redact everything apart from directory seperators
2381    $filename =~ s(.)(X)g
2382        if $opt_Redact;
2383
2384    return $filename;
2385}
2386
2387sub validateDirectory
2388{
2389    # Check that Directries are stored correctly
2390    #
2391    # 1. Filename MUST end with a "/"
2392    #    see APPNOTE 6.3.10, sec 4.3.8
2393    # 2. Uncompressed size == 0
2394    #    see APPNOTE 6.3.10, sec 4.3.8
2395    # 3. warn if compressed size > 0 and Uncompressed size == 0
2396    # 4. check for presence of DOS directory attrib in External Attributes
2397    # 5. Check for Unix  extrnal attribute S_IFDIR
2398
2399    my $offset = shift ;
2400    my $filename = shift ;
2401    my $extractVersion = shift;
2402    my $versionMadeBy = shift;
2403    my $compressedSize = shift;
2404    my $uncompressedSize = shift;
2405    my $externalAttributes = shift;
2406
2407    my $dosAttributes = $externalAttributes & 0xFFFF;
2408    my $otherAttributes = ($externalAttributes >> 16 ) &  0xFFFF;
2409
2410    my $probablyDirectory = 0;
2411    my $filenameOK = 0;
2412    my $attributesSet = 0;
2413    my $dosAttributeSet = 0;
2414    my $unixAttributeSet = 0;
2415
2416    if ($filename =~ m#/$#)
2417    {
2418        # filename claims it is a directory.
2419        $probablyDirectory = 1;
2420        $filenameOK = 1;
2421    }
2422
2423    if ($dosAttributes & 0x0010) # ATTR_DIRECTORY
2424    {
2425        $probablyDirectory = 1;
2426        $attributesSet = 1 ;
2427        $dosAttributeSet = 1 ;
2428    }
2429
2430    if ($versionMadeBy == 3 && $otherAttributes & 0x4000) # Unix & S_IFDIR
2431    {
2432        $probablyDirectory = 1;
2433        $attributesSet = 1;
2434        $unixAttributeSet = 1;
2435    }
2436
2437    return
2438        unless $probablyDirectory ;
2439
2440    error $offset + CentralDirectoryEntry::Offset_Filename(),
2441            "Directory '$filename' must end in a '/'",
2442            "'External Attributes' flag this as a directory"
2443        if ! $filenameOK && $uncompressedSize == 0;
2444
2445    info $offset + CentralDirectoryEntry::Offset_ExternalAttributes(),
2446            "DOS Directory flag not set in 'External Attributes' for Directory '$filename'"
2447        if $filenameOK && ! $dosAttributeSet;
2448
2449    info $offset + CentralDirectoryEntry::Offset_ExternalAttributes(),
2450            "Unix Directory flag not set in 'External Attributes' for Directory '$filename'"
2451        if $filenameOK && $versionMadeBy == 3 && ! $unixAttributeSet;
2452
2453    if ($uncompressedSize != 0)
2454    {
2455        # APPNOTE 6.3.10, sec 4.3.8
2456        error $offset + CentralDirectoryEntry::Offset_UncompressedSize(),
2457                "Directory '$filename' must not have a payload"
2458    }
2459    elsif ($compressedSize != 0)
2460    {
2461
2462        info $offset + CentralDirectoryEntry::Offset_CompressedSize(),
2463                "Directory '$filename' has compressed payload that uncompresses to nothing"
2464    }
2465
2466    if ($extractVersion < 20)
2467    {
2468        # APPNOTE 6.3.10, sec 4.4.3.2
2469        my $got = decodeZipVer($extractVersion);
2470        warning $offset + CentralDirectoryEntry::Offset_VersionNeededToExtract(),
2471                "'Extract Zip Spec' is '$got'. Need value >= '2.0' for Directory '$filename'"
2472    }
2473}
2474
2475sub validateFilename
2476{
2477    my $filename = shift ;
2478
2479    return "Zero length filename"
2480        if $filename eq '' ;
2481
2482    # TODO
2483    # - check length of filename
2484    #   getconf NAME_MAX . and getconf PATH_MAX . on Linux
2485
2486    # Start with APPNOTE restrictions
2487
2488    # APPNOTE 6.3.10, sec 4.4.17.1
2489    #
2490    # No absolute path
2491    # No backslash delimeters
2492    # No drive letters
2493
2494    return "Filename must not be an absolute path"
2495        if $filename =~ m#^/#;
2496
2497    return ["Backslash detected in filename", "Possible Windows path."]
2498        if $filename =~ m#\\#;
2499
2500    return "Windows Drive Letter '$1' not allowed in filename"
2501        if $filename =~ /^([a-z]:)/i ;
2502
2503    # Slip Vulnerability with use of ".." in a relative path
2504    # https://security.snyk.io/research/zip-slip-vulnerability
2505    return ["Use of '..' in filename is a Zip Slip Vulnerability",
2506            "See https://security.snyk.io/research/zip-slip-vulnerability" ]
2507        if $filename =~ m#^\.\./# || $filename =~ m#/\.\./# || $filename =~ m#/\.\.# ;
2508
2509    # Cannot have "." or ".." as the full filename
2510    return "Use of current-directory filename '.' may not unzip correctly"
2511        if $filename eq '.' ;
2512
2513    return "Use of parent-directory filename '..' may not unzip correctly"
2514        if $filename eq '..' ;
2515
2516    # Portability (mostly with Windows)
2517
2518    {
2519        # see https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file
2520        state $badDosFilename = join '|', map { quotemeta }
2521                                qw(CON  PRN  AUX  NUL
2522                                COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9
2523                                LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9
2524                                ) ;
2525
2526        # if $filename contains any invalid codepoints, we will get a warning like this
2527        #
2528        #   Operation "pattern match (m//)" returns its argument for non-Unicode code point
2529        #
2530        # so silence it for now.
2531
2532        no warnings;
2533
2534        return "Portability Issue: '$1' is a reserved Windows device name"
2535            if $filename =~ /^($badDosFilename)$/io ;
2536
2537        # Can't have the device name with an extension either
2538        return "Portability Issue: '$1' is a reserved Windows device name"
2539            if $filename =~ /^($badDosFilename)\./io ;
2540    }
2541
2542    state $illegal_windows_chars = join '|', map { quotemeta } qw( < > : " | ? * );
2543    return "Portability Issue: Windows filename cannot contain '$1'"
2544        if  $filename =~ /($illegal_windows_chars)/o ;
2545
2546    return "Portability Issue: Null character '\\x00' is not allowed in a Windows or Linux filename"
2547        if  $filename =~ /\x00/ ;
2548
2549    return sprintf "Portability Issue: Control character '\\x%02X' is not allowed in a Windows filename", ord($1)
2550        if  $filename =~ /([\x00-\x1F])/ ;
2551
2552    return undef;
2553}
2554
2555sub getOutputFilename
2556{
2557    my $raw_filename = shift;
2558    my $LanguageEncodingFlag = shift;
2559    my $message = shift // "Filename";
2560
2561    my $filename ;
2562    my $decoded_filename;
2563
2564    if ($raw_filename eq '')
2565    {
2566        if ($message eq 'Filename')
2567        {
2568            warning $FH->tell() ,
2569                "Filename ''",
2570                "Zero Length Filename" ;
2571        }
2572
2573        return '', '', 0;
2574    }
2575    elsif ($opt_Redact)
2576    {
2577        return redactFilename($raw_filename), '', 0 ;
2578    }
2579    else
2580    {
2581        $decoded_filename = TextEncoding::decode($raw_filename, $message, $LanguageEncodingFlag) ;
2582        $filename = TextEncoding::encode($decoded_filename, $message, $LanguageEncodingFlag) ;
2583    }
2584
2585    return $filename, $decoded_filename, $filename ne $raw_filename ;
2586}
2587
2588sub outputFilename
2589{
2590    my $raw_filename = shift;
2591    my $LanguageEncodingFlag = shift;
2592    my $message = shift // "Filename";
2593
2594    my ($filename, $decoded_filename, $modified) = getOutputFilename($raw_filename, $LanguageEncodingFlag);
2595
2596    out $raw_filename, $message,  "'". $filename . "'";
2597
2598    if (! $opt_Redact && TextEncoding::debugEncoding())
2599    {
2600        # use Devel::Peek;
2601        # print "READ     " ; Dump($raw_filename);
2602        # print "INTERNAL " ; Dump($decoded_filename);
2603        # print "OUTPUT   " ; Dump($filename);
2604
2605        debug $FH->tell() - length($raw_filename),
2606                    "$message Encoding Change"
2607            if $modified ;
2608
2609        # use Unicode::Normalize;
2610        # my $NormaizedForm ;
2611        # if (defined $decoded_filename)
2612        # {
2613        #     $NormaizedForm .= Unicode::Normalize::checkNFD  $decoded_filename ? 'NFD ' : '';
2614        #     $NormaizedForm .= Unicode::Normalize::checkNFC  $decoded_filename ? 'NFC ' : '';
2615        #     $NormaizedForm .= Unicode::Normalize::checkNFKD $decoded_filename ? 'NFKD ' : '';
2616        #     $NormaizedForm .= Unicode::Normalize::checkNFKC $decoded_filename ? 'NFKC ' : '';
2617        #     $NormaizedForm .= Unicode::Normalize::checkFCD  $decoded_filename ? 'FCD ' : '';
2618        #     $NormaizedForm .= Unicode::Normalize::checkFCC  $decoded_filename ? 'FCC ' : '';
2619        # }
2620
2621        debug $FH->tell() - length($raw_filename),
2622                    "Encoding Debug for $message",
2623                    "Octets Read from File  [$raw_filename][" . length($raw_filename). "] [" . charDump2($raw_filename) . "]",
2624                    "Via Unicode Codepoints [$decoded_filename][" . length($decoded_filename) . "] [" . charDump($decoded_filename) . "]",
2625                    # "Unicode Normalization  $NormaizedForm",
2626                    "Octets Written         [$filename][" . length($filename). "] [" . charDump2($filename) . "]";
2627    }
2628
2629    if ($message eq 'Filename' && $opt_want_warning_mesages)
2630    {
2631        # Check for bad, unsafe & not portable filenames
2632        my $v = validateFilename($decoded_filename);
2633
2634        if ($v)
2635        {
2636            my @v = ref $v eq 'ARRAY'
2637                        ? @$v
2638                        : $v;
2639
2640            warning $FH->tell() - length($raw_filename),
2641                "Filename '$filename'",
2642                @v
2643        }
2644    }
2645
2646    return $filename;
2647}
2648
2649sub CentralHeader
2650{
2651    my $signature = shift ;
2652    my $data = shift ;
2653    my $startRecordOffset = shift ;
2654
2655    my $cdEntryOffset = $FH->tell() - 4 ;
2656
2657    ++ $CentralHeaderCount;
2658
2659    print "\n";
2660    out $data, "CENTRAL HEADER #$CentralHeaderCount", Value_V($signature);
2661    my $buffer;
2662
2663    need 42, Signatures::name($signature);
2664
2665    out_C "Created Zip Spec", \&decodeZipVer;
2666    my $made_by = out_C "Created OS", \&decodeOS;
2667    my $extractVer = out_C "Extract Zip Spec", \&decodeZipVer;
2668    out_C "Extract OS", \&decodeOS;
2669
2670    my ($bgp, $gpFlag) = read_v();
2671    my ($bcm, $compressedMethod) = read_v();
2672
2673    my $cdEntry = CentralDirectoryEntry->new($cdEntryOffset);
2674
2675    out $bgp, "General Purpose Flag", Value_v($gpFlag) ;
2676    GeneralPurposeBits($compressedMethod, $gpFlag);
2677    my $LanguageEncodingFlag = $gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING ;
2678    $cdEntry->languageEncodingFlag($LanguageEncodingFlag) ;
2679
2680    out $bcm, "Compression Method", compressionMethod($compressedMethod) ;
2681    info $FH->tell() - 2, "Unknown 'Compression Method' ID " . decimalHex0x($compressedMethod, 2)
2682        if ! defined $ZIP_CompressionMethods{$compressedMethod} ;
2683
2684    my $lastMod = out_V "Modification Time", sub { LastModTime($_[0]) };
2685
2686    my $crc                = out_V "CRC";
2687    my $compressedSize   = out_V "Compressed Size";
2688    my $std_compressedSize   = $compressedSize;
2689    my $uncompressedSize = out_V "Uncompressed Size";
2690    my $std_uncompressedSize = $uncompressedSize;
2691    my $filenameLength     = out_v "Filename Length";
2692    if ($filenameLength == 0)
2693    {
2694        info $FH->tell()- 2, "Zero Length filename";
2695    }
2696    my $extraLength        = out_v "Extra Length";
2697    my $comment_length     = out_v "Comment Length";
2698    my $disk_start         = out_v "Disk Start";
2699    my $std_disk_start     = $disk_start;
2700
2701    my $int_file_attrib    = out_v "Int File Attributes";
2702    out1 "[Bit 0]",      $int_file_attrib & 1 ? "1 'Text Data'" : "0 'Binary Data'";
2703    out1 "[Bits 1-15]",  Value_v($int_file_attrib & 0xFE) . " 'Unknown'"
2704        if  $int_file_attrib & 0xFE ;
2705
2706    my $ext_file_attrib    = out_V "Ext File Attributes";
2707
2708    {
2709        # MS-DOS Attributes are bottom two bytes
2710        my $dos_attrib = $ext_file_attrib & 0xFFFF;
2711
2712        # See https://learn.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
2713        # and https://learn.microsoft.com/en-us/openspecs/windows_protocols/ms-smb/65e0c225-5925-44b0-8104-6b91339c709f
2714
2715        out1 "[Bit 0]",  "Read-Only"     if $dos_attrib & 0x0001 ;
2716        out1 "[Bit 1]",  "Hidden"        if $dos_attrib & 0x0002 ;
2717        out1 "[Bit 2]",  "System"        if $dos_attrib & 0x0004 ;
2718        out1 "[Bit 3]",  "Label"         if $dos_attrib & 0x0008 ;
2719        out1 "[Bit 4]",  "Directory"     if $dos_attrib & 0x0010 ;
2720        out1 "[Bit 5]",  "Archive"       if $dos_attrib & 0x0020 ;
2721        out1 "[Bit 6]",  "Device"        if $dos_attrib & 0x0040 ;
2722        out1 "[Bit 7]",  "Normal"        if $dos_attrib & 0x0080 ;
2723        out1 "[Bit 8]",  "Temporary"     if $dos_attrib & 0x0100 ;
2724        out1 "[Bit 9]",  "Sparse"        if $dos_attrib & 0x0200 ;
2725        out1 "[Bit 10]", "Reparse Point" if $dos_attrib & 0x0400 ;
2726        out1 "[Bit 11]", "Compressed"    if $dos_attrib & 0x0800 ;
2727
2728        out1 "[Bit 12]", "Offline"       if $dos_attrib & 0x1000 ;
2729        out1 "[Bit 13]", "Not Indexed"   if $dos_attrib & 0x2000 ;
2730
2731        # Zip files created on Mac seem to set this bit. Not clear why.
2732        out1 "[Bit 14]", "Possible Mac Flag"   if $dos_attrib & 0x4000 ;
2733
2734        # p7Zip & 7z set this bit to flag that the high 16-bits are Unix attributes
2735        out1 "[Bit 15]", "Possible p7zip/7z Unix Flag"   if $dos_attrib & 0x8000 ;
2736
2737    }
2738
2739    my $native_attrib = ($ext_file_attrib >> 16 ) &  0xFFFF;
2740
2741    if ($made_by == 3) # Unix
2742    {
2743
2744        state $mask = {
2745                0   => '---',
2746                1   => '--x',
2747                2   => '-w-',
2748                3   => '-wx',
2749                4   => 'r--',
2750                5   => 'r-x',
2751                6   => 'rw-',
2752                7   => 'rwx',
2753            } ;
2754
2755        my $rwx = ($native_attrib  &  0777);
2756
2757        if ($rwx)
2758        {
2759            my $output  = '';
2760            $output .= $mask->{ ($rwx >> 6) & 07 } ;
2761            $output .= $mask->{ ($rwx >> 3) & 07 } ;
2762            $output .= $mask->{ ($rwx >> 0) & 07 } ;
2763
2764            out1 "[Bits 16-24]",  Value_v($rwx)  . " 'Unix attrib: $output'" ;
2765            out1 "[Bit 25]",  "1 'Sticky'"
2766                if $rwx & 0x200 ;
2767            out1 "[Bit 26]",  "1 'Set GID'"
2768                if $rwx & 0x400 ;
2769            out1 "[Bit 27]",  "1 'Set UID'"
2770                if $rwx & 0x800 ;
2771
2772            my $not_rwx = (($native_attrib  >> 12) & 0xF);
2773            if ($not_rwx)
2774            {
2775                state $masks = {
2776                    0x0C =>  'Socket',           # 0x0C  0b1100
2777                    0x0A =>  'Symbolic Link',    # 0x0A  0b1010
2778                    0x08 =>  'Regular File',     # 0x08  0b1000
2779                    0x06 =>  'Block Device',     # 0x06  0b0110
2780                    0x04 =>  'Directory',        # 0x04  0b0100
2781                    0x02 =>  'Character Device', # 0x02  0b0010
2782                    0x01 =>  'FIFO',             # 0x01  0b0001
2783                };
2784
2785                my $got = $masks->{$not_rwx} // 'Unknown Unix attrib' ;
2786                out1 "[Bits 28-31]",  Value_C($not_rwx) . " '$got'"
2787            }
2788        }
2789    }
2790    elsif ($native_attrib)
2791    {
2792        out1 "[Bits 24-31]",  Value_v($native_attrib) . " 'Unknown attributes for OS ID $made_by'"
2793    }
2794
2795    my ($d, $locHeaderOffset) = read_V();
2796    my $out = Value_V($locHeaderOffset);
2797    my $std_localHeaderOffset = $locHeaderOffset;
2798
2799    if ($locHeaderOffset != MAX32)
2800    {
2801        testPossiblePrefix($locHeaderOffset, ZIP_LOCAL_HDR_SIG);
2802        if ($PREFIX_DELTA)
2803        {
2804            $out .= " [Actual Offset is " . Value_V($locHeaderOffset + $PREFIX_DELTA) . "]"
2805        }
2806    }
2807
2808    out $d, "Local Header Offset", $out;
2809
2810    if ($locHeaderOffset != MAX32)
2811    {
2812        my $commonMessage = "'Local Header Offset' field in '" . Signatures::name($signature) .  "' is invalid";
2813        $locHeaderOffset = checkOffsetValue($locHeaderOffset, $startRecordOffset, 0, $commonMessage, $startRecordOffset + CentralDirectoryEntry::Offset_RelativeOffsetToLocal(), ZIP_LOCAL_HDR_SIG) ;
2814    }
2815
2816    my $filename = '';
2817    if ($filenameLength)
2818    {
2819        need $filenameLength, Signatures::name($signature), 'Filename';
2820
2821        myRead(my $raw_filename, $filenameLength);
2822        $cdEntry->filename($raw_filename) ;
2823        $filename = outputFilename($raw_filename, $LanguageEncodingFlag);
2824        $cdEntry->outputFilename($filename);
2825    }
2826
2827    $cdEntry->centralHeaderOffset($cdEntryOffset) ;
2828    $cdEntry->localHeaderOffset($locHeaderOffset) ;
2829    $cdEntry->compressedSize($compressedSize) ;
2830    $cdEntry->uncompressedSize($uncompressedSize) ;
2831    $cdEntry->zip64ExtraPresent(undef) ; #$cdZip64; ### FIX ME
2832    $cdEntry->zip64SizesPresent(undef) ; # $zip64Sizes;   ### FIX ME
2833    $cdEntry->extractVersion($extractVer);
2834    $cdEntry->generalPurposeFlags($gpFlag);
2835    $cdEntry->compressedMethod($compressedMethod) ;
2836    $cdEntry->lastModDateTime($lastMod);
2837    $cdEntry->crc32($crc) ;
2838    $cdEntry->inCentralDir(1) ;
2839
2840    $cdEntry->std_localHeaderOffset($std_localHeaderOffset) ;
2841    $cdEntry->std_compressedSize($std_compressedSize) ;
2842    $cdEntry->std_uncompressedSize($std_uncompressedSize) ;
2843    $cdEntry->std_diskNumber($std_disk_start) ;
2844
2845    if ($extraLength)
2846    {
2847        need $extraLength, Signatures::name($signature), 'Extra';
2848
2849        walkExtra($extraLength, $cdEntry);
2850    }
2851
2852    # $cdEntry->endCentralHeaderOffset($FH->tell() - 1);
2853
2854    # Can only validate for directory after zip64 data is read
2855    validateDirectory($cdEntryOffset, $filename, $extractVer, $made_by,
2856        $cdEntry->compressedSize, $cdEntry->uncompressedSize, $ext_file_attrib);
2857
2858    if ($comment_length)
2859    {
2860        need $comment_length, Signatures::name($signature), 'Comment';
2861
2862        my $comment ;
2863        myRead($comment, $comment_length);
2864        outputFilename $comment, $LanguageEncodingFlag, "Comment";
2865        $cdEntry->comment($comment);
2866    }
2867
2868    $cdEntry->offsetStart($cdEntryOffset) ;
2869    $cdEntry->offsetEnd($FH->tell() - 1) ;
2870
2871    $CentralDirectory->addEntry($cdEntry);
2872
2873    return { 'encapsulated' => $cdEntry ? $cdEntry->encapsulated() : 0};
2874}
2875
2876sub decodeZipVer
2877{
2878    my $ver = shift ;
2879
2880    return ""
2881        if ! defined $ver;
2882
2883    my $sHi = int($ver /10) ;
2884    my $sLo = $ver % 10 ;
2885
2886    "$sHi.$sLo";
2887}
2888
2889sub decodeOS
2890{
2891    my $ver = shift ;
2892
2893    $OS_Lookup{$ver} || "Unknown" ;
2894}
2895
2896sub Zip64EndCentralHeader
2897{
2898    # Extra ID is 0x0001
2899
2900    # APPNOTE 6.3.10, section 4.3.14, 7.3.3, 7.3.4 & APPENDIX C
2901
2902    # TODO - APPNOTE allows an extensible data sector at end of this record (see APPNOTE 6.3.10, section 4.3.14.4)
2903    # The code below does NOT take this into account.
2904
2905    my $signature = shift ;
2906    my $data = shift ;
2907    my $startRecordOffset = shift ;
2908
2909    print "\n";
2910    out $data, "ZIP64 END CENTRAL DIR RECORD", Value_V($signature);
2911
2912    need 8, Signatures::name($signature);
2913
2914    my $size = out_Q "Size of record";
2915
2916    need $size, Signatures::name($signature);
2917
2918                              out_C  "Created Zip Spec", \&decodeZipVer;
2919                              out_C  "Created OS", \&decodeOS;
2920    my $extractSpec         = out_C  "Extract Zip Spec", \&decodeZipVer;
2921                              out_C  "Extract OS", \&decodeOS;
2922    my $diskNumber          = out_V  "Number of this disk";
2923    my $cdDiskNumber        = out_V  "Central Dir Disk no";
2924    my $entriesOnThisDisk   = out_Q  "Entries in this disk";
2925    my $totalEntries        = out_Q  "Total Entries";
2926    my $centralDirSize      = out_Q  "Size of Central Dir";
2927
2928    my ($d, $centralDirOffset) = read_Q();
2929    my $out = Value_Q($centralDirOffset);
2930    testPossiblePrefix($centralDirOffset, ZIP_CENTRAL_HDR_SIG);
2931
2932    $out .= " [Actual Offset is " . Value_Q($centralDirOffset + $PREFIX_DELTA) . "]"
2933        if $PREFIX_DELTA ;
2934    out $d, "Offset to Central dir", $out;
2935
2936    if (! emptyArchive($startRecordOffset, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries,  $centralDirSize, $centralDirOffset))
2937    {
2938        my $commonMessage = "'Offset to Central Directory' field in '" . Signatures::name($signature) . "' is invalid";
2939        $centralDirOffset = checkOffsetValue($centralDirOffset, $startRecordOffset, $centralDirSize, $commonMessage, $startRecordOffset + 48, ZIP_CENTRAL_HDR_SIG, 0, $extractSpec < 0x3E) ;
2940    }
2941
2942    # Length of 44 means typical version 1 header
2943    return
2944        if $size == 44 ;
2945
2946    my $remaining = $size - 44;
2947
2948    # pkzip sets the extract zip spec to 6.2 (0x3E) to signal a v2 record
2949    # See APPNOTE 6.3.10, section, 7.3.3
2950
2951    if ($extractSpec >= 0x3E)
2952    {
2953        # Version 2 header (see APPNOTE 6.3.7, section  7.3.4, )
2954        # Can use version 2 header to infer presence of encrypted CD
2955        $CentralDirectory->setPkEncryptedCD();
2956
2957
2958        # Compression Method    2 bytes    Method used to compress the
2959        #                                  Central Directory
2960        # Compressed Size       8 bytes    Size of the compressed data
2961        # Original   Size       8 bytes    Original uncompressed size
2962        # AlgId                 2 bytes    Encryption algorithm ID
2963        # BitLen                2 bytes    Encryption key length
2964        # Flags                 2 bytes    Encryption flags
2965        # HashID                2 bytes    Hash algorithm identifier
2966        # Hash Length           2 bytes    Length of hash data
2967        # Hash Data             (variable) Hash data
2968
2969        my ($bcm, $compressedMethod) = read_v();
2970        out $bcm, "Compression Method", compressionMethod($compressedMethod) ;
2971        info $FH->tell() - 2, "Unknown 'Compression Method' ID " . decimalHex0x($compressedMethod, 2)
2972            if ! defined $ZIP_CompressionMethods{$compressedMethod} ;
2973        out_Q "Compressed Size";
2974        out_Q "Uncompressed Size";
2975        out_v "AlgId", sub { $AlgIdLookup{ $_[0] } // "Unknown algorithm" } ;
2976        out_v "BitLen";
2977        out_v "Flags", sub { $FlagsLookup{ $_[0] } // "reserved for certificate processing" } ;
2978        out_v "HashID", sub { $HashIDLookup{ $_[0] } // "Unknown ID" } ;
2979
2980        my $hashLen = out_v "Hash Length ";
2981        outHexdump($hashLen, "Hash Data");
2982
2983        $remaining -= $hashLen + 28;
2984    }
2985
2986    my $entry = Zip64EndCentralHeaderEntry->new();
2987
2988    if ($remaining)
2989    {
2990        # Handle 'zip64 extensible data sector' here
2991        # See APPNOTE 6.3.10, section 4.3.14.3, 4.3.14.4 & APPENDIX C
2992        # Not seen a real example of this. Tested with hand crafted files.
2993        walkExtra($remaining, $entry);
2994    }
2995
2996    return {};
2997}
2998
2999
3000sub Zip64EndCentralLocator
3001{
3002    # APPNOTE 6.3.10, sec 4.3.15
3003
3004    my $signature = shift ;
3005    my $data = shift ;
3006    my $startRecordOffset = shift ;
3007
3008    print "\n";
3009    out $data, "ZIP64 END CENTRAL DIR LOCATOR", Value_V($signature);
3010
3011    need 16, Signatures::name($signature);
3012
3013    # my ($nextRecord, $deltaActuallyAvailable) = $HeaderOffsetIndex->checkForOverlap(16);
3014
3015    # if ($deltaActuallyAvailable)
3016    # {
3017    #     fatal_truncated_record(
3018    #         sprintf("ZIP64 END CENTRAL DIR LOCATOR \@%X truncated", $FH->tell() - 4),
3019    #         sprintf("Need 0x%X bytes, have 0x%X available", 16, $deltaActuallyAvailable),
3020    #         sprintf("Next Record is %s \@0x%X", $nextRecord->name(), $nextRecord->offset())
3021    #         )
3022    # }
3023
3024    # TODO - check values for traces of multi-part + crazy offsets
3025    out_V  "Central Dir Disk no";
3026
3027    my ($d, $zip64EndCentralDirOffset) = read_Q();
3028    my $out = Value_Q($zip64EndCentralDirOffset);
3029    testPossiblePrefix($zip64EndCentralDirOffset, ZIP64_END_CENTRAL_REC_HDR_SIG);
3030
3031    $out .= " [Actual Offset is " . Value_Q($zip64EndCentralDirOffset + $PREFIX_DELTA) . "]"
3032        if $PREFIX_DELTA ;
3033    out $d, "Offset to Zip64 EOCD", $out;
3034
3035    my $totalDisks = out_V  "Total no of Disks";
3036
3037    if ($totalDisks > 0)
3038    {
3039        my $commonMessage = "'Offset to Zip64 End of Central Directory Record' field in '" . Signatures::name($signature) . "' is invalid";
3040        $zip64EndCentralDirOffset = checkOffsetValue($zip64EndCentralDirOffset, $startRecordOffset, 0, $commonMessage, $FH->tell() - 12, ZIP64_END_CENTRAL_REC_HDR_SIG) ;
3041    }
3042
3043    return {};
3044}
3045
3046sub needZip64EOCDLocator
3047{
3048    # zip64 end of central directory field needed if any of the fields
3049    # in the End Central Header record are maxed out
3050
3051    my $diskNumber          = shift ;
3052    my $cdDiskNumber        = shift ;
3053    my $entriesOnThisDisk   = shift ;
3054    my $totalEntries        = shift ;
3055    my $centralDirSize      = shift ;
3056    my $centralDirOffset    = shift ;
3057
3058    return  (full16($diskNumber)        || # 4.4.19
3059             full16($cdDiskNumber)      || # 4.4.20
3060             full16($entriesOnThisDisk) || # 4.4.21
3061             full16($totalEntries)      || # 4.4.22
3062             full32($centralDirSize)    || # 4.4.23
3063             full32($centralDirOffset)     # 4.4.24
3064             ) ;
3065}
3066
3067sub emptyArchive
3068{
3069    my $offset              = shift;
3070    my $diskNumber          = shift ;
3071    my $cdDiskNumber        = shift ;
3072    my $entriesOnThisDisk   = shift ;
3073    my $totalEntries        = shift ;
3074    my $centralDirSize      = shift ;
3075    my $centralDirOffset    = shift ;
3076
3077    return  (#$offset == 0           &&
3078             $diskNumber == 0        &&
3079             $cdDiskNumber == 0      &&
3080             $entriesOnThisDisk == 0 &&
3081             $totalEntries == 0      &&
3082             $centralDirSize == 0    &&
3083             $centralDirOffset== 0
3084             ) ;
3085}
3086
3087sub EndCentralHeader
3088{
3089    # APPNOTE 6.3.10, sec 4.3.16
3090
3091    my $signature = shift ;
3092    my $data = shift ;
3093    my $startRecordOffset = shift ;
3094
3095    print "\n";
3096    out $data, "END CENTRAL HEADER", Value_V($signature);
3097
3098    need 18, Signatures::name($signature);
3099
3100    # TODO - check values for traces of multi-part + crazy values
3101    my $diskNumber          = out_v "Number of this disk";
3102    my $cdDiskNumber        = out_v "Central Dir Disk no";
3103    my $entriesOnThisDisk   = out_v "Entries in this disk";
3104    my $totalEntries        = out_v "Total Entries";
3105    my $centralDirSize      = out_V "Size of Central Dir";
3106
3107    my ($d, $centralDirOffset) = read_V();
3108    my $out = Value_V($centralDirOffset);
3109    testPossiblePrefix($centralDirOffset, ZIP_CENTRAL_HDR_SIG);
3110
3111    $out .= " [Actual Offset is " . Value_V($centralDirOffset + $PREFIX_DELTA) . "]"
3112        if $PREFIX_DELTA  && $centralDirOffset != MAX32 ;
3113    out $d, "Offset to Central Dir", $out;
3114
3115    my $comment_length = out_v "Comment Length";
3116
3117    if ($comment_length)
3118    {
3119        my $here = $FH->tell() ;
3120        my $available = $FILELEN - $here ;
3121        if ($available < $comment_length)
3122        {
3123            error $here,
3124                  "file truncated while reading 'Comment' field in '" . Signatures::name($signature) . "'",
3125                  expectedMessage($comment_length, $available);
3126            $comment_length = $available;
3127        }
3128
3129        if ($comment_length)
3130        {
3131            my $comment ;
3132            myRead($comment, $comment_length);
3133            outputFilename $comment, 0, "Comment";
3134        }
3135    }
3136
3137    if ( ! Nesting::isNested($startRecordOffset, $FH->tell()  -1))
3138    {
3139        # Not nested
3140        if (! needZip64EOCDLocator($diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries,  $centralDirSize, $centralDirOffset) &&
3141            ! emptyArchive($startRecordOffset, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries,  $centralDirSize, $centralDirOffset))
3142        {
3143            my $commonMessage = "'Offset to Central Directory' field in '"  . Signatures::name($signature) .  "' is invalid";
3144            $centralDirOffset = checkOffsetValue($centralDirOffset, $startRecordOffset, $centralDirSize, $commonMessage, $startRecordOffset + 16, ZIP_CENTRAL_HDR_SIG) ;
3145        }
3146    }
3147    # else do nothing
3148
3149    return {};
3150}
3151
3152sub DataDescriptor
3153{
3154
3155    # Data header record or Spanned archive marker.
3156    #
3157
3158    # ZIP_DATA_HDR_SIG at start of file flags a spanned zip file.
3159    # If it is a true marker, the next four bytes MUST be a ZIP_LOCAL_HDR_SIG
3160    # See APPNOTE 6.3.10, sec 8.5.3, 8.5.4 & 8.5.5
3161
3162    # If not at start of file, assume a Data Header Record
3163    # See APPNOTE 6.3.10, sec 4.3.9 & 4.3.9.3
3164
3165    my $signature = shift ;
3166    my $data = shift ;
3167    my $startRecordOffset = shift ;
3168
3169    my $here = $FH->tell();
3170
3171    if ($here == 4)
3172    {
3173        # Spanned Archive Marker
3174        out $data, "SPLIT ARCHIVE MULTI-SEGMENT MARKER", Value_V($signature);
3175        return;
3176
3177        # my (undef, $next_sig) = read_V();
3178        # seekTo(0);
3179
3180        # if ($next_sig == ZIP_LOCAL_HDR_SIG)
3181        # {
3182        #     print "\n";
3183        #     out $data, "SPLIT ARCHIVE MULTI-SEGMENT MARKER", Value_V($signature);
3184        #     seekTo($here);
3185        #     return;
3186        # }
3187    }
3188
3189    my $sigName = Signatures::titleName(ZIP_DATA_HDR_SIG);
3190
3191    print "\n";
3192    out $data, $sigName, Value_V($signature);
3193
3194    need  24, Signatures::name($signature);
3195
3196    # Ignore header payload if nested (assume 64-bit descriptor)
3197    if (Nesting::isNested( $here - 4, $here - 4 + 24 - 1))
3198    {
3199        out "",  "Skipping Nested Payload";
3200        return {};
3201    }
3202
3203    my $compressedSize;
3204    my $uncompressedSize;
3205
3206    my $localEntry = $LocalDirectory->lastStreamedEntryAdded();
3207    my $centralEntry =  $localEntry && $localEntry->getCdEntry ;
3208
3209    if (!$localEntry)
3210    {
3211        # found a Data Descriptor without a local header
3212        out "",  "Skipping Data Descriptor", "No matching Local header with streaming bit set";
3213        error $here - 4, "Orphan '$sigName' found", "No matching Local header with streaming bit set";
3214        return {};
3215    }
3216
3217    my $crc = out_V "CRC";
3218    my $payloadLength = $here - 4 - $localEntry->payloadOffset;
3219
3220    my $deltaToNext = deltaToNextSignature();
3221    my $cl32 = unpack "V",  peekAtOffset($here + 4, 4);
3222    my $cl64 = unpack "Q<", peekAtOffset($here + 4, 8);
3223
3224    # use delta to next header & payload length
3225    # deals with use case where the payload length < 32 bit
3226    # will use a 32-bit value rather than the 64-bit value
3227
3228    # see if delta & payload size match
3229    if ($deltaToNext == 16 && $cl64 == $payloadLength)
3230    {
3231        if (! $localEntry->zip64 && ($centralEntry && ! $centralEntry->zip64))
3232        {
3233            error $here, "'$sigName': expected 32-bit values, got 64-bit";
3234        }
3235
3236        $compressedSize   = out_Q "Compressed Size" ;
3237        $uncompressedSize = out_Q "Uncompressed Size" ;
3238    }
3239    elsif ($deltaToNext == 8 && $cl32 == $payloadLength)
3240    {
3241        if ($localEntry->zip64)
3242        {
3243            error $here, "'$sigName': expected 64-bit values, got 32-bit";
3244        }
3245
3246        $compressedSize   = out_V "Compressed Size" ;
3247        $uncompressedSize = out_V "Uncompressed Size" ;
3248    }
3249
3250    # Try matching juast payload lengths
3251    elsif ($cl32 == $payloadLength)
3252    {
3253        if ($localEntry->zip64)
3254        {
3255            error $here, "'$sigName': expected 64-bit values, got 32-bit";
3256        }
3257
3258        $compressedSize   = out_V "Compressed Size" ;
3259        $uncompressedSize = out_V "Uncompressed Size" ;
3260
3261        warning $here, "'$sigName': Zip Header not directly after Data Descriptor";
3262    }
3263    elsif ($cl64 == $payloadLength)
3264    {
3265        if (! $localEntry->zip64 && ($centralEntry && ! $centralEntry->zip64))
3266        {
3267            error $here, "'$sigName': expected 32-bit values, got 64-bit";
3268        }
3269
3270        $compressedSize   = out_Q "Compressed Size" ;
3271        $uncompressedSize = out_Q "Uncompressed Size" ;
3272
3273        warning $here, "'$sigName': Zip Header not directly after Data Descriptor";
3274    }
3275
3276    # payloads don't match, so try delta
3277    elsif ($deltaToNext == 16)
3278    {
3279        if (! $localEntry->zip64 && ($centralEntry && ! $centralEntry->zip64))
3280        {
3281            error $here, "'$sigName': expected 32-bit values, got 64-bit";
3282        }
3283
3284        $compressedSize   = out_Q "Compressed Size" ;
3285        # compressed size is wrong
3286        error $here, "'$sigName': Compressed size" . decimalHex0x($compressedSize) . " doesn't match with payload size " . decimalHex0x($payloadLength);
3287
3288        $uncompressedSize = out_Q "Uncompressed Size" ;
3289    }
3290    elsif ($deltaToNext == 8 )
3291    {
3292        if ($localEntry->zip64)
3293        {
3294            error $here, "'$sigName': expected 64-bit values, got 32-bit";
3295        }
3296
3297        $compressedSize   = out_V "Compressed Size" ;
3298        # compressed size is wrong
3299        error $here, "'$sigName': Compressed Size " . decimalHex0x($compressedSize) . " doesn't match with payload size " . decimalHex0x($payloadLength);
3300
3301        $uncompressedSize = out_V "Uncompressed Size" ;
3302    }
3303
3304    # no payoad or delta match at all, so likely a false positive or data corruption
3305    else
3306    {
3307        warning $here, "Cannot determine size of Data Descriptor record";
3308    }
3309
3310    # TODO - neither payload size or delta to next signature match
3311
3312    if ($localEntry)
3313    {
3314        $localEntry->readDataDescriptor(1) ;
3315        $localEntry->crc32($crc) ;
3316        $localEntry->compressedSize($compressedSize) ;
3317        $localEntry->uncompressedSize($uncompressedSize) ;
3318    }
3319
3320    # APPNOTE 6.3.10, sec 4.3.8
3321    my $filename = $localEntry->filename;
3322    warning undef, "Directory '$filename' must not have a payload"
3323        if  $filename =~ m#/$# && $uncompressedSize ;
3324
3325    return {
3326        crc => $crc,
3327        compressedSize => $compressedSize,
3328        uncompressedSize => $uncompressedSize,
3329    };
3330}
3331
3332sub SingleSegmentMarker
3333{
3334    # ZIP_SINGLE_SEGMENT_MARKER at start of file flags a spanned zip file.
3335    # If this ia a true marker, the next four bytes MUST be a ZIP_LOCAL_HDR_SIG
3336    # See APPNOTE 6.3.10, sec 8.5.3, 8.5.4 & 8.5.5
3337
3338    my $signature = shift ;
3339    my $data = shift ;
3340    my $startRecordOffset = shift ;
3341
3342    my $here = $FH->tell();
3343
3344    if ($here == 4)
3345    {
3346        my (undef, $next_sig) = read_V();
3347        if ($next_sig == ZIP_LOCAL_HDR_SIG)
3348        {
3349            print "\n";
3350            out $data, "SPLIT ARCHIVE SINGLE-SEGMENT MARKER", Value_V($signature);
3351        }
3352        seekTo($here);
3353    }
3354
3355    return {};
3356}
3357
3358sub ArchiveExtraDataRecord
3359{
3360    # TODO - not seen an example of this record
3361
3362    # APPNOTE 6.3.10, sec 4.3.11
3363
3364    my $signature = shift ;
3365    my $data = shift ;
3366    my $startRecordOffset = shift ;
3367
3368    out $data, "ARCHIVE EXTRA DATA RECORD", Value_V($signature);
3369
3370    need 2, Signatures::name($signature);
3371
3372    my $size = out_v "Size of record";
3373
3374    need $size, Signatures::name($signature);
3375
3376    outHexdump($size, "Field data", 1);
3377
3378    return {};
3379}
3380
3381sub DigitalSignature
3382{
3383    my $signature = shift ;
3384    my $data = shift ;
3385    my $startRecordOffset = shift ;
3386
3387    print "\n";
3388    out $data, "DIGITAL SIGNATURE RECORD", Value_V($signature);
3389
3390    need 2, Signatures::name($signature);
3391    my $Size = out_v "Size of record";
3392
3393    need $Size, Signatures::name($signature);
3394
3395
3396    myRead(my $payload, $Size);
3397    out $payload, "Signature", hexDump16($payload);
3398
3399    return {};
3400}
3401
3402sub GeneralPurposeBits
3403{
3404    my $method = shift;
3405    my $gp = shift;
3406
3407    out1 "[Bit  0]", "1 'Encryption'" if $gp & ZIP_GP_FLAG_ENCRYPTED_MASK;
3408
3409    my %lookup = (
3410        0 =>    "Normal Compression",
3411        1 =>    "Maximum Compression",
3412        2 =>    "Fast Compression",
3413        3 =>    "Super Fast Compression");
3414
3415
3416    if ($method == ZIP_CM_DEFLATE)
3417    {
3418        my $mid = ($gp >> 1) & 0x03 ;
3419
3420        out1 "[Bits 1-2]", "$mid '$lookup{$mid}'";
3421    }
3422
3423    if ($method == ZIP_CM_LZMA)
3424    {
3425        if ($gp & ZIP_GP_FLAG_LZMA_EOS_PRESENT) {
3426            out1 "[Bit 1]", "1 'LZMA EOS Marker Present'" ;
3427        }
3428        else {
3429            out1 "[Bit 1]", "0 'LZMA EOS Marker Not Present'" ;
3430        }
3431    }
3432
3433    if ($method == ZIP_CM_IMPLODE) # Imploding
3434    {
3435        out1 "[Bit 1]", ($gp & (1 << 1) ? "1 '8k" : "0 '4k") . " Sliding Dictionary'" ;
3436        out1 "[Bit 2]", ($gp & (2 << 1) ? "1 '3" : "0 '2"  ) . " Shannon-Fano Trees'" ;
3437    }
3438
3439    out1 "[Bit  3]", "1 'Streamed'"           if $gp & ZIP_GP_FLAG_STREAMING_MASK;
3440    out1 "[Bit  4]", "1 'Enhanced Deflating'" if $gp & 1 << 4;
3441    out1 "[Bit  5]", "1 'Compressed Patched'" if $gp & ZIP_GP_FLAG_PATCHED_MASK ;
3442    out1 "[Bit  6]", "1 'Strong Encryption'"  if $gp & ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK;
3443    out1 "[Bit 11]", "1 'Language Encoding'"  if $gp & ZIP_GP_FLAG_LANGUAGE_ENCODING;
3444    out1 "[Bit 12]", "1 'Pkware Enhanced Compression'"  if $gp & ZIP_GP_FLAG_PKWARE_ENHANCED_COMP ;
3445    out1 "[Bit 13]", "1 'Encrypted Central Dir'"  if $gp & ZIP_GP_FLAG_ENCRYPTED_CD ;
3446
3447    return ();
3448}
3449
3450
3451sub seekSet
3452{
3453    my $fh = $_[0] ;
3454    my $size = $_[1];
3455
3456    use Fcntl qw(SEEK_SET);
3457    seek($fh, $size, SEEK_SET);
3458
3459}
3460
3461sub skip
3462{
3463    my $fh = $_[0] ;
3464    my $size = $_[1];
3465
3466    use Fcntl qw(SEEK_CUR);
3467    seek($fh, $size, SEEK_CUR);
3468
3469}
3470
3471
3472sub myRead
3473{
3474    my $got = \$_[0] ;
3475    my $size = $_[1];
3476
3477    my $wantSize = $size;
3478    $$got = '';
3479
3480    if ($size == 0)
3481    {
3482        return ;
3483    }
3484
3485    if ($size > 0)
3486    {
3487        my $buff ;
3488        my $status = $FH->read($buff, $size);
3489        return $status
3490            if $status < 0;
3491        $$got .= $buff ;
3492    }
3493
3494    my $len = length $$got;
3495    # fatal undef, "Truncated file (got $len, wanted $wantSize): $!"
3496    fatal undef, "Unexpected zip file truncation",
3497                expectedMessage($wantSize, $len)
3498        if length $$got != $wantSize;
3499}
3500
3501sub expectedMessage
3502{
3503    my $expected = shift;
3504    my $got = shift;
3505    return "Expected " . decimalHex0x($expected) . " bytes, but only " . decimalHex0x($got) . " available"
3506}
3507
3508sub need
3509{
3510    my $byteCount = shift ;
3511    my $message = shift ;
3512    my $field = shift // '';
3513
3514    # return $FILELEN - $FH->tell() >= $byteCount;
3515    my $here = $FH->tell() ;
3516    my $available = $FILELEN - $here ;
3517    if ($available < $byteCount)
3518    {
3519        my @message ;
3520
3521        if ($field)
3522        {
3523            push @message, "Unexpected zip file truncation while reading '$field' field in '$message'";
3524        }
3525        else
3526        {
3527            push @message, "Unexpected zip file truncation while reading '$message'";
3528        }
3529
3530
3531        push @message, expectedMessage($byteCount, $available);
3532        # push @message, sprintf("Expected 0x%X bytes, but only 0x%X available", $byteCount, $available);
3533        push @message, "Try running with --walk' or '--scan' options"
3534            if ! $opt_scan && ! $opt_walk ;
3535
3536        fatal $here, @message;
3537    }
3538}
3539
3540sub testPossiblePrefix
3541{
3542    my $offset = shift;
3543    my $expectedSignature = shift ;
3544
3545    if (testPossiblePrefixNoPREFIX_DELTA($offset, $expectedSignature))
3546    {
3547        $PREFIX_DELTA = $POSSIBLE_PREFIX_DELTA;
3548        $POSSIBLE_PREFIX_DELTA = 0;
3549
3550        reportPrefixData();
3551
3552        return 1
3553    }
3554
3555    return 0
3556}
3557
3558sub testPossiblePrefixNoPREFIX_DELTA
3559{
3560    my $offset = shift;
3561    my $expectedSignature = shift ;
3562
3563    return 0
3564        if $offset + 4 > $FILELEN || ! $POSSIBLE_PREFIX_DELTA || $PREFIX_DELTA;
3565
3566    my $currentOFFSET = $OFFSET;
3567    my $gotSig = readSignatureFromOffset($offset);
3568
3569    if ($gotSig == $expectedSignature)
3570    {
3571        # do have possible prefix data, but the offset is correct
3572        $POSSIBLE_PREFIX_DELTA = $PREFIX_DELTA = 0;
3573        $OFFSET = $currentOFFSET;
3574
3575        return 0;
3576    }
3577
3578    $gotSig = readSignatureFromOffset($offset + $POSSIBLE_PREFIX_DELTA);
3579
3580    $OFFSET = $currentOFFSET;
3581
3582    return  ($gotSig == $expectedSignature) ;
3583}
3584
3585sub offsetIsValid
3586{
3587    my $offset = shift;
3588    my $headerStart = shift;
3589    my $centralDirSize = shift;
3590    my $commonMessage = shift ;
3591    my $expectedSignature = shift ;
3592    my $dereferencePointer = shift;
3593
3594    my $must_point_back = 1;
3595
3596    my $delta = $offset - $FILELEN + 1 ;
3597
3598    $offset += $PREFIX_DELTA
3599        if $PREFIX_DELTA ;
3600
3601    return sprintf("value %s is %s bytes past EOF", decimalHex0x($offset), decimalHex0x($delta))
3602        if $delta > 0 ;
3603
3604    return sprintf "value %s must be less that %s", decimalHex0x($offset), decimalHex0x($headerStart)
3605        if $must_point_back && $offset >= $headerStart;
3606
3607    if ($dereferencePointer)
3608    {
3609        my $actual = $headerStart - $centralDirSize;
3610        my $cdSizeOK = ($actual == $offset);
3611        my $possibleDelta = $actual - $offset;
3612
3613        if ($centralDirSize && ! $cdSizeOK && $possibleDelta > 0 && readSignatureFromOffset($possibleDelta) == ZIP_LOCAL_HDR_SIG)
3614        {
3615            # If testing end of central dir, check if the location of the first CD header
3616            # is consistent with the central dir size.
3617            # Common use case is a SFX zip file
3618
3619            my $gotSig = readSignatureFromOffset($actual);
3620            my $v = hexValue32($gotSig);
3621            return 'value @ ' .  hexValue($actual) . " should decode to signature for " . Signatures::nameAndHex($expectedSignature) . ". Got $v" # . hexValue32($gotSig)
3622                if $gotSig != $expectedSignature ;
3623
3624            $PREFIX_DELTA = $possibleDelta;
3625            reportPrefixData();
3626
3627            return undef;
3628        }
3629        else
3630        {
3631            my $gotSig = readSignatureFromOffset($offset);
3632            my $v = hexValue32($gotSig);
3633            return 'value @ ' .  hexValue($offset) . " should decode to signature for " . Signatures::nameAndHex($expectedSignature) . ". Got $v" # . hexValue32($gotSig)
3634                if $gotSig != $expectedSignature ;
3635        }
3636    }
3637
3638    return undef ;
3639}
3640
3641sub checkOffsetValue
3642{
3643    my $offset = shift;
3644    my $headerStart = shift;
3645    my $centralDirSize = shift;
3646    my $commonMessage = shift ;
3647    my $messageOffset = shift;
3648    my $expectedSignature = shift ;
3649    my $fatal = shift // 0;
3650    my $dereferencePointer = shift // 1;
3651
3652    my $keepOFFSET = $OFFSET ;
3653
3654    my $message = offsetIsValid($offset, $headerStart, $centralDirSize, $commonMessage, $expectedSignature, $dereferencePointer);
3655    if ($message)
3656    {
3657        fatal_tryWalk($messageOffset, $commonMessage, $message)
3658            if $fatal;
3659
3660        error $messageOffset, $commonMessage, $message
3661            if ! $fatal;
3662    }
3663
3664    $OFFSET = $keepOFFSET;
3665
3666    return $offset + $PREFIX_DELTA;
3667
3668}
3669
3670sub fatal_tryWalk
3671{
3672    my $offset   = shift ;
3673    my $message = shift;
3674
3675    fatal($offset, $message, @_, "Try running with --walk' or '--scan' options");
3676}
3677
3678sub fatal
3679{
3680    my $offset   = shift ;
3681    my $message = shift;
3682
3683    return if $fatalDisabled;
3684
3685    if (defined $offset)
3686    {
3687        warn "#\n# FATAL: Offset " . hexValue($offset) . ": $message\n";
3688    }
3689    else
3690    {
3691        warn "#\n# FATAL: $message\n";
3692    }
3693
3694    warn  "#        $_ . \n"
3695        for @_;
3696    warn "#\n" ;
3697
3698    exit 1;
3699}
3700
3701sub disableFatal
3702{
3703    $fatalDisabled = 1 ;
3704}
3705
3706sub enableFatal
3707{
3708    $fatalDisabled = 0 ;
3709}
3710
3711sub topLevelFatal
3712{
3713    my $message = shift ;
3714
3715    no warnings 'utf8';
3716
3717    warn "FATAL: $message\n";
3718
3719    warn  "$_ . \n"
3720        for @_;
3721
3722    exit 1;
3723}
3724
3725sub internalFatal
3726{
3727    my $offset   = shift ;
3728    my $message = shift;
3729
3730    no warnings 'utf8';
3731
3732    if (defined $offset)
3733    {
3734        warn "# FATAL: Offset " . hexValue($offset) . ": Internal Error: $message\n";
3735    }
3736    else
3737    {
3738        warn "# FATAL: Internal Error: $message\n";
3739    }
3740
3741    warn "#        $_ \n"
3742        for @_;
3743
3744    warn "#        Please report error at https://github.com/pmqs/zipdetails/issues\n";
3745    exit 1;
3746}
3747
3748sub warning
3749{
3750    my $offset   = shift ;
3751    my $message  = shift;
3752
3753    no warnings 'utf8';
3754
3755    return
3756        unless $opt_want_warning_mesages ;
3757
3758    say "#"
3759        unless $lastWasMessage ++ ;
3760
3761    if (defined $offset)
3762    {
3763        say "# WARNING: Offset " . hexValue($offset) . ": $message";
3764    }
3765    else
3766    {
3767        say "# WARNING: $message";
3768    }
3769
3770
3771    say "#          $_" for @_ ;
3772    say "#";
3773    ++ $WarningCount ;
3774
3775    $exit_status_code |= 2
3776        if $opt_want_message_exit_status ;
3777}
3778
3779sub error
3780{
3781    my $offset   = shift ;
3782    my $message  = shift;
3783
3784    no warnings 'utf8';
3785
3786    return
3787        unless $opt_want_error_mesages ;
3788
3789    say "#"
3790        unless $lastWasMessage ++ ;
3791
3792    if (defined $offset)
3793    {
3794        say "# ERROR: Offset " . hexValue($offset) . ": $message";
3795    }
3796    else
3797    {
3798        say "# ERROR: $message";
3799    }
3800
3801
3802    say "#        $_" for @_ ;
3803    say "#";
3804
3805    ++ $ErrorCount ;
3806
3807    $exit_status_code |= 4
3808        if $opt_want_message_exit_status ;
3809}
3810
3811sub debug
3812{
3813    my $offset   = shift ;
3814    my $message  = shift;
3815
3816    no warnings 'utf8';
3817
3818    say "#"
3819        unless $lastWasMessage ++ ;
3820
3821    if (defined $offset)
3822    {
3823        say "# DEBUG: Offset " . hexValue($offset) . ": $message";
3824    }
3825    else
3826    {
3827        say "# DEBUG: $message";
3828    }
3829
3830
3831    say "#        $_" for @_ ;
3832    say "#";
3833}
3834
3835sub internalError
3836{
3837    my $message  = shift;
3838
3839    no warnings 'utf8';
3840
3841    say "#";
3842    say "# ERROR: $message";
3843    say "#        $_" for @_ ;
3844    say "#        Please report error at https://github.com/pmqs/zipdetails/issues";
3845    say "#";
3846
3847    ++ $ErrorCount ;
3848}
3849
3850sub reportPrefixData
3851{
3852    my $delta = shift // $PREFIX_DELTA ;
3853    state $reported = 0;
3854    return if $reported || $delta == 0;
3855
3856    info 0, "found " . decimalHex0x($delta) . " bytes before beginning of zipfile" ;
3857    $reported = 1;
3858}
3859
3860sub info
3861{
3862    my $offset   = shift;
3863    my $message  = shift;
3864
3865    no warnings 'utf8';
3866
3867    return
3868        unless $opt_want_info_mesages ;
3869
3870    say "#"
3871        unless $lastWasMessage ++ ;
3872
3873    if (defined $offset)
3874    {
3875        say "# INFO: Offset " . hexValue($offset) . ": $message";
3876    }
3877    else
3878    {
3879        say "# INFO: $message";
3880    }
3881
3882    say "#       $_" for @_ ;
3883    say "#";
3884
3885    ++ $InfoCount ;
3886
3887    $exit_status_code |= 1
3888        if $opt_want_message_exit_status ;
3889}
3890
3891sub walkExtra
3892{
3893    # APPNOTE 6.3.10, sec 4.4.11, 4.4.28, 4.5
3894    my $XLEN = shift;
3895    my $entry = shift;
3896
3897    # Caller has determined that there are $XLEN bytes available to read
3898
3899    my $buff ;
3900    my $offset = 0 ;
3901
3902    my $id;
3903    my $subLen;
3904    my $payload ;
3905
3906    my $count = 0 ;
3907    my $endExtraOffset = $FH->tell() + $XLEN ;
3908
3909    while ($offset < $XLEN) {
3910
3911        ++ $count;
3912
3913        # Detect if there is not enough data for an extra ID and length.
3914        # Android zipalign and zipflinger are prime candidates for these
3915        # non-standard extra sub-fields.
3916        my $remaining = $XLEN - $offset;
3917        if ($remaining < ZIP_EXTRA_SUBFIELD_HEADER_SIZE) {
3918            # There is not enough left.
3919            # Consume whatever is there and return so parsing
3920            # can continue.
3921
3922            myRead($payload, $remaining);
3923            my $data = hexDump($payload);
3924
3925            if ($payload =~ /^\x00+$/)
3926            {
3927                # All nulls
3928                out $payload, "Null Padding in Extra";
3929                info $FH->tell() - length($payload), decimalHex0x(length $payload) . " Null Padding Bytes in Extra Field" ;
3930            }
3931            else
3932            {
3933                out $payload, "Extra Data", $data;
3934                error $FH->tell() - length($payload), "'Extra Data' Malformed";
3935            }
3936
3937            return undef;
3938        }
3939
3940        myRead($id, ZIP_EXTRA_SUBFIELD_ID_SIZE);
3941        $offset += ZIP_EXTRA_SUBFIELD_ID_SIZE;
3942        my $lookID = unpack "v", $id ;
3943        if ($lookID == 0)
3944        {
3945            # check for null padding at end of extra
3946            my $here = $FH->tell();
3947            my $rest;
3948            myRead($rest, $XLEN - $offset);
3949            if ($rest =~ /^\x00+$/)
3950            {
3951                my $len = length ($id . $rest) ;
3952                out $id . $rest, "Null Padding in Extra";
3953                info $FH->tell() - $len, decimalHex0x($len) . " Null Padding Bytes in Extra Field";
3954                return undef;
3955            }
3956
3957            seekTo($here);
3958        }
3959
3960        my ($who, $decoder, $local_min, $local_max, $central_min, $central_max) =  @{ $Extras{$lookID} // ['', undef, undef,  undef,  undef, undef ] };
3961
3962        my $idString =  Value_v($lookID) ;
3963        $idString .=  " '$who'"
3964            if $who;
3965
3966        out $id, "Extra ID #$count", $idString ;
3967        info $FH->tell() - 2, "Unknown Extra ID $idString"
3968            if ! exists $Extras{$lookID} ;
3969
3970        myRead($buff, ZIP_EXTRA_SUBFIELD_LEN_SIZE);
3971        $offset += ZIP_EXTRA_SUBFIELD_LEN_SIZE;
3972
3973        $subLen =  unpack("v", $buff);
3974        out2 $buff, "Length", Value_v($subLen) ;
3975
3976        $remaining = $XLEN - $offset;
3977        if ($subLen > $remaining )
3978        {
3979            error $FH->tell() -2,
3980                  extraFieldIdentifier($lookID) . ": 'Length' field invalid",
3981                  sprintf("value %s > %s bytes remaining", decimalHex0x($subLen), decimalHex0x($remaining));
3982            outSomeData $remaining, "  Extra Payload";
3983            return undef;
3984        }
3985
3986        if (! defined $decoder)
3987        {
3988            if ($subLen)
3989            {
3990                myRead($payload, $subLen);
3991                my $data = hexDump16($payload);
3992
3993                out2 $payload, "Extra Payload", $data;
3994            }
3995        }
3996        else
3997        {
3998            if (testExtraLimits($lookID, $subLen, $entry->inCentralDir))
3999            {
4000                my $endExtraOffset = $FH->tell() + $subLen;
4001                $decoder->($lookID, $subLen, $entry) ;
4002
4003                # Belt & Braces - should now be at $endExtraOffset
4004                # error here means issue in an extra handler
4005                # should noy happen, but just in case
4006                # TODO -- need tests for this
4007                my $here = $FH->tell() ;
4008                if ($here > $endExtraOffset)
4009                {
4010                    # gone too far, so need to bomb out now
4011                    internalFatal $here, "Overflow processing " . extraFieldIdentifier($lookID) . ".",
4012                                  sprintf("Should be at offset %s, actually at %s", decimalHex0x($endExtraOffset),  decimalHex0x($here));
4013                }
4014                elsif ($here < $endExtraOffset)
4015                {
4016                    # not gone far enough, can recover
4017                    error $here,
4018                            sprintf("Expected to be at offset %s after processing %s, actually at %s", decimalHex0x($endExtraOffset),  extraFieldIdentifier($lookID), decimalHex0x($here)),
4019                            "Skipping " . decimalHex0x($endExtraOffset - $here) . " bytes";
4020                    outSomeData $endExtraOffset - $here, "  Extra Data";
4021                }
4022            }
4023        }
4024
4025        $offset += $subLen ;
4026    }
4027
4028    return undef ;
4029}
4030
4031sub testExtraLimits
4032{
4033    my $lookID = shift;
4034    my $size = shift;
4035    my $inCentralDir = shift;
4036
4037    my ($who, undef, $local_min, $local_max, $central_min, $central_max) =  @{ $Extras{$lookID} // ['', undef, undef,  undef,  undef, undef ] };
4038
4039    my ($min, $max) = $inCentralDir
4040                        ? ($central_min, $central_max)
4041                        : ($local_min, $local_max) ;
4042
4043    return 1
4044        if ! defined $min && ! defined $max ;
4045
4046    if (defined $min && defined $max)
4047    {
4048        # both the same
4049        if ($min == $max)
4050        {
4051            if ($size != $min)
4052            {
4053                error $FH->tell() -2, sprintf "%s: 'Length' field invalid: expected %s, got %s", extraFieldIdentifier($lookID), decimalHex0x($min),  decimalHex0x($size);
4054                outSomeData $size, "  Extra Payload" if $size;
4055                return 0;
4056            }
4057        }
4058        else # min != max
4059        {
4060            if ($size < $min || $size > $max)
4061            {
4062                error $FH->tell() -2, sprintf "%s: 'Length' field invalid: value must be betweem %s and %s, got %s", extraFieldIdentifier($lookID), decimalHex0x($min), decimalHex0x($max), decimalHex0x($size);
4063                outSomeData $size, "  Extra Payload" if $size ;
4064                return 0;
4065            }
4066        }
4067
4068    }
4069    else # must be defined $min & undefined max
4070    {
4071        if ($size < $min)
4072        {
4073            error $FH->tell() -2, sprintf "%s: 'Length' field invalid: value must be at least %s, got %s", extraFieldIdentifier($lookID), decimalHex0x($min),  decimalHex0x($size);
4074            outSomeData $size, "  Extra Payload" if $size;
4075            return 0;
4076        }
4077    }
4078
4079    return 1;
4080
4081}
4082
4083sub full32
4084{
4085    return ($_[0] // 0) == MAX32 ;
4086}
4087
4088sub full16
4089{
4090    return ($_[0] // 0) == MAX16 ;
4091}
4092
4093sub decode_Zip64
4094{
4095    my $extraID = shift ;
4096    my $len = shift;
4097    my $entry = shift;
4098
4099    myRead(my $payload, $len);
4100    if ($entry->inCentralDir() )
4101    {
4102        walk_Zip64_in_CD($extraID, $payload, $entry, 1) ;
4103    }
4104    else
4105    {
4106        walk_Zip64_in_LD($extraID, $payload, $entry, 1) ;
4107
4108    }
4109}
4110
4111sub walk_Zip64_in_LD
4112{
4113    my $extraID = shift ;
4114    my $zip64Extended = shift;
4115    my $entry = shift;
4116    my $display = shift // 1 ;
4117
4118    my $fieldStart = $FH->tell() - length $zip64Extended;
4119    my $fieldOffset = $fieldStart ;
4120
4121    $ZIP64 = 1;
4122    $entry->zip64(1);
4123
4124    if (length $zip64Extended == 0)
4125    {
4126        info $fieldOffset, extraFieldIdentifier($extraID) .  ": Length is Zero";
4127        return;
4128    }
4129
4130    my $assumeLengthsPresent   = (length($zip64Extended) == 16) ;
4131    my $assumeAllFieldsPresent = (length($zip64Extended) == 28) ;
4132
4133    if ($assumeLengthsPresent || $assumeAllFieldsPresent || full32 $entry->std_uncompressedSize )
4134    {
4135        # TODO defer a warning if in local header & central/local don't have std_uncompressedSizeset to 0xffffffff
4136        if (length $zip64Extended < 8)
4137        {
4138            my $message = extraFieldIdentifier($extraID) .  ": Expected " . decimalHex0x(8) . " bytes for 'Uncompressed Size': only " . decimalHex0x(length $zip64Extended)  . " bytes present";
4139            error $fieldOffset, $message;
4140            out2 $zip64Extended, $message;
4141            return;
4142        }
4143
4144        $fieldOffset += 8;
4145        my $data = substr($zip64Extended, 0, 8, "") ;
4146        $entry->uncompressedSize(unpack "Q<", $data);
4147        out2 $data, "Uncompressed Size", Value_Q($entry->uncompressedSize)
4148            if $display;
4149    }
4150
4151    if ($assumeLengthsPresent || $assumeAllFieldsPresent || full32 $entry->std_compressedSize)
4152    {
4153        if (length $zip64Extended < 8)
4154        {
4155            my $message = extraFieldIdentifier($extraID) .  ": Expected " . decimalHex0x(8) . " bytes for 'Compressed Size': only " . decimalHex0x(length $zip64Extended)  . " bytes present";
4156            error $fieldOffset, $message;
4157            out2 $zip64Extended, $message;
4158            return;
4159        }
4160
4161        $fieldOffset += 8;
4162
4163        my $data = substr($zip64Extended, 0, 8, "") ;
4164        $entry->compressedSize( unpack "Q<", $data);
4165        out2 $data, "Compressed Size", Value_Q($entry->compressedSize)
4166            if $display;
4167    }
4168
4169    # Zip64 in local header should not have localHeaderOffset or disk number
4170    # but some zip files do
4171
4172    if ($assumeAllFieldsPresent)
4173    {
4174        $fieldOffset += 8;
4175
4176        my $data = substr($zip64Extended, 0, 8, "") ;
4177        my $localHeaderOffset = unpack "Q<", $data;
4178        out2 $data, "Offset to Local Dir", Value_Q($localHeaderOffset)
4179            if $display;
4180    }
4181
4182    if ($assumeAllFieldsPresent)
4183    {
4184        $fieldOffset += 4;
4185
4186        my $data = substr($zip64Extended, 0, 4, "") ;
4187        my $diskNumber = unpack "v", $data;
4188        out2 $data, "Disk Number", Value_V($diskNumber)
4189            if $display;
4190    }
4191
4192    if (length $zip64Extended)
4193    {
4194        if ($display)
4195        {
4196            out2 $zip64Extended, "Unexpected Data", hexDump16 $zip64Extended ;
4197            info $fieldOffset, extraFieldIdentifier($extraID) .  ": Unexpected Data: " . decimalHex0x(length $zip64Extended) . " bytes";
4198        }
4199    }
4200
4201}
4202
4203sub walk_Zip64_in_CD
4204{
4205    my $extraID = shift ;
4206    my $zip64Extended = shift;
4207    my $entry = shift;
4208    my $display = shift // 1 ;
4209
4210    my $fieldStart = $FH->tell() - length $zip64Extended;
4211    my $fieldOffset = $fieldStart ;
4212
4213    $ZIP64 = 1;
4214    $entry->zip64(1);
4215
4216    if (length $zip64Extended == 0)
4217    {
4218        info $fieldOffset, extraFieldIdentifier($extraID) .  ": Length is Zero";
4219        return;
4220    }
4221
4222    my $assumeAllFieldsPresent = (length($zip64Extended) == 28) ;
4223
4224    if ($assumeAllFieldsPresent || full32 $entry->std_uncompressedSize )
4225    {
4226        if (length $zip64Extended < 8)
4227        {
4228            my $message = extraFieldIdentifier($extraID) .  ": Expected " . decimalHex0x(8) . " bytes for 'Uncompressed Size': only " . decimalHex0x(length $zip64Extended)  . " bytes present";
4229            error $fieldOffset, $message;
4230            out2 $zip64Extended, $message;
4231            return;
4232        }
4233
4234        $fieldOffset += 8;
4235        my $data = substr($zip64Extended, 0, 8, "") ;
4236        $entry->uncompressedSize(unpack "Q<", $data);
4237        out2 $data, "Uncompressed Size", Value_Q($entry->uncompressedSize)
4238            if $display;
4239    }
4240
4241    if ($assumeAllFieldsPresent || full32 $entry->std_compressedSize)
4242    {
4243        if (length $zip64Extended < 8)
4244        {
4245            my $message = extraFieldIdentifier($extraID) .  ": Expected " . decimalHex0x(8) . " bytes for 'Compressed Size': only " . decimalHex0x(length $zip64Extended)  . " bytes present";
4246            error $fieldOffset, $message;
4247            out2 $zip64Extended, $message;
4248            return;
4249        }
4250
4251        $fieldOffset += 8;
4252
4253        my $data = substr($zip64Extended, 0, 8, "") ;
4254        $entry->compressedSize(unpack "Q<", $data);
4255        out2 $data, "Compressed Size", Value_Q($entry->compressedSize)
4256            if $display;
4257    }
4258
4259    if ($assumeAllFieldsPresent || full32 $entry->std_localHeaderOffset)
4260    {
4261        if (length $zip64Extended < 8)
4262        {
4263            my $message = extraFieldIdentifier($extraID) .  ": Expected " . decimalHex0x(8) . " bytes for 'Offset to Local Dir': only " . decimalHex0x(length $zip64Extended)  . " bytes present";
4264            error $fieldOffset, $message;
4265            out2 $zip64Extended, $message;
4266            return;
4267        }
4268
4269        $fieldOffset += 8;
4270
4271        my $here = $FH->tell();
4272        my $data = substr($zip64Extended, 0, 8, "") ;
4273        $entry->localHeaderOffset(unpack "Q<", $data);
4274        out2 $data, "Offset to Local Dir", Value_Q($entry->localHeaderOffset)
4275            if $display;
4276
4277        my $commonMessage = "'Offset to Local Dir' field in 'Zip64 Extra Field' is invalid";
4278        $entry->localHeaderOffset(checkOffsetValue($entry->localHeaderOffset, $fieldStart, 0, $commonMessage, $fieldStart, ZIP_LOCAL_HDR_SIG, 0) );
4279    }
4280
4281    if ($assumeAllFieldsPresent || full16 $entry->std_diskNumber)
4282    {
4283        if (length $zip64Extended < 4)
4284        {
4285            my $message = extraFieldIdentifier($extraID) .  ": Expected " . decimalHex0x(4) . " bytes for 'Disk Number': only " . decimalHex0x(length $zip64Extended)  . " bytes present";
4286            error $fieldOffset, $message;
4287            out2 $zip64Extended, $message;
4288            return;
4289        }
4290
4291        $fieldOffset += 4;
4292
4293        my $here = $FH->tell();
4294        my $data = substr($zip64Extended, 0, 4, "") ;
4295        $entry->diskNumber(unpack "v", $data);
4296        out2 $data, "Disk Number", Value_V($entry->diskNumber)
4297            if $display;
4298        $entry->zip64_diskNumberPresent(1);
4299    }
4300
4301    if (length $zip64Extended)
4302    {
4303        if ($display)
4304        {
4305            out2 $zip64Extended, "Unexpected Data", hexDump16 $zip64Extended ;
4306            info $fieldOffset, extraFieldIdentifier($extraID) .  ": Unexpected Data: " . decimalHex0x(length $zip64Extended) . " bytes";
4307        }
4308    }
4309}
4310
4311sub Ntfs2Unix
4312{
4313    my $m = shift;
4314    my $v = shift;
4315
4316    # NTFS offset is 19DB1DED53E8000
4317
4318    my $hex = Value_Q($v) ;
4319
4320    # Treat empty value as special case
4321    # Could decode to 1 Jan 1601
4322    return "$hex 'No Date/Time'"
4323        if $v == 0;
4324
4325    $v -= 0x19DB1DED53E8000 ;
4326    my $ns = ($v % 10000000) * 100;
4327    my $elapse = int ($v/10000000);
4328    return "$hex '" . getT($elapse) .
4329           " " . sprintf("%0dns'", $ns);
4330}
4331
4332sub decode_NTFS_Filetimes
4333{
4334    my $extraID = shift ;
4335    my $len = shift;
4336    my $entry = shift;
4337
4338    out_V "  Reserved";
4339    out_v "  Tag1";
4340    out_v "  Size1" ;
4341
4342    my ($m, $s1) = read_Q;
4343    out $m, "  Mtime", Ntfs2Unix($m, $s1);
4344
4345    my ($a, $s3) = read_Q;
4346    out $a, "  Atime", Ntfs2Unix($a, $s3);
4347
4348    my ($c, $s2) = read_Q;
4349    out $c, "  Ctime", Ntfs2Unix($c, $s2);
4350}
4351
4352sub OpenVMS_DateTime
4353{
4354    my $ix = shift;
4355    my $tag = shift;
4356    my $size = shift;
4357
4358    # VMS epoch is 17 Nov 1858
4359    # Offset to Unix Epoch is -0x7C95674C3DA5C0 (-35067168005400000)
4360
4361    my ($data, $value) = read_Q();
4362
4363    my $datetime = "No Date Time'";
4364    if ($value != 0)
4365    {
4366        my $v =  $value - 0x007C95674C3DA5C0 ;
4367        my $ns = ($v % 10000000) * 100 ;
4368        my $seconds = int($v / 10000000) ;
4369        $datetime = getT($seconds) .
4370           " " . sprintf("%0dns'", $ns);
4371    }
4372
4373    out2 $data, "  Attribute", Value_Q($value) . " '$datetime";
4374}
4375
4376sub OpenVMS_DumpBytes
4377{
4378    my $ix = shift;
4379    my $tag = shift;
4380    my $size = shift;
4381
4382    myRead(my $data, $size);
4383
4384    out($data, "    Attribute", hexDump16($data));
4385
4386}
4387
4388sub OpenVMS_4ByteValue
4389{
4390    my $ix = shift;
4391    my $tag = shift;
4392    my $size = shift;
4393
4394    my ($data, $value) = read_V();
4395
4396    out2 $data, "  Attribute", Value_V($value);
4397}
4398
4399sub OpenVMS_UCHAR
4400{
4401    my $ix = shift;
4402    my $tag = shift;
4403    my $size = shift;
4404
4405    state $FCH = {
4406        0     => 'FCH$M_WASCONTIG',
4407        1     => 'FCH$M_NOBACKUP',
4408        2     => 'FCH$M_WRITEBACK',
4409        3     => 'FCH$M_READCHECK',
4410        4     => 'FCH$M_WRITCHECK',
4411        5     => 'FCH$M_CONTIGB',
4412        6     => 'FCH$M_LOCKED',
4413        6     => 'FCH$M_CONTIG',
4414        11    => 'FCH$M_BADACL',
4415        12    => 'FCH$M_SPOOL',
4416        13    => 'FCH$M_DIRECTORY',
4417        14    => 'FCH$M_BADBLOCK',
4418        15    => 'FCH$M_MARKDEL',
4419        16    => 'FCH$M_NOCHARGE',
4420        17    => 'FCH$M_ERASE',
4421        18    => 'FCH$M_SHELVED',
4422        20    => 'FCH$M_SCRATCH',
4423        21    => 'FCH$M_NOMOVE',
4424        22    => 'FCH$M_NOSHELVABLE',
4425    } ;
4426
4427    my ($data, $value) = read_V();
4428
4429    out2 $data, "  Attribute", Value_V($value);
4430
4431    for my $bit ( sort { $a <=> $b } keys %{ $FCH } )
4432    {
4433        # print "$bit\n";
4434        if ($value & (1 << $bit) )
4435        {
4436            out1 "      [Bit $bit]", $FCH->{$bit} ;
4437        }
4438    }
4439}
4440
4441sub OpenVMS_2ByteValue
4442{
4443    my $ix = shift;
4444    my $tag = shift;
4445    my $size = shift;
4446
4447    my ($data, $value) = read_v();
4448
4449    out2 $data, "  Attribute", Value_v($value);
4450}
4451
4452sub OpenVMS_revision
4453{
4454    my $ix = shift;
4455    my $tag = shift;
4456    my $size = shift;
4457
4458    my ($data, $value) = read_v();
4459
4460    out2 $data, "  Attribute", Value_v($value) . "'Revision Count " . Value_v($value) . "'";
4461}
4462
4463sub decode_OpenVMS
4464{
4465    my $extraID = shift ;
4466    my $len = shift;
4467    my $entry = shift;
4468
4469    state $openVMS_tags = {
4470        0x04    => [ 'ATR$C_RECATTR',   \&OpenVMS_DumpBytes  ],
4471        0x03    => [ 'ATR$C_UCHAR',     \&OpenVMS_UCHAR      ],
4472        0x11    => [ 'ATR$C_CREDATE',   \&OpenVMS_DateTime   ],
4473        0x12    => [ 'ATR$C_REVDATE',   \&OpenVMS_DateTime   ],
4474        0x13    => [ 'ATR$C_EXPDATE',   \&OpenVMS_DateTime   ],
4475        0x14    => [ 'ATR$C_BAKDATE',   \&OpenVMS_DateTime   ],
4476        0x0D    => [ 'ATR$C_ASCDATES',  \&OpenVMS_revision   ],
4477        0x15    => [ 'ATR$C_UIC',       \&OpenVMS_4ByteValue ],
4478        0x16    => [ 'ATR$C_FPRO',      \&OpenVMS_DumpBytes  ],
4479        0x17    => [ 'ATR$C_RPRO',      \&OpenVMS_2ByteValue ],
4480        0x1D    => [ 'ATR$C_JOURNAL',   \&OpenVMS_DumpBytes  ],
4481        0x1F    => [ 'ATR$C_ADDACLENT', \&OpenVMS_DumpBytes  ],
4482    } ;
4483
4484    out_V "  CRC";
4485    $len -= 4;
4486
4487    my $ix = 1;
4488    while ($len)
4489    {
4490        my ($data, $tag) = read_v();
4491        my $tagname = 'Unknown Tag';
4492        my $decoder = undef;
4493
4494        if ($openVMS_tags->{$tag})
4495        {
4496            ($tagname, $decoder) = @{ $openVMS_tags->{$tag} } ;
4497        }
4498
4499        out2 $data,  "Tag #$ix", Value_v($tag) . " '" . $tagname . "'" ;
4500        my $size = out_v "    Size";
4501
4502        if (defined $decoder)
4503        {
4504            $decoder->($ix, $tag, $size) ;
4505        }
4506        else
4507        {
4508            outSomeData($size, "    Attribute");
4509        }
4510
4511        ++ $ix;
4512        $len -= $size + 2 + 2;
4513    }
4514
4515}
4516
4517sub getT
4518{
4519    my $time = shift ;
4520
4521    if ($opt_utc)
4522     { return scalar gmtime($time) // 'Unknown'}
4523    else
4524     { return scalar localtime($time) // 'Unknown' }
4525}
4526
4527sub getTime
4528{
4529    my $time = shift ;
4530
4531    return "'Invalid Date or Time'"
4532        if ! defined $time;
4533
4534    return "'" . getT($time) . "'";
4535}
4536
4537sub LastModTime
4538{
4539    my $value = shift ;
4540
4541    return "'No Date/Time'"
4542        if $value == 0;
4543
4544    return getTime(_dosToUnixTime($value))
4545}
4546
4547sub _dosToUnixTime
4548{
4549    my $dt = shift;
4550
4551    # Mozilla xpi files have empty datetime
4552    # This is not a valid Dos datetime value
4553    return 0 if $dt == 0 ;
4554
4555    my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
4556    my $mon  = ( ( $dt >> 21 ) & 0x0f ) - 1;
4557    my $mday = ( ( $dt >> 16 ) & 0x1f );
4558
4559    my $hour = ( ( $dt >> 11 ) & 0x1f );
4560    my $min  = ( ( $dt >> 5  ) & 0x3f );
4561    my $sec  = ( ( $dt << 1  ) & 0x3e );
4562
4563    use Time::Local ;
4564    my $time_t;
4565    eval
4566    {
4567        # Use eval to catch crazy dates
4568        $time_t = Time::Local::timegm( $sec, $min, $hour, $mday, $mon, $year);
4569    }
4570    or do
4571    {
4572        my $dosDecode = $year+1900 . sprintf "-%02u-%02u %02u:%02u:%02u", $mon, $mday, $hour, $min, $sec;
4573        warning $FH->tell(), "'Modification Time' value " . decimalHex0x($dt, 4) .  "  decodes to '$dosDecode': not a valid DOS date/time" ;
4574        return undef
4575    };
4576
4577    return $time_t;
4578
4579}
4580
4581sub decode_UT
4582{
4583    # 0x5455 'UT: Extended Timestamp'
4584
4585    my $extraID = shift ;
4586    my $len = shift;
4587    my $entry = shift;
4588
4589    # Definition in IZ APPNOTE
4590
4591    # NOTE: Although the IZ appnote says that the central directory
4592    #       doesn't store the Acces & Creation times, there are
4593    #       some implementations that do poopulate the CD incorrectly.
4594
4595    # Caller has determined that at least one byte is available
4596
4597    # When $full is true assume all timestamps are present
4598    my $full = ($len == 13) ;
4599
4600    my $remaining = $len;
4601
4602    my ($data, $flags) = read_C();
4603
4604    my $v = Value_C $flags;
4605    my @f ;
4606    push @f, "Modification"    if $flags & 1;
4607    push @f, "Access" if $flags & 2;
4608    push @f, "Creation" if $flags & 4;
4609    $v .= " '" . join(' ', @f) . "'"
4610        if @f;
4611
4612    out $data, "  Flags", $v;
4613
4614    info $FH->tell() - 1, extraFieldIdentifier($extraID) . ": Reserved bits set in 'Flags' field"
4615        if $flags & ~0x7;
4616
4617    -- $remaining;
4618
4619    if ($flags & 1 || $full)
4620    {
4621        if ($remaining == 0 )
4622        {
4623            # Central Dir only has Modification Time
4624            error $FH->tell(), extraFieldIdentifier($extraID) . ": Missing field 'Modification Time'" ;
4625            return;
4626        }
4627        else
4628        {
4629            info $FH->tell(), extraFieldIdentifier($extraID) .  ": Unexpected 'Modification Time' present"
4630                if ! ($flags & 1)  ;
4631
4632            if ($remaining < 4)
4633            {
4634                outSomeData $remaining, "  Extra Data";
4635                error $FH->tell() - $remaining,
4636                    extraFieldIdentifier($extraID) .  ": Truncated reading 'Modification Time'",
4637                    expectedMessage(4, $remaining);
4638                return;
4639            }
4640
4641            my ($data, $time) = read_V();
4642
4643            out2 $data, "Modification Time",    Value_V($time) . " " . getTime($time) ;
4644
4645            $remaining -= 4 ;
4646        }
4647    }
4648
4649    # The remaining sub-fields are only present in the Local Header
4650
4651    if ($flags & 2 || $full)
4652    {
4653        if ($remaining == 0 && $entry->inCentralDir)
4654        {
4655            # Central Dir doesn't have access time
4656        }
4657        else
4658        {
4659            info $FH->tell(), extraFieldIdentifier($extraID) . ": Unexpected 'Access Time' present"
4660                if ! ($flags & 2) || $entry->inCentralDir ;
4661
4662            if ($remaining < 4)
4663            {
4664                outSomeData $remaining, "  Extra Data";
4665                error $FH->tell() - $remaining,
4666                    extraFieldIdentifier($extraID) . ": Truncated reading 'Access Time'" ,
4667                    expectedMessage(4, $remaining);
4668
4669                return;
4670            }
4671
4672            my ($data, $time) = read_V();
4673
4674            out2 $data, "Access Time",    Value_V($time) . " " . getTime($time) ;
4675            $remaining -= 4 ;
4676        }
4677    }
4678
4679    if ($flags & 4  || $full)
4680    {
4681        if ($remaining == 0 && $entry->inCentralDir)
4682        {
4683            # Central Dir doesn't have creation time
4684        }
4685        else
4686        {
4687            info $FH->tell(), extraFieldIdentifier($extraID) . ": Unexpected 'Creation Time' present"
4688                if ! ($flags & 4) || $entry->inCentralDir ;
4689
4690            if ($remaining < 4)
4691            {
4692                outSomeData $remaining, "  Extra Data";
4693
4694                error  $FH->tell() - $remaining,
4695                    extraFieldIdentifier($extraID) . ": Truncated reading 'Creation Time'" ,
4696                    expectedMessage(4, $remaining);
4697
4698                return;
4699            }
4700
4701            my ($data, $time) = read_V();
4702
4703            out2 $data, "Creation Time",    Value_V($time) . " " . getTime($time) ;
4704        }
4705    }
4706}
4707
4708
4709sub decode_Minizip_Signature
4710{
4711    # 0x10c5 Minizip CMS Signature
4712
4713    my $extraID = shift ;
4714    my $len = shift;
4715    my $entry = shift;
4716
4717    # Definition in https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md#cms-signature-0x10c5
4718
4719    $CentralDirectory->setMiniZipEncrypted();
4720
4721    if ($len == 0)
4722    {
4723        info $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Zero length Signature";
4724        return;
4725    }
4726
4727    outHexdump($len, "  Signature");
4728
4729}
4730
4731sub decode_Minizip_Hash
4732{
4733    # 0x1a51 Minizip Hash
4734    # Definition in https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md#hash-0x1a51
4735
4736    # caller ckecks there are at least 4 bytes available
4737    my $extraID = shift ;
4738    my $len = shift;
4739    my $entry = shift;
4740
4741    state $Algorithm = {
4742            10 => 'MD5',
4743            20 => 'SHA1',
4744            23 => 'SHA256',
4745    };
4746
4747    my $remaining = $len;
4748
4749    $CentralDirectory->setMiniZipEncrypted();
4750
4751    my ($data, $alg) = read_v();
4752    my $algorithm = $Algorithm->{$alg} // "Unknown";
4753
4754    out $data, "  Algorithm", Value_v($alg) . " '$algorithm'";
4755    if (! exists $Algorithm->{$alg})
4756    {
4757        info $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown algorithm ID " .Value_v($alg);
4758    }
4759
4760    my ($d, $digestSize) = read_v();
4761    out $d, "  Digest Size", Value_v($digestSize);
4762
4763    $remaining -= 4;
4764
4765    if ($digestSize == 0)
4766    {
4767        info $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Zero length Digest";
4768    }
4769    elsif ($digestSize > $remaining)
4770    {
4771        error $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Digest Size " . decimalHex0x($digestSize) . " >  " . decimalHex0x($remaining) . " bytes remaining in extra field" ;
4772        $digestSize = $remaining ;
4773    }
4774
4775    outHexdump($digestSize, "  Digest");
4776
4777    $remaining -= $digestSize;
4778
4779    if ($remaining)
4780    {
4781        outHexdump($remaining, "  Unexpected Data");
4782        error $FH->tell() - $remaining, extraFieldIdentifier($extraID) . ": " . decimalHex0x($remaining) . " unexpected trailing bytes" ;
4783    }
4784}
4785
4786sub decode_Minizip_CD
4787{
4788    # 0xcdcd Minizip Central Directory
4789    # Definition in https://github.com/zlib-ng/minizip-ng/blob/master/doc/mz_extrafield.md#central-directory-0xcdcd
4790
4791    my $extraID = shift ;
4792    my $len = shift;
4793    my $entry = shift;
4794
4795    $entry->minizip_secure(1);
4796    $CentralDirectory->setMiniZipEncrypted();
4797
4798    my $size = out_Q "  Entries";
4799
4800 }
4801
4802sub decode_AES
4803{
4804    # ref https://www.winzip.com/en/support/aes-encryption/
4805    # Document version: 1.04
4806    # Last modified: January 30, 2009
4807
4808    my $extraID = shift ;
4809    my $len = shift;
4810    my $entry = shift;
4811
4812    return if $len == 0 ;
4813
4814    my $validAES = 1;
4815
4816    state $lookup = { 1 => "AE-1", 2 => "AE-2" };
4817    my $vendorVersion = out_v "  Vendor Version", sub {  $lookup->{$_[0]} || "Unknown"  } ;
4818    if (! $lookup->{$vendorVersion})
4819    {
4820        $validAES = 0;
4821        warning $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown 'Vendor Version' $vendorVersion. Valid values are 1,2"
4822    }
4823
4824    my $id ;
4825    myRead($id, 2);
4826    my $idValue = out $id, "  Vendor ID", unpackValue_v($id) . " '$id'";
4827
4828    if ($id ne 'AE')
4829    {
4830        $validAES = 0;
4831        warning $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown 'Vendor ID' '$idValue'. Valid value is 'AE'"
4832    }
4833
4834    state $strengths = {1 => "128-bit encryption key",
4835                        2 => "192-bit encryption key",
4836                        3 => "256-bit encryption key",
4837                       };
4838
4839    my $strength = out_C "  Encryption Strength", sub {$strengths->{$_[0]} || "Unknown" } ;
4840
4841    if (! $strengths->{$strength})
4842    {
4843        $validAES = 0;
4844        warning $FH->tell() - 1, extraFieldIdentifier($extraID) . ": Unknown 'Encryption Strength' $strength. Valid values are 1,2,3"
4845    }
4846
4847    my ($bmethod, $method) = read_v();
4848    out $bmethod, "  Compression Method", compressionMethod($method) ;
4849    if (! defined $ZIP_CompressionMethods{$method})
4850    {
4851        $validAES = 0;
4852        warning $FH->tell() - 2, extraFieldIdentifier($extraID) . ": Unknown 'Compression Method' ID " . decimalHex0x($method, 2)
4853    }
4854
4855    $entry->aesStrength($strength) ;
4856    $entry->aesValid($validAES) ;
4857}
4858
4859sub decode_Reference
4860{
4861    # ref https://www.winzip.com/en/support/compression-methods/
4862
4863    my $len = shift;
4864    my $entry = shift;
4865
4866    out_V "  CRC";
4867    myRead(my $uuid, 16);
4868    # UUID is big endian
4869    out2 $uuid, "UUID",
4870        unpack('H*', substr($uuid, 0, 4)) . '-' .
4871        unpack('H*', substr($uuid, 4, 2)) . '-' .
4872        unpack('H*', substr($uuid, 6, 2)) . '-' .
4873        unpack('H*', substr($uuid, 8, 2)) . '-' .
4874        unpack('H*', substr($uuid, 10, 6)) ;
4875}
4876
4877sub decode_DUMMY
4878{
4879    my $extraID = shift ;
4880    my $len = shift;
4881    my $entry = shift;
4882
4883    out_V "  Data";
4884}
4885
4886sub decode_GrowthHint
4887{
4888    # APPNOTE 6.3.10, sec 4.6.10
4889
4890    my $extraID = shift ;
4891    my $len = shift;
4892    my $entry = shift;
4893
4894    # caller has checked that 4 bytes are available,
4895    # so can output values without checking available space
4896    out_v "  Signature" ;
4897    out_v "  Initial Value";
4898
4899    my $padding;
4900    myRead($padding, $len - 4);
4901
4902    out2 $padding, "Padding", hexDump16($padding);
4903
4904    if ($padding !~ /^\x00+$/)
4905    {
4906        info $FH->tell(), extraFieldIdentifier($extraID) . ": 'Padding' is not all NULL bytes";
4907    }
4908}
4909
4910sub decode_DataStreamAlignment
4911{
4912    # APPNOTE 6.3.10, sec 4.6.11
4913
4914    my $extraID = shift ;
4915    my $len = shift;
4916    my $entry = shift;
4917
4918    my $inCentralHdr = $entry->inCentralDir ;
4919
4920    return if $len == 0 ;
4921
4922    my ($data, $alignment) = read_v();
4923
4924    out $data, "  Alignment", Value_v($alignment) ;
4925
4926    my $recompress_value = $alignment & 0x8000 ? 1 : 0;
4927
4928    my $recompressing = $recompress_value ? "True" : "False";
4929    $alignment &= 0x7FFF ;
4930    my $hexAl =  sprintf("%X", $alignment);
4931
4932    out1 "  [Bit   15]",  "$recompress_value    'Recompress $recompressing'";
4933    out1 "  [Bits 0-14]", "$hexAl 'Minimal Alignment $alignment'";
4934
4935    if (! $inCentralHdr && $len - 2 > 0)
4936    {
4937        my $padding;
4938        myRead($padding, $len - 2);
4939
4940        out2 $padding, "Padding", hexDump16($padding);
4941    }
4942}
4943
4944
4945sub decode_UX
4946{
4947    my $extraID = shift ;
4948    my $len = shift;
4949    my $entry = shift;
4950
4951    my $inCentralHdr = $entry->inCentralDir ;
4952
4953    return if $len == 0 ;
4954
4955    my ($data, $time) = read_V();
4956    out2 $data, "Access Time", Value_V($time) . " " . getTime($time) ;
4957
4958    ($data, $time) = read_V();
4959    out2 $data, "Modification Time", Value_V($time) . " " . getTime($time) ;
4960
4961    if (! $inCentralHdr ) {
4962        out_v "  UID" ;
4963        out_v "  GID";
4964    }
4965}
4966
4967sub decode_Ux
4968{
4969    my $extraID = shift ;
4970    my $len = shift;
4971    my $entry = shift;
4972
4973    return if $len == 0 ;
4974    out_v "  UID" ;
4975    out_v "  GID";
4976}
4977
4978sub decodeLitteEndian
4979{
4980    my $value = shift ;
4981
4982    if (length $value == 8)
4983    {
4984        return unpackValueQ ($value)
4985    }
4986    elsif (length $value == 4)
4987    {
4988        return unpackValue_V ($value)
4989    }
4990    elsif (length $value == 2)
4991    {
4992        return unpackValue_v ($value)
4993    }
4994    elsif (length $value == 1)
4995    {
4996        return unpackValue_C ($value)
4997    }
4998    else {
4999        # TODO - fix this
5000        internalFatal undef, "unsupported decodeLitteEndian length '" . length ($value) . "'";
5001    }
5002}
5003
5004sub decode_ux
5005{
5006    my $extraID = shift ;
5007    my $len = shift;
5008    my $entry = shift;
5009
5010    # caller has checked that 3 bytes are available
5011
5012    return if $len == 0 ;
5013
5014    my $version = out_C "  Version" ;
5015    info  $FH->tell() - 1, extraFieldIdentifier($extraID) . ": 'Version' should be " . decimalHex0x(1) . ", got " . decimalHex0x($version, 1)
5016        if $version != 1 ;
5017
5018    my $available = $len - 1 ;
5019
5020    my $uidSize = out_C "  UID Size";
5021    $available -= 1;
5022
5023    if ($uidSize)
5024    {
5025        if ($available < $uidSize)
5026        {
5027            outSomeData($available, "  Bad Extra Data");
5028            error $FH->tell() - $available,
5029                extraFieldIdentifier($extraID) . ": truncated reading 'UID'",
5030                expectedMessage($uidSize, $available);
5031            return;
5032        }
5033
5034        myRead(my $data, $uidSize);
5035        out2 $data, "UID", decodeLitteEndian($data);
5036        $available -= $uidSize ;
5037    }
5038
5039    if ($available < 1)
5040    {
5041        error $FH->tell(),
5042                    extraFieldIdentifier($extraID) . ": truncated reading 'GID Size'",
5043                    expectedMessage($uidSize, $available);
5044        return ;
5045    }
5046
5047    my $gidSize = out_C "  GID Size";
5048    $available -= 1 ;
5049    if ($gidSize)
5050    {
5051        if ($available < $gidSize)
5052        {
5053            outSomeData($available, "  Bad Extra Data");
5054            error $FH->tell() - $available,
5055                        extraFieldIdentifier($extraID) . ": truncated reading 'GID'",
5056                        expectedMessage($gidSize, $available);
5057            return;
5058        }
5059
5060        myRead(my $data, $gidSize);
5061        out2 $data, "GID", decodeLitteEndian($data);
5062        $available -= $gidSize ;
5063    }
5064
5065}
5066
5067sub decode_Java_exe
5068{
5069    my $extraID = shift ;
5070    my $len = shift;
5071    my $entry = shift;
5072
5073}
5074
5075sub decode_up
5076{
5077    # APPNOTE 6.3.10, sec 4.6.9
5078
5079    my $extraID = shift ;
5080    my $len = shift;
5081    my $entry = shift;
5082
5083    out_C "  Version";
5084    out_V "  NameCRC32";
5085
5086    if ($len - 5 > 0)
5087    {
5088        myRead(my $data, $len - 5);
5089
5090        outputFilename($data, 1,  "  UnicodeName");
5091    }
5092}
5093
5094sub decode_ASi_Unix
5095{
5096    my $extraID = shift ;
5097    my $len = shift;
5098    my $entry = shift;
5099
5100    # https://stackoverflow.com/questions/76581811/why-does-unzip-ignore-my-zip64-end-of-central-directory-record
5101
5102    out_V "  CRC";
5103    my $native_attrib = out_v "  Mode";
5104
5105    # TODO - move to separate sub & tidy
5106    if (1) # Unix
5107    {
5108
5109        state $mask = {
5110                0   => '---',
5111                1   => '--x',
5112                2   => '-w-',
5113                3   => '-wx',
5114                4   => 'r--',
5115                5   => 'r-x',
5116                6   => 'rw-',
5117                7   => 'rwx',
5118            } ;
5119
5120        my $rwx = ($native_attrib  &  0777);
5121
5122        if ($rwx)
5123        {
5124            my $output  = '';
5125            $output .= $mask->{ ($rwx >> 6) & 07 } ;
5126            $output .= $mask->{ ($rwx >> 3) & 07 } ;
5127            $output .= $mask->{ ($rwx >> 0) & 07 } ;
5128
5129            out1 "  [Bits 0-8]",  Value_v($rwx)  . " 'Unix attrib: $output'" ;
5130            out1 "  [Bit 9]",  "1 'Sticky'"
5131                if $rwx & 0x200 ;
5132            out1 "  [Bit 10]",  "1 'Set GID'"
5133                if $rwx & 0x400 ;
5134            out1 "  [Bit 11]",  "1 'Set UID'"
5135                if $rwx & 0x800 ;
5136
5137            my $not_rwx = (($native_attrib  >> 12) & 0xF);
5138            if ($not_rwx)
5139            {
5140                state $masks = {
5141                    0x0C =>  'Socket',           # 0x0C  0b1100
5142                    0x0A =>  'Symbolic Link',    # 0x0A  0b1010
5143                    0x08 =>  'Regular File',     # 0x08  0b1000
5144                    0x06 =>  'Block Device',     # 0x06  0b0110
5145                    0x04 =>  'Directory',        # 0x04  0b0100
5146                    0x02 =>  'Character Device', # 0x02  0b0010
5147                    0x01 =>  'FIFO',             # 0x01  0b0001
5148                };
5149
5150                my $got = $masks->{$not_rwx} // 'Unknown Unix attrib' ;
5151                out1 "  [Bits 12-15]",  Value_C($not_rwx) . " '$got'"
5152            }
5153        }
5154    }
5155
5156
5157    my $s = out_V "  SizDev";
5158    out_v "  UID";
5159    out_v "  GID";
5160
5161}
5162
5163sub decode_uc
5164{
5165    # APPNOTE 6.3.10, sec 4.6.8
5166
5167    my $extraID = shift ;
5168    my $len = shift;
5169    my $entry = shift;
5170
5171    out_C "  Version";
5172    out_V "  ComCRC32";
5173
5174    if ($len - 5 > 0)
5175    {
5176        myRead(my $data, $len - 5);
5177
5178        outputFilename($data, 1, "  UnicodeCom");
5179    }
5180}
5181
5182sub decode_Xceed_unicode
5183{
5184    # 0x554e
5185
5186    my $extraID = shift ;
5187    my $len = shift;
5188    my $entry = shift;
5189
5190    my $data ;
5191    my $remaining = $len;
5192
5193    # No public definition available, so reverse engineer the content.
5194
5195    # See https://github.com/pmqs/zipdetails/issues/13 for C# source that populates
5196    # this field.
5197
5198    # Fiddler https://www.telerik.com/fiddler) creates this field.
5199
5200    # Local Header only has UTF16LE filename
5201    #
5202    # Field definition
5203    #    4 bytes Signature                      always XCUN
5204    #    2 bytes Filename Length (divided by 2)
5205    #      Filename
5206
5207    # Central has UTF16LE filename & comment
5208    #
5209    # Field definition
5210    #    4 bytes Signature                      always XCUN
5211    #    2 bytes Filename Length (divided by 2)
5212    #    2 bytes Comment Length (divided by 2)
5213    #      Filename
5214    #      Comment
5215
5216    # First 4 bytes appear to be little-endian "XCUN" all the time
5217    # Just double check
5218    my ($idb, $id) = read_V();
5219    $remaining -= 4;
5220
5221    my $outid = decimalHex0x($id);
5222    $outid .= " 'XCUN'"
5223        if $idb eq 'NUCX';
5224
5225    out $idb, "  ID", $outid;
5226
5227    # Next 2 bytes contains a count of the filename length divided by 2
5228    # Dividing by 2 gives the number of UTF-16 characters.
5229    my $filenameLength = out_v "  Filename Length";
5230    $filenameLength *= 2; # Double to get number of bytes to read
5231    $remaining -= 2;
5232
5233    my $commentLength = 0;
5234
5235    if ($entry->inCentralDir)
5236    {
5237        # Comment length only in Central Directory
5238        # Again stored divided by 2.
5239        $commentLength = out_v "  Comment Length";
5240        $commentLength *= 2; # Double to get number of bytes to read
5241        $remaining -= 2;
5242    }
5243
5244    # next is a UTF16 encoded filename
5245
5246    if ($filenameLength)
5247    {
5248        if ($filenameLength > $remaining )
5249        {
5250            myRead($data, $remaining);
5251            out redactData($data), "  UTF16LE Filename", "'" . redactFilename(decode("UTF16LE", $data)) . "'";
5252
5253            error $FH->tell() - $remaining,
5254                extraFieldIdentifier($extraID) .  ": Truncated reading 'UTF16LE Filename'",
5255                expectedMessage($filenameLength, $remaining);
5256            return undef;
5257        }
5258
5259        myRead($data, $filenameLength);
5260        out redactData($data), "  UTF16LE Filename", "'" . redactFilename(decode("UTF16LE", $data)) . "'";
5261        $remaining -= $filenameLength;
5262    }
5263
5264    # next is a UTF16 encoded comment
5265
5266    if ($commentLength)
5267    {
5268        if ($commentLength > $remaining )
5269        {
5270            myRead($data, $remaining);
5271            out redactData($data), "  UTF16LE Comment", "'" . redactFilename(decode("UTF16LE", $data)) . "'";
5272
5273            error $FH->tell() - $remaining,
5274                extraFieldIdentifier($extraID) .  ": Truncated reading 'UTF16LE Comment'",
5275                expectedMessage($filenameLength, $remaining);
5276            return undef;
5277        }
5278
5279        myRead($data, $commentLength);
5280        out redactData($data), "  UTF16LE Comment", "'" . redactFilename(decode("UTF16LE", $data)) . "'";
5281        $remaining -= $commentLength;
5282    }
5283
5284    if ($remaining)
5285    {
5286        outHexdump($remaining, "  Unexpected Data");
5287        error $FH->tell() - $remaining, extraFieldIdentifier($extraID) . ": " . decimalHex0x($remaining) . " unexpected trailing bytes" ;
5288    }
5289}
5290
5291sub decode_Key_Value_Pair
5292{
5293    # 0x564B 'KV'
5294    # https://github.com/sozip/keyvaluepairs-spec/blob/master/zip_keyvalue_extra_field_specification.md
5295
5296    my $extraID = shift ;
5297    my $len = shift;
5298    my $entry = shift;
5299
5300    my $remaining = $len;
5301
5302    myRead(my $signature, 13);
5303    $remaining -= 13;
5304
5305    if ($signature ne 'KeyValuePairs')
5306    {
5307        error $FH->tell() - 13, extraFieldIdentifier($extraID) . ": 'Signature' field not 'KeyValuePairs'" ;
5308        myRead(my $payload, $remaining);
5309        my $data = hexDump16($signature . $payload);
5310
5311        out2 $signature . $payload, "Extra Payload", $data;
5312
5313        return ;
5314    }
5315
5316    out $signature, '  Signature', "'KeyValuePairs'";
5317    my $kvPairs = out_C "  KV Count";
5318    $remaining -= 1;
5319
5320    for my $index (1 .. $kvPairs)
5321    {
5322        my $key;
5323        my $klen = out_v "  Key size #$index";
5324        $remaining -= 4;
5325
5326        myRead($key, $klen);
5327        outputFilename $key, 1, "  Key #$index";
5328        $remaining -= $klen;
5329
5330        my $value;
5331        my $vlen = out_v "  Value size #$index";
5332        $remaining -= 4;
5333
5334        myRead($value, $vlen);
5335        outputFilename $value, 1, "  Value #$index";
5336        $remaining -= $vlen;
5337    }
5338
5339    # TODO check that
5340    # * count of kv pairs is accurate
5341    # * no truncation in middle of kv data
5342    # * no trailing data
5343}
5344
5345sub decode_NT_security
5346{
5347    # IZ Appnote
5348    my $extraID = shift ;
5349    my $len = shift;
5350    my $entry = shift;
5351
5352    my $inCentralHdr = $entry->inCentralDir ;
5353
5354    out_V "  Uncompressed Size" ;
5355
5356    if (! $inCentralHdr) {
5357
5358        out_C "  Version" ;
5359
5360        out_v "  CType", sub { "'" . ($ZIP_CompressionMethods{$_[0]} || "Unknown Method") . "'" };
5361
5362        out_V "  CRC" ;
5363
5364        my $plen = $len - 4 - 1 - 2 - 4;
5365        outHexdump $plen, "  Extra Payload";
5366    }
5367}
5368
5369sub decode_MVS
5370{
5371    # APPNOTE 6.3.10, Appendix
5372    my $extraID = shift ;
5373    my $len = shift;
5374    my $entry = shift;
5375
5376    # data in Big-Endian
5377    myRead(my $data, $len);
5378    my $ID = unpack("N", $data);
5379
5380    if ($ID == 0xE9F3F9F0) # EBCDIC for "Z390"
5381    {
5382        my $d = substr($data, 0, 4, '') ;
5383        out($d, "  ID", "'Z390'");
5384    }
5385
5386    out($data, "  Extra Payload", hexDump16($data));
5387}
5388
5389sub decode_strong_encryption
5390{
5391    # APPNOTE 6.3.10, sec 4.5.12 & 7.4.2
5392
5393    my $extraID = shift ;
5394    my $len = shift;
5395    my $entry = shift;
5396
5397    # TODO check for overflow is contents > $len
5398    out_v "  Format";
5399    out_v "  AlgId", sub { $AlgIdLookup{ $_[0] } // "Unknown algorithm" } ;
5400    out_v "  BitLen";
5401    out_v "  Flags", sub { $FlagsLookup{ $_[0] } // "reserved for certificate processing" } ;
5402
5403    # see APPNOTE 6.3.10, sec 7.4.2 for this part
5404    my $recipients = out_V "  Recipients";
5405
5406    my $available = $len - 12;
5407
5408    if ($recipients)
5409    {
5410        if ($available < 2)
5411        {
5412            outSomeData($available, "  Badly formed extra data");
5413            # TODO - need warning
5414            return;
5415        }
5416
5417        out_v "  HashAlg", sub { $HashAlgLookup{ $_[0] } // "Unknown algorithm" } ;
5418        $available -= 2;
5419
5420        if ($available < 2)
5421        {
5422            outSomeData($available, "  Badly formed extra data");
5423            # TODO - need warning
5424            return;
5425        }
5426
5427        my $HSize = out_v "  HSize" ;
5428        $available -= 2;
5429
5430        # should have $recipients * $HSize bytes available
5431        if ($recipients * $HSize != $available)
5432        {
5433            outSomeData($available, "  Badly formed extra data");
5434            # TODO - need warning
5435            return;
5436        }
5437
5438        my $ix = 1;
5439        for (0 .. $recipients-1)
5440        {
5441            myRead(my $payload, $HSize);
5442            my $data = hexDump16($payload);
5443
5444            out2 $payload, sprintf("Key #%X", $ix), $data;
5445            ++ $ix;
5446        }
5447    }
5448}
5449
5450
5451sub printAes
5452{
5453    # ref https://www.winzip.com/en/support/aes-encryption/
5454
5455    my $entry = shift;
5456
5457    return 0
5458        if ! $entry->aesValid;
5459
5460    my %saltSize = (
5461                        1 => 8,
5462                        2 => 12,
5463                        3 => 16,
5464                    );
5465
5466    myRead(my $salt, $saltSize{$entry->aesStrength } // 0);
5467    out $salt, "AES Salt", hexDump16($salt);
5468    myRead(my $pwv, 2);
5469    out $pwv, "AES Pwd Ver", hexDump16($pwv);
5470
5471    return  $saltSize{$entry->aesStrength} + 2 + 10;
5472}
5473
5474sub printLzmaProperties
5475{
5476    my $len = 0;
5477
5478    my $b1;
5479    my $b2;
5480    my $buffer;
5481
5482    myRead($b1, 2);
5483    my ($verHi, $verLow) = unpack ("CC", $b1);
5484
5485    out $b1, "LZMA Version", sprintf("%02X%02X", $verHi, $verLow) . " '$verHi.$verLow'";
5486    my $LzmaPropertiesSize = out_v "LZMA Properties Size";
5487    $len += 4;
5488
5489    my $LzmaInfo = out_C "LZMA Info",  sub { $_[0] == 93 ? "(Default)" : ""};
5490
5491    my $PosStateBits = 0;
5492    my $LiteralPosStateBits = 0;
5493    my $LiteralContextBits = 0;
5494    $PosStateBits = int($LzmaInfo / (9 * 5));
5495	$LzmaInfo -= $PosStateBits * 9 * 5;
5496	$LiteralPosStateBits = int($LzmaInfo / 9);
5497	$LiteralContextBits = $LzmaInfo - $LiteralPosStateBits * 9;
5498
5499    out1 "  PosStateBits",        $PosStateBits;
5500    out1 "  LiteralPosStateBits", $LiteralPosStateBits;
5501    out1 "  LiteralContextBits",  $LiteralContextBits;
5502
5503    out_V "LZMA Dictionary Size";
5504
5505    # TODO - assumption that this is 5
5506    $len += $LzmaPropertiesSize;
5507
5508    skip($FH, $LzmaPropertiesSize - 5)
5509        if  $LzmaPropertiesSize != 5 ;
5510
5511    return $len;
5512}
5513
5514sub peekAtOffset
5515{
5516    # my $fh = shift;
5517    my $offset = shift;
5518    my $len = shift;
5519
5520    my $here = $FH->tell();
5521
5522    seekTo($offset) ;
5523
5524    my $buffer;
5525    myRead($buffer, $len);
5526    seekTo($here);
5527
5528    length $buffer == $len
5529        or return '';
5530
5531    return $buffer;
5532}
5533
5534sub readFromOffset
5535{
5536    # my $fh = shift;
5537    my $offset = shift;
5538    my $len = shift;
5539
5540    seekTo($offset) ;
5541
5542    my $buffer;
5543    myRead($buffer, $len);
5544
5545    length $buffer == $len
5546        or return '';
5547
5548    return $buffer;
5549}
5550
5551sub readSignatureFromOffset
5552{
5553    my $offset = shift ;
5554
5555    # catch use case where attempting to read past EOF
5556    # sub is expecting to return a 32-bit value so return 54-bit out-of-bound value
5557    return MAX64
5558        if $offset + 4 > $FILELEN ;
5559
5560    my $here = $FH->tell();
5561    my $buffer = readFromOffset($offset, 4);
5562    my $gotSig = unpack("V", $buffer) ;
5563    seekTo($here);
5564
5565    return $gotSig;
5566}
5567
5568
5569sub chckForAPKSigningBlock
5570{
5571    my $fh = shift;
5572    my $cdOffset = shift;
5573    my $cdSize = shift;
5574
5575    # APK Signing Block comes directy before the Central directory
5576    # See https://source.android.com/security/apksigning/v2
5577
5578    # If offset available is less than 44, it isn't an APK signing block
5579    #
5580    #   len1     8
5581    #   id       4
5582    #   kv with zero len 8
5583    #   len1     8
5584    #   magic   16
5585    #   ----------
5586    #           44
5587
5588    return (0, 0, '')
5589        if $cdOffset < 44 || $FILELEN - $cdSize < 44 ;
5590
5591    # Step 1 - 16 bytes before CD is literal string "APK Sig Block 42"
5592    my $magicOffset = $cdOffset - 16;
5593    my $buffer = readFromOffset($magicOffset, 16);
5594
5595    return (0, 0, '')
5596        if $buffer ne "APK Sig Block 42" ;
5597
5598    # Step 2 - read the second length field
5599    #          and check that it looks ok
5600    $buffer = readFromOffset($cdOffset - 16 - 8, 8);
5601    my $len2 = unpack("Q<", $buffer);
5602
5603    return (0, 0, '')
5604        if $len2 == 0 || $len2 > $FILELEN;
5605
5606    # Step 3 - read the first length field.
5607    #          It should be identical to the second one.
5608
5609    my $startApkOffset = $cdOffset -  8 - $len2 ;
5610
5611    $buffer = readFromOffset($startApkOffset, 8);
5612    my $len1 = unpack("Q<", $buffer);
5613
5614    return (0, 0, '')
5615        if $len1 != $len2;
5616
5617    return ($startApkOffset, $cdOffset - 16 - 8, $buffer);
5618}
5619
5620sub scanApkBlock
5621{
5622    state $IDs = {
5623            0x7109871a  => "APK Signature v2",
5624            0xf05368c0  => "APK Signature v3",
5625            0x42726577  => "Verity Padding Block", # from https://android.googlesource.com/platform/tools/apksig/+/master/src/main/java/com/android/apksig/internal/apk/ApkSigningBlockUtils.java
5626            0x6dff800d  => "Source Stamp",
5627            0x504b4453  => "Dependency Info",
5628            0x71777777  => "APK Channel Block",
5629            0xff3b5998  => "Zero Block",
5630            0x2146444e  => "Play Metadata",
5631    } ;
5632
5633
5634    seekTo($FH->tell() - 4) ;
5635    print "\n";
5636    out "", "APK SIGNING BLOCK";
5637
5638    scanApkPadding();
5639    out_Q "Block Length Copy #1";
5640    my $ix = 1;
5641
5642    while ($FH->tell() < $APK - 8)
5643    {
5644         my ($bytes, $id, $len);
5645        ($bytes, $len) = read_Q ;
5646        out $bytes, "ID/Value Length #" . sprintf("%X", $ix), Value_Q($len);
5647
5648        ($bytes, $id) = read_V;
5649
5650        out $bytes, "  ID", Value_V($id) . " '" . ($IDs->{$id} // 'Unknown ID') . "'";
5651
5652        outSomeData($len-4, "  Value");
5653        ++ $ix;
5654    }
5655
5656    out_Q "Block Length Copy #2";
5657
5658    my $magic ;
5659    myRead($magic, 16);
5660
5661    out $magic, "Magic", qq['$magic'];
5662}
5663
5664sub scanApkPadding
5665{
5666    my $here = $FH->tell();
5667
5668    return
5669        if $here == $START_APK;
5670
5671    # found some padding
5672
5673    my $delta = $START_APK - $here;
5674    my $padding = peekAtOffset($here, $delta);
5675
5676    if ($padding =~ /^\x00+$/)
5677    {
5678        outSomeData($delta, "Null Padding");
5679    }
5680    else
5681    {
5682        outHexdump($delta, "Unexpected Padding");
5683    }
5684}
5685
5686sub scanCentralDirectory
5687{
5688    my $fh = shift;
5689
5690    my $here = $fh->tell();
5691
5692    # Use cases
5693    # 1 32-bit CD
5694    # 2 64-bit CD
5695
5696    my ($offset, $size) = findCentralDirectoryOffset($fh);
5697    $CentralDirectory->{CentralDirectoryOffset} = $offset;
5698    $CentralDirectory->{CentralDirectorySize} = $size;
5699
5700    return ()
5701        if ! defined $offset;
5702
5703    $fh->seek($offset, SEEK_SET) ;
5704
5705    # Now walk the Central Directory Records
5706    my $buffer ;
5707    my $cdIndex = 0;
5708    my $cdEntryOffset = 0;
5709
5710    while ($fh->read($buffer, ZIP_CD_FILENAME_OFFSET) == ZIP_CD_FILENAME_OFFSET  &&
5711           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
5712
5713        my $startHeader = $fh->tell() - ZIP_CD_FILENAME_OFFSET;
5714
5715        my $cdEntryOffset = $fh->tell() - ZIP_CD_FILENAME_OFFSET;
5716        $HeaderOffsetIndex->addOffsetNoPrefix($cdEntryOffset, ZIP_CENTRAL_HDR_SIG) ;
5717
5718        ++ $cdIndex ;
5719
5720        my $extractVer         = unpack("v", substr($buffer,  6, 1));
5721        my $gpFlag             = unpack("v", substr($buffer,  8, 2));
5722        my $lastMod            = unpack("V", substr($buffer, 10, 4));
5723        my $crc                = unpack("V", substr($buffer, 16, 4));
5724        my $compressedSize   = unpack("V", substr($buffer, 20, 4));
5725        my $uncompressedSize = unpack("V", substr($buffer, 24, 4));
5726        my $filename_length    = unpack("v", substr($buffer, 28, 2));
5727        my $extra_length       = unpack("v", substr($buffer, 30, 2));
5728        my $comment_length     = unpack("v", substr($buffer, 32, 2));
5729        my $diskNumber         = unpack("v", substr($buffer, 34, 2));
5730        my $locHeaderOffset    = unpack("V", substr($buffer, 42, 4));
5731
5732        my $cdZip64 = 0;
5733        my $zip64Sizes = 0;
5734
5735        if (! full32 $locHeaderOffset)
5736        {
5737            # Check for corrupt offset
5738            # 1. ponting paset EOF
5739            # 2. offset points forward in the file
5740            # 3. value at offset is not a CD record signature
5741
5742            my $commonMessage = "'Local Header Offset' field in '" . Signatures::name(ZIP_CENTRAL_HDR_SIG) . "' is invalid";
5743            checkOffsetValue($locHeaderOffset, $startHeader, 0, $commonMessage,
5744                $startHeader + CentralDirectoryEntry::Offset_RelativeOffsetToLocal(),
5745                ZIP_LOCAL_HDR_SIG, 1) ;
5746        }
5747
5748        $fh->read(my $filename, $filename_length) ;
5749
5750        my $cdEntry = CentralDirectoryEntry->new();
5751
5752        $cdEntry->centralHeaderOffset($startHeader) ;
5753        $cdEntry->localHeaderOffset($locHeaderOffset) ;
5754        $cdEntry->compressedSize($compressedSize) ;
5755        $cdEntry->uncompressedSize($uncompressedSize) ;
5756        $cdEntry->extractVersion($extractVer);
5757        $cdEntry->generalPurposeFlags($gpFlag);
5758        $cdEntry->filename($filename) ;
5759        $cdEntry->lastModDateTime($lastMod);
5760        $cdEntry->languageEncodingFlag($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ;
5761        $cdEntry->diskNumber($diskNumber) ;
5762        $cdEntry->crc32($crc) ;
5763        $cdEntry->zip64ExtraPresent($cdZip64) ;
5764
5765        $cdEntry->std_localHeaderOffset($locHeaderOffset) ;
5766        $cdEntry->std_compressedSize($compressedSize) ;
5767        $cdEntry->std_uncompressedSize($uncompressedSize) ;
5768        $cdEntry->std_diskNumber($diskNumber) ;
5769
5770
5771        if ($extra_length)
5772        {
5773            $fh->read(my $extraField, $extra_length) ;
5774
5775            # Check for Zip64
5776            my $zip64Extended = findID(0x0001, $extraField);
5777
5778            if ($zip64Extended)
5779            {
5780                $cdZip64 = 1;
5781                walk_Zip64_in_CD(1, $zip64Extended, $cdEntry, 0);
5782            }
5783        }
5784
5785        $cdEntry->offsetStart($startHeader) ;
5786        $cdEntry->offsetEnd($FH->tell() - 1);
5787
5788        # don't call addEntry until after the extra fields have been scanned
5789        # the localheader offset value may be updated in th ezip64 extra field.
5790        $CentralDirectory->addEntry($cdEntry);
5791        $HeaderOffsetIndex->addOffset($cdEntry->localHeaderOffset, ZIP_LOCAL_HDR_SIG) ;
5792
5793        skip($fh, $comment_length ) ;
5794    }
5795
5796    $FH->seek($fh->tell() - ZIP_CD_FILENAME_OFFSET, SEEK_SET);
5797
5798    # Check for Digital Signature
5799    $HeaderOffsetIndex->addOffset($fh->tell() - 4, ZIP_DIGITAL_SIGNATURE_SIG)
5800        if $fh->read($buffer, 4) == 4  &&
5801            unpack("V", $buffer) == ZIP_DIGITAL_SIGNATURE_SIG ;
5802
5803    $CentralDirectory->sortByLocalOffset();
5804    $HeaderOffsetIndex->sortOffsets();
5805
5806    $fh->seek($here, SEEK_SET) ;
5807
5808}
5809
5810use constant ZIP64_END_CENTRAL_LOC_HDR_SIZE     => 20;
5811use constant ZIP64_END_CENTRAL_REC_HDR_MIN_SIZE => 56;
5812
5813sub offsetFromZip64
5814{
5815    my $fh = shift ;
5816    my $here = shift;
5817    my $eocdSize = shift;
5818
5819    #### Zip64 end of central directory locator
5820
5821    # check enough bytes available for zip64 locator record
5822    fatal_tryWalk undef, "Cannot find signature for " .  Signatures::nameAndHex(ZIP64_END_CENTRAL_LOC_HDR_SIG), # 'Zip64 end of central directory locator': 0x07064b50"
5823                         "Possible truncated or corrupt zip file"
5824        if $here < ZIP64_END_CENTRAL_LOC_HDR_SIZE ;
5825
5826    $fh->seek($here - ZIP64_END_CENTRAL_LOC_HDR_SIZE, SEEK_SET) ;
5827    $here = $FH->tell();
5828
5829    my $buffer;
5830    my $got = 0;
5831    $fh->read($buffer, ZIP64_END_CENTRAL_LOC_HDR_SIZE);
5832
5833    my $gotSig = unpack("V", $buffer);
5834    fatal_tryWalk $here - 4, sprintf("Expected signature for " . Signatures::nameAndHex(ZIP64_END_CENTRAL_LOC_HDR_SIG) . " not found, got 0x%X", $gotSig)
5835        if $gotSig != ZIP64_END_CENTRAL_LOC_HDR_SIG ;
5836
5837    $HeaderOffsetIndex->addOffset($fh->tell() - ZIP64_END_CENTRAL_LOC_HDR_SIZE, ZIP64_END_CENTRAL_LOC_HDR_SIG) ;
5838
5839
5840    my $cd64 = unpack "Q<", substr($buffer,  8, 8);
5841    my $totalDisks = unpack "V", substr($buffer,  16, 4);
5842
5843    testPossiblePrefix($cd64, ZIP64_END_CENTRAL_REC_HDR_SIG);
5844
5845    if ($totalDisks > 0)
5846    {
5847        my $commonMessage = "'Offset to Zip64 End of Central Directory Record' field in '" . Signatures::name(ZIP64_END_CENTRAL_LOC_HDR_SIG) . "' is invalid";
5848        $cd64 = checkOffsetValue($cd64, $here, 0, $commonMessage, $here + 8, ZIP64_END_CENTRAL_REC_HDR_SIG, 1) ;
5849    }
5850
5851    my $delta = $here - $cd64;
5852
5853    #### Zip64 end of central directory record
5854
5855    my $zip64eocd_name = "'" . Signatures::name(ZIP64_END_CENTRAL_REC_HDR_SIG) . "'";
5856    my $zip64eocd_name_value = Signatures::nameAndHex(ZIP64_END_CENTRAL_REC_HDR_SIG);
5857    my $zip64eocd_value = Signatures::hexValue(ZIP64_END_CENTRAL_REC_HDR_SIG);
5858
5859    # check enough bytes available
5860    # fatal_tryWalk sprintf "Size of 'Zip64 End of Central Directory Record' 0x%X too small", $cd64
5861    fatal_tryWalk undef, sprintf "Size of $zip64eocd_name 0x%X too small", $cd64
5862        if $delta < ZIP64_END_CENTRAL_REC_HDR_MIN_SIZE;
5863
5864    # Seek to Zip64 End of Central Directory Record
5865    $fh->seek($cd64, SEEK_SET) ;
5866    $HeaderOffsetIndex->addOffsetNoPrefix($fh->tell(), ZIP64_END_CENTRAL_REC_HDR_SIG) ;
5867
5868    $fh->read($buffer, ZIP64_END_CENTRAL_REC_HDR_MIN_SIZE) ;
5869
5870    my $sig = unpack("V", substr($buffer, 0, 4)) ;
5871    fatal_tryWalk undef, sprintf "Cannot find $zip64eocd_name: expected $zip64eocd_value but got 0x%X", $sig
5872        if $sig != ZIP64_END_CENTRAL_REC_HDR_SIG ;
5873
5874    # pkzip sets the extract zip spec to 6.2 (0x3E) to signal a v2 record
5875    # See APPNOTE 6.3.10, section, 7.3.3
5876
5877    # Version 1 header is 44 bytes (assuming no extensible data sector)
5878    # Version 2 header (see APPNOTE 6.3.7, section) is > 44 bytes
5879
5880    my $extractSpec         = unpack "C",  substr($buffer, 14, 1);
5881    my $diskNumber          = unpack "V",  substr($buffer, 16, 4);
5882    my $cdDiskNumber        = unpack "V",  substr($buffer, 20, 4);
5883    my $entriesOnThisDisk   = unpack "Q<", substr($buffer, 24, 8);
5884    my $totalEntries        = unpack "Q<", substr($buffer, 32, 8);
5885    my $centralDirSize      = unpack "Q<", substr($buffer, 40, 8);
5886    my $centralDirOffset    = unpack "Q<", substr($buffer, 48, 8);
5887
5888    if ($extractSpec >= 0x3E)
5889    {
5890        $opt_walk = 1;
5891        $CentralDirectory->setPkEncryptedCD();
5892    }
5893
5894    if (! emptyArchive($here, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries,  $centralDirSize, $centralDirOffset))
5895    {
5896        my $commonMessage = "'Offset to Central Directory' field in $zip64eocd_name is invalid";
5897        $centralDirOffset = checkOffsetValue($centralDirOffset, $here, 0, $commonMessage, $here + 48, ZIP_CENTRAL_HDR_SIG, 1, $extractSpec < 0x3E) ;
5898    }
5899
5900    # TODO - APPNOTE allows an extensible data sector here (see APPNOTE 6.3.10, section 4.3.14.2) -- need to take this into account
5901
5902    return ($centralDirOffset, $centralDirSize) ;
5903}
5904
5905use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG);
5906
5907sub findCentralDirectoryOffset
5908{
5909    my $fh = shift ;
5910
5911    # Most common use-case is where there is no comment, so
5912    # know exactly where the end of central directory record
5913    # should be.
5914
5915    need ZIP_EOCD_MIN_SIZE, Signatures::name(ZIP_END_CENTRAL_HDR_SIG);
5916
5917    $fh->seek(-ZIP_EOCD_MIN_SIZE(), SEEK_END) ;
5918    my $here = $fh->tell();
5919
5920    my $is64bit = $here > MAX32;
5921    my $over64bit = $here  & (~ MAX32);
5922
5923    my $buffer;
5924    $fh->read($buffer, ZIP_EOCD_MIN_SIZE);
5925
5926    my $zip64 = 0;
5927    my $diskNumber ;
5928    my $cdDiskNumber ;
5929    my $entriesOnThisDisk ;
5930    my $totalEntries ;
5931    my $centralDirSize ;
5932    my $centralDirOffset ;
5933    my $commentLength = 0;
5934    my $trailingBytes = 0;
5935
5936    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
5937
5938        $HeaderOffsetIndex->addOffset($here + $PREFIX_DELTA, ZIP_END_CENTRAL_HDR_SIG) ;
5939
5940        $diskNumber       = unpack("v", substr($buffer, 4,   2));
5941        $cdDiskNumber     = unpack("v", substr($buffer, 6,   2));
5942        $entriesOnThisDisk= unpack("v", substr($buffer, 8,   2));
5943        $totalEntries     = unpack("v", substr($buffer, 10,  2));
5944        $centralDirSize   = unpack("V", substr($buffer, 12,  4));
5945        $centralDirOffset = unpack("V", substr($buffer, 16,  4));
5946        $commentLength    = unpack("v", substr($buffer, 20,  2));
5947    }
5948    else {
5949        $fh->seek(0, SEEK_END) ;
5950
5951        my $fileLen = $fh->tell();
5952        my $want = 0 ;
5953
5954        while(1) {
5955            $want += 1024 * 32;
5956            my $seekTo = $fileLen - $want;
5957            if ($seekTo < 0 ) {
5958                $seekTo = 0;
5959                $want = $fileLen ;
5960            }
5961            $fh->seek( $seekTo, SEEK_SET);
5962            $fh->read($buffer, $want) ;
5963            my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG);
5964
5965            if ($pos >= 0 && $want - $pos > ZIP_EOCD_MIN_SIZE) {
5966                $here = $seekTo + $pos ;
5967                $HeaderOffsetIndex->addOffset($here + $PREFIX_DELTA, ZIP_END_CENTRAL_HDR_SIG) ;
5968
5969                $diskNumber       = unpack("v", substr($buffer, $pos + 4,   2));
5970                $cdDiskNumber     = unpack("v", substr($buffer, $pos + 6,   2));
5971                $entriesOnThisDisk= unpack("v", substr($buffer, $pos + 8,   2));
5972                $totalEntries     = unpack("v", substr($buffer, $pos + 10,  2));
5973                $centralDirSize   = unpack("V", substr($buffer, $pos + 12,  4));
5974                $centralDirOffset = unpack("V", substr($buffer, $pos + 16,  4));
5975                $commentLength    = unpack("v", substr($buffer, $pos + 20,  2)) // 0;
5976
5977                my $expectedEof = $fileLen - $want + $pos + ZIP_EOCD_MIN_SIZE + $commentLength  ;
5978                # check for trailing data after end of zip
5979                if ($expectedEof < $fileLen ) {
5980                    $TRAILING = $expectedEof ;
5981                    $trailingBytes = $FILELEN - $expectedEof ;
5982                }
5983                last ;
5984            }
5985
5986            return undef
5987                if $want == $fileLen;
5988
5989        }
5990    }
5991
5992    $EOCD_Present = 1;
5993
5994    # Empty zip file can just contain an EOCD record
5995    return (0, 0)
5996        if ZIP_EOCD_MIN_SIZE + $commentLength + $trailingBytes  == $FILELEN ;
5997
5998    if (needZip64EOCDLocator($diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirOffset, $centralDirSize) &&
5999        ! emptyArchive($here, $diskNumber, $cdDiskNumber, $entriesOnThisDisk, $totalEntries, $centralDirOffset, $centralDirSize))
6000    {
6001        ($centralDirOffset, $centralDirSize) = offsetFromZip64($fh, $here, ZIP_EOCD_MIN_SIZE + $commentLength + $trailingBytes)
6002    }
6003    elsif ($is64bit)
6004    {
6005        # use-case is where a 64-bit zip file doesn't use the 64-bit
6006        # extensions.
6007        # print "EOCD not 64-bit $centralDirOffset ($here)\n" ;
6008
6009        fatal_tryWalk $here, "Zip file > 4Gig. Expected 'Offset to Central Dir' to be 0xFFFFFFFF, got " . hexValue($centralDirOffset);
6010
6011        $centralDirOffset += $over64bit;
6012        $is64In32 = 1;
6013    }
6014    else
6015    {
6016        if ($centralDirSize)
6017        {
6018            my $commonMessage = "'Offset to Central Directory' field in '" . Signatures::name(ZIP_END_CENTRAL_HDR_SIG) . "' is invalid";
6019            $centralDirOffset = checkOffsetValue($centralDirOffset, $here, $centralDirSize, $commonMessage, $here + 16, ZIP_CENTRAL_HDR_SIG, 1) ;
6020        }
6021    }
6022
6023    return (0, 0)
6024        if  $totalEntries == 0 && $entriesOnThisDisk == 0;
6025
6026    # APK Signing Block is directly before the first CD entry
6027    # Check if it is present
6028    ($START_APK, $APK, $APK_LEN) = chckForAPKSigningBlock($fh, $centralDirOffset, ZIP_EOCD_MIN_SIZE + $commentLength);
6029
6030    return ($centralDirOffset, $centralDirSize) ;
6031}
6032
6033sub findID
6034{
6035    my $id_want = shift ;
6036    my $data    = shift;
6037
6038    my $XLEN = length $data ;
6039
6040    my $offset = 0 ;
6041    while ($offset < $XLEN) {
6042
6043        return undef
6044            if $offset + ZIP_EXTRA_SUBFIELD_HEADER_SIZE  > $XLEN ;
6045
6046        my $id = substr($data, $offset, ZIP_EXTRA_SUBFIELD_ID_SIZE);
6047        $id = unpack("v", $id);
6048        $offset += ZIP_EXTRA_SUBFIELD_ID_SIZE;
6049
6050        my $subLen =  unpack("v", substr($data, $offset,
6051                                            ZIP_EXTRA_SUBFIELD_LEN_SIZE));
6052        $offset += ZIP_EXTRA_SUBFIELD_LEN_SIZE ;
6053
6054        return undef
6055            if $offset + $subLen > $XLEN ;
6056
6057        return substr($data, $offset, $subLen)
6058            if $id eq $id_want ;
6059
6060        $offset += $subLen ;
6061    }
6062
6063    return undef ;
6064}
6065
6066
6067sub nibbles
6068{
6069    my @nibbles = (
6070        [ 16 => 0x1000000000000000 ],
6071        [ 15 => 0x100000000000000 ],
6072        [ 14 => 0x10000000000000 ],
6073        [ 13 => 0x1000000000000 ],
6074        [ 12 => 0x100000000000 ],
6075        [ 11 => 0x10000000000 ],
6076        [ 10 => 0x1000000000 ],
6077        [  9 => 0x100000000 ],
6078        [  8 => 0x10000000 ],
6079        [  7 => 0x1000000 ],
6080        [  6 => 0x100000 ],
6081        [  5 => 0x10000 ],
6082        [  4 => 0x1000 ],
6083        [  4 => 0x100 ],
6084        [  4 => 0x10 ],
6085        [  4 => 0x1 ],
6086    );
6087    my $value = shift ;
6088
6089    for my $pair (@nibbles)
6090    {
6091        my ($count, $limit) = @{ $pair };
6092
6093        return $count
6094            if $value >= $limit ;
6095    }
6096}
6097
6098{
6099    package HeaderOffsetEntry;
6100
6101    sub new
6102    {
6103        my $class = shift ;
6104        my $offset = shift ;
6105        my $signature = shift;
6106
6107        bless [ $offset, $signature, Signatures::name($signature)] , $class;
6108
6109    }
6110
6111    sub offset
6112    {
6113        my $self = shift;
6114        return $self->[0];
6115    }
6116
6117    sub signature
6118    {
6119        my $self = shift;
6120        return $self->[1];
6121    }
6122
6123    sub name
6124    {
6125        my $self = shift;
6126        return $self->[2];
6127    }
6128
6129}
6130
6131{
6132    package HeaderOffsetIndex;
6133
6134    # Store a list of header offsets recorded when scannning the central directory
6135
6136    sub new
6137    {
6138        my $class = shift ;
6139
6140        my %object = (
6141                        'offsetIndex'       => [],
6142                        'offset2Index'      => {},
6143                        'offset2Signature'  => {},
6144                        'currentIndex'      => -1,
6145                        'currentSignature'  => 0,
6146                        # 'sigNames'          => $sigNames,
6147                     ) ;
6148
6149        bless \%object, $class;
6150    }
6151
6152    sub sortOffsets
6153    {
6154        my $self = shift ;
6155
6156        @{ $self->{offsetIndex} } = sort { $a->[0] <=> $b->[0] }
6157                                    @{ $self->{offsetIndex} };
6158        my $ix = 0;
6159        $self->{offset2Index}{$_} = $ix++
6160            for @{ $self->{offsetIndex} } ;
6161    }
6162
6163    sub addOffset
6164    {
6165        my $self = shift ;
6166        my $offset = shift ;
6167        my $signature = shift ;
6168
6169        $offset += $PREFIX_DELTA ;
6170        $self->addOffsetNoPrefix($offset, $signature);
6171    }
6172
6173    sub addOffsetNoPrefix
6174    {
6175        my $self = shift ;
6176        my $offset = shift ;
6177        my $signature = shift ;
6178
6179        my $name = Signatures::name($signature);
6180
6181        if (! defined $self->{offset2Signature}{$offset})
6182        {
6183            push @{ $self->{offsetIndex} }, HeaderOffsetEntry->new($offset, $signature) ;
6184            $self->{offset2Signature}{$offset} = $signature;
6185        }
6186    }
6187
6188    sub getNextIndex
6189    {
6190        my $self = shift ;
6191        my $offset = shift ;
6192
6193        $self->{currentIndex} ++;
6194
6195        return ${ $self->{offsetIndex} }[$self->{currentIndex}] // undef
6196    }
6197
6198    sub rewindIndex
6199    {
6200        my $self = shift ;
6201        my $offset = shift ;
6202
6203        $self->{currentIndex} --;
6204    }
6205
6206    sub dump
6207    {
6208        my $self = shift;
6209
6210        say "### HeaderOffsetIndex";
6211        say "###   Offset\tSignature";
6212        for my $x ( @{ $self->{offsetIndex} } )
6213        {
6214            my ($offset, $sig) = @$x;
6215            printf "###   %X %d\t\t" . $x->name() . "\n", $x->offset(), $x->offset();
6216        }
6217    }
6218
6219    sub checkForOverlap
6220    {
6221        my $self = shift ;
6222        my $need = shift;
6223
6224        my $needOffset = $FH->tell() + $need;
6225
6226        for my $hdrOffset (@{ $self->{offsetIndex} })
6227        {
6228            my $delta = $hdrOffset - $needOffset;
6229            return [$self->{offsetIndex}{$hdrOffset}, $needOffset - $hdrOffset]
6230                if $delta <= 0 ;
6231        }
6232
6233        return [undef, undef];
6234    }
6235
6236}
6237
6238{
6239    package FieldsAndAccessors;
6240
6241    sub Add
6242    {
6243        use Data::Dumper ;
6244
6245        my $classname = shift;
6246        my $object = shift;
6247        my $fields = shift ;
6248        my $no_handler = shift // {};
6249
6250        state $done = {};
6251
6252
6253        while (my ($name, $value) =  each %$fields)
6254        {
6255            my $method = "${classname}::$name";
6256
6257            $object->{$name} = $value;
6258
6259            # don't auto-create a handler
6260            next
6261                if $no_handler->{$name};
6262
6263            no strict 'refs';
6264
6265            # Don't use lvalue sub for now - vscode debugger breaks with it enabled.
6266            # https://github.com/richterger/Perl-LanguageServer/issues/194
6267            # *$method = sub : lvalue {
6268            #     $_[0]->{$name} ;
6269            # }
6270            # unless defined $done->{$method};
6271
6272            # Auto-generate getter/setter
6273            *$method = sub  {
6274                $_[0]->{$name} = $_[1]
6275                    if @_ == 2;
6276                return $_[0]->{$name} ;
6277            }
6278            unless defined $done->{$method};
6279
6280            ++ $done->{$method};
6281
6282
6283        }
6284    }
6285}
6286
6287{
6288    package BaseEntry ;
6289
6290    sub new
6291    {
6292        my $class = shift ;
6293
6294        state $index = 0;
6295
6296        my %fields = (
6297                        'index'                 => $index ++,
6298                        'zip64'                 => 0,
6299                        'offsetStart'           => 0,
6300                        'offsetEnd'             => 0,
6301                        'inCentralDir'          => 0,
6302                        'encapsulated'          => 0, # enclosed in outer zip
6303                        'childrenCount'         => 0, # this entry is a zip with enclosed children
6304                        'streamed'              => 0,
6305                        'languageEncodingFlag'  => 0,
6306                        'entryType'             => 0,
6307                     ) ;
6308
6309        my $self = bless {}, $class;
6310
6311        FieldsAndAccessors::Add($class, $self, \%fields) ;
6312
6313        return $self;
6314    }
6315
6316    sub increment_childrenCount
6317    {
6318        my $self = shift;
6319        $self->{childrenCount} ++;
6320    }
6321}
6322
6323{
6324    package LocalCentralEntryBase ;
6325
6326    use parent -norequire , 'BaseEntry' ;
6327
6328    sub new
6329    {
6330        my $class = shift ;
6331
6332        my $self = $class->SUPER::new();
6333
6334
6335        my %fields = (
6336                        # fields from the header
6337                        'centralHeaderOffset'   => 0,
6338                        'localHeaderOffset'     => 0,
6339
6340                        'extractVersion'        => 0,
6341                        'generalPurposeFlags'   => 0,
6342                        'compressedMethod'      => 0,
6343                        'lastModDateTime'       => 0,
6344                        'crc32'                 => 0,
6345                        'compressedSize'        => 0,
6346                        'uncompressedSize'      => 0,
6347                        'filename'              => '',
6348                        'outputFilename'        => '',
6349                        # inferred data
6350                        # 'InCentralDir'          => 0,
6351                        # 'zip64'                 => 0,
6352
6353                        'zip64ExtraPresent'     => 0,
6354                        'zip64SizesPresent'     => 0,
6355                        'payloadOffset'         => 0,
6356
6357                        # zip64 extra
6358                        'zip64_compressedSize'    => undef,
6359                        'zip64_uncompressedSize'  => undef,
6360                        'zip64_localHeaderOffset' => undef,
6361                        'zip64_diskNumber'        => undef,
6362                        'zip64_diskNumberPresent' => 0,
6363
6364                        # Values direct from the header before merging any Zip64 values
6365                        'std_compressedSize'    => undef,
6366                        'std_uncompressedSize'  => undef,
6367                        'std_localHeaderOffset' => undef,
6368                        'std_diskNumber'        => undef,
6369
6370                        # AES
6371                        'aesStrength'             => 0,
6372                        'aesValid'                => 0,
6373
6374                        # Minizip CD encryption
6375                        'minizip_secure'          => 0,
6376
6377                     ) ;
6378
6379        FieldsAndAccessors::Add($class, $self, \%fields) ;
6380
6381        return $self;
6382    }
6383}
6384
6385{
6386    package Zip64EndCentralHeaderEntry ;
6387
6388    use parent -norequire , 'LocalCentralEntryBase' ;
6389
6390    sub new
6391    {
6392        my $class = shift ;
6393
6394        my $self = $class->SUPER::new();
6395
6396
6397        my %fields = (
6398                        'inCentralDir'          => 1,
6399                     ) ;
6400
6401        FieldsAndAccessors::Add($class, $self, \%fields) ;
6402
6403        return $self;
6404    }
6405
6406}
6407
6408{
6409    package CentralDirectoryEntry;
6410
6411    use parent -norequire , 'LocalCentralEntryBase' ;
6412
6413    use constant Offset_VersionMadeBy           => 4;
6414    use constant Offset_VersionNeededToExtract  => 6;
6415    use constant Offset_GeneralPurposeFlags     => 8;
6416    use constant Offset_CompressionMethod       => 10;
6417    use constant Offset_ModificationTime        => 12;
6418    use constant Offset_ModificationDate        => 14;
6419    use constant Offset_CRC32                   => 16;
6420    use constant Offset_CompressedSize          => 20;
6421    use constant Offset_UncompressedSize        => 24;
6422    use constant Offset_FilenameLength          => 28;
6423    use constant Offset_ExtraFieldLength        => 30;
6424    use constant Offset_FileCommentLength       => 32;
6425    use constant Offset_DiskNumber              => 34;
6426    use constant Offset_InternalAttributes      => 36;
6427    use constant Offset_ExternalAttributes      => 38;
6428    use constant Offset_RelativeOffsetToLocal   => 42;
6429    use constant Offset_Filename                => 46;
6430
6431    sub new
6432    {
6433        my $class = shift ;
6434        my $offset = shift;
6435
6436        # check for existing entry
6437        return $CentralDirectory->{byCentralOffset}{$offset}
6438            if defined $offset && defined $CentralDirectory->{byCentralOffset}{$offset} ;
6439
6440        my $self = $class->SUPER::new();
6441
6442        my %fields = (
6443                        'diskNumber'                => 0,
6444                        'comment'                   => "",
6445                        'ldEntry'                   => undef,
6446                     ) ;
6447
6448        FieldsAndAccessors::Add($class, $self, \%fields) ;
6449
6450        $self->inCentralDir(1) ;
6451        $self->entryType(::ZIP_CENTRAL_HDR_SIG) ;
6452
6453        return $self;
6454    }
6455}
6456
6457{
6458    package CentralDirectory;
6459
6460    sub new
6461    {
6462        my $class = shift ;
6463
6464        my %object = (
6465                        'entries'       => [],
6466                        'count'         => 0,
6467                        'byLocalOffset' => {},
6468                        'byCentralOffset' => {},
6469                        'byName'        => {},
6470                        'offset2Index' => {},
6471                        'normalized_filenames' => {},
6472                        'CentralDirectoryOffset'      => 0,
6473                        'CentralDirectorySize'      => 0,
6474                        'zip64'         => 0,
6475                        'encryptedCD'   => 0,
6476                        'minizip_secure' => 0,
6477                        'alreadyScanned' => 0,
6478                     ) ;
6479
6480        bless \%object, $class;
6481    }
6482
6483    sub addEntry
6484    {
6485        my $self = shift ;
6486        my $entry = shift ;
6487
6488        my $localHeaderOffset = $entry->localHeaderOffset  ;
6489        my $CentralDirectoryOffset = $entry->centralHeaderOffset ;
6490        my $filename = $entry->filename ;
6491
6492        Nesting::add($entry);
6493
6494        # Create a reference from Central to Local header entries
6495        my $ldEntry = Nesting::getLdEntryByOffset($localHeaderOffset);
6496        if ($ldEntry)
6497        {
6498            $entry->ldEntry($ldEntry) ;
6499
6500            # LD -> CD
6501            # can have multiple LD entries point to same CD
6502            # so need to keep a list
6503            $ldEntry->addCdEntry($entry);
6504        }
6505
6506        # only check for duplicate in real CD scan
6507        if ($self->{alreadyScanned} && ! $entry->encapsulated )
6508        {
6509            my $existing = $self->{byName}{$filename} ;
6510            if ($existing && $existing->centralHeaderOffset != $entry->centralHeaderOffset)
6511            {
6512                ::error $CentralDirectoryOffset,
6513                        "Duplicate Central Directory entries for filename '$filename'",
6514                        "Current Central Directory entry at offset " . ::decimalHex0x($CentralDirectoryOffset),
6515                        "Duplicate Central Directory entry at offset " . ::decimalHex0x($self->{byName}{$filename}{centralHeaderOffset});
6516
6517                # not strictly illegal to have duplicate filename, so save this one
6518            }
6519            else
6520            {
6521                my $existingNormalizedEntry = $self->normalize_filename($entry, $filename);
6522                if ($existingNormalizedEntry)
6523                {
6524                    ::warning $CentralDirectoryOffset,
6525                            "Portability Issue: Found case-insensitive duplicate for filename '$filename'",
6526                            "Current Central Directory entry at offset " . ::decimalHex0x($CentralDirectoryOffset),
6527                            "Duplicate Central Directory entry for filename '" . $existingNormalizedEntry->outputFilename . "' at offset " . ::decimalHex0x($existingNormalizedEntry->centralHeaderOffset);
6528                }
6529            }
6530        }
6531
6532        # CD can get processed twice, so return if already processed
6533        return
6534            if $self->{byCentralOffset}{$CentralDirectoryOffset} ;
6535
6536        if (! $entry->encapsulated )
6537        {
6538            push @{ $self->{entries} }, $entry;
6539
6540            $self->{byLocalOffset}{$localHeaderOffset} = $entry;
6541            $self->{byCentralOffset}{$CentralDirectoryOffset} = $entry;
6542            $self->{byName}{ $filename } = $entry;
6543            $self->{offset2Index} = $self->{count} ++;
6544        }
6545
6546    }
6547
6548    sub exists
6549    {
6550        my $self = shift ;
6551
6552        return scalar @{ $self->{entries} };
6553    }
6554
6555    sub sortByLocalOffset
6556    {
6557        my $self = shift ;
6558
6559        @{ $self->{entries} } = sort { $a->localHeaderOffset() <=> $b->localHeaderOffset() }
6560                                @{ $self->{entries} };
6561    }
6562
6563    sub getByLocalOffset
6564    {
6565        my $self = shift ;
6566        my $offset = shift ;
6567
6568        # TODO - what happens if none exists?
6569        my $entry = $self->{byLocalOffset}{$offset - $PREFIX_DELTA} ;
6570        return $entry ;
6571    }
6572
6573    sub localOffset
6574    {
6575        my $self = shift ;
6576        my $offset = shift ;
6577
6578        # TODO - what happens if none exists?
6579        return $self->{byLocalOffset}{$offset - $PREFIX_DELTA} ;
6580    }
6581
6582    sub getNextLocalOffset
6583    {
6584        my $self = shift ;
6585        my $offset = shift ;
6586
6587        my $index = $self->{offset2Index} ;
6588
6589        if ($index + 1 >= $self->{count})
6590        {
6591            return 0;
6592        }
6593
6594        return ${ $self->{entries} }[$index+1]->localHeaderOffset() ;
6595    }
6596
6597    sub inCD
6598    {
6599        my $self = shift ;
6600        $FH->tell() >= $self->{CentralDirectoryOffset};
6601    }
6602
6603    sub setPkEncryptedCD
6604    {
6605        my $self = shift ;
6606
6607        $self->{encryptedCD} = 1 ;
6608
6609    }
6610
6611    sub setMiniZipEncrypted
6612    {
6613        my $self = shift ;
6614
6615        $self->{minizip_secure} = 1 ;
6616    }
6617
6618    sub isMiniZipEncrypted
6619    {
6620        my $self = shift ;
6621        return $self->{minizip_secure};
6622    }
6623
6624    sub isEncryptedCD
6625    {
6626        my $self = shift ;
6627        return $self->{encryptedCD} && ! $self->{minizip_secure};
6628    }
6629
6630    sub normalize_filename
6631    {
6632        # check if there is a filename that already exists
6633        # with the same name when normalized to lower case
6634
6635        my $self = shift ;
6636        my $entry = shift;
6637        my $filename = shift;
6638
6639        my $nFilename = lc $filename;
6640
6641        my $lookup = $self->{normalized_filenames}{$nFilename};
6642        # if ($lookup && $lookup ne $filename)
6643        if ($lookup)
6644        {
6645            return $lookup,
6646        }
6647
6648        $self->{normalized_filenames}{$nFilename} = $entry;
6649
6650        return undef;
6651    }
6652}
6653
6654{
6655    package LocalDirectoryEntry;
6656
6657    use parent -norequire , 'LocalCentralEntryBase' ;
6658
6659    use constant Offset_VersionNeededToExtract  => 4;
6660    use constant Offset_GeneralPurposeFlags     => 6;
6661    use constant Offset_CompressionMethod       => 8;
6662    use constant Offset_ModificationTime        => 10;
6663    use constant Offset_ModificationDate        => 12;
6664    use constant Offset_CRC32                   => 14;
6665    use constant Offset_CompressedSize          => 18;
6666    use constant Offset_UncompressedSize        => 22;
6667    use constant Offset_FilenameLength          => 26;
6668    use constant Offset_ExtraFieldLength        => 27;
6669    use constant Offset_Filename                => 30;
6670
6671    sub new
6672    {
6673        my $class = shift ;
6674
6675        my $self = $class->SUPER::new();
6676
6677        my %fields = (
6678                        'streamedMatch'         => 0,
6679                        'readDataDescriptor'    => 0,
6680                        'cdEntryIndex'          => {},
6681                        'cdEntryList'           => [],
6682                     ) ;
6683
6684        FieldsAndAccessors::Add($class, $self, \%fields) ;
6685
6686        $self->inCentralDir(0) ;
6687        $self->entryType(::ZIP_LOCAL_HDR_SIG) ;
6688
6689        return $self;
6690    }
6691
6692    sub addCdEntry
6693    {
6694        my $self = shift ;
6695        my $entry = shift;
6696
6697        # don't want encapsulated entries
6698        # and protect against duplicates
6699        return
6700            if $entry->encapsulated ||
6701               $self->{cdEntryIndex}{$entry->index} ++ >= 1;
6702
6703        push @{ $self->{cdEntryList} }, $entry ;
6704    }
6705
6706    sub getCdEntry
6707    {
6708        my $self = shift ;
6709
6710        return []
6711            if ! $self->{cdEntryList} ;
6712
6713        return $self->{cdEntryList}[0] ;
6714    }
6715
6716    sub getCdEntries
6717    {
6718        my $self = shift ;
6719        return $self->{cdEntryList} ;
6720    }
6721}
6722
6723{
6724    package LocalDirectory;
6725
6726    sub new
6727    {
6728        my $class = shift ;
6729
6730        my %object = (
6731                        'entries'       => [],
6732                        'count'         => 0,
6733                        'byLocalOffset' => {},
6734                        'byName'        => {},
6735                        'offset2Index' => {},
6736                        'normalized_filenames' => {},
6737                        'CentralDirectoryOffset'      => 0,
6738                        'CentralDirectorySize'      => 0,
6739                        'zip64'         => 0,
6740                        'encryptedCD'   => 0,
6741                        'streamedPresent' => 0,
6742                     ) ;
6743
6744        bless \%object, $class;
6745    }
6746
6747    sub isLocalEntryNested
6748    {
6749        my $self = shift ;
6750        my $localEntry = shift;
6751
6752        return Nesting::getFirstEncapsulation($localEntry);
6753
6754    }
6755
6756    sub addEntry
6757    {
6758        my $self = shift ;
6759        my $localEntry = shift ;
6760
6761        my $filename = $localEntry->filename ;
6762        my $localHeaderOffset = $localEntry->localHeaderOffset;
6763        my $payloadOffset = $localEntry->payloadOffset ;
6764
6765        my $existingEntry = $self->{byName}{$filename} ;
6766
6767        my $endSurfaceArea = $payloadOffset + ($localEntry->compressedSize // 0)  ;
6768
6769        if ($existingEntry)
6770        {
6771            ::error $localHeaderOffset,
6772                    "Duplicate Local Directory entry for filename '$filename'",
6773                    "Current Local Directory entry at offset " . ::decimalHex0x($localHeaderOffset),
6774                    "Duplicate Local Directory entry at offset " . ::decimalHex0x($existingEntry->localHeaderOffset),
6775        }
6776        else
6777        {
6778
6779            my ($existing_filename, $offset) = $self->normalize_filename($filename);
6780            if ($existing_filename)
6781            {
6782                ::warning $localHeaderOffset,
6783                        "Portability Issue: Found case-insensitive duplicate for filename '$filename'",
6784                        "Current Local Directory entry at offset " . ::decimalHex0x($localHeaderOffset),
6785                        "Duplicate Local Directory entry for filename '$existing_filename' at offset " . ::decimalHex0x($offset);
6786            }
6787        }
6788
6789        # keep nested local entries for zipbomb deteection
6790        push @{ $self->{entries} }, $localEntry;
6791
6792        $self->{byLocalOffset}{$localHeaderOffset} = $localEntry;
6793        $self->{byName}{ $filename } = $localEntry;
6794
6795        $self->{streamedPresent} ++
6796            if $localEntry->streamed;
6797
6798        Nesting::add($localEntry);
6799    }
6800
6801    sub exists
6802    {
6803        my $self = shift ;
6804
6805        return scalar @{ $self->{entries} };
6806    }
6807
6808    sub sortByLocalOffset
6809    {
6810        my $self = shift ;
6811
6812        @{ $self->{entries} } = sort { $a->localHeaderOffset() <=> $b->localHeaderOffset() }
6813                                @{ $self->{entries} };
6814    }
6815
6816    sub localOffset
6817    {
6818        my $self = shift ;
6819        my $offset = shift ;
6820
6821        return $self->{byLocalOffset}{$offset} ;
6822    }
6823
6824    sub getByLocalOffset
6825    {
6826        my $self = shift ;
6827        my $offset = shift ;
6828
6829        # TODO - what happens if none exists?
6830        my $entry = $self->{byLocalOffset}{$offset} ;
6831        return $entry ;
6832    }
6833
6834    sub getNextLocalOffset
6835    {
6836        my $self = shift ;
6837        my $offset = shift ;
6838
6839        my $index = $self->{offset2Index} ;
6840
6841        if ($index + 1 >= $self->{count})
6842        {
6843            return 0;
6844        }
6845
6846        return ${ $self->{entries} }[$index+1]->localHeaderOffset ;
6847    }
6848
6849    sub lastStreamedEntryAdded
6850    {
6851        my $self = shift ;
6852        my $offset = shift ;
6853
6854        for my $entry ( reverse @{ $self->{entries} } )
6855        {
6856            if ($entry->streamed)# && ! $entry->streamedMatch)
6857            {
6858                $entry->streamedMatch($entry->streamedMatch + 1) ;
6859                return $entry;
6860            }
6861        }
6862
6863        return undef;
6864    }
6865
6866    sub inCD
6867    {
6868        my $self = shift ;
6869        $FH->tell() >= $self->{CentralDirectoryOffset};
6870    }
6871
6872    sub setPkEncryptedCD
6873    {
6874        my $self = shift ;
6875
6876        $self->{encryptedCD} = 1 ;
6877
6878    }
6879
6880    sub isEncryptedCD
6881    {
6882        my $self = shift ;
6883        return $self->{encryptedCD} ;
6884    }
6885
6886    sub anyStreamedEntries
6887    {
6888        my $self = shift ;
6889        return $self->{streamedPresent} ;
6890    }
6891
6892    sub normalize_filename
6893    {
6894        # check if there is a filename that already exists
6895        # with the same name when normalized to lower case
6896
6897        my $self = shift ;
6898        my $filename = shift;
6899
6900        my $nFilename = lc $filename;
6901
6902        my $lookup = $self->{normalized_filenames}{$nFilename};
6903        if ($lookup && $lookup ne $filename)
6904        {
6905            return $self->{byName}{$lookup}{outputFilename},
6906                   $self->{byName}{$lookup}{localHeaderOffset}
6907        }
6908
6909        $self->{normalized_filenames}{$nFilename} = $filename;
6910
6911        return undef, undef;
6912    }
6913}
6914
6915{
6916    package Eocd ;
6917
6918    sub new
6919    {
6920        my $class = shift ;
6921
6922        my %object = (
6923                        'zip64'       => 0,
6924                     ) ;
6925
6926        bless \%object, $class;
6927    }
6928}
6929
6930sub displayFileInfo
6931{
6932    return;
6933
6934    my $filename = shift;
6935
6936    info undef,
6937        "Filename       : '$filename'",
6938        "Size           : " . (-s $filename) . " (" . decimalHex0x(-s $filename) . ")",
6939        # "Native Encoding: '" . TextEncoding::getNativeLocaleName() . "'",
6940}
6941
6942{
6943    package TextEncoding;
6944
6945    my $nativeLocaleEncoding = getNativeLocale();
6946    my $opt_EncodingFrom = $nativeLocaleEncoding;
6947    my $opt_EncodingTo = $nativeLocaleEncoding ;
6948    my $opt_Encoding_Enabled;
6949    my $opt_Debug_Encoding;
6950    my $opt_use_LanguageEncodingFlag;
6951
6952    sub setDefaults
6953    {
6954        $nativeLocaleEncoding = getNativeLocale();
6955        $opt_EncodingFrom = $nativeLocaleEncoding;
6956        $opt_EncodingTo = $nativeLocaleEncoding ;
6957        $opt_Encoding_Enabled = 1;
6958        $opt_Debug_Encoding = 0;
6959        $opt_use_LanguageEncodingFlag = 1;
6960    }
6961
6962    sub getNativeLocale
6963    {
6964        state $enc;
6965
6966        if (! defined $enc)
6967        {
6968            eval
6969            {
6970                require encoding ;
6971                my $encoding = encoding::_get_locale_encoding() ;
6972                if (! $encoding)
6973                {
6974                    # CP437 is the legacy default for zip files
6975                    $encoding = 'cp437';
6976                    # ::warning undef, "Cannot determine system charset: defaulting to '$encoding'"
6977                }
6978                $enc = Encode::find_encoding($encoding) ;
6979            } ;
6980        }
6981
6982        return $enc;
6983    }
6984
6985    sub getNativeLocaleName
6986    {
6987        state $name;
6988
6989        return $name
6990            if defined $name ;
6991
6992        if (! defined $name)
6993        {
6994            my $enc = getNativeLocale();
6995            if ($enc)
6996            {
6997                $name = $enc->name()
6998            }
6999            else
7000            {
7001                $name = 'unknown'
7002            }
7003        }
7004
7005        return $name ;
7006    }
7007
7008    sub parseEncodingOption
7009    {
7010        my $opt_name = shift;
7011        my $opt_value = shift;
7012
7013        my $enc = Encode::find_encoding($opt_value) ;
7014        die "Encoding '$opt_value' not found for option '$opt_name'\n"
7015            unless ref $enc;
7016
7017        if ($opt_name eq 'encoding')
7018        {
7019            $opt_EncodingFrom = $enc;
7020        }
7021        elsif ($opt_name eq 'output-encoding')
7022        {
7023            $opt_EncodingTo = $enc;
7024        }
7025        else
7026        {
7027            die "Unknown option $opt_name\n"
7028        }
7029    }
7030
7031    sub NoEncoding
7032    {
7033        my $opt_name = shift;
7034        my $opt_value = shift;
7035
7036        $opt_Encoding_Enabled = 0 ;
7037    }
7038
7039    sub LanguageEncodingFlag
7040    {
7041        my $opt_name = shift;
7042        my $opt_value = shift;
7043
7044        $opt_use_LanguageEncodingFlag = $opt_value ;
7045    }
7046
7047    sub debugEncoding
7048    {
7049        if (@_)
7050        {
7051            $opt_Debug_Encoding = 1 ;
7052        }
7053
7054        return $opt_Debug_Encoding ;
7055    }
7056
7057    sub encodingInfo
7058    {
7059        return
7060            unless $opt_Encoding_Enabled && $opt_Debug_Encoding ;
7061
7062        my $enc  = TextEncoding::getNativeLocaleName();
7063        my $from = $opt_EncodingFrom->name();
7064        my $to   = $opt_EncodingTo->name();
7065
7066        ::debug undef, "Debug Encoding Enabled",
7067                      "System Default Encoding:                  '$enc'",
7068                      "Encoding used when reading from zip file: '$from'",
7069                      "Encoding used for display output:         '$to'";
7070    }
7071
7072    sub cleanEval
7073    {
7074        chomp $_[0] ;
7075        $_[0] =~ s/ at .+ line \d+\.$// ;
7076        return $_[0];
7077    }
7078
7079    sub decode
7080    {
7081        my $name = shift ;
7082        my $type = shift ;
7083        my $LanguageEncodingFlag = shift ;
7084
7085        return $name
7086            if ! $opt_Encoding_Enabled ;
7087
7088        # TODO - check for badly formed content
7089        if ($LanguageEncodingFlag && $opt_use_LanguageEncodingFlag)
7090        {
7091            # use "utf-8-strict" to catch invalid codepoints
7092            eval { $name = Encode::decode('utf-8-strict', $name, Encode::FB_CROAK ) } ;
7093            ::warning $FH->tell() - length $name, "Could not decode 'UTF-8' $type: " . cleanEval $@
7094                if $@ ;
7095        }
7096        else
7097        {
7098            eval { $name = $opt_EncodingFrom->decode($name, Encode::FB_CROAK ) } ;
7099            ::warning $FH->tell() - length $name, "Could not decode '" . $opt_EncodingFrom->name() . "' $type: " . cleanEval $@
7100                if $@;
7101        }
7102
7103        # remove any BOM
7104        $name =~ s/^\x{FEFF}//;
7105
7106        return $name ;
7107    }
7108
7109    sub encode
7110    {
7111        my $name = shift ;
7112        my $type = shift ;
7113        my $LanguageEncodingFlag = shift ;
7114
7115        return $name
7116            if ! $opt_Encoding_Enabled;
7117
7118        if ($LanguageEncodingFlag && $opt_use_LanguageEncodingFlag)
7119        {
7120            eval { $name = Encode::encode('utf8', $name, Encode::FB_CROAK ) } ;
7121            ::warning $FH->tell() - length $name, "Could not encode 'utf8' $type: " . cleanEval $@
7122                if $@ ;
7123        }
7124        else
7125        {
7126            eval { $name = $opt_EncodingTo->encode($name, Encode::FB_CROAK ) } ;
7127            ::warning $FH->tell() - length $name, "Could not encode '" . $opt_EncodingTo->name() . "' $type: " . cleanEval $@
7128                if $@;
7129        }
7130
7131        return $name;
7132    }
7133}
7134
7135{
7136    package Nesting;
7137
7138    use Data::Dumper;
7139
7140    my @nestingStack = ();
7141    my %encapsulations;
7142    my %inner2outer;
7143    my $encapsulationCount  = 0;
7144    my %index2entry ;
7145    my %offset2entry ;
7146
7147    # my %localOffset2cdEntry;
7148
7149    sub clearStack
7150    {
7151        @nestingStack = ();
7152        %encapsulations = ();
7153        %inner2outer = ();
7154        %index2entry = ();
7155        %offset2entry = ();
7156        $encapsulationCount = 0;
7157    }
7158
7159    sub dump
7160    {
7161        my $indent = shift // 0;
7162
7163        for my $offset (sort {$a <=> $b} keys %offset2entry)
7164        {
7165            my $leading = " " x $indent ;
7166            say $leading . "\nOffset $offset" ;
7167            say Dumper($offset2entry{$offset})
7168        }
7169    }
7170
7171    sub add
7172    {
7173        my $entry = shift;
7174
7175        getEnclosingEntry($entry);
7176        push @nestingStack, $entry;
7177        $index2entry{ $entry->index } = $entry;
7178        $offset2entry{ $entry->offsetStart } = $entry;
7179    }
7180
7181    sub getEnclosingEntry
7182    {
7183        my $entry = shift;
7184
7185        my $filename = $entry->filename;
7186
7187        pop @nestingStack
7188            while @nestingStack && $entry->offsetStart > $nestingStack[-1]->offsetEnd ;
7189
7190        my $match = undef;
7191
7192        if (@nestingStack &&
7193            $entry->offsetStart >= $nestingStack[-1]->offsetStart &&
7194            $entry->offsetEnd   <= $nestingStack[-1]->offsetEnd &&
7195            $entry->index       != $nestingStack[-1]->index)
7196        {
7197            # Nested entry found
7198            $match = $nestingStack[-1];
7199            push @{ $encapsulations{ $match->index } }, $entry;
7200            $inner2outer{ $entry->index} = $match->index;
7201            ++ $encapsulationCount;
7202
7203            $entry->encapsulated(1) ;
7204            $match->increment_childrenCount();
7205
7206            if ($NESTING_DEBUG)
7207            {
7208                say "#### nesting " . (caller(1))[3] . " index #" . $entry->index . ' "' .
7209                    $entry->outputFilename . '" [' . $entry->offsetStart . "->" . $entry->offsetEnd . "]" .
7210                    " in #" . $match->index . ' "' .
7211                    $match->outputFilename . '" [' . $match->offsetStart . "->" . $match->offsetEnd . "]" ;
7212            }
7213        }
7214
7215        return $match;
7216    }
7217
7218    sub isNested
7219    {
7220        my $offsetStart = shift;
7221        my $offsetEnd = shift;
7222
7223        if ($NESTING_DEBUG)
7224        {
7225            say "### Want: offsetStart " . ::decimalHex0x($offsetStart) . " offsetEnd " . ::decimalHex0x($offsetEnd);
7226            for my $entry (@nestingStack)
7227            {
7228                say "### Have: offsetStart " . ::decimalHex0x($entry->offsetStart) . " offsetEnd " . ::decimalHex0x($entry->offsetEnd);
7229            }
7230        }
7231
7232        return 0
7233            unless @nestingStack ;
7234
7235        my @copy = @nestingStack ;
7236
7237        pop @copy
7238            while @copy && $offsetStart > $copy[-1]->offsetEnd ;
7239
7240        return @copy &&
7241               $offsetStart >= $copy[-1]->offsetStart &&
7242               $offsetEnd   <= $copy[-1]->offsetEnd ;
7243    }
7244
7245    sub getOuterEncapsulation
7246    {
7247        my $entry = shift;
7248
7249        my $outerIndex =  $inner2outer{ $entry->index } ;
7250
7251        return undef
7252            if ! defined $outerIndex ;
7253
7254        return $index2entry{$outerIndex} // undef;
7255    }
7256
7257    sub getEncapsulations
7258    {
7259        my $entry = shift;
7260
7261        return $encapsulations{ $entry->index } ;
7262    }
7263
7264    sub getFirstEncapsulation
7265    {
7266        my $entry = shift;
7267
7268        my $got = $encapsulations{ $entry->index } ;
7269
7270        return defined $got ? $$got[0] : undef;
7271    }
7272
7273    sub encapsulations
7274    {
7275        return \%encapsulations;
7276    }
7277
7278    sub encapsulationCount
7279    {
7280        return $encapsulationCount;
7281    }
7282
7283    sub childrenInCentralDir
7284    {
7285        # find local header entries that have children that are not referenced in the CD
7286        # tis means it is likely a benign nextd zip file
7287        my $entry = shift;
7288
7289        for my $child (@{ $encapsulations{$entry->index} } )
7290        {
7291            next
7292                unless $child->entryType == ::ZIP_LOCAL_HDR_SIG ;
7293
7294            return 1
7295                if @{ $child->cdEntryList };
7296        }
7297
7298        return 0;
7299    }
7300
7301    sub entryByIndex
7302    {
7303        my $index = shift;
7304        return $index2entry{$index};
7305    }
7306
7307    sub getEntryByOffset
7308    {
7309        my $offset  = shift;
7310        return $offset2entry{$offset};
7311    }
7312
7313    sub getLdEntryByOffset
7314    {
7315        my $offset  = shift;
7316        my $entry = $offset2entry{$offset};
7317
7318        return $entry
7319            if $entry && $entry->entryType == ::ZIP_LOCAL_HDR_SIG;
7320
7321        return undef;
7322    }
7323
7324    sub getEntriesByOffset
7325    {
7326        return \%offset2entry ;
7327    }
7328}
7329
7330{
7331    package SimpleTable ;
7332
7333    use List::Util qw(max sum);
7334
7335    sub new
7336    {
7337        my $class = shift;
7338
7339        my %object = (
7340            header => [],
7341            data   => [],
7342            columns   => 0,
7343            prefix => '#  ',
7344        );
7345        bless \%object, $class;
7346    }
7347
7348    sub addHeaderRow
7349    {
7350        my $self = shift;
7351        push @{ $self->{header} }, [ @_ ] ;
7352        $self->{columns} = max($self->{columns}, scalar @_ ) ;
7353    }
7354
7355    sub addDataRow
7356    {
7357        my $self = shift;
7358
7359        push @{ $self->{data} }, [ @_ ] ;
7360        $self->{columns} = max($self->{columns}, scalar @_ ) ;
7361    }
7362
7363    sub hasData
7364    {
7365        my $self = shift;
7366
7367        return scalar @{ $self->{data} } ;
7368    }
7369
7370    sub display
7371    {
7372        my $self = shift;
7373
7374        # work out the column widths
7375        my @colW = (0) x $self->{columns} ;
7376        for my $row (@{ $self->{data} }, @{ $self->{header} })
7377        {
7378            my @r = @$row;
7379            for my $ix (0 .. $self->{columns} -1)
7380            {
7381                $colW[$ix] = max($colW[$ix],
7382                                3 + length( $r[$ix] )
7383                                );
7384            }
7385        }
7386
7387        my $width = sum(@colW) ; #+ @colW ;
7388        my @template ;
7389        for my $w (@colW)
7390        {
7391            push @template, ' ' x ($w - 3);
7392        }
7393
7394        print $self->{prefix} . '-' x ($width + 1) . "\n";
7395
7396        for my $row (@{ $self->{header} })
7397        {
7398            my @outputRow = @template;
7399
7400            print $self->{prefix} . '| ';
7401            for my $ix (0 .. $self->{columns} -1)
7402            {
7403                my $field = $template[$ix] ;
7404                substr($field, 0, length($row->[$ix]), $row->[$ix]);
7405                print $field . ' | ';
7406            }
7407            print "\n";
7408
7409        }
7410
7411        print $self->{prefix} . '-' x ($width + 1) . "\n";
7412
7413        for my $row (@{ $self->{data} })
7414        {
7415            my @outputRow = @template;
7416
7417            print $self->{prefix} . '| ';
7418            for my $ix (0 .. $self->{columns} -1)
7419            {
7420                my $field = $template[$ix] ;
7421                substr($field, 0, length($row->[$ix]), $row->[$ix]);
7422                print $field . ' | ';
7423            }
7424            print "\n";
7425        }
7426
7427        print $self->{prefix} . '-' x ($width + 1) . "\n";
7428        print "#\n";
7429    }
7430}
7431
7432sub Usage
7433{
7434    my $enc = TextEncoding::getNativeLocaleName();
7435
7436    my $message = <<EOM;
7437zipdetails [OPTIONS] file
7438
7439Display details about the internal structure of a Zip file.
7440
7441OPTIONS
7442
7443  General Options
7444     -h, --help
7445            Display help
7446     --redact
7447            Hide filename and payload data in the output
7448     --scan
7449            Enable pessimistic scanning mode.
7450            Blindly scan the file looking for zip headers
7451            Expect false-positives.
7452     --utc
7453            Display date/time fields in UTC. Default is local time
7454     -v
7455            Enable verbose mode -- output more stuff
7456     --version
7457            Print zipdetails version number
7458            This is version $VERSION
7459     --walk
7460            Enable optimistic scanning mode.
7461            Blindly scan the file looking for zip headers
7462            Expect false-positives.
7463
7464  Filename/Comment Encoding
7465    --encoding e
7466            Use encoding "e" when reading filename/comments from the zip file
7467            Uses system encoding ('$enc') by default
7468    --no-encoding
7469            Disable filename & comment encoding. Default disabled.
7470    --output-encoding e
7471            Use encoding "e" when writing filename/comments to the display
7472            Uses system encoding ('$enc') by default
7473    --debug-encoding
7474            Display eatra info when a filename/comment encoding has changed
7475    --language-encoding, --no-language-encoding
7476            Enable/disable support for the zip file "Language Encoding" flag.
7477            When this flag is set in a zip file the filename/comment is assumed
7478            to be encoded in UTF8.
7479            Default is enabled
7480
7481  Message Control
7482     --messages, --no-messages
7483            Enable/disable all info/warning/error messages. Default enabled.
7484     --exit-bitmask, --no-exit-bitmask
7485            Enable/disable exit status bitmask for messages. Default disabled.
7486            Bitmask values are
7487                Info    1
7488                Warning 2
7489                Error   4
7490
7491Copyright (c) 2011-2024 Paul Marquess. All rights reserved.
7492
7493This program is free software; you can redistribute it and/or
7494modify it under the same terms as Perl itself.
7495EOM
7496
7497    if (@_)
7498    {
7499        warn "$_\n"
7500            for @_  ;
7501        warn "\n";
7502
7503        die $message ;
7504    }
7505
7506    print $message ;
7507    exit 0;
7508
7509}
7510
75111;
7512
7513__END__
7514
7515=head1 NAME
7516
7517zipdetails - display the internal structure of zip files
7518
7519=head1 SYNOPSIS
7520
7521    zipdetails [options] zipfile.zip
7522
7523=head1 DESCRIPTION
7524
7525This program creates a detailed report on the internal structure of zip
7526files. For each item of metadata within a zip file the program will output
7527
7528=over 5
7529
7530=item the offset into the zip file where the item is located.
7531
7532=item a textual representation for the item.
7533
7534=item an optional hex dump of the item.
7535
7536=back
7537
7538
7539The program assumes a prior understanding of the internal structure of Zip
7540files. You should have a copy of the zip file definition,
7541L<APPNOTE.TXT|https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT>,
7542at hand to help understand the output from this program.
7543
7544=head2 Default Behaviour
7545
7546By default the program expects to be given a well-formed zip file.  It will
7547navigate the zip file by first parsing the zip C<Central Directory> at the end
7548of the file.  If the C<Central Directory> is found, it will then walk
7549sequentally through the zip records starting at the beginning of the file.
7550See L<Advanced Analysis> for other processing options.
7551
7552If the program finds any structural or portability issues with the zip file
7553it will print a message at the point it finds the issue and/or in a summary
7554at the end of the output report. Whilst the set of issues that can be
7555detected it exhaustive, don't assume that this program can find I<all> the
7556possible issues in a zip file - there are likely edge conditions that need
7557to be addressed.
7558
7559If you have suggestions for use-cases where this could be enhanced please
7560consider creating an enhancement request (see L<"SUPPORT">).
7561
7562=head3 Date & Time fields
7563
7564Date/time fields found in zip files are displayed in local time. Use the
7565C<--utc> option to display these fields in Coordinated Universal Time (UTC).
7566
7567=head3 Filenames & Comments
7568
7569Filenames and comments are decoded/encoded using the default system
7570encoding of the host running C<zipdetails>. When the sytem encoding cannot
7571be determined C<cp437> will be used.
7572
7573The exceptions are
7574
7575=over 5
7576
7577=item *
7578
7579when the C<Language Encoding Flag> is set in the zip file, the
7580filename/comment fields are assumed to be encoded in UTF-8.
7581
7582=item *
7583
7584the definition for the metadata field implies UTF-8 charset encoding
7585
7586=back
7587
7588See L<"Filename Encoding Issues"> and L<Filename & Comment Encoding
7589Options> for ways to control the encoding of filename/comment fields.
7590
7591=head2 OPTIONS
7592
7593=head3 General Options
7594
7595=over 5
7596
7597=item C<-h>, C<--help>
7598
7599Display help
7600
7601=item C<--redact>
7602
7603Obscure filenames and payload data in the output. Handy for the use case
7604where the zip files contains sensitive data that cannot be shared.
7605
7606=item C<--scan>
7607
7608Pessimistically scan the zip file loking for possible zip records. Can be
7609error-prone. For very large zip files this option is slow. Consider using
7610the C<--walk> option first. See L<"Advanced Analysis Options">
7611
7612=item C<--utc>
7613
7614By default, date/time fields are displayed in local time. Use this option to
7615display them in in Coordinated Universal Time (UTC).
7616
7617=item C<-v>
7618
7619Enable Verbose mode. See L<"Verbose Output">.
7620
7621=item C<--version>
7622
7623Display version number of the program and exit.
7624
7625=item C<--walk>
7626
7627Optimistically walk the zip file looking for possible zip records.
7628See L<"Advanced Analysis Options">
7629
7630=back
7631
7632=head3 Filename & Comment Encoding Options
7633
7634See L<"Filename Encoding Issues">
7635
7636=over 5
7637
7638=item C<--encoding name>
7639
7640Use encoding "name" when reading filenames/comments from the zip file.
7641
7642When this option is not specified the default the system encoding is used.
7643
7644=item C< --no-encoding>
7645
7646Disable all filename & comment encoding/decoding. Filenames/comments are
7647processed as byte streams.
7648
7649This option is not enabled by default.
7650
7651=item C<--output-encoding name>
7652
7653Use encoding "name" when writing filename/comments to the display.  By
7654default the system encoding will be used.
7655
7656=item C<--language-encoding>, C<--no-language-encoding>
7657
7658Modern zip files set a metadata entry in zip files, called the "Language
7659encoding flag", when they write filenames/comments encoded in UTF-8.
7660
7661Occasionally some applications set the C<Language Encoding Flag> but write
7662data that is not UTF-8 in the filename/comment fields of the zip file. This
7663will usually result in garbled text being output for the
7664filenames/comments.
7665
7666To deal with this use-case, set the C<--no-language-encoding> option and,
7667if needed, set the C<--encoding name> option to encoding actually used.
7668
7669Default is C<--language-encoding>.
7670
7671=item C<--debug-encoding>
7672
7673Display extra debugging info when a filename/comment encoding has changed.
7674
7675=back
7676
7677=head3 Message Control Options
7678
7679=over 5
7680
7681=item C<--messages>, C<--no-messages>
7682
7683Enable/disable the output of all info/warning/error messages.
7684
7685Disabling messages means that no checks are carried out to check that the
7686zip file is well-formed.
7687
7688Default is enabled.
7689
7690=item C<--exit-bitmask>, C<--no-exit-bitmask>
7691
7692Enable/disable exit status bitmask for messages. Default disabled.
7693Bitmask values are: 1 for info, 2 for warning and 4 for error.
7694
7695=back
7696
7697
7698=head2 Default Output
7699
7700By default C<zipdetails> will output each metadata field from the zip file
7701in three columns.
7702
7703=over 5
7704
7705=item 1
7706
7707The offset, in hex, to the start of the field relative to the beginning of
7708the file.
7709
7710=item 2
7711
7712The name of the field.
7713
7714=item 3
7715
7716Detailed information about the contents of the field. The format depends on
7717the type of data:
7718
7719=over 5
7720
7721=item * Numeric Values
7722
7723If the field contains an 8-bit, 16-bit, 32-bit or 64-bit numeric value, it
7724will be displayed in both hex and decimal -- for example "C<002A (42)>".
7725
7726Note that Zip files store most numeric values in I<little-endian> encoding
7727(there area few rare instances where I<big-endian> is used). The value read
7728from the zip file will have the I<endian> encoding removed before being
7729displayed.
7730
7731Next, is an optional description of what the numeric value means.
7732
7733=item * String
7734
7735If the field corresponds to a printable string, it will be output enclosed
7736in single quotes.
7737
7738=item * Binary Data
7739
7740The term I<Binary Data> is just a catch-all for all other metadata in the
7741zip file. This data is displayed as a series of ascii-hex byte values in
7742the same order they are stored in the zip file.
7743
7744=back
7745
7746=back
7747
7748For example, assuming you have a zip file, C<test,zip>, with one entry
7749
7750    $ unzip -l  test.zip
7751    Archive:  test.zip
7752    Length      Date    Time    Name
7753    ---------  ---------- -----   ----
7754        446  2023-03-22 20:03   lorem.txt
7755    ---------                     -------
7756        446                     1 file
7757
7758Running C<zipdetails> will gives this output
7759
7760    $ zipdetails test.zip
7761
7762    0000 LOCAL HEADER #1       04034B50 (67324752)
7763    0004 Extract Zip Spec      14 (20) '2.0'
7764    0005 Extract OS            00 (0) 'MS-DOS'
7765    0006 General Purpose Flag  0000 (0)
7766         [Bits 1-2]            0 'Normal Compression'
7767    0008 Compression Method    0008 (8) 'Deflated'
7768    000A Modification Time     5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023'
7769    000E CRC                   F90EE7FF (4178503679)
7770    0012 Compressed Size       0000010E (270)
7771    0016 Uncompressed Size     000001BE (446)
7772    001A Filename Length       0009 (9)
7773    001C Extra Length          0000 (0)
7774    001E Filename              'lorem.txt'
7775    0027 PAYLOAD
7776
7777    0135 CENTRAL HEADER #1     02014B50 (33639248)
7778    0139 Created Zip Spec      1E (30) '3.0'
7779    013A Created OS            03 (3) 'Unix'
7780    013B Extract Zip Spec      14 (20) '2.0'
7781    013C Extract OS            00 (0) 'MS-DOS'
7782    013D General Purpose Flag  0000 (0)
7783         [Bits 1-2]            0 'Normal Compression'
7784    013F Compression Method    0008 (8) 'Deflated'
7785    0141 Modification Time     5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023'
7786    0145 CRC                   F90EE7FF (4178503679)
7787    0149 Compressed Size       0000010E (270)
7788    014D Uncompressed Size     000001BE (446)
7789    0151 Filename Length       0009 (9)
7790    0153 Extra Length          0000 (0)
7791    0155 Comment Length        0000 (0)
7792    0157 Disk Start            0000 (0)
7793    0159 Int File Attributes   0001 (1)
7794         [Bit 0]               1 'Text Data'
7795    015B Ext File Attributes   81ED0000 (2179792896)
7796         [Bits 16-24]          01ED (493) 'Unix attrib: rwxr-xr-x'
7797         [Bits 28-31]          08 (8) 'Regular File'
7798    015F Local Header Offset   00000000 (0)
7799    0163 Filename              'lorem.txt'
7800
7801    016C END CENTRAL HEADER    06054B50 (101010256)
7802    0170 Number of this disk   0000 (0)
7803    0172 Central Dir Disk no   0000 (0)
7804    0174 Entries in this disk  0001 (1)
7805    0176 Total Entries         0001 (1)
7806    0178 Size of Central Dir   00000037 (55)
7807    017C Offset to Central Dir 00000135 (309)
7808    0180 Comment Length        0000 (0)
7809    #
7810    # Done
7811
7812
7813=head2 Verbose Output
7814
7815If the C<-v> option is present, the metadata output is split into the
7816following columns:
7817
7818=over 5
7819
7820=item 1
7821
7822The offset, in hex, to the start of the field relative to the beginning of
7823the file.
7824
7825=item 2
7826
7827The offset, in hex, to the end of the field relative to the beginning of
7828the file.
7829
7830=item 3
7831
7832The length, in hex, of the field.
7833
7834=item 4
7835
7836A hex dump of the bytes in field in the order they are stored in the zip file.
7837
7838=item 5
7839
7840A textual description of the field.
7841
7842=item 6
7843
7844Information about the contents of the field. See the description in the
7845L<Default Output> for more details.
7846
7847=back
7848
7849Here is the same zip file, C<test.zip>, dumped using the C<zipdetails>
7850C<-v> option:
7851
7852    $ zipdetails -v test.zip
7853
7854    0000 0003 0004 50 4B 03 04 LOCAL HEADER #1       04034B50 (67324752)
7855    0004 0004 0001 14          Extract Zip Spec      14 (20) '2.0'
7856    0005 0005 0001 00          Extract OS            00 (0) 'MS-DOS'
7857    0006 0007 0002 00 00       General Purpose Flag  0000 (0)
7858                               [Bits 1-2]            0 'Normal Compression'
7859    0008 0009 0002 08 00       Compression Method    0008 (8) 'Deflated'
7860    000A 000D 0004 72 A0 76 56 Modification Time     5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023'
7861    000E 0011 0004 FF E7 0E F9 CRC                   F90EE7FF (4178503679)
7862    0012 0015 0004 0E 01 00 00 Compressed Size       0000010E (270)
7863    0016 0019 0004 BE 01 00 00 Uncompressed Size     000001BE (446)
7864    001A 001B 0002 09 00       Filename Length       0009 (9)
7865    001C 001D 0002 00 00       Extra Length          0000 (0)
7866    001E 0026 0009 6C 6F 72 65 Filename              'lorem.txt'
7867                   6D 2E 74 78
7868                   74
7869    0027 0134 010E ...         PAYLOAD
7870
7871    0135 0138 0004 50 4B 01 02 CENTRAL HEADER #1     02014B50 (33639248)
7872    0139 0139 0001 1E          Created Zip Spec      1E (30) '3.0'
7873    013A 013A 0001 03          Created OS            03 (3) 'Unix'
7874    013B 013B 0001 14          Extract Zip Spec      14 (20) '2.0'
7875    013C 013C 0001 00          Extract OS            00 (0) 'MS-DOS'
7876    013D 013E 0002 00 00       General Purpose Flag  0000 (0)
7877                               [Bits 1-2]            0 'Normal Compression'
7878    013F 0140 0002 08 00       Compression Method    0008 (8) 'Deflated'
7879    0141 0144 0004 72 A0 76 56 Modification Time     5676A072 (1450614898) 'Wed Mar 22 20:03:36 2023'
7880    0145 0148 0004 FF E7 0E F9 CRC                   F90EE7FF (4178503679)
7881    0149 014C 0004 0E 01 00 00 Compressed Size       0000010E (270)
7882    014D 0150 0004 BE 01 00 00 Uncompressed Size     000001BE (446)
7883    0151 0152 0002 09 00       Filename Length       0009 (9)
7884    0153 0154 0002 00 00       Extra Length          0000 (0)
7885    0155 0156 0002 00 00       Comment Length        0000 (0)
7886    0157 0158 0002 00 00       Disk Start            0000 (0)
7887    0159 015A 0002 01 00       Int File Attributes   0001 (1)
7888                               [Bit 0]               1 'Text Data'
7889    015B 015E 0004 00 00 ED 81 Ext File Attributes   81ED0000 (2179792896)
7890                               [Bits 16-24]          01ED (493) 'Unix attrib: rwxr-xr-x'
7891                               [Bits 28-31]          08 (8) 'Regular File'
7892    015F 0162 0004 00 00 00 00 Local Header Offset   00000000 (0)
7893    0163 016B 0009 6C 6F 72 65 Filename              'lorem.txt'
7894                   6D 2E 74 78
7895                   74
7896
7897    016C 016F 0004 50 4B 05 06 END CENTRAL HEADER    06054B50 (101010256)
7898    0170 0171 0002 00 00       Number of this disk   0000 (0)
7899    0172 0173 0002 00 00       Central Dir Disk no   0000 (0)
7900    0174 0175 0002 01 00       Entries in this disk  0001 (1)
7901    0176 0177 0002 01 00       Total Entries         0001 (1)
7902    0178 017B 0004 37 00 00 00 Size of Central Dir   00000037 (55)
7903    017C 017F 0004 35 01 00 00 Offset to Central Dir 00000135 (309)
7904    0180 0181 0002 00 00       Comment Length        0000 (0)
7905    #
7906    # Done
7907
7908=head2 Advanced Analysis
7909
7910If you have a corrupt or non-standard zip file, particulatly one where the
7911C<Central Directory> metadata at the end of the file is absent/incomplete, you
7912can use either the C<--walk> option or the C<--scan> option to search for
7913any zip metadata that is still present in the file.
7914
7915When either of these options is enabled, this program will bypass the
7916initial step of reading the C<Central Directory> at the end of the file and
7917simply scan the zip file sequentially from the start of the file looking
7918for zip metedata records. Although this can be error prone, for the most
7919part it will find any zip file metadata that is still present in the file.
7920
7921The difference between the two options is how aggressive the sequential
7922scan is: C<--walk> is optimistic, while C<--scan> is pessimistic.
7923
7924To understand the difference in more detail you need to know a bit about
7925how zip file metadata is structured. Under the hood, a zip file uses a
7926series of 4-byte signatures to flag the start of a each of the metadata
7927records it uses. When the C<--walk> or the C<--scan> option is enabled both
7928work identically by scanning the file from the beginning looking for any
7929the of these valid 4-byte metadata signatures. When a 4-byte signature is
7930found both options will blindly assume that it has found a vald metadata
7931record and display it.
7932
7933=head3 C<--walk>
7934
7935The C<--walk> option optimistically assumes that it has found a real zip
7936metatada record and so starts the scan for the next record directly after
7937the record it has just output.
7938
7939=head3 C<--scan>
7940
7941The C<--scan> option is pessimistic and assumes the 4-byte signature
7942sequence may have been a false-positive, so before starting the scan for
7943the next resord, it will rewind to the location in the file directly after
7944the 4-byte sequecce it just processed. This means it will rescan data that
7945has already been processed.  For very lage zip files the C<--scan> option
7946can be really realy slow, so trying the C<--walk> option first.
7947
7948B<Important Note>: If the zip file being processed contains one or more
7949nested zip files, and the outer zip file uses the C<STORE> compression
7950method, the C<--scan> option will display the zip metadata for both the
7951outer & inner zip files.
7952
7953=head2 Filename Encoding Issues
7954
7955Sometimes when displaying the contents of a zip file the filenames (or
7956comments) appear to be garbled. This section walks through the reasons and
7957mitigations that can be applied to work around these issues.
7958
7959=head3 Background
7960
7961When zip files were first created in the 1980's, there was no Unicode or
7962UTF-8. Issues around character set encoding interoperability were not a
7963major concern.
7964
7965Initially, the only official encoding supported in zip files was IBM Code
7966Page 437 (AKA C<CP437>). As time went on users in locales where C<CP437>
7967wasn't appropriate stored filenames in the encoding native to their locale.
7968If you were running a system that matched the locale of the zip file, all
7969was well. If not, you had to post-process the filenames after unzipping the
7970zip file.
7971
7972Fast forward to the introduction of Unicode and UTF-8 encoding. The
7973approach now used by all major zip implementations is to set the C<Language
7974encoding flag> (also known as C<EFS>) in the zip file metadata to signal
7975that a filename/comment is encoded in UTF-8.
7976
7977To ensure maximum interoperability when sharing zip files store 7-bit
7978filenames as-is in the zip file. For anything else the C<EFS> bit needs to
7979be set and the filename is encoded in UTF-8. Although this rule is kept to
7980for the most part, there are exceptions out in the wild.
7981
7982=head3 Dealing with Encoding Errors
7983
7984The most common filename encoding issue is where the C<EFS> bit is not set and
7985the filename is stored in a character set that doesnt't match the system
7986encoding. This mostly impacts legacy zip files that predate the
7987introduction of Unicode.
7988
7989To deal with this issue you first need to know what encoding was used in
7990the zip file. For example, if the filename is encoded in C<ISO-8859-1> you
7991can display the filenames using the C<--encoding> option
7992
7993    zipdetails --encoding ISO-8859-1 myfile.zip
7994
7995A less common variation of this is where the C<EFS> bit is set, signalling
7996that the filename will be encoded in UTF-8, but the filename is not encoded
7997in UTF-8. To deal with this scenarion, use the C<--no-language-encoding>
7998option along with the C<--encoding> option.
7999
8000
8001=head1 LIMITATIONS
8002
8003The following zip file features are not supported by this program:
8004
8005=over 5
8006
8007=item *
8008
8009Multi-part/Split/Spanned Zip Archives.
8010
8011This program cannot give an overall report on the combined parts of a
8012multi-part zip file.
8013
8014The best you can do is run with either the C<--scan> or C<--walk> options
8015against individual parts. Some will contains zipfile metadata which will be
8016detected and some will only contain compressed payload data.
8017
8018
8019=item *
8020
8021Encrypted Central Directory
8022
8023When pkzip I<Strong Encryption> is enabled in a zip file this program can
8024still parse most of the metadata in the zip file. The exception is when the
8025C<Central Directory> of a zip file is also encrypted. This program cannot
8026parse any metadata from an encrypted C<Central Directory>.
8027
8028=item *
8029
8030Corrupt Zip files
8031
8032When C<zipdetails> encounters a corrupt zip file, it will do one or more of
8033the following
8034
8035=over 5
8036
8037=item *
8038
8039Display details of the corruption and carry on
8040
8041=item *
8042
8043Display details of the corruption and terminate
8044
8045=item *
8046
8047Terminate with a generic message
8048
8049=back
8050
8051Which of the above is output is dependent in the severity of the
8052corruption.
8053
8054=back
8055
8056=head1 TODO
8057
8058=head2 JSON/YML Output
8059
8060Output some of the zip file metadata as a JSON or YML document.
8061
8062=head2 Corrupt Zip files
8063
8064Although the detection and reporting of most of the common corruption use-cases is
8065present in C<zipdetails>, there are likely to be other edge cases that need
8066to be supported.
8067
8068If you have a corrupt Zip file that isn't being processed properly, please
8069report it (see  L<"SUPPORT">).
8070
8071=head1 SUPPORT
8072
8073General feedback/questions/bug reports should be sent to
8074L<https://github.com/pmqs/zipdetails/issues>.
8075
8076=head1 SEE ALSO
8077
8078
8079The primary reference for Zip files is
8080L<APPNOTE.TXT|https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT>.
8081
8082An alternative reference is the Info-Zip appnote. This is available from
8083L<ftp://ftp.info-zip.org/pub/infozip/doc/>
8084
8085For details of WinZip AES encryption see L<AES Encryption Information:
8086Encryption Specification AE-1 and
8087AE-2|https://www.winzip.com/en/support/aes-encryption/>.
8088
8089The C<zipinfo> program that comes with the info-zip distribution
8090(L<http://www.info-zip.org/>) can also display details of the structure of a zip
8091file.
8092
8093
8094=head1 AUTHOR
8095
8096Paul Marquess F<pmqs@cpan.org>.
8097
8098=head1 COPYRIGHT
8099
8100Copyright (c) 2011-2024 Paul Marquess. All rights reserved.
8101
8102This program is free software; you can redistribute it and/or modify it under
8103the same terms as Perl itself.
8104