1package IO::Uncompress::Unzip;
2
3require 5.006 ;
4
5# for RFC1952
6
7use strict ;
8use warnings;
9use bytes;
10
11use IO::File;
12use IO::Uncompress::RawInflate  2.212 ;
13use IO::Compress::Base::Common  2.212 qw(:Status );
14use IO::Uncompress::Adapter::Inflate  2.212 ;
15use IO::Uncompress::Adapter::Identity 2.212 ;
16use IO::Compress::Zlib::Extra 2.212 ;
17use IO::Compress::Zip::Constants 2.212 ;
18
19use Compress::Raw::Zlib  2.212 () ;
20
21BEGIN
22{
23   # Don't trigger any __DIE__ Hooks.
24   local $SIG{__DIE__};
25
26    eval{ require IO::Uncompress::Adapter::Bunzip2 ;
27          IO::Uncompress::Adapter::Bunzip2->VERSION(2.212) } ;
28    eval{ require IO::Uncompress::Adapter::UnLzma ;
29          IO::Uncompress::Adapter::UnLzma->VERSION(2.212) } ;
30    eval{ require IO::Uncompress::Adapter::UnXz ;
31          IO::Uncompress::Adapter::UnXz->VERSION(2.212) } ;
32    eval{ require IO::Uncompress::Adapter::UnZstd ;
33          IO::Uncompress::Adapter::UnZstd->VERSION(2.212) } ;
34}
35
36
37require Exporter ;
38
39our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
40
41$VERSION = '2.212';
42$UnzipError = '';
43
44@ISA    = qw(IO::Uncompress::RawInflate Exporter);
45@EXPORT_OK = qw($UnzipError unzip );
46%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
47push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
48Exporter::export_ok_tags('all');
49
50%headerLookup = (
51        ZIP_CENTRAL_HDR_SIG,            \&skipCentralDirectory,
52        ZIP_END_CENTRAL_HDR_SIG,        \&skipEndCentralDirectory,
53        ZIP64_END_CENTRAL_REC_HDR_SIG,  \&skipCentralDirectory64Rec,
54        ZIP64_END_CENTRAL_LOC_HDR_SIG,  \&skipCentralDirectory64Loc,
55        ZIP64_ARCHIVE_EXTRA_SIG,        \&skipArchiveExtra,
56        ZIP64_DIGITAL_SIGNATURE_SIG,    \&skipDigitalSignature,
57        );
58
59my %MethodNames = (
60        ZIP_CM_DEFLATE()    => 'Deflated',
61        ZIP_CM_BZIP2()      => 'Bzip2',
62        ZIP_CM_LZMA()       => 'Lzma',
63        ZIP_CM_STORE()      => 'Stored',
64        ZIP_CM_XZ()         => 'Xz',
65        ZIP_CM_ZSTD()       => 'Zstd',
66    );
67
68sub new
69{
70    my $class = shift ;
71    my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError);
72    $obj->_create(undef, 0, @_);
73}
74
75sub unzip
76{
77    my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError);
78    return $obj->_inf(@_) ;
79}
80
81sub getExtraParams
82{
83
84    return (
85#            # Zip header fields
86            'name'    => [IO::Compress::Base::Common::Parse_any,       undef],
87
88            'stream'  => [IO::Compress::Base::Common::Parse_boolean,   0],
89            'efs'     => [IO::Compress::Base::Common::Parse_boolean,   0],
90
91            # TODO - This means reading the central directory to get
92            # 1. the local header offsets
93            # 2. The compressed data length
94        );
95}
96
97sub ckParams
98{
99    my $self = shift ;
100    my $got = shift ;
101
102    # unzip always needs crc32
103    $got->setValue('crc32' => 1);
104
105    *$self->{UnzipData}{Name} = $got->getValue('name');
106    *$self->{UnzipData}{efs} = $got->getValue('efs');
107
108    return 1;
109}
110
111sub mkUncomp
112{
113    my $self = shift ;
114    my $got = shift ;
115
116     my $magic = $self->ckMagic()
117        or return 0;
118
119    *$self->{Info} = $self->readHeader($magic)
120        or return undef ;
121
122    return 1;
123
124}
125
126sub ckMagic
127{
128    my $self = shift;
129
130    my $magic ;
131    $self->smartReadExact(\$magic, 4);
132
133    *$self->{HeaderPending} = $magic ;
134
135    return $self->HeaderError("Minimum header size is " .
136                              4 . " bytes")
137        if length $magic != 4 ;
138
139    return $self->HeaderError("Bad Magic")
140        if ! _isZipMagic($magic) ;
141
142    *$self->{Type} = 'zip';
143
144    return $magic ;
145}
146
147
148sub fastForward
149{
150    my $self = shift;
151    my $offset = shift;
152
153    # TODO - if Stream isn't enabled & reading from file, use seek
154
155    my $buffer = '';
156    my $c = 1024 * 16;
157
158    while ($offset > 0)
159    {
160        $c = length $offset
161            if length $offset < $c ;
162
163        $offset -= $c;
164
165        $self->smartReadExact(\$buffer, $c)
166            or return 0;
167    }
168
169    return 1;
170}
171
172
173sub readHeader
174{
175    my $self = shift;
176    my $magic = shift ;
177
178    my $name =  *$self->{UnzipData}{Name} ;
179    my $hdr = $self->_readZipHeader($magic) ;
180
181    while (defined $hdr)
182    {
183        if (! defined $name || $hdr->{Name} eq $name)
184        {
185            return $hdr ;
186        }
187
188        # skip the data
189        # TODO - when Stream is off, use seek
190        my $buffer;
191        if (*$self->{ZipData}{Streaming}) {
192            while (1) {
193
194                my $b;
195                my $status = $self->smartRead(\$b, 1024 * 16);
196
197                return $self->saveErrorString(undef, "Truncated file")
198                    if $status <= 0 ;
199
200                my $temp_buf ;
201                my $out;
202
203                $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
204
205                return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
206                                                     *$self->{Uncomp}{ErrorNo})
207                    if $self->saveStatus($status) == STATUS_ERROR;
208
209                $self->pushBack($b)  ;
210
211                if ($status == STATUS_ENDSTREAM) {
212                    *$self->{Uncomp}->reset();
213                    last;
214                }
215            }
216
217            # skip the trailer
218            $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
219                or return $self->saveErrorString(undef, "Truncated file");
220        }
221        else {
222            my $c = $hdr->{CompressedLength}->get64bit();
223            $self->fastForward($c)
224                or return $self->saveErrorString(undef, "Truncated file");
225            $buffer = '';
226        }
227
228        $self->chkTrailer($buffer) == STATUS_OK
229            or return $self->saveErrorString(undef, "Truncated file");
230
231        $hdr = $self->_readFullZipHeader();
232
233        return $self->saveErrorString(undef, "Cannot find '$name'")
234            if $self->smartEof();
235    }
236
237    return undef;
238}
239
240sub chkTrailer
241{
242    my $self = shift;
243    my $trailer = shift;
244
245    my ($sig, $CRC32, $cSize, $uSize) ;
246    my ($cSizeHi, $uSizeHi) = (0, 0);
247    if (*$self->{ZipData}{Streaming}) {
248        $sig   = unpack ("V", substr($trailer, 0, 4));
249        $CRC32 = unpack ("V", substr($trailer, 4, 4));
250
251        if (*$self->{ZipData}{Zip64} ) {
252            $cSize = U64::newUnpack_V64 substr($trailer,  8, 8);
253            $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
254        }
255        else {
256            $cSize = U64::newUnpack_V32 substr($trailer,  8, 4);
257            $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
258        }
259
260        return $self->TrailerError("Data Descriptor signature, got $sig")
261            if $sig != ZIP_DATA_HDR_SIG;
262    }
263    else {
264        ($CRC32, $cSize, $uSize) =
265            (*$self->{ZipData}{Crc32},
266             *$self->{ZipData}{CompressedLen},
267             *$self->{ZipData}{UnCompressedLen});
268    }
269
270    *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ;
271    *$self->{Info}{CompressedLength} = $cSize->get64bit();
272    *$self->{Info}{UncompressedLength} = $uSize->get64bit();
273
274    if (*$self->{Strict}) {
275        return $self->TrailerError("CRC mismatch")
276            if $CRC32  != *$self->{ZipData}{CRC32} ;
277
278        return $self->TrailerError("CSIZE mismatch.")
279            if ! $cSize->equal(*$self->{CompSize});
280
281        return $self->TrailerError("USIZE mismatch.")
282            if ! $uSize->equal(*$self->{UnCompSize});
283    }
284
285    my $reachedEnd = STATUS_ERROR ;
286    # check for central directory or end of central directory
287    while (1)
288    {
289        my $magic ;
290        my $got = $self->smartRead(\$magic, 4);
291
292        return $self->saveErrorString(STATUS_ERROR, "Truncated file")
293            if $got != 4 && *$self->{Strict};
294
295        if ($got == 0) {
296            return STATUS_EOF ;
297        }
298        elsif ($got < 0) {
299            return STATUS_ERROR ;
300        }
301        elsif ($got < 4) {
302            $self->pushBack($magic)  ;
303            return STATUS_OK ;
304        }
305
306        my $sig = unpack("V", $magic) ;
307
308        my $hdr;
309        if ($hdr = $headerLookup{$sig})
310        {
311            if (&$hdr($self, $magic) != STATUS_OK ) {
312                if (*$self->{Strict}) {
313                    return STATUS_ERROR ;
314                }
315                else {
316                    $self->clearError();
317                    return STATUS_OK ;
318                }
319            }
320
321            if ($sig == ZIP_END_CENTRAL_HDR_SIG)
322            {
323                return STATUS_OK ;
324                last;
325            }
326        }
327        elsif ($sig == ZIP_LOCAL_HDR_SIG)
328        {
329            $self->pushBack($magic)  ;
330            return STATUS_OK ;
331        }
332        else
333        {
334            # put the data back
335            $self->pushBack($magic)  ;
336            last;
337        }
338    }
339
340    return $reachedEnd ;
341}
342
343sub skipCentralDirectory
344{
345    my $self = shift;
346    my $magic = shift ;
347
348    my $buffer;
349    $self->smartReadExact(\$buffer, 46 - 4)
350        or return $self->TrailerError("Minimum header size is " .
351                                     46 . " bytes") ;
352
353    my $keep = $magic . $buffer ;
354    *$self->{HeaderPending} = $keep ;
355
356   #my $versionMadeBy      = unpack ("v", substr($buffer, 4-4,  2));
357   #my $extractVersion     = unpack ("v", substr($buffer, 6-4,  2));
358   #my $gpFlag             = unpack ("v", substr($buffer, 8-4,  2));
359   #my $compressedMethod   = unpack ("v", substr($buffer, 10-4, 2));
360   #my $lastModTime        = unpack ("V", substr($buffer, 12-4, 4));
361   #my $crc32              = unpack ("V", substr($buffer, 16-4, 4));
362    my $compressedLength   = unpack ("V", substr($buffer, 20-4, 4));
363    my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
364    my $filename_length    = unpack ("v", substr($buffer, 28-4, 2));
365    my $extra_length       = unpack ("v", substr($buffer, 30-4, 2));
366    my $comment_length     = unpack ("v", substr($buffer, 32-4, 2));
367   #my $disk_start         = unpack ("v", substr($buffer, 34-4, 2));
368   #my $int_file_attrib    = unpack ("v", substr($buffer, 36-4, 2));
369   #my $ext_file_attrib    = unpack ("V", substr($buffer, 38-4, 2));
370   #my $lcl_hdr_offset     = unpack ("V", substr($buffer, 42-4, 2));
371
372
373    my $filename;
374    my $extraField;
375    my $comment ;
376    if ($filename_length)
377    {
378        $self->smartReadExact(\$filename, $filename_length)
379            or return $self->TruncatedTrailer("filename");
380        $keep .= $filename ;
381    }
382
383    if ($extra_length)
384    {
385        $self->smartReadExact(\$extraField, $extra_length)
386            or return $self->TruncatedTrailer("extra");
387        $keep .= $extraField ;
388    }
389
390    if ($comment_length)
391    {
392        $self->smartReadExact(\$comment, $comment_length)
393            or return $self->TruncatedTrailer("comment");
394        $keep .= $comment ;
395    }
396
397    return STATUS_OK ;
398}
399
400sub skipArchiveExtra
401{
402    my $self = shift;
403    my $magic = shift ;
404
405    my $buffer;
406    $self->smartReadExact(\$buffer, 4)
407        or return $self->TrailerError("Minimum header size is " .
408                                     4 . " bytes") ;
409
410    my $keep = $magic . $buffer ;
411
412    my $size = unpack ("V", $buffer);
413
414    $self->smartReadExact(\$buffer, $size)
415        or return $self->TrailerError("Minimum header size is " .
416                                     $size . " bytes") ;
417
418    $keep .= $buffer ;
419    *$self->{HeaderPending} = $keep ;
420
421    return STATUS_OK ;
422}
423
424
425sub skipCentralDirectory64Rec
426{
427    my $self = shift;
428    my $magic = shift ;
429
430    my $buffer;
431    $self->smartReadExact(\$buffer, 8)
432        or return $self->TrailerError("Minimum header size is " .
433                                     8 . " bytes") ;
434
435    my $keep = $magic . $buffer ;
436
437    my ($sizeLo, $sizeHi)  = unpack ("V V", $buffer);
438    my $size = $sizeHi * U64::MAX32 + $sizeLo;
439
440    $self->fastForward($size)
441        or return $self->TrailerError("Minimum header size is " .
442                                     $size . " bytes") ;
443
444   #$keep .= $buffer ;
445   #*$self->{HeaderPending} = $keep ;
446
447   #my $versionMadeBy      = unpack ("v",   substr($buffer,  0, 2));
448   #my $extractVersion     = unpack ("v",   substr($buffer,  2, 2));
449   #my $diskNumber         = unpack ("V",   substr($buffer,  4, 4));
450   #my $cntrlDirDiskNo     = unpack ("V",   substr($buffer,  8, 4));
451   #my $entriesInThisCD    = unpack ("V V", substr($buffer, 12, 8));
452   #my $entriesInCD        = unpack ("V V", substr($buffer, 20, 8));
453   #my $sizeOfCD           = unpack ("V V", substr($buffer, 28, 8));
454   #my $offsetToCD         = unpack ("V V", substr($buffer, 36, 8));
455
456    return STATUS_OK ;
457}
458
459sub skipCentralDirectory64Loc
460{
461    my $self = shift;
462    my $magic = shift ;
463
464    my $buffer;
465    $self->smartReadExact(\$buffer, 20 - 4)
466        or return $self->TrailerError("Minimum header size is " .
467                                     20 . " bytes") ;
468
469    my $keep = $magic . $buffer ;
470    *$self->{HeaderPending} = $keep ;
471
472   #my $startCdDisk        = unpack ("V",   substr($buffer,  4-4, 4));
473   #my $offsetToCD         = unpack ("V V", substr($buffer,  8-4, 8));
474   #my $diskCount          = unpack ("V",   substr($buffer, 16-4, 4));
475
476    return STATUS_OK ;
477}
478
479sub skipEndCentralDirectory
480{
481    my $self = shift;
482    my $magic = shift ;
483
484
485    my $buffer;
486    $self->smartReadExact(\$buffer, 22 - 4)
487        or return $self->TrailerError("Minimum header size is " .
488                                     22 . " bytes") ;
489
490    my $keep = $magic . $buffer ;
491    *$self->{HeaderPending} = $keep ;
492
493   #my $diskNumber         = unpack ("v", substr($buffer, 4-4,  2));
494   #my $cntrlDirDiskNo     = unpack ("v", substr($buffer, 6-4,  2));
495   #my $entriesInThisCD    = unpack ("v", substr($buffer, 8-4,  2));
496   #my $entriesInCD        = unpack ("v", substr($buffer, 10-4, 2));
497   #my $sizeOfCD           = unpack ("V", substr($buffer, 12-4, 4));
498   #my $offsetToCD         = unpack ("V", substr($buffer, 16-4, 4));
499    my $comment_length     = unpack ("v", substr($buffer, 20-4, 2));
500
501
502    my $comment ;
503    if ($comment_length)
504    {
505        $self->smartReadExact(\$comment, $comment_length)
506            or return $self->TruncatedTrailer("comment");
507        $keep .= $comment ;
508    }
509
510    return STATUS_OK ;
511}
512
513
514sub _isZipMagic
515{
516    my $buffer = shift ;
517    return 0 if length $buffer < 4 ;
518    my $sig = unpack("V", $buffer) ;
519    return $sig == ZIP_LOCAL_HDR_SIG ;
520}
521
522
523sub _readFullZipHeader($)
524{
525    my ($self) = @_ ;
526    my $magic = '' ;
527
528    $self->smartReadExact(\$magic, 4);
529
530    *$self->{HeaderPending} = $magic ;
531
532    return $self->HeaderError("Minimum header size is " .
533                              30 . " bytes")
534        if length $magic != 4 ;
535
536
537    return $self->HeaderError("Bad Magic")
538        if ! _isZipMagic($magic) ;
539
540    my $status = $self->_readZipHeader($magic);
541    delete *$self->{Transparent} if ! defined $status ;
542    return $status ;
543}
544
545sub _readZipHeader($)
546{
547    my ($self, $magic) = @_ ;
548    my ($HeaderCRC) ;
549    my ($buffer) = '' ;
550
551    $self->smartReadExact(\$buffer, 30 - 4)
552        or return $self->HeaderError("Minimum header size is " .
553                                     30 . " bytes") ;
554
555    my $keep = $magic . $buffer ;
556    *$self->{HeaderPending} = $keep ;
557
558    my $extractVersion     = unpack ("v", substr($buffer, 4-4,  2));
559    my $gpFlag             = unpack ("v", substr($buffer, 6-4,  2));
560    my $compressedMethod   = unpack ("v", substr($buffer, 8-4,  2));
561    my $lastModTime        = unpack ("V", substr($buffer, 10-4, 4));
562    my $crc32              = unpack ("V", substr($buffer, 14-4, 4));
563    my $compressedLength   = U64::newUnpack_V32 substr($buffer, 18-4, 4);
564    my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4);
565    my $filename_length    = unpack ("v", substr($buffer, 26-4, 2));
566    my $extra_length       = unpack ("v", substr($buffer, 28-4, 2));
567
568    my $filename;
569    my $extraField;
570    my @EXTRA = ();
571
572    # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out
573    # compressedLength/uncompressedLength & crc32 in the local file header.
574    # The expected data descriptor is not populated.
575    # So only assume streaming if the Streaming bit is set AND the compressed length is zero
576    my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK)  && $crc32 == 0) ? 1 : 0 ;
577
578    my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0;
579
580    return $self->HeaderError("Encrypted content not supported")
581        if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
582
583    return $self->HeaderError("Patch content not supported")
584        if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
585
586    *$self->{ZipData}{Streaming} = $streamingMode;
587
588
589    if ($filename_length)
590    {
591        $self->smartReadExact(\$filename, $filename_length)
592            or return $self->TruncatedHeader("Filename");
593
594        if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004)
595        {
596            require Encode;
597            eval { $filename = Encode::decode_utf8($filename, 1) }
598                or Carp::croak "Zip Filename not UTF-8" ;
599        }
600
601        $keep .= $filename ;
602    }
603
604    my $zip64 = 0 ;
605
606    if ($extra_length)
607    {
608        $self->smartReadExact(\$extraField, $extra_length)
609            or return $self->TruncatedHeader("Extra Field");
610
611        my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
612                                                \@EXTRA, 1, 0);
613        return $self->HeaderError($bad)
614            if defined $bad;
615
616        $keep .= $extraField ;
617
618        my %Extra ;
619        for (@EXTRA)
620        {
621            $Extra{$_->[0]} = \$_->[1];
622        }
623
624        if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
625        {
626            $zip64 = 1 ;
627
628            my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
629
630            # This code assumes that all the fields in the Zip64
631            # extra field aren't necessarily present. The spec says that
632            # they only exist if the equivalent local headers are -1.
633
634            if (! $streamingMode) {
635                my $offset = 0 ;
636
637                if (U64::full32 $uncompressedLength->get32bit() ) {
638                    $uncompressedLength
639                            = U64::newUnpack_V64 substr($buff, 0, 8);
640
641                    $offset += 8 ;
642                }
643
644                if (U64::full32 $compressedLength->get32bit() ) {
645
646                    $compressedLength
647                        = U64::newUnpack_V64 substr($buff, $offset, 8);
648
649                    $offset += 8 ;
650                }
651           }
652        }
653    }
654
655    *$self->{ZipData}{Zip64} = $zip64;
656
657    if (! $streamingMode) {
658        *$self->{ZipData}{Streaming} = 0;
659        *$self->{ZipData}{Crc32} = $crc32;
660        *$self->{ZipData}{CompressedLen} = $compressedLength;
661        *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
662        *$self->{CompressedInputLengthRemaining} =
663            *$self->{CompressedInputLength} = $compressedLength->get64bit();
664    }
665
666    *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef);
667    *$self->{ZipData}{Method} = $compressedMethod;
668    if ($compressedMethod == ZIP_CM_DEFLATE)
669    {
670        *$self->{Type} = 'zip-deflate';
671        my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0);
672
673        *$self->{Uncomp} = $obj;
674    }
675    elsif ($compressedMethod == ZIP_CM_BZIP2)
676    {
677        return $self->HeaderError("Unsupported Compression format $compressedMethod")
678            if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ;
679
680        *$self->{Type} = 'zip-bzip2';
681
682        my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();
683
684        *$self->{Uncomp} = $obj;
685    }
686    elsif ($compressedMethod == ZIP_CM_XZ)
687    {
688        return $self->HeaderError("Unsupported Compression format $compressedMethod")
689            if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ;
690
691        *$self->{Type} = 'zip-xz';
692
693        my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject();
694
695        *$self->{Uncomp} = $obj;
696    }
697    elsif ($compressedMethod == ZIP_CM_ZSTD)
698    {
699        return $self->HeaderError("Unsupported Compression format $compressedMethod")
700            if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ;
701
702        *$self->{Type} = 'zip-zstd';
703
704        my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject();
705
706        *$self->{Uncomp} = $obj;
707    }
708    elsif ($compressedMethod == ZIP_CM_LZMA)
709    {
710        return $self->HeaderError("Unsupported Compression format $compressedMethod")
711            if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ;
712
713        *$self->{Type} = 'zip-lzma';
714        my $LzmaHeader;
715        $self->smartReadExact(\$LzmaHeader, 4)
716                or return $self->saveErrorString(undef, "Truncated file");
717        my ($verHi, $verLo)   = unpack ("CC", substr($LzmaHeader, 0, 2));
718        my $LzmaPropertiesSize   = unpack ("v", substr($LzmaHeader, 2, 2));
719
720
721        my $LzmaPropertyData;
722        $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize)
723                or return $self->saveErrorString(undef, "Truncated file");
724
725        if (! $streamingMode) {
726            *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ;
727            *$self->{CompressedInputLengthRemaining} =
728                *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit();
729        }
730
731        my $obj =
732            IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData);
733
734        *$self->{Uncomp} = $obj;
735    }
736    elsif ($compressedMethod == ZIP_CM_STORE)
737    {
738        *$self->{Type} = 'zip-stored';
739
740        my $obj =
741        IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode,
742                                                          $zip64);
743
744        *$self->{Uncomp} = $obj;
745    }
746    else
747    {
748        return $self->HeaderError("Unsupported Compression format $compressedMethod");
749    }
750
751    return {
752        'Type'               => 'zip',
753        'FingerprintLength'  => 4,
754        #'HeaderLength'       => $compressedMethod == 8 ? length $keep : 0,
755        'HeaderLength'       => length $keep,
756        'Zip64'              => $zip64,
757        'TrailerLength'      => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
758        'Header'             => $keep,
759        'CompressedLength'   => $compressedLength ,
760        'UncompressedLength' => $uncompressedLength ,
761        'CRC32'              => $crc32 ,
762        'Name'               => $filename,
763        'efs'                => $efs_flag, # language encoding flag
764        'Time'               => _dosToUnixTime($lastModTime),
765        'Stream'             => $streamingMode,
766
767        'MethodID'           => $compressedMethod,
768        'MethodName'         => $MethodNames{$compressedMethod} || 'Unknown',
769
770#        'TextFlag'      => $flag & GZIP_FLG_FTEXT ? 1 : 0,
771#        'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
772#        'NameFlag'      => $flag & GZIP_FLG_FNAME ? 1 : 0,
773#        'CommentFlag'   => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
774#        'ExtraFlag'     => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
775#        'Comment'       => $comment,
776#        'OsID'          => $os,
777#        'OsName'        => defined $GZIP_OS_Names{$os}
778#                                 ? $GZIP_OS_Names{$os} : "Unknown",
779#        'HeaderCRC'     => $HeaderCRC,
780#        'Flags'         => $flag,
781#        'ExtraFlags'    => $xfl,
782        'ExtraFieldRaw' => $extraField,
783        'ExtraField'    => [ @EXTRA ],
784
785
786      }
787}
788
789sub filterUncompressed
790{
791    my $self = shift ;
792
793    if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) {
794        *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
795    }
796    else {
797        *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]);
798    }
799}
800
801
802# from Archive::Zip & info-zip
803sub _dosToUnixTime
804{
805	my $dt = shift;
806
807	my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
808	my $mon  = ( ( $dt >> 21 ) & 0x0f ) - 1;
809	my $mday = ( ( $dt >> 16 ) & 0x1f );
810
811	my $hour = ( ( $dt >> 11 ) & 0x1f );
812	my $min  = ( ( $dt >> 5 ) & 0x3f );
813	my $sec  = ( ( $dt << 1 ) & 0x3e );
814
815    use Time::Local ;
816    my $time_t = Time::Local::timelocal( $sec, $min, $hour, $mday, $mon, $year);
817    return 0 if ! defined $time_t;
818    return $time_t;
819
820}
821
822#sub scanCentralDirectory
823#{
824#    # Use cases
825#    # 1 32-bit CD
826#    # 2 64-bit CD
827#
828#    my $self = shift ;
829#
830#    my @CD = ();
831#    my $offset = $self->findCentralDirectoryOffset();
832#
833#    return 0
834#        if ! defined $offset;
835#
836#    $self->smarkSeek($offset, 0, SEEK_SET) ;
837#
838#    # Now walk the Central Directory Records
839#    my $buffer ;
840#    while ($self->smartReadExact(\$buffer, 46) &&
841#           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
842#
843#        my $compressedLength   = unpack ("V", substr($buffer, 20, 4));
844#        my $filename_length    = unpack ("v", substr($buffer, 28, 2));
845#        my $extra_length       = unpack ("v", substr($buffer, 30, 2));
846#        my $comment_length     = unpack ("v", substr($buffer, 32, 2));
847#
848#        $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR)
849#            if $extra_length || $comment_length || $filename_length;
850#        push @CD, $compressedLength ;
851#    }
852#
853#}
854#
855#sub findCentralDirectoryOffset
856#{
857#    my $self = shift ;
858#
859#    # Most common use-case is where there is no comment, so
860#    # know exactly where the end of central directory record
861#    # should be.
862#
863#    $self->smarkSeek(-22, 0, SEEK_END) ;
864#
865#    my $buffer;
866#    $self->smartReadExact(\$buffer, 22) ;
867#
868#    my $zip64 = 0;
869#    my $centralDirOffset ;
870#    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
871#        $centralDirOffset = unpack ("V", substr($buffer, 16, 2));
872#    }
873#    else {
874#        die "xxxx";
875#    }
876#
877#    return $centralDirOffset ;
878#}
879#
880#sub is84BitCD
881#{
882#    # TODO
883#    my $self = shift ;
884#}
885
886
887sub skip
888{
889    my $self = shift;
890    my $size = shift;
891
892    use Fcntl qw(SEEK_CUR);
893    if (ref $size eq 'U64') {
894        $self->smartSeek($size->get64bit(), SEEK_CUR);
895    }
896    else {
897        $self->smartSeek($size, SEEK_CUR);
898    }
899
900}
901
902
903sub scanCentralDirectory
904{
905    my $self = shift;
906
907    my $here = $self->tell();
908
909    # Use cases
910    # 1 32-bit CD
911    # 2 64-bit CD
912
913    my @CD = ();
914    my $offset = $self->findCentralDirectoryOffset();
915
916    return ()
917        if ! defined $offset;
918
919    $self->smarkSeek($offset, 0, SEEK_SET) ;
920
921    # Now walk the Central Directory Records
922    my $buffer ;
923    while ($self->smartReadExact(\$buffer, 46) &&
924           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
925
926        my $compressedLength   = unpack("V", substr($buffer, 20, 4));
927        my $uncompressedLength = unpack("V", substr($buffer, 24, 4));
928        my $filename_length    = unpack("v", substr($buffer, 28, 2));
929        my $extra_length       = unpack("v", substr($buffer, 30, 2));
930        my $comment_length     = unpack("v", substr($buffer, 32, 2));
931
932        $self->skip($filename_length ) ;
933
934        my $v64 = U64->new( $compressedLength );
935
936        if (U64::full32 $compressedLength ) {
937            $self->smartReadExact(\$buffer, $extra_length) ;
938            die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer)
939                if length($buffer) != $extra_length;
940            my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength);
941
942            # If not Zip64 extra field, assume size is 0xFFFFFFFF
943            $v64 = $got if defined $got;
944        }
945        else {
946            $self->skip($extra_length) ;
947        }
948
949        $self->skip($comment_length ) ;
950
951        push @CD, $v64 ;
952    }
953
954    $self->smartSeek($here, 0, SEEK_SET) ;
955
956    return @CD;
957}
958
959sub get64Extra
960{
961    my $self = shift ;
962
963    my $buffer = shift;
964    my $is_uncomp = shift ;
965
966    my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer);
967
968    if (! defined $extra)
969    {
970        return undef;
971    }
972    else
973    {
974        my $u64 = U64::newUnpack_V64(substr($extra,  $is_uncomp ? 8 : 0)) ;
975        return $u64;
976    }
977}
978
979sub offsetFromZip64
980{
981    my $self = shift ;
982    my $here = shift;
983
984    $self->smartSeek($here - 20, 0, SEEK_SET)
985        or die "xx $!" ;
986
987    my $buffer;
988    my $got = 0;
989    $self->smartReadExact(\$buffer, 20)
990        or die "xxx $here $got $!" ;
991
992    if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) {
993        my $cd64 = U64::Value_VV64 substr($buffer,  8, 8);
994
995        $self->smartSeek($cd64, 0, SEEK_SET) ;
996
997        $self->smartReadExact(\$buffer, 4)
998            or die "xxx" ;
999
1000        if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) {
1001
1002            $self->smartReadExact(\$buffer, 8)
1003                or die "xxx" ;
1004            my $size  = U64::Value_VV64($buffer);
1005            $self->smartReadExact(\$buffer, $size)
1006                or die "xxx" ;
1007
1008            my $cd64 =  U64::Value_VV64 substr($buffer,  36, 8);
1009
1010            return $cd64 ;
1011        }
1012
1013        die "zzz";
1014    }
1015
1016    die "zzz";
1017}
1018
1019use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG);
1020
1021sub findCentralDirectoryOffset
1022{
1023    my $self = shift ;
1024
1025    # Most common use-case is where there is no comment, so
1026    # know exactly where the end of central directory record
1027    # should be.
1028
1029    $self->smartSeek(-22, 0, SEEK_END) ;
1030    my $here = $self->tell();
1031
1032    my $buffer;
1033    $self->smartReadExact(\$buffer, 22)
1034        or die "xxx" ;
1035
1036    my $zip64 = 0;
1037    my $centralDirOffset ;
1038    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
1039        $centralDirOffset = unpack("V", substr($buffer, 16,  4));
1040    }
1041    else {
1042        $self->smartSeek(0, 0, SEEK_END) ;
1043
1044        my $fileLen = $self->tell();
1045        my $want = 0 ;
1046
1047        while(1) {
1048            $want += 1024;
1049            my $seekTo = $fileLen - $want;
1050            if ($seekTo < 0 ) {
1051                $seekTo = 0;
1052                $want = $fileLen ;
1053            }
1054            $self->smartSeek( $seekTo, 0, SEEK_SET)
1055                or die "xxx $!" ;
1056            my $got;
1057            $self->smartReadExact($buffer, $want)
1058                or die "xxx " ;
1059            my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG);
1060
1061            if ($pos >= 0) {
1062                #$here = $self->tell();
1063                $here = $seekTo + $pos ;
1064                $centralDirOffset = unpack("V", substr($buffer, $pos + 16,  4));
1065                last ;
1066            }
1067
1068            return undef
1069                if $want == $fileLen;
1070        }
1071    }
1072
1073    $centralDirOffset = $self->offsetFromZip64($here)
1074        if U64::full32 $centralDirOffset ;
1075
1076    return $centralDirOffset ;
1077}
1078
10791;
1080
1081__END__
1082
1083
1084=head1 NAME
1085
1086IO::Uncompress::Unzip - Read zip files/buffers
1087
1088=head1 SYNOPSIS
1089
1090    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1091
1092    my $status = unzip $input => $output [,OPTS]
1093        or die "unzip failed: $UnzipError\n";
1094
1095    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
1096        or die "unzip failed: $UnzipError\n";
1097
1098    $status = $z->read($buffer)
1099    $status = $z->read($buffer, $length)
1100    $status = $z->read($buffer, $length, $offset)
1101    $line = $z->getline()
1102    $char = $z->getc()
1103    $char = $z->ungetc()
1104    $char = $z->opened()
1105
1106    $status = $z->inflateSync()
1107
1108    $data = $z->trailingData()
1109    $status = $z->nextStream()
1110    $data = $z->getHeaderInfo()
1111    $z->tell()
1112    $z->seek($position, $whence)
1113    $z->binmode()
1114    $z->fileno()
1115    $z->eof()
1116    $z->close()
1117
1118    $UnzipError ;
1119
1120    # IO::File mode
1121
1122    <$z>
1123    read($z, $buffer);
1124    read($z, $buffer, $length);
1125    read($z, $buffer, $length, $offset);
1126    tell($z)
1127    seek($z, $position, $whence)
1128    binmode($z)
1129    fileno($z)
1130    eof($z)
1131    close($z)
1132
1133=head1 DESCRIPTION
1134
1135This module provides a Perl interface that allows the reading of
1136zlib files/buffers.
1137
1138For writing zip files/buffers, see the companion module IO::Compress::Zip.
1139
1140The primary purpose of this module is to provide I<streaming> read access to
1141zip files and buffers.
1142
1143At present the following compression methods are supported by IO::Uncompress::Unzip
1144
1145=over 5
1146
1147=item Store (0)
1148
1149=item Deflate (8)
1150
1151=item Bzip2 (12)
1152
1153To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must
1154be installed.
1155
1156=item Lzma (14)
1157
1158To read LZMA content, the module C<IO::Uncompress::UnLzma> must
1159be installed.
1160
1161=item Xz (95)
1162
1163To read Xz content, the module C<IO::Uncompress::UnXz> must
1164be installed.
1165
1166=item Zstandard (93)
1167
1168To read Zstandard content, the module C<IO::Uncompress::UnZstd> must
1169be installed.
1170
1171=back
1172
1173=head1 Functional Interface
1174
1175A top-level function, C<unzip>, is provided to carry out
1176"one-shot" uncompression between buffers and/or files. For finer
1177control over the uncompression process, see the L</"OO Interface">
1178section.
1179
1180    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1181
1182    unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS]
1183        or die "unzip failed: $UnzipError\n";
1184
1185The functional interface needs Perl5.005 or better.
1186
1187=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS]
1188
1189C<unzip> expects at least two parameters,
1190C<$input_filename_or_reference> and C<$output_filename_or_reference>
1191and zero or more optional parameters (see L</Optional Parameters>)
1192
1193=head3 The C<$input_filename_or_reference> parameter
1194
1195The parameter, C<$input_filename_or_reference>, is used to define the
1196source of the compressed data.
1197
1198It can take one of the following forms:
1199
1200=over 5
1201
1202=item A filename
1203
1204If the C<$input_filename_or_reference> parameter is a simple scalar, it is
1205assumed to be a filename. This file will be opened for reading and the
1206input data will be read from it.
1207
1208=item A filehandle
1209
1210If the C<$input_filename_or_reference> parameter is a filehandle, the input
1211data will be read from it.  The string '-' can be used as an alias for
1212standard input.
1213
1214=item A scalar reference
1215
1216If C<$input_filename_or_reference> is a scalar reference, the input data
1217will be read from C<$$input_filename_or_reference>.
1218
1219=item An array reference
1220
1221If C<$input_filename_or_reference> is an array reference, each element in
1222the array must be a filename.
1223
1224The input data will be read from each file in turn.
1225
1226The complete array will be walked to ensure that it only
1227contains valid filenames before any data is uncompressed.
1228
1229=item An Input FileGlob string
1230
1231If C<$input_filename_or_reference> is a string that is delimited by the
1232characters "<" and ">" C<unzip> will assume that it is an
1233I<input fileglob string>. The input is the list of files that match the
1234fileglob.
1235
1236See L<File::GlobMapper|File::GlobMapper> for more details.
1237
1238=back
1239
1240If the C<$input_filename_or_reference> parameter is any other type,
1241C<undef> will be returned.
1242
1243=head3 The C<$output_filename_or_reference> parameter
1244
1245The parameter C<$output_filename_or_reference> is used to control the
1246destination of the uncompressed data. This parameter can take one of
1247these forms.
1248
1249=over 5
1250
1251=item A filename
1252
1253If the C<$output_filename_or_reference> parameter is a simple scalar, it is
1254assumed to be a filename.  This file will be opened for writing and the
1255uncompressed data will be written to it.
1256
1257=item A filehandle
1258
1259If the C<$output_filename_or_reference> parameter is a filehandle, the
1260uncompressed data will be written to it.  The string '-' can be used as
1261an alias for standard output.
1262
1263=item A scalar reference
1264
1265If C<$output_filename_or_reference> is a scalar reference, the
1266uncompressed data will be stored in C<$$output_filename_or_reference>.
1267
1268=item An Array Reference
1269
1270If C<$output_filename_or_reference> is an array reference,
1271the uncompressed data will be pushed onto the array.
1272
1273=item An Output FileGlob
1274
1275If C<$output_filename_or_reference> is a string that is delimited by the
1276characters "<" and ">" C<unzip> will assume that it is an
1277I<output fileglob string>. The output is the list of files that match the
1278fileglob.
1279
1280When C<$output_filename_or_reference> is an fileglob string,
1281C<$input_filename_or_reference> must also be a fileglob string. Anything
1282else is an error.
1283
1284See L<File::GlobMapper|File::GlobMapper> for more details.
1285
1286=back
1287
1288If the C<$output_filename_or_reference> parameter is any other type,
1289C<undef> will be returned.
1290
1291=head2 Notes
1292
1293When C<$input_filename_or_reference> maps to multiple compressed
1294files/buffers and C<$output_filename_or_reference> is
1295a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a
1296concatenation of all the uncompressed data from each of the input
1297files/buffers.
1298
1299=head2 Optional Parameters
1300
1301The optional parameters for the one-shot function C<unzip>
1302are (for the most part) identical to those used with the OO interface defined in the
1303L</"Constructor Options"> section. The exceptions are listed below
1304
1305=over 5
1306
1307=item C<< AutoClose => 0|1 >>
1308
1309This option applies to any input or output data streams to
1310C<unzip> that are filehandles.
1311
1312If C<AutoClose> is specified, and the value is true, it will result in all
1313input and/or output filehandles being closed once C<unzip> has
1314completed.
1315
1316This parameter defaults to 0.
1317
1318=item C<< BinModeOut => 0|1 >>
1319
1320This option is now a no-op. All files will be written  in binmode.
1321
1322=item C<< Append => 0|1 >>
1323
1324The behaviour of this option is dependent on the type of output data
1325stream.
1326
1327=over 5
1328
1329=item * A Buffer
1330
1331If C<Append> is enabled, all uncompressed data will be append to the end of
1332the output buffer. Otherwise the output buffer will be cleared before any
1333uncompressed data is written to it.
1334
1335=item * A Filename
1336
1337If C<Append> is enabled, the file will be opened in append mode. Otherwise
1338the contents of the file, if any, will be truncated before any uncompressed
1339data is written to it.
1340
1341=item * A Filehandle
1342
1343If C<Append> is enabled, the filehandle will be positioned to the end of
1344the file via a call to C<seek> before any uncompressed data is
1345written to it.  Otherwise the file pointer will not be moved.
1346
1347=back
1348
1349When C<Append> is specified, and set to true, it will I<append> all uncompressed
1350data to the output data stream.
1351
1352So when the output is a filehandle it will carry out a seek to the eof
1353before writing any uncompressed data. If the output is a filename, it will be opened for
1354appending. If the output is a buffer, all uncompressed data will be
1355appended to the existing buffer.
1356
1357Conversely when C<Append> is not specified, or it is present and is set to
1358false, it will operate as follows.
1359
1360When the output is a filename, it will truncate the contents of the file
1361before writing any uncompressed data. If the output is a filehandle
1362its position will not be changed. If the output is a buffer, it will be
1363wiped before any uncompressed data is output.
1364
1365Defaults to 0.
1366
1367=item C<< MultiStream => 0|1 >>
1368
1369If the input file/buffer contains multiple compressed data streams, this
1370option will uncompress the whole lot as a single data stream.
1371
1372Defaults to 0.
1373
1374=item C<< TrailingData => $scalar >>
1375
1376Returns the data, if any, that is present immediately after the compressed
1377data stream once uncompression is complete.
1378
1379This option can be used when there is useful information immediately
1380following the compressed data stream, and you don't know the length of the
1381compressed data stream.
1382
1383If the input is a buffer, C<trailingData> will return everything from the
1384end of the compressed data stream to the end of the buffer.
1385
1386If the input is a filehandle, C<trailingData> will return the data that is
1387left in the filehandle input buffer once the end of the compressed data
1388stream has been reached. You can then use the filehandle to read the rest
1389of the input file.
1390
1391Don't bother using C<trailingData> if the input is a filename.
1392
1393If you know the length of the compressed data stream before you start
1394uncompressing, you can avoid having to use C<trailingData> by setting the
1395C<InputLength> option.
1396
1397=back
1398
1399=head2 OneShot Examples
1400
1401Say you have a zip file, C<file1.zip>, that only contains a
1402single member, you can read it and write the uncompressed data to the
1403file C<file1.txt> like this.
1404
1405    use strict ;
1406    use warnings ;
1407    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1408
1409    my $input = "file1.zip";
1410    my $output = "file1.txt";
1411    unzip $input => $output
1412        or die "unzip failed: $UnzipError\n";
1413
1414If you have a zip file that contains multiple members and want to read a
1415specific member from the file, say C<"data1">, use the C<Name> option
1416
1417    use strict ;
1418    use warnings ;
1419    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1420
1421    my $input = "file1.zip";
1422    my $output = "file1.txt";
1423    unzip $input => $output, Name => "data1"
1424        or die "unzip failed: $UnzipError\n";
1425
1426Alternatively, if you want to read the  C<"data1"> member into memory, use
1427a scalar reference for the C<output> parameter.
1428
1429    use strict ;
1430    use warnings ;
1431    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1432
1433    my $input = "file1.zip";
1434    my $output ;
1435    unzip $input => \$output, Name => "data1"
1436        or die "unzip failed: $UnzipError\n";
1437    # $output now contains the uncompressed data
1438
1439To read from an existing Perl filehandle, C<$input>, and write the
1440uncompressed data to a buffer, C<$buffer>.
1441
1442    use strict ;
1443    use warnings ;
1444    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1445    use IO::File ;
1446
1447    my $input = IO::File->new( "<file1.zip" )
1448        or die "Cannot open 'file1.zip': $!\n" ;
1449    my $buffer ;
1450    unzip $input => \$buffer
1451        or die "unzip failed: $UnzipError\n";
1452
1453=head1 OO Interface
1454
1455=head2 Constructor
1456
1457The format of the constructor for IO::Uncompress::Unzip is shown below
1458
1459    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
1460        or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1461
1462The constructor takes one mandatory parameter, C<$input>, defined below, and
1463zero or more C<OPTS>, defined in L<Constructor Options>.
1464
1465Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1466The variable C<$UnzipError> will contain an error message on failure.
1467
1468If you are running Perl 5.005 or better the object, C<$z>, returned from
1469IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1470This means that all normal input file operations can be carried out with
1471C<$z>.  For example, to read a line from a compressed file/buffer you can
1472use either of these forms
1473
1474    $line = $z->getline();
1475    $line = <$z>;
1476
1477Below is a simple exaple of using the OO interface to read the compressed file
1478C<myfile.zip> and write its contents to stdout.
1479
1480    my $filename = "myfile.zip";
1481    my $z = IO::Uncompress::Unzip->new($filename)
1482        or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1483
1484    while (<$z>) {
1485        print $_;
1486    }
1487    $z->close();
1488
1489See L</EXAMPLES> for further examples
1490
1491The mandatory parameter C<$input> is used to determine the source of the
1492compressed data. This parameter can take one of three forms.
1493
1494=over 5
1495
1496=item A filename
1497
1498If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1499file will be opened for reading and the compressed data will be read from it.
1500
1501=item A filehandle
1502
1503If the C<$input> parameter is a filehandle, the compressed data will be
1504read from it.
1505The string '-' can be used as an alias for standard input.
1506
1507=item A scalar reference
1508
1509If C<$input> is a scalar reference, the compressed data will be read from
1510C<$$input>.
1511
1512=back
1513
1514=head2 Constructor Options
1515
1516The option names defined below are case insensitive and can be optionally
1517prefixed by a '-'.  So all of the following are valid
1518
1519    -AutoClose
1520    -autoclose
1521    AUTOCLOSE
1522    autoclose
1523
1524OPTS is a combination of the following options:
1525
1526=over 5
1527
1528=item C<< Name => "membername" >>
1529
1530Open "membername" from the zip file for reading.
1531
1532=item C<< Efs => 0| 1 >>
1533
1534When this option is set to true AND the zip archive being read has
1535the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8.
1536
1537If the member name in the zip archive is not valid UTF-8 when this optionn is true,
1538the script will die with an error message.
1539
1540Note that this option only works with Perl 5.8.4 or better.
1541
1542This option defaults to B<false>.
1543
1544=item C<< AutoClose => 0|1 >>
1545
1546This option is only valid when the C<$input> parameter is a filehandle. If
1547specified, and the value is true, it will result in the file being closed once
1548either the C<close> method is called or the IO::Uncompress::Unzip object is
1549destroyed.
1550
1551This parameter defaults to 0.
1552
1553=item C<< MultiStream => 0|1 >>
1554
1555Treats the complete zip file/buffer as a single compressed data
1556stream. When reading in multi-stream mode each member of the zip
1557file/buffer will be uncompressed in turn until the end of the file/buffer
1558is encountered.
1559
1560This parameter defaults to 0.
1561
1562=item C<< Prime => $string >>
1563
1564This option will uncompress the contents of C<$string> before processing the
1565input file/buffer.
1566
1567This option can be useful when the compressed data is embedded in another
1568file/data structure and it is not possible to work out where the compressed
1569data begins without having to read the first few bytes. If this is the
1570case, the uncompression can be I<primed> with these bytes using this
1571option.
1572
1573=item C<< Transparent => 0|1 >>
1574
1575If this option is set and the input file/buffer is not compressed data,
1576the module will allow reading of it anyway.
1577
1578In addition, if the input file/buffer does contain compressed data and
1579there is non-compressed data immediately following it, setting this option
1580will make this module treat the whole file/buffer as a single data stream.
1581
1582This option defaults to 1.
1583
1584=item C<< BlockSize => $num >>
1585
1586When reading the compressed input data, IO::Uncompress::Unzip will read it in
1587blocks of C<$num> bytes.
1588
1589This option defaults to 4096.
1590
1591=item C<< InputLength => $size >>
1592
1593When present this option will limit the number of compressed bytes read
1594from the input file/buffer to C<$size>. This option can be used in the
1595situation where there is useful data directly after the compressed data
1596stream and you know beforehand the exact length of the compressed data
1597stream.
1598
1599This option is mostly used when reading from a filehandle, in which case
1600the file pointer will be left pointing to the first byte directly after the
1601compressed data stream.
1602
1603This option defaults to off.
1604
1605=item C<< Append => 0|1 >>
1606
1607This option controls what the C<read> method does with uncompressed data.
1608
1609If set to 1, all uncompressed data will be appended to the output parameter
1610of the C<read> method.
1611
1612If set to 0, the contents of the output parameter of the C<read> method
1613will be overwritten by the uncompressed data.
1614
1615Defaults to 0.
1616
1617=item C<< Strict => 0|1 >>
1618
1619This option controls whether the extra checks defined below are used when
1620carrying out the decompression. When Strict is on, the extra tests are
1621carried out, when Strict is off they are not.
1622
1623The default for this option is off.
1624
1625=back
1626
1627=head1 Methods
1628
1629=head2 read
1630
1631Usage is
1632
1633    $status = $z->read($buffer)
1634
1635Reads a block of compressed data (the size of the compressed block is
1636determined by the C<Buffer> option in the constructor), uncompresses it and
1637writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1638set in the constructor, the uncompressed data will be appended to the
1639C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1640
1641Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1642or a negative number on error.
1643
1644=head2 read
1645
1646Usage is
1647
1648    $status = $z->read($buffer, $length)
1649    $status = $z->read($buffer, $length, $offset)
1650
1651    $status = read($z, $buffer, $length)
1652    $status = read($z, $buffer, $length, $offset)
1653
1654Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1655
1656The main difference between this form of the C<read> method and the
1657previous one, is that this one will attempt to return I<exactly> C<$length>
1658bytes. The only circumstances that this function will not is if end-of-file
1659or an IO error is encountered.
1660
1661Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1662or a negative number on error.
1663
1664=head2 getline
1665
1666Usage is
1667
1668    $line = $z->getline()
1669    $line = <$z>
1670
1671Reads a single line.
1672
1673This method fully supports the use of the variable C<$/> (or
1674C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1675determine what constitutes an end of line. Paragraph mode, record mode and
1676file slurp mode are all supported.
1677
1678=head2 getc
1679
1680Usage is
1681
1682    $char = $z->getc()
1683
1684Read a single character.
1685
1686=head2 ungetc
1687
1688Usage is
1689
1690    $char = $z->ungetc($string)
1691
1692=head2 inflateSync
1693
1694Usage is
1695
1696    $status = $z->inflateSync()
1697
1698TODO
1699
1700=head2 getHeaderInfo
1701
1702Usage is
1703
1704    $hdr  = $z->getHeaderInfo();
1705    @hdrs = $z->getHeaderInfo();
1706
1707This method returns either a hash reference (in scalar context) or a list
1708or hash references (in array context) that contains information about each
1709of the header fields in the compressed data stream(s).
1710
1711=head2 tell
1712
1713Usage is
1714
1715    $z->tell()
1716    tell $z
1717
1718Returns the uncompressed file offset.
1719
1720=head2 eof
1721
1722Usage is
1723
1724    $z->eof();
1725    eof($z);
1726
1727Returns true if the end of the compressed input stream has been reached.
1728
1729=head2 seek
1730
1731    $z->seek($position, $whence);
1732    seek($z, $position, $whence);
1733
1734Provides a sub-set of the C<seek> functionality, with the restriction
1735that it is only legal to seek forward in the input file/buffer.
1736It is a fatal error to attempt to seek backward.
1737
1738Note that the implementation of C<seek> in this module does not provide
1739true random access to a compressed file/buffer. It  works by uncompressing
1740data from the current offset in the file/buffer until it reaches the
1741uncompressed offset specified in the parameters to C<seek>. For very small
1742files this may be acceptable behaviour. For large files it may cause an
1743unacceptable delay.
1744
1745The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1746SEEK_CUR or SEEK_END.
1747
1748Returns 1 on success, 0 on failure.
1749
1750=head2 binmode
1751
1752Usage is
1753
1754    $z->binmode
1755    binmode $z ;
1756
1757This is a noop provided for completeness.
1758
1759=head2 opened
1760
1761    $z->opened()
1762
1763Returns true if the object currently refers to a opened file/buffer.
1764
1765=head2 autoflush
1766
1767    my $prev = $z->autoflush()
1768    my $prev = $z->autoflush(EXPR)
1769
1770If the C<$z> object is associated with a file or a filehandle, this method
1771returns the current autoflush setting for the underlying filehandle. If
1772C<EXPR> is present, and is non-zero, it will enable flushing after every
1773write/print operation.
1774
1775If C<$z> is associated with a buffer, this method has no effect and always
1776returns C<undef>.
1777
1778B<Note> that the special variable C<$|> B<cannot> be used to set or
1779retrieve the autoflush setting.
1780
1781=head2 input_line_number
1782
1783    $z->input_line_number()
1784    $z->input_line_number(EXPR)
1785
1786Returns the current uncompressed line number. If C<EXPR> is present it has
1787the effect of setting the line number. Note that setting the line number
1788does not change the current position within the file/buffer being read.
1789
1790The contents of C<$/> are used to determine what constitutes a line
1791terminator.
1792
1793=head2 fileno
1794
1795    $z->fileno()
1796    fileno($z)
1797
1798If the C<$z> object is associated with a file or a filehandle, C<fileno>
1799will return the underlying file descriptor. Once the C<close> method is
1800called C<fileno> will return C<undef>.
1801
1802If the C<$z> object is associated with a buffer, this method will return
1803C<undef>.
1804
1805=head2 close
1806
1807    $z->close() ;
1808    close $z ;
1809
1810Closes the output file/buffer.
1811
1812For most versions of Perl this method will be automatically invoked if
1813the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1814variable with the reference to the object going out of scope). The
1815exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1816these cases, the C<close> method will be called automatically, but
1817not until global destruction of all live objects when the program is
1818terminating.
1819
1820Therefore, if you want your scripts to be able to run on all versions
1821of Perl, you should call C<close> explicitly and not rely on automatic
1822closing.
1823
1824Returns true on success, otherwise 0.
1825
1826If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1827object was created, and the object is associated with a file, the
1828underlying file will also be closed.
1829
1830=head2 nextStream
1831
1832Usage is
1833
1834    my $status = $z->nextStream();
1835
1836Skips to the next compressed data stream in the input file/buffer. If a new
1837compressed data stream is found, the eof marker will be cleared and C<$.>
1838will be reset to 0.
1839
1840If trailing data is present immediately after the zip archive and the
1841C<Transparent> option is enabled, this method will consider that trailing
1842data to be another member of the zip archive.
1843
1844Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1845error was encountered.
1846
1847=head2 trailingData
1848
1849Usage is
1850
1851    my $data = $z->trailingData();
1852
1853Returns the data, if any, that is present immediately after the compressed
1854data stream once uncompression is complete. It only makes sense to call
1855this method once the end of the compressed data stream has been
1856encountered.
1857
1858This option can be used when there is useful information immediately
1859following the compressed data stream, and you don't know the length of the
1860compressed data stream.
1861
1862If the input is a buffer, C<trailingData> will return everything from the
1863end of the compressed data stream to the end of the buffer.
1864
1865If the input is a filehandle, C<trailingData> will return the data that is
1866left in the filehandle input buffer once the end of the compressed data
1867stream has been reached. You can then use the filehandle to read the rest
1868of the input file.
1869
1870Don't bother using C<trailingData> if the input is a filename.
1871
1872If you know the length of the compressed data stream before you start
1873uncompressing, you can avoid having to use C<trailingData> by setting the
1874C<InputLength> option in the constructor.
1875
1876=head1 Importing
1877
1878No symbolic constants are required by IO::Uncompress::Unzip at present.
1879
1880=over 5
1881
1882=item :all
1883
1884Imports C<unzip> and C<$UnzipError>.
1885Same as doing this
1886
1887    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1888
1889=back
1890
1891=head1 EXAMPLES
1892
1893=head2  Simple Read
1894
1895Say you have a zip file, C<file1.zip>, that only contains a
1896single member, you can read it and write the uncompressed data to the
1897file C<file1.txt> like this.
1898
1899    use strict ;
1900    use warnings ;
1901    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1902
1903    my $filename = "file1.zip";
1904    my $z = IO::Uncompress::Unzip->new($filename)
1905        or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1906    open my $out, ">", "file1.txt";
1907
1908    while (<$z>) {
1909        print $out $_;
1910    }
1911    $z->close();
1912
1913If you have a zip file that contains multiple members and want to read a
1914specific member from the file, say C<"data1">, use the C<Name> option when
1915constructing the
1916
1917    use strict ;
1918    use warnings ;
1919    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1920
1921    my $filename = "file1.zip";
1922    my $z = IO::Uncompress::Unzip->new($filename, Name => "data1")
1923        or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1924
1925=head2 Walking through a zip file
1926
1927The code below can be used to traverse a zip file, one compressed data
1928stream at a time.
1929
1930    use IO::Uncompress::Unzip qw($UnzipError);
1931
1932    my $zipfile = "somefile.zip";
1933    my $u = IO::Uncompress::Unzip->new( $zipfile )
1934        or die "Cannot open $zipfile: $UnzipError";
1935
1936    my $status;
1937    for ($status = 1; $status > 0; $status = $u->nextStream())
1938    {
1939
1940        my $name = $u->getHeaderInfo()->{Name};
1941        warn "Processing member $name\n" ;
1942
1943        my $buff;
1944        while (($status = $u->read($buff)) > 0) {
1945            # Do something here
1946        }
1947
1948        last if $status < 0;
1949    }
1950
1951    die "Error processing $zipfile: $!\n"
1952        if $status < 0 ;
1953
1954Each individual compressed data stream is read until the logical
1955end-of-file is reached. Then C<nextStream> is called. This will skip to the
1956start of the next compressed data stream and clear the end-of-file flag.
1957
1958It is also worth noting that C<nextStream> can be called at any time -- you
1959don't have to wait until you have exhausted a compressed data stream before
1960skipping to the next one.
1961
1962=head2 Unzipping a complete zip file to disk
1963
1964Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip>
1965to read a zip file and unzip its contents to disk.
1966
1967The script is available from L<https://gist.github.com/eqhmcow/5389877>
1968
1969=head2 Working with Net::FTP
1970
1971See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP">
1972
1973=head1 SUPPORT
1974
1975General feedback/questions/bug reports should be sent to
1976L<https://github.com/pmqs/IO-Compress/issues> (preferred) or
1977L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>.
1978
1979=head1 SEE ALSO
1980
1981L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1982
1983L<IO::Compress::FAQ|IO::Compress::FAQ>
1984
1985L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1986L<Archive::Tar|Archive::Tar>,
1987L<IO::Zlib|IO::Zlib>
1988
1989For RFC 1950, 1951 and 1952 see
1990L<https://datatracker.ietf.org/doc/html/rfc1950>,
1991L<https://datatracker.ietf.org/doc/html/rfc1951> and
1992L<https://datatracker.ietf.org/doc/html/rfc1952>
1993
1994The I<zlib> compression library was written by Jean-loup Gailly
1995C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>.
1996
1997The primary site for the I<zlib> compression library is
1998L<http://www.zlib.org>.
1999
2000The primary site for the I<zlib-ng> compression library is
2001L<https://github.com/zlib-ng/zlib-ng>.
2002
2003The primary site for gzip is L<http://www.gzip.org>.
2004
2005=head1 AUTHOR
2006
2007This module was written by Paul Marquess, C<pmqs@cpan.org>.
2008
2009=head1 MODIFICATION HISTORY
2010
2011See the Changes file.
2012
2013=head1 COPYRIGHT AND LICENSE
2014
2015Copyright (c) 2005-2024 Paul Marquess. All rights reserved.
2016
2017This program is free software; you can redistribute it and/or
2018modify it under the same terms as Perl itself.
2019