1package IO::Uncompress::Unzip;
2
3require 5.006 ;
4
5# for RFC1952
6
7use strict ;
8use warnings;
9use bytes;
10
11use IO::File;
12use IO::Uncompress::RawInflate  2.101 ;
13use IO::Compress::Base::Common  2.101 qw(:Status );
14use IO::Uncompress::Adapter::Inflate  2.101 ;
15use IO::Uncompress::Adapter::Identity 2.101 ;
16use IO::Compress::Zlib::Extra 2.101 ;
17use IO::Compress::Zip::Constants 2.101 ;
18
19use Compress::Raw::Zlib  2.101 () ;
20
21BEGIN
22{
23   # Don't trigger any __DIE__ Hooks.
24   local $SIG{__DIE__};
25
26    eval{ require IO::Uncompress::Adapter::Bunzip2 ;
27          IO::Uncompress::Adapter::Bunzip2->import() } ;
28    eval{ require IO::Uncompress::Adapter::UnLzma ;
29          IO::Uncompress::Adapter::UnLzma->import() } ;
30    eval{ require IO::Uncompress::Adapter::UnXz ;
31          IO::Uncompress::Adapter::UnXz->import() } ;
32    eval{ require IO::Uncompress::Adapter::UnZstd ;
33          IO::Uncompress::Adapter::UnZstd->import() } ;
34}
35
36
37require Exporter ;
38
39our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup);
40
41$VERSION = '2.102';
42$UnzipError = '';
43
44@ISA    = qw(IO::Uncompress::RawInflate Exporter);
45@EXPORT_OK = qw( $UnzipError unzip );
46%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ;
47push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
48Exporter::export_ok_tags('all');
49
50%headerLookup = (
51        ZIP_CENTRAL_HDR_SIG,            \&skipCentralDirectory,
52        ZIP_END_CENTRAL_HDR_SIG,        \&skipEndCentralDirectory,
53        ZIP64_END_CENTRAL_REC_HDR_SIG,  \&skipCentralDirectory64Rec,
54        ZIP64_END_CENTRAL_LOC_HDR_SIG,  \&skipCentralDirectory64Loc,
55        ZIP64_ARCHIVE_EXTRA_SIG,        \&skipArchiveExtra,
56        ZIP64_DIGITAL_SIGNATURE_SIG,    \&skipDigitalSignature,
57        );
58
59my %MethodNames = (
60        ZIP_CM_DEFLATE()    => 'Deflated',
61        ZIP_CM_BZIP2()      => 'Bzip2',
62        ZIP_CM_LZMA()       => 'Lzma',
63        ZIP_CM_STORE()      => 'Stored',
64        ZIP_CM_XZ()         => 'Xz',
65        ZIP_CM_ZSTD()       => 'Zstd',
66    );
67
68sub new
69{
70    my $class = shift ;
71    my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError);
72    $obj->_create(undef, 0, @_);
73}
74
75sub unzip
76{
77    my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError);
78    return $obj->_inf(@_) ;
79}
80
81sub getExtraParams
82{
83
84    return (
85#            # Zip header fields
86            'name'    => [IO::Compress::Base::Common::Parse_any,       undef],
87
88            'stream'  => [IO::Compress::Base::Common::Parse_boolean,   0],
89            'efs'     => [IO::Compress::Base::Common::Parse_boolean,   0],
90
91            # TODO - This means reading the central directory to get
92            # 1. the local header offsets
93            # 2. The compressed data length
94        );
95}
96
97sub ckParams
98{
99    my $self = shift ;
100    my $got = shift ;
101
102    # unzip always needs crc32
103    $got->setValue('crc32' => 1);
104
105    *$self->{UnzipData}{Name} = $got->getValue('name');
106    *$self->{UnzipData}{efs} = $got->getValue('efs');
107
108    return 1;
109}
110
111sub mkUncomp
112{
113    my $self = shift ;
114    my $got = shift ;
115
116     my $magic = $self->ckMagic()
117        or return 0;
118
119    *$self->{Info} = $self->readHeader($magic)
120        or return undef ;
121
122    return 1;
123
124}
125
126sub ckMagic
127{
128    my $self = shift;
129
130    my $magic ;
131    $self->smartReadExact(\$magic, 4);
132
133    *$self->{HeaderPending} = $magic ;
134
135    return $self->HeaderError("Minimum header size is " .
136                              4 . " bytes")
137        if length $magic != 4 ;
138
139    return $self->HeaderError("Bad Magic")
140        if ! _isZipMagic($magic) ;
141
142    *$self->{Type} = 'zip';
143
144    return $magic ;
145}
146
147
148sub fastForward
149{
150    my $self = shift;
151    my $offset = shift;
152
153    # TODO - if Stream isn't enabled & reading from file, use seek
154
155    my $buffer = '';
156    my $c = 1024 * 16;
157
158    while ($offset > 0)
159    {
160        $c = length $offset
161            if length $offset < $c ;
162
163        $offset -= $c;
164
165        $self->smartReadExact(\$buffer, $c)
166            or return 0;
167    }
168
169    return 1;
170}
171
172
173sub readHeader
174{
175    my $self = shift;
176    my $magic = shift ;
177
178    my $name =  *$self->{UnzipData}{Name} ;
179    my $hdr = $self->_readZipHeader($magic) ;
180
181    while (defined $hdr)
182    {
183        if (! defined $name || $hdr->{Name} eq $name)
184        {
185            return $hdr ;
186        }
187
188        # skip the data
189        # TODO - when Stream is off, use seek
190        my $buffer;
191        if (*$self->{ZipData}{Streaming}) {
192            while (1) {
193
194                my $b;
195                my $status = $self->smartRead(\$b, 1024 * 16);
196
197                return $self->saveErrorString(undef, "Truncated file")
198                    if $status <= 0 ;
199
200                my $temp_buf ;
201                my $out;
202
203                $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out);
204
205                return $self->saveErrorString(undef, *$self->{Uncomp}{Error},
206                                                     *$self->{Uncomp}{ErrorNo})
207                    if $self->saveStatus($status) == STATUS_ERROR;
208
209                $self->pushBack($b)  ;
210
211                if ($status == STATUS_ENDSTREAM) {
212                    *$self->{Uncomp}->reset();
213                    last;
214                }
215            }
216
217            # skip the trailer
218            $self->smartReadExact(\$buffer, $hdr->{TrailerLength})
219                or return $self->saveErrorString(undef, "Truncated file");
220        }
221        else {
222            my $c = $hdr->{CompressedLength}->get64bit();
223            $self->fastForward($c)
224                or return $self->saveErrorString(undef, "Truncated file");
225            $buffer = '';
226        }
227
228        $self->chkTrailer($buffer) == STATUS_OK
229            or return $self->saveErrorString(undef, "Truncated file");
230
231        $hdr = $self->_readFullZipHeader();
232
233        return $self->saveErrorString(undef, "Cannot find '$name'")
234            if $self->smartEof();
235    }
236
237    return undef;
238}
239
240sub chkTrailer
241{
242    my $self = shift;
243    my $trailer = shift;
244
245    my ($sig, $CRC32, $cSize, $uSize) ;
246    my ($cSizeHi, $uSizeHi) = (0, 0);
247    if (*$self->{ZipData}{Streaming}) {
248        $sig   = unpack ("V", substr($trailer, 0, 4));
249        $CRC32 = unpack ("V", substr($trailer, 4, 4));
250
251        if (*$self->{ZipData}{Zip64} ) {
252            $cSize = U64::newUnpack_V64 substr($trailer,  8, 8);
253            $uSize = U64::newUnpack_V64 substr($trailer, 16, 8);
254        }
255        else {
256            $cSize = U64::newUnpack_V32 substr($trailer,  8, 4);
257            $uSize = U64::newUnpack_V32 substr($trailer, 12, 4);
258        }
259
260        return $self->TrailerError("Data Descriptor signature, got $sig")
261            if $sig != ZIP_DATA_HDR_SIG;
262    }
263    else {
264        ($CRC32, $cSize, $uSize) =
265            (*$self->{ZipData}{Crc32},
266             *$self->{ZipData}{CompressedLen},
267             *$self->{ZipData}{UnCompressedLen});
268    }
269
270    *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ;
271    *$self->{Info}{CompressedLength} = $cSize->get64bit();
272    *$self->{Info}{UncompressedLength} = $uSize->get64bit();
273
274    if (*$self->{Strict}) {
275        return $self->TrailerError("CRC mismatch")
276            if $CRC32  != *$self->{ZipData}{CRC32} ;
277
278        return $self->TrailerError("CSIZE mismatch.")
279            if ! $cSize->equal(*$self->{CompSize});
280
281        return $self->TrailerError("USIZE mismatch.")
282            if ! $uSize->equal(*$self->{UnCompSize});
283    }
284
285    my $reachedEnd = STATUS_ERROR ;
286    # check for central directory or end of central directory
287    while (1)
288    {
289        my $magic ;
290        my $got = $self->smartRead(\$magic, 4);
291
292        return $self->saveErrorString(STATUS_ERROR, "Truncated file")
293            if $got != 4 && *$self->{Strict};
294
295        if ($got == 0) {
296            return STATUS_EOF ;
297        }
298        elsif ($got < 0) {
299            return STATUS_ERROR ;
300        }
301        elsif ($got < 4) {
302            $self->pushBack($magic)  ;
303            return STATUS_OK ;
304        }
305
306        my $sig = unpack("V", $magic) ;
307
308        my $hdr;
309        if ($hdr = $headerLookup{$sig})
310        {
311            if (&$hdr($self, $magic) != STATUS_OK ) {
312                if (*$self->{Strict}) {
313                    return STATUS_ERROR ;
314                }
315                else {
316                    $self->clearError();
317                    return STATUS_OK ;
318                }
319            }
320
321            if ($sig == ZIP_END_CENTRAL_HDR_SIG)
322            {
323                return STATUS_OK ;
324                last;
325            }
326        }
327        elsif ($sig == ZIP_LOCAL_HDR_SIG)
328        {
329            $self->pushBack($magic)  ;
330            return STATUS_OK ;
331        }
332        else
333        {
334            # put the data back
335            $self->pushBack($magic)  ;
336            last;
337        }
338    }
339
340    return $reachedEnd ;
341}
342
343sub skipCentralDirectory
344{
345    my $self = shift;
346    my $magic = shift ;
347
348    my $buffer;
349    $self->smartReadExact(\$buffer, 46 - 4)
350        or return $self->TrailerError("Minimum header size is " .
351                                     46 . " bytes") ;
352
353    my $keep = $magic . $buffer ;
354    *$self->{HeaderPending} = $keep ;
355
356   #my $versionMadeBy      = unpack ("v", substr($buffer, 4-4,  2));
357   #my $extractVersion     = unpack ("v", substr($buffer, 6-4,  2));
358   #my $gpFlag             = unpack ("v", substr($buffer, 8-4,  2));
359   #my $compressedMethod   = unpack ("v", substr($buffer, 10-4, 2));
360   #my $lastModTime        = unpack ("V", substr($buffer, 12-4, 4));
361   #my $crc32              = unpack ("V", substr($buffer, 16-4, 4));
362    my $compressedLength   = unpack ("V", substr($buffer, 20-4, 4));
363    my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4));
364    my $filename_length    = unpack ("v", substr($buffer, 28-4, 2));
365    my $extra_length       = unpack ("v", substr($buffer, 30-4, 2));
366    my $comment_length     = unpack ("v", substr($buffer, 32-4, 2));
367   #my $disk_start         = unpack ("v", substr($buffer, 34-4, 2));
368   #my $int_file_attrib    = unpack ("v", substr($buffer, 36-4, 2));
369   #my $ext_file_attrib    = unpack ("V", substr($buffer, 38-4, 2));
370   #my $lcl_hdr_offset     = unpack ("V", substr($buffer, 42-4, 2));
371
372
373    my $filename;
374    my $extraField;
375    my $comment ;
376    if ($filename_length)
377    {
378        $self->smartReadExact(\$filename, $filename_length)
379            or return $self->TruncatedTrailer("filename");
380        $keep .= $filename ;
381    }
382
383    if ($extra_length)
384    {
385        $self->smartReadExact(\$extraField, $extra_length)
386            or return $self->TruncatedTrailer("extra");
387        $keep .= $extraField ;
388    }
389
390    if ($comment_length)
391    {
392        $self->smartReadExact(\$comment, $comment_length)
393            or return $self->TruncatedTrailer("comment");
394        $keep .= $comment ;
395    }
396
397    return STATUS_OK ;
398}
399
400sub skipArchiveExtra
401{
402    my $self = shift;
403    my $magic = shift ;
404
405    my $buffer;
406    $self->smartReadExact(\$buffer, 4)
407        or return $self->TrailerError("Minimum header size is " .
408                                     4 . " bytes") ;
409
410    my $keep = $magic . $buffer ;
411
412    my $size = unpack ("V", $buffer);
413
414    $self->smartReadExact(\$buffer, $size)
415        or return $self->TrailerError("Minimum header size is " .
416                                     $size . " bytes") ;
417
418    $keep .= $buffer ;
419    *$self->{HeaderPending} = $keep ;
420
421    return STATUS_OK ;
422}
423
424
425sub skipCentralDirectory64Rec
426{
427    my $self = shift;
428    my $magic = shift ;
429
430    my $buffer;
431    $self->smartReadExact(\$buffer, 8)
432        or return $self->TrailerError("Minimum header size is " .
433                                     8 . " bytes") ;
434
435    my $keep = $magic . $buffer ;
436
437    my ($sizeLo, $sizeHi)  = unpack ("V V", $buffer);
438    my $size = $sizeHi * U64::MAX32 + $sizeLo;
439
440    $self->fastForward($size)
441        or return $self->TrailerError("Minimum header size is " .
442                                     $size . " bytes") ;
443
444   #$keep .= $buffer ;
445   #*$self->{HeaderPending} = $keep ;
446
447   #my $versionMadeBy      = unpack ("v",   substr($buffer,  0, 2));
448   #my $extractVersion     = unpack ("v",   substr($buffer,  2, 2));
449   #my $diskNumber         = unpack ("V",   substr($buffer,  4, 4));
450   #my $cntrlDirDiskNo     = unpack ("V",   substr($buffer,  8, 4));
451   #my $entriesInThisCD    = unpack ("V V", substr($buffer, 12, 8));
452   #my $entriesInCD        = unpack ("V V", substr($buffer, 20, 8));
453   #my $sizeOfCD           = unpack ("V V", substr($buffer, 28, 8));
454   #my $offsetToCD         = unpack ("V V", substr($buffer, 36, 8));
455
456    return STATUS_OK ;
457}
458
459sub skipCentralDirectory64Loc
460{
461    my $self = shift;
462    my $magic = shift ;
463
464    my $buffer;
465    $self->smartReadExact(\$buffer, 20 - 4)
466        or return $self->TrailerError("Minimum header size is " .
467                                     20 . " bytes") ;
468
469    my $keep = $magic . $buffer ;
470    *$self->{HeaderPending} = $keep ;
471
472   #my $startCdDisk        = unpack ("V",   substr($buffer,  4-4, 4));
473   #my $offsetToCD         = unpack ("V V", substr($buffer,  8-4, 8));
474   #my $diskCount          = unpack ("V",   substr($buffer, 16-4, 4));
475
476    return STATUS_OK ;
477}
478
479sub skipEndCentralDirectory
480{
481    my $self = shift;
482    my $magic = shift ;
483
484
485    my $buffer;
486    $self->smartReadExact(\$buffer, 22 - 4)
487        or return $self->TrailerError("Minimum header size is " .
488                                     22 . " bytes") ;
489
490    my $keep = $magic . $buffer ;
491    *$self->{HeaderPending} = $keep ;
492
493   #my $diskNumber         = unpack ("v", substr($buffer, 4-4,  2));
494   #my $cntrlDirDiskNo     = unpack ("v", substr($buffer, 6-4,  2));
495   #my $entriesInThisCD    = unpack ("v", substr($buffer, 8-4,  2));
496   #my $entriesInCD        = unpack ("v", substr($buffer, 10-4, 2));
497   #my $sizeOfCD           = unpack ("V", substr($buffer, 12-4, 4));
498   #my $offsetToCD         = unpack ("V", substr($buffer, 16-4, 4));
499    my $comment_length     = unpack ("v", substr($buffer, 20-4, 2));
500
501
502    my $comment ;
503    if ($comment_length)
504    {
505        $self->smartReadExact(\$comment, $comment_length)
506            or return $self->TruncatedTrailer("comment");
507        $keep .= $comment ;
508    }
509
510    return STATUS_OK ;
511}
512
513
514sub _isZipMagic
515{
516    my $buffer = shift ;
517    return 0 if length $buffer < 4 ;
518    my $sig = unpack("V", $buffer) ;
519    return $sig == ZIP_LOCAL_HDR_SIG ;
520}
521
522
523sub _readFullZipHeader($)
524{
525    my ($self) = @_ ;
526    my $magic = '' ;
527
528    $self->smartReadExact(\$magic, 4);
529
530    *$self->{HeaderPending} = $magic ;
531
532    return $self->HeaderError("Minimum header size is " .
533                              30 . " bytes")
534        if length $magic != 4 ;
535
536
537    return $self->HeaderError("Bad Magic")
538        if ! _isZipMagic($magic) ;
539
540    my $status = $self->_readZipHeader($magic);
541    delete *$self->{Transparent} if ! defined $status ;
542    return $status ;
543}
544
545sub _readZipHeader($)
546{
547    my ($self, $magic) = @_ ;
548    my ($HeaderCRC) ;
549    my ($buffer) = '' ;
550
551    $self->smartReadExact(\$buffer, 30 - 4)
552        or return $self->HeaderError("Minimum header size is " .
553                                     30 . " bytes") ;
554
555    my $keep = $magic . $buffer ;
556    *$self->{HeaderPending} = $keep ;
557
558    my $extractVersion     = unpack ("v", substr($buffer, 4-4,  2));
559    my $gpFlag             = unpack ("v", substr($buffer, 6-4,  2));
560    my $compressedMethod   = unpack ("v", substr($buffer, 8-4,  2));
561    my $lastModTime        = unpack ("V", substr($buffer, 10-4, 4));
562    my $crc32              = unpack ("V", substr($buffer, 14-4, 4));
563    my $compressedLength   = U64::newUnpack_V32 substr($buffer, 18-4, 4);
564    my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4);
565    my $filename_length    = unpack ("v", substr($buffer, 26-4, 2));
566    my $extra_length       = unpack ("v", substr($buffer, 28-4, 2));
567
568    my $filename;
569    my $extraField;
570    my @EXTRA = ();
571
572    # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out
573    # compressedLength/uncompressedLength & crc32 in the local file header.
574    # The expected data descriptor is not populated.
575    # So only assume streaming if the Streaming bit is set AND the compressed length is zero
576    my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK)  && $crc32 == 0) ? 1 : 0 ;
577
578    my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0;
579
580    return $self->HeaderError("Encrypted content not supported")
581        if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);
582
583    return $self->HeaderError("Patch content not supported")
584        if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;
585
586    *$self->{ZipData}{Streaming} = $streamingMode;
587
588
589    if ($filename_length)
590    {
591        $self->smartReadExact(\$filename, $filename_length)
592            or return $self->TruncatedHeader("Filename");
593
594        if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004)
595        {
596            require Encode;
597            eval { $filename = Encode::decode_utf8($filename, 1) }
598                or Carp::croak "Zip Filename not UTF-8" ;
599        }
600
601        $keep .= $filename ;
602    }
603
604    my $zip64 = 0 ;
605
606    if ($extra_length)
607    {
608        $self->smartReadExact(\$extraField, $extra_length)
609            or return $self->TruncatedHeader("Extra Field");
610
611        my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
612                                                \@EXTRA, 1, 0);
613        return $self->HeaderError($bad)
614            if defined $bad;
615
616        $keep .= $extraField ;
617
618        my %Extra ;
619        for (@EXTRA)
620        {
621            $Extra{$_->[0]} = \$_->[1];
622        }
623
624        if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
625        {
626            $zip64 = 1 ;
627
628            my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };
629
630            # This code assumes that all the fields in the Zip64
631            # extra field aren't necessarily present. The spec says that
632            # they only exist if the equivalent local headers are -1.
633
634            if (! $streamingMode) {
635                my $offset = 0 ;
636
637                if (U64::full32 $uncompressedLength->get32bit() ) {
638                    $uncompressedLength
639                            = U64::newUnpack_V64 substr($buff, 0, 8);
640
641                    $offset += 8 ;
642                }
643
644                if (U64::full32 $compressedLength->get32bit() ) {
645
646                    $compressedLength
647                        = U64::newUnpack_V64 substr($buff, $offset, 8);
648
649                    $offset += 8 ;
650                }
651           }
652        }
653    }
654
655    *$self->{ZipData}{Zip64} = $zip64;
656
657    if (! $streamingMode) {
658        *$self->{ZipData}{Streaming} = 0;
659        *$self->{ZipData}{Crc32} = $crc32;
660        *$self->{ZipData}{CompressedLen} = $compressedLength;
661        *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
662        *$self->{CompressedInputLengthRemaining} =
663            *$self->{CompressedInputLength} = $compressedLength->get64bit();
664    }
665
666    *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef);
667    *$self->{ZipData}{Method} = $compressedMethod;
668    if ($compressedMethod == ZIP_CM_DEFLATE)
669    {
670        *$self->{Type} = 'zip-deflate';
671        my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0);
672
673        *$self->{Uncomp} = $obj;
674    }
675    elsif ($compressedMethod == ZIP_CM_BZIP2)
676    {
677        return $self->HeaderError("Unsupported Compression format $compressedMethod")
678            if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ;
679
680        *$self->{Type} = 'zip-bzip2';
681
682        my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();
683
684        *$self->{Uncomp} = $obj;
685    }
686    elsif ($compressedMethod == ZIP_CM_XZ)
687    {
688        return $self->HeaderError("Unsupported Compression format $compressedMethod")
689            if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ;
690
691        *$self->{Type} = 'zip-xz';
692
693        my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject();
694
695        *$self->{Uncomp} = $obj;
696    }
697    elsif ($compressedMethod == ZIP_CM_ZSTD)
698    {
699        return $self->HeaderError("Unsupported Compression format $compressedMethod")
700            if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ;
701
702        *$self->{Type} = 'zip-zstd';
703
704        my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject();
705
706        *$self->{Uncomp} = $obj;
707    }
708    elsif ($compressedMethod == ZIP_CM_LZMA)
709    {
710        return $self->HeaderError("Unsupported Compression format $compressedMethod")
711            if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ;
712
713        *$self->{Type} = 'zip-lzma';
714        my $LzmaHeader;
715        $self->smartReadExact(\$LzmaHeader, 4)
716                or return $self->saveErrorString(undef, "Truncated file");
717        my ($verHi, $verLo)   = unpack ("CC", substr($LzmaHeader, 0, 2));
718        my $LzmaPropertiesSize   = unpack ("v", substr($LzmaHeader, 2, 2));
719
720
721        my $LzmaPropertyData;
722        $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize)
723                or return $self->saveErrorString(undef, "Truncated file");
724
725        if (! $streamingMode) {
726            *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ;
727            *$self->{CompressedInputLengthRemaining} =
728                *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit();
729        }
730
731        my $obj =
732            IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData);
733
734        *$self->{Uncomp} = $obj;
735    }
736    elsif ($compressedMethod == ZIP_CM_STORE)
737    {
738        *$self->{Type} = 'zip-stored';
739
740        my $obj =
741        IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode,
742                                                          $zip64);
743
744        *$self->{Uncomp} = $obj;
745    }
746    else
747    {
748        return $self->HeaderError("Unsupported Compression format $compressedMethod");
749    }
750
751    return {
752        'Type'               => 'zip',
753        'FingerprintLength'  => 4,
754        #'HeaderLength'       => $compressedMethod == 8 ? length $keep : 0,
755        'HeaderLength'       => length $keep,
756        'Zip64'              => $zip64,
757        'TrailerLength'      => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
758        'Header'             => $keep,
759        'CompressedLength'   => $compressedLength ,
760        'UncompressedLength' => $uncompressedLength ,
761        'CRC32'              => $crc32 ,
762        'Name'               => $filename,
763        'efs'                => $efs_flag, # language encoding flag
764        'Time'               => _dosToUnixTime($lastModTime),
765        'Stream'             => $streamingMode,
766
767        'MethodID'           => $compressedMethod,
768        'MethodName'         => $MethodNames{$compressedMethod} || 'Unknown',
769
770#        'TextFlag'      => $flag & GZIP_FLG_FTEXT ? 1 : 0,
771#        'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
772#        'NameFlag'      => $flag & GZIP_FLG_FNAME ? 1 : 0,
773#        'CommentFlag'   => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
774#        'ExtraFlag'     => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
775#        'Comment'       => $comment,
776#        'OsID'          => $os,
777#        'OsName'        => defined $GZIP_OS_Names{$os}
778#                                 ? $GZIP_OS_Names{$os} : "Unknown",
779#        'HeaderCRC'     => $HeaderCRC,
780#        'Flags'         => $flag,
781#        'ExtraFlags'    => $xfl,
782        'ExtraFieldRaw' => $extraField,
783        'ExtraField'    => [ @EXTRA ],
784
785
786      }
787}
788
789sub filterUncompressed
790{
791    my $self = shift ;
792
793    if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) {
794        *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
795    }
796    else {
797        *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]);
798    }
799}
800
801
802# from Archive::Zip & info-zip
803sub _dosToUnixTime
804{
805	my $dt = shift;
806
807	my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
808	my $mon  = ( ( $dt >> 21 ) & 0x0f ) - 1;
809	my $mday = ( ( $dt >> 16 ) & 0x1f );
810
811	my $hour = ( ( $dt >> 11 ) & 0x1f );
812	my $min  = ( ( $dt >> 5 ) & 0x3f );
813	my $sec  = ( ( $dt << 1 ) & 0x3e );
814
815
816    use POSIX 'mktime';
817
818    my $time_t = mktime( $sec, $min, $hour, $mday, $mon, $year, 0, 0, -1 );
819    return 0 if ! defined $time_t;
820	return $time_t;
821}
822
823#sub scanCentralDirectory
824#{
825#    # Use cases
826#    # 1 32-bit CD
827#    # 2 64-bit CD
828#
829#    my $self = shift ;
830#
831#    my @CD = ();
832#    my $offset = $self->findCentralDirectoryOffset();
833#
834#    return 0
835#        if ! defined $offset;
836#
837#    $self->smarkSeek($offset, 0, SEEK_SET) ;
838#
839#    # Now walk the Central Directory Records
840#    my $buffer ;
841#    while ($self->smartReadExact(\$buffer, 46) &&
842#           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
843#
844#        my $compressedLength   = unpack ("V", substr($buffer, 20, 4));
845#        my $filename_length    = unpack ("v", substr($buffer, 28, 2));
846#        my $extra_length       = unpack ("v", substr($buffer, 30, 2));
847#        my $comment_length     = unpack ("v", substr($buffer, 32, 2));
848#
849#        $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR)
850#            if $extra_length || $comment_length || $filename_length;
851#        push @CD, $compressedLength ;
852#    }
853#
854#}
855#
856#sub findCentralDirectoryOffset
857#{
858#    my $self = shift ;
859#
860#    # Most common use-case is where there is no comment, so
861#    # know exactly where the end of central directory record
862#    # should be.
863#
864#    $self->smarkSeek(-22, 0, SEEK_END) ;
865#
866#    my $buffer;
867#    $self->smartReadExact(\$buffer, 22) ;
868#
869#    my $zip64 = 0;
870#    my $centralDirOffset ;
871#    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
872#        $centralDirOffset = unpack ("V", substr($buffer, 16, 2));
873#    }
874#    else {
875#        die "xxxx";
876#    }
877#
878#    return $centralDirOffset ;
879#}
880#
881#sub is84BitCD
882#{
883#    # TODO
884#    my $self = shift ;
885#}
886
887
888sub skip
889{
890    my $self = shift;
891    my $size = shift;
892
893    use Fcntl qw(SEEK_CUR);
894    if (ref $size eq 'U64') {
895        $self->smartSeek($size->get64bit(), SEEK_CUR);
896    }
897    else {
898        $self->smartSeek($size, SEEK_CUR);
899    }
900
901}
902
903
904sub scanCentralDirectory
905{
906    my $self = shift;
907
908    my $here = $self->tell();
909
910    # Use cases
911    # 1 32-bit CD
912    # 2 64-bit CD
913
914    my @CD = ();
915    my $offset = $self->findCentralDirectoryOffset();
916
917    return ()
918        if ! defined $offset;
919
920    $self->smarkSeek($offset, 0, SEEK_SET) ;
921
922    # Now walk the Central Directory Records
923    my $buffer ;
924    while ($self->smartReadExact(\$buffer, 46) &&
925           unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) {
926
927        my $compressedLength   = unpack("V", substr($buffer, 20, 4));
928        my $uncompressedLength = unpack("V", substr($buffer, 24, 4));
929        my $filename_length    = unpack("v", substr($buffer, 28, 2));
930        my $extra_length       = unpack("v", substr($buffer, 30, 2));
931        my $comment_length     = unpack("v", substr($buffer, 32, 2));
932
933        $self->skip($filename_length ) ;
934
935        my $v64 = U64->new( $compressedLength );
936
937        if (U64::full32 $compressedLength ) {
938            $self->smartReadExact(\$buffer, $extra_length) ;
939            die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer)
940                if length($buffer) != $extra_length;
941            my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength);
942
943            # If not Zip64 extra field, assume size is 0xFFFFFFFF
944            $v64 = $got if defined $got;
945        }
946        else {
947            $self->skip($extra_length) ;
948        }
949
950        $self->skip($comment_length ) ;
951
952        push @CD, $v64 ;
953    }
954
955    $self->smartSeek($here, 0, SEEK_SET) ;
956
957    return @CD;
958}
959
960sub get64Extra
961{
962    my $self = shift ;
963
964    my $buffer = shift;
965    my $is_uncomp = shift ;
966
967    my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer);
968
969    if (! defined $extra)
970    {
971        return undef;
972    }
973    else
974    {
975        my $u64 = U64::newUnpack_V64(substr($extra,  $is_uncomp ? 8 : 0)) ;
976        return $u64;
977    }
978}
979
980sub offsetFromZip64
981{
982    my $self = shift ;
983    my $here = shift;
984
985    $self->smartSeek($here - 20, 0, SEEK_SET)
986        or die "xx $!" ;
987
988    my $buffer;
989    my $got = 0;
990    $self->smartReadExact(\$buffer, 20)
991        or die "xxx $here $got $!" ;
992
993    if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) {
994        my $cd64 = U64::Value_VV64 substr($buffer,  8, 8);
995
996        $self->smartSeek($cd64, 0, SEEK_SET) ;
997
998        $self->smartReadExact(\$buffer, 4)
999            or die "xxx" ;
1000
1001        if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) {
1002
1003            $self->smartReadExact(\$buffer, 8)
1004                or die "xxx" ;
1005            my $size  = U64::Value_VV64($buffer);
1006            $self->smartReadExact(\$buffer, $size)
1007                or die "xxx" ;
1008
1009            my $cd64 =  U64::Value_VV64 substr($buffer,  36, 8);
1010
1011            return $cd64 ;
1012        }
1013
1014        die "zzz";
1015    }
1016
1017    die "zzz";
1018}
1019
1020use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG);
1021
1022sub findCentralDirectoryOffset
1023{
1024    my $self = shift ;
1025
1026    # Most common use-case is where there is no comment, so
1027    # know exactly where the end of central directory record
1028    # should be.
1029
1030    $self->smartSeek(-22, 0, SEEK_END) ;
1031    my $here = $self->tell();
1032
1033    my $buffer;
1034    $self->smartReadExact(\$buffer, 22)
1035        or die "xxx" ;
1036
1037    my $zip64 = 0;
1038    my $centralDirOffset ;
1039    if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) {
1040        $centralDirOffset = unpack("V", substr($buffer, 16,  4));
1041    }
1042    else {
1043        $self->smartSeek(0, 0, SEEK_END) ;
1044
1045        my $fileLen = $self->tell();
1046        my $want = 0 ;
1047
1048        while(1) {
1049            $want += 1024;
1050            my $seekTo = $fileLen - $want;
1051            if ($seekTo < 0 ) {
1052                $seekTo = 0;
1053                $want = $fileLen ;
1054            }
1055            $self->smartSeek( $seekTo, 0, SEEK_SET)
1056                or die "xxx $!" ;
1057            my $got;
1058            $self->smartReadExact($buffer, $want)
1059                or die "xxx " ;
1060            my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG);
1061
1062            if ($pos >= 0) {
1063                #$here = $self->tell();
1064                $here = $seekTo + $pos ;
1065                $centralDirOffset = unpack("V", substr($buffer, $pos + 16,  4));
1066                last ;
1067            }
1068
1069            return undef
1070                if $want == $fileLen;
1071        }
1072    }
1073
1074    $centralDirOffset = $self->offsetFromZip64($here)
1075        if U64::full32 $centralDirOffset ;
1076
1077    return $centralDirOffset ;
1078}
1079
10801;
1081
1082__END__
1083
1084
1085=head1 NAME
1086
1087IO::Uncompress::Unzip - Read zip files/buffers
1088
1089=head1 SYNOPSIS
1090
1091    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1092
1093    my $status = unzip $input => $output [,OPTS]
1094        or die "unzip failed: $UnzipError\n";
1095
1096    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
1097        or die "unzip failed: $UnzipError\n";
1098
1099    $status = $z->read($buffer)
1100    $status = $z->read($buffer, $length)
1101    $status = $z->read($buffer, $length, $offset)
1102    $line = $z->getline()
1103    $char = $z->getc()
1104    $char = $z->ungetc()
1105    $char = $z->opened()
1106
1107    $status = $z->inflateSync()
1108
1109    $data = $z->trailingData()
1110    $status = $z->nextStream()
1111    $data = $z->getHeaderInfo()
1112    $z->tell()
1113    $z->seek($position, $whence)
1114    $z->binmode()
1115    $z->fileno()
1116    $z->eof()
1117    $z->close()
1118
1119    $UnzipError ;
1120
1121    # IO::File mode
1122
1123    <$z>
1124    read($z, $buffer);
1125    read($z, $buffer, $length);
1126    read($z, $buffer, $length, $offset);
1127    tell($z)
1128    seek($z, $position, $whence)
1129    binmode($z)
1130    fileno($z)
1131    eof($z)
1132    close($z)
1133
1134=head1 DESCRIPTION
1135
1136This module provides a Perl interface that allows the reading of
1137zlib files/buffers.
1138
1139For writing zip files/buffers, see the companion module IO::Compress::Zip.
1140
1141The primary purpose of this module is to provide I<streaming> read access to
1142zip files and buffers.
1143
1144At present the following compression methods are supported by IO::Uncompress::Unzip
1145
1146=over 5
1147
1148=item Store (0)
1149
1150=item Deflate (8)
1151
1152=item Bzip2 (12)
1153
1154To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must
1155be installed.
1156
1157=item Lzma (14)
1158
1159To read LZMA content, the module C<IO::Uncompress::UnLzma> must
1160be installed.
1161
1162=item Xz (95)
1163
1164To read Xz content, the module C<IO::Uncompress::UnXz> must
1165be installed.
1166
1167=item Zstandard (93)
1168
1169To read Zstandard content, the module C<IO::Uncompress::UnZstd> must
1170be installed.
1171
1172=back
1173
1174=head1 Functional Interface
1175
1176A top-level function, C<unzip>, is provided to carry out
1177"one-shot" uncompression between buffers and/or files. For finer
1178control over the uncompression process, see the L</"OO Interface">
1179section.
1180
1181    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1182
1183    unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS]
1184        or die "unzip failed: $UnzipError\n";
1185
1186The functional interface needs Perl5.005 or better.
1187
1188=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS]
1189
1190C<unzip> expects at least two parameters,
1191C<$input_filename_or_reference> and C<$output_filename_or_reference>
1192and zero or more optional parameters (see L</Optional Parameters>)
1193
1194=head3 The C<$input_filename_or_reference> parameter
1195
1196The parameter, C<$input_filename_or_reference>, is used to define the
1197source of the compressed data.
1198
1199It can take one of the following forms:
1200
1201=over 5
1202
1203=item A filename
1204
1205If the C<$input_filename_or_reference> parameter is a simple scalar, it is
1206assumed to be a filename. This file will be opened for reading and the
1207input data will be read from it.
1208
1209=item A filehandle
1210
1211If the C<$input_filename_or_reference> parameter is a filehandle, the input
1212data will be read from it.  The string '-' can be used as an alias for
1213standard input.
1214
1215=item A scalar reference
1216
1217If C<$input_filename_or_reference> is a scalar reference, the input data
1218will be read from C<$$input_filename_or_reference>.
1219
1220=item An array reference
1221
1222If C<$input_filename_or_reference> is an array reference, each element in
1223the array must be a filename.
1224
1225The input data will be read from each file in turn.
1226
1227The complete array will be walked to ensure that it only
1228contains valid filenames before any data is uncompressed.
1229
1230=item An Input FileGlob string
1231
1232If C<$input_filename_or_reference> is a string that is delimited by the
1233characters "<" and ">" C<unzip> will assume that it is an
1234I<input fileglob string>. The input is the list of files that match the
1235fileglob.
1236
1237See L<File::GlobMapper|File::GlobMapper> for more details.
1238
1239=back
1240
1241If the C<$input_filename_or_reference> parameter is any other type,
1242C<undef> will be returned.
1243
1244=head3 The C<$output_filename_or_reference> parameter
1245
1246The parameter C<$output_filename_or_reference> is used to control the
1247destination of the uncompressed data. This parameter can take one of
1248these forms.
1249
1250=over 5
1251
1252=item A filename
1253
1254If the C<$output_filename_or_reference> parameter is a simple scalar, it is
1255assumed to be a filename.  This file will be opened for writing and the
1256uncompressed data will be written to it.
1257
1258=item A filehandle
1259
1260If the C<$output_filename_or_reference> parameter is a filehandle, the
1261uncompressed data will be written to it.  The string '-' can be used as
1262an alias for standard output.
1263
1264=item A scalar reference
1265
1266If C<$output_filename_or_reference> is a scalar reference, the
1267uncompressed data will be stored in C<$$output_filename_or_reference>.
1268
1269=item An Array Reference
1270
1271If C<$output_filename_or_reference> is an array reference,
1272the uncompressed data will be pushed onto the array.
1273
1274=item An Output FileGlob
1275
1276If C<$output_filename_or_reference> is a string that is delimited by the
1277characters "<" and ">" C<unzip> will assume that it is an
1278I<output fileglob string>. The output is the list of files that match the
1279fileglob.
1280
1281When C<$output_filename_or_reference> is an fileglob string,
1282C<$input_filename_or_reference> must also be a fileglob string. Anything
1283else is an error.
1284
1285See L<File::GlobMapper|File::GlobMapper> for more details.
1286
1287=back
1288
1289If the C<$output_filename_or_reference> parameter is any other type,
1290C<undef> will be returned.
1291
1292=head2 Notes
1293
1294When C<$input_filename_or_reference> maps to multiple compressed
1295files/buffers and C<$output_filename_or_reference> is
1296a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a
1297concatenation of all the uncompressed data from each of the input
1298files/buffers.
1299
1300=head2 Optional Parameters
1301
1302The optional parameters for the one-shot function C<unzip>
1303are (for the most part) identical to those used with the OO interface defined in the
1304L</"Constructor Options"> section. The exceptions are listed below
1305
1306=over 5
1307
1308=item C<< AutoClose => 0|1 >>
1309
1310This option applies to any input or output data streams to
1311C<unzip> that are filehandles.
1312
1313If C<AutoClose> is specified, and the value is true, it will result in all
1314input and/or output filehandles being closed once C<unzip> has
1315completed.
1316
1317This parameter defaults to 0.
1318
1319=item C<< BinModeOut => 0|1 >>
1320
1321This option is now a no-op. All files will be written  in binmode.
1322
1323=item C<< Append => 0|1 >>
1324
1325The behaviour of this option is dependent on the type of output data
1326stream.
1327
1328=over 5
1329
1330=item * A Buffer
1331
1332If C<Append> is enabled, all uncompressed data will be append to the end of
1333the output buffer. Otherwise the output buffer will be cleared before any
1334uncompressed data is written to it.
1335
1336=item * A Filename
1337
1338If C<Append> is enabled, the file will be opened in append mode. Otherwise
1339the contents of the file, if any, will be truncated before any uncompressed
1340data is written to it.
1341
1342=item * A Filehandle
1343
1344If C<Append> is enabled, the filehandle will be positioned to the end of
1345the file via a call to C<seek> before any uncompressed data is
1346written to it.  Otherwise the file pointer will not be moved.
1347
1348=back
1349
1350When C<Append> is specified, and set to true, it will I<append> all uncompressed
1351data to the output data stream.
1352
1353So when the output is a filehandle it will carry out a seek to the eof
1354before writing any uncompressed data. If the output is a filename, it will be opened for
1355appending. If the output is a buffer, all uncompressed data will be
1356appended to the existing buffer.
1357
1358Conversely when C<Append> is not specified, or it is present and is set to
1359false, it will operate as follows.
1360
1361When the output is a filename, it will truncate the contents of the file
1362before writing any uncompressed data. If the output is a filehandle
1363its position will not be changed. If the output is a buffer, it will be
1364wiped before any uncompressed data is output.
1365
1366Defaults to 0.
1367
1368=item C<< MultiStream => 0|1 >>
1369
1370If the input file/buffer contains multiple compressed data streams, this
1371option will uncompress the whole lot as a single data stream.
1372
1373Defaults to 0.
1374
1375=item C<< TrailingData => $scalar >>
1376
1377Returns the data, if any, that is present immediately after the compressed
1378data stream once uncompression is complete.
1379
1380This option can be used when there is useful information immediately
1381following the compressed data stream, and you don't know the length of the
1382compressed data stream.
1383
1384If the input is a buffer, C<trailingData> will return everything from the
1385end of the compressed data stream to the end of the buffer.
1386
1387If the input is a filehandle, C<trailingData> will return the data that is
1388left in the filehandle input buffer once the end of the compressed data
1389stream has been reached. You can then use the filehandle to read the rest
1390of the input file.
1391
1392Don't bother using C<trailingData> if the input is a filename.
1393
1394If you know the length of the compressed data stream before you start
1395uncompressing, you can avoid having to use C<trailingData> by setting the
1396C<InputLength> option.
1397
1398=back
1399
1400=head2 Examples
1401
1402Say you have a zip file, C<file1.zip>, that only contains a
1403single member, you can read it and write the uncompressed data to the
1404file C<file1.txt> like this.
1405
1406    use strict ;
1407    use warnings ;
1408    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1409
1410    my $input = "file1.zip";
1411    my $output = "file1.txt";
1412    unzip $input => $output
1413        or die "unzip failed: $UnzipError\n";
1414
1415If you have a zip file that contains multiple members and want to read a
1416specific member from the file, say C<"data1">, use the C<Name> option
1417
1418    use strict ;
1419    use warnings ;
1420    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1421
1422    my $input = "file1.zip";
1423    my $output = "file1.txt";
1424    unzip $input => $output, Name => "data1"
1425        or die "unzip failed: $UnzipError\n";
1426
1427Alternatively, if you want to read the  C<"data1"> member into memory, use
1428a scalar reference for the C<output> parameter.
1429
1430    use strict ;
1431    use warnings ;
1432    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1433
1434    my $input = "file1.zip";
1435    my $output ;
1436    unzip $input => \$output, Name => "data1"
1437        or die "unzip failed: $UnzipError\n";
1438    # $output now contains the uncompressed data
1439
1440To read from an existing Perl filehandle, C<$input>, and write the
1441uncompressed data to a buffer, C<$buffer>.
1442
1443    use strict ;
1444    use warnings ;
1445    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1446    use IO::File ;
1447
1448    my $input = IO::File->new( "<file1.zip" )
1449        or die "Cannot open 'file1.zip': $!\n" ;
1450    my $buffer ;
1451    unzip $input => \$buffer
1452        or die "unzip failed: $UnzipError\n";
1453
1454=head1 OO Interface
1455
1456=head2 Constructor
1457
1458The format of the constructor for IO::Uncompress::Unzip is shown below
1459
1460    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
1461        or die "IO::Uncompress::Unzip failed: $UnzipError\n";
1462
1463Returns an C<IO::Uncompress::Unzip> object on success and undef on failure.
1464The variable C<$UnzipError> will contain an error message on failure.
1465
1466If you are running Perl 5.005 or better the object, C<$z>, returned from
1467IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle.
1468This means that all normal input file operations can be carried out with
1469C<$z>.  For example, to read a line from a compressed file/buffer you can
1470use either of these forms
1471
1472    $line = $z->getline();
1473    $line = <$z>;
1474
1475The mandatory parameter C<$input> is used to determine the source of the
1476compressed data. This parameter can take one of three forms.
1477
1478=over 5
1479
1480=item A filename
1481
1482If the C<$input> parameter is a scalar, it is assumed to be a filename. This
1483file will be opened for reading and the compressed data will be read from it.
1484
1485=item A filehandle
1486
1487If the C<$input> parameter is a filehandle, the compressed data will be
1488read from it.
1489The string '-' can be used as an alias for standard input.
1490
1491=item A scalar reference
1492
1493If C<$input> is a scalar reference, the compressed data will be read from
1494C<$$input>.
1495
1496=back
1497
1498=head2 Constructor Options
1499
1500The option names defined below are case insensitive and can be optionally
1501prefixed by a '-'.  So all of the following are valid
1502
1503    -AutoClose
1504    -autoclose
1505    AUTOCLOSE
1506    autoclose
1507
1508OPTS is a combination of the following options:
1509
1510=over 5
1511
1512=item C<< Name => "membername" >>
1513
1514Open "membername" from the zip file for reading.
1515
1516=item C<< Efs => 0| 1 >>
1517
1518When this option is set to true AND the zip archive being read has
1519the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8.
1520
1521If the member name in the zip archive is not valid UTF-8 when this optionn is true,
1522the script will die with an error message.
1523
1524Note that this option only works with Perl 5.8.4 or better.
1525
1526This option defaults to B<false>.
1527
1528=item C<< AutoClose => 0|1 >>
1529
1530This option is only valid when the C<$input> parameter is a filehandle. If
1531specified, and the value is true, it will result in the file being closed once
1532either the C<close> method is called or the IO::Uncompress::Unzip object is
1533destroyed.
1534
1535This parameter defaults to 0.
1536
1537=item C<< MultiStream => 0|1 >>
1538
1539Treats the complete zip file/buffer as a single compressed data
1540stream. When reading in multi-stream mode each member of the zip
1541file/buffer will be uncompressed in turn until the end of the file/buffer
1542is encountered.
1543
1544This parameter defaults to 0.
1545
1546=item C<< Prime => $string >>
1547
1548This option will uncompress the contents of C<$string> before processing the
1549input file/buffer.
1550
1551This option can be useful when the compressed data is embedded in another
1552file/data structure and it is not possible to work out where the compressed
1553data begins without having to read the first few bytes. If this is the
1554case, the uncompression can be I<primed> with these bytes using this
1555option.
1556
1557=item C<< Transparent => 0|1 >>
1558
1559If this option is set and the input file/buffer is not compressed data,
1560the module will allow reading of it anyway.
1561
1562In addition, if the input file/buffer does contain compressed data and
1563there is non-compressed data immediately following it, setting this option
1564will make this module treat the whole file/buffer as a single data stream.
1565
1566This option defaults to 1.
1567
1568=item C<< BlockSize => $num >>
1569
1570When reading the compressed input data, IO::Uncompress::Unzip will read it in
1571blocks of C<$num> bytes.
1572
1573This option defaults to 4096.
1574
1575=item C<< InputLength => $size >>
1576
1577When present this option will limit the number of compressed bytes read
1578from the input file/buffer to C<$size>. This option can be used in the
1579situation where there is useful data directly after the compressed data
1580stream and you know beforehand the exact length of the compressed data
1581stream.
1582
1583This option is mostly used when reading from a filehandle, in which case
1584the file pointer will be left pointing to the first byte directly after the
1585compressed data stream.
1586
1587This option defaults to off.
1588
1589=item C<< Append => 0|1 >>
1590
1591This option controls what the C<read> method does with uncompressed data.
1592
1593If set to 1, all uncompressed data will be appended to the output parameter
1594of the C<read> method.
1595
1596If set to 0, the contents of the output parameter of the C<read> method
1597will be overwritten by the uncompressed data.
1598
1599Defaults to 0.
1600
1601=item C<< Strict => 0|1 >>
1602
1603This option controls whether the extra checks defined below are used when
1604carrying out the decompression. When Strict is on, the extra tests are
1605carried out, when Strict is off they are not.
1606
1607The default for this option is off.
1608
1609=back
1610
1611=head2 Examples
1612
1613TODO
1614
1615=head1 Methods
1616
1617=head2 read
1618
1619Usage is
1620
1621    $status = $z->read($buffer)
1622
1623Reads a block of compressed data (the size of the compressed block is
1624determined by the C<Buffer> option in the constructor), uncompresses it and
1625writes any uncompressed data into C<$buffer>. If the C<Append> parameter is
1626set in the constructor, the uncompressed data will be appended to the
1627C<$buffer> parameter. Otherwise C<$buffer> will be overwritten.
1628
1629Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1630or a negative number on error.
1631
1632=head2 read
1633
1634Usage is
1635
1636    $status = $z->read($buffer, $length)
1637    $status = $z->read($buffer, $length, $offset)
1638
1639    $status = read($z, $buffer, $length)
1640    $status = read($z, $buffer, $length, $offset)
1641
1642Attempt to read C<$length> bytes of uncompressed data into C<$buffer>.
1643
1644The main difference between this form of the C<read> method and the
1645previous one, is that this one will attempt to return I<exactly> C<$length>
1646bytes. The only circumstances that this function will not is if end-of-file
1647or an IO error is encountered.
1648
1649Returns the number of uncompressed bytes written to C<$buffer>, zero if eof
1650or a negative number on error.
1651
1652=head2 getline
1653
1654Usage is
1655
1656    $line = $z->getline()
1657    $line = <$z>
1658
1659Reads a single line.
1660
1661This method fully supports the use of the variable C<$/> (or
1662C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to
1663determine what constitutes an end of line. Paragraph mode, record mode and
1664file slurp mode are all supported.
1665
1666=head2 getc
1667
1668Usage is
1669
1670    $char = $z->getc()
1671
1672Read a single character.
1673
1674=head2 ungetc
1675
1676Usage is
1677
1678    $char = $z->ungetc($string)
1679
1680=head2 inflateSync
1681
1682Usage is
1683
1684    $status = $z->inflateSync()
1685
1686TODO
1687
1688=head2 getHeaderInfo
1689
1690Usage is
1691
1692    $hdr  = $z->getHeaderInfo();
1693    @hdrs = $z->getHeaderInfo();
1694
1695This method returns either a hash reference (in scalar context) or a list
1696or hash references (in array context) that contains information about each
1697of the header fields in the compressed data stream(s).
1698
1699=head2 tell
1700
1701Usage is
1702
1703    $z->tell()
1704    tell $z
1705
1706Returns the uncompressed file offset.
1707
1708=head2 eof
1709
1710Usage is
1711
1712    $z->eof();
1713    eof($z);
1714
1715Returns true if the end of the compressed input stream has been reached.
1716
1717=head2 seek
1718
1719    $z->seek($position, $whence);
1720    seek($z, $position, $whence);
1721
1722Provides a sub-set of the C<seek> functionality, with the restriction
1723that it is only legal to seek forward in the input file/buffer.
1724It is a fatal error to attempt to seek backward.
1725
1726Note that the implementation of C<seek> in this module does not provide
1727true random access to a compressed file/buffer. It  works by uncompressing
1728data from the current offset in the file/buffer until it reaches the
1729uncompressed offset specified in the parameters to C<seek>. For very small
1730files this may be acceptable behaviour. For large files it may cause an
1731unacceptable delay.
1732
1733The C<$whence> parameter takes one the usual values, namely SEEK_SET,
1734SEEK_CUR or SEEK_END.
1735
1736Returns 1 on success, 0 on failure.
1737
1738=head2 binmode
1739
1740Usage is
1741
1742    $z->binmode
1743    binmode $z ;
1744
1745This is a noop provided for completeness.
1746
1747=head2 opened
1748
1749    $z->opened()
1750
1751Returns true if the object currently refers to a opened file/buffer.
1752
1753=head2 autoflush
1754
1755    my $prev = $z->autoflush()
1756    my $prev = $z->autoflush(EXPR)
1757
1758If the C<$z> object is associated with a file or a filehandle, this method
1759returns the current autoflush setting for the underlying filehandle. If
1760C<EXPR> is present, and is non-zero, it will enable flushing after every
1761write/print operation.
1762
1763If C<$z> is associated with a buffer, this method has no effect and always
1764returns C<undef>.
1765
1766B<Note> that the special variable C<$|> B<cannot> be used to set or
1767retrieve the autoflush setting.
1768
1769=head2 input_line_number
1770
1771    $z->input_line_number()
1772    $z->input_line_number(EXPR)
1773
1774Returns the current uncompressed line number. If C<EXPR> is present it has
1775the effect of setting the line number. Note that setting the line number
1776does not change the current position within the file/buffer being read.
1777
1778The contents of C<$/> are used to determine what constitutes a line
1779terminator.
1780
1781=head2 fileno
1782
1783    $z->fileno()
1784    fileno($z)
1785
1786If the C<$z> object is associated with a file or a filehandle, C<fileno>
1787will return the underlying file descriptor. Once the C<close> method is
1788called C<fileno> will return C<undef>.
1789
1790If the C<$z> object is associated with a buffer, this method will return
1791C<undef>.
1792
1793=head2 close
1794
1795    $z->close() ;
1796    close $z ;
1797
1798Closes the output file/buffer.
1799
1800For most versions of Perl this method will be automatically invoked if
1801the IO::Uncompress::Unzip object is destroyed (either explicitly or by the
1802variable with the reference to the object going out of scope). The
1803exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In
1804these cases, the C<close> method will be called automatically, but
1805not until global destruction of all live objects when the program is
1806terminating.
1807
1808Therefore, if you want your scripts to be able to run on all versions
1809of Perl, you should call C<close> explicitly and not rely on automatic
1810closing.
1811
1812Returns true on success, otherwise 0.
1813
1814If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip
1815object was created, and the object is associated with a file, the
1816underlying file will also be closed.
1817
1818=head2 nextStream
1819
1820Usage is
1821
1822    my $status = $z->nextStream();
1823
1824Skips to the next compressed data stream in the input file/buffer. If a new
1825compressed data stream is found, the eof marker will be cleared and C<$.>
1826will be reset to 0.
1827
1828If trailing data is present immediately after the zip archive and the
1829C<Transparent> option is enabled, this method will consider that trailing
1830data to be another member of the zip archive.
1831
1832Returns 1 if a new stream was found, 0 if none was found, and -1 if an
1833error was encountered.
1834
1835=head2 trailingData
1836
1837Usage is
1838
1839    my $data = $z->trailingData();
1840
1841Returns the data, if any, that is present immediately after the compressed
1842data stream once uncompression is complete. It only makes sense to call
1843this method once the end of the compressed data stream has been
1844encountered.
1845
1846This option can be used when there is useful information immediately
1847following the compressed data stream, and you don't know the length of the
1848compressed data stream.
1849
1850If the input is a buffer, C<trailingData> will return everything from the
1851end of the compressed data stream to the end of the buffer.
1852
1853If the input is a filehandle, C<trailingData> will return the data that is
1854left in the filehandle input buffer once the end of the compressed data
1855stream has been reached. You can then use the filehandle to read the rest
1856of the input file.
1857
1858Don't bother using C<trailingData> if the input is a filename.
1859
1860If you know the length of the compressed data stream before you start
1861uncompressing, you can avoid having to use C<trailingData> by setting the
1862C<InputLength> option in the constructor.
1863
1864=head1 Importing
1865
1866No symbolic constants are required by IO::Uncompress::Unzip at present.
1867
1868=over 5
1869
1870=item :all
1871
1872Imports C<unzip> and C<$UnzipError>.
1873Same as doing this
1874
1875    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;
1876
1877=back
1878
1879=head1 EXAMPLES
1880
1881=head2 Working with Net::FTP
1882
1883See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP">
1884
1885=head2 Walking through a zip file
1886
1887The code below can be used to traverse a zip file, one compressed data
1888stream at a time.
1889
1890    use IO::Uncompress::Unzip qw($UnzipError);
1891
1892    my $zipfile = "somefile.zip";
1893    my $u = IO::Uncompress::Unzip->new( $zipfile )
1894        or die "Cannot open $zipfile: $UnzipError";
1895
1896    my $status;
1897    for ($status = 1; $status > 0; $status = $u->nextStream())
1898    {
1899
1900        my $name = $u->getHeaderInfo()->{Name};
1901        warn "Processing member $name\n" ;
1902
1903        my $buff;
1904        while (($status = $u->read($buff)) > 0) {
1905            # Do something here
1906        }
1907
1908        last if $status < 0;
1909    }
1910
1911    die "Error processing $zipfile: $!\n"
1912        if $status < 0 ;
1913
1914Each individual compressed data stream is read until the logical
1915end-of-file is reached. Then C<nextStream> is called. This will skip to the
1916start of the next compressed data stream and clear the end-of-file flag.
1917
1918It is also worth noting that C<nextStream> can be called at any time -- you
1919don't have to wait until you have exhausted a compressed data stream before
1920skipping to the next one.
1921
1922=head2 Unzipping a complete zip file to disk
1923
1924Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip>
1925to read a zip file and unzip its contents to disk.
1926
1927The script is available from L<https://gist.github.com/eqhmcow/5389877>
1928
1929=head1 SUPPORT
1930
1931General feedback/questions/bug reports should be sent to
1932L<https://github.com/pmqs/IO-Compress/issues> (preferred) or
1933L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>.
1934
1935=head1 SEE ALSO
1936
1937L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress>
1938
1939L<IO::Compress::FAQ|IO::Compress::FAQ>
1940
1941L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>,
1942L<Archive::Tar|Archive::Tar>,
1943L<IO::Zlib|IO::Zlib>
1944
1945For RFC 1950, 1951 and 1952 see
1946L<http://www.faqs.org/rfcs/rfc1950.html>,
1947L<http://www.faqs.org/rfcs/rfc1951.html> and
1948L<http://www.faqs.org/rfcs/rfc1952.html>
1949
1950The I<zlib> compression library was written by Jean-loup Gailly
1951C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>.
1952
1953The primary site for the I<zlib> compression library is
1954L<http://www.zlib.org>.
1955
1956The primary site for gzip is L<http://www.gzip.org>.
1957
1958=head1 AUTHOR
1959
1960This module was written by Paul Marquess, C<pmqs@cpan.org>.
1961
1962=head1 MODIFICATION HISTORY
1963
1964See the Changes file.
1965
1966=head1 COPYRIGHT AND LICENSE
1967
1968Copyright (c) 2005-2021 Paul Marquess. All rights reserved.
1969
1970This program is free software; you can redistribute it and/or
1971modify it under the same terms as Perl itself.
1972