1package IO::Uncompress::Unzip; 2 3require 5.006 ; 4 5# for RFC1952 6 7use strict ; 8use warnings; 9use bytes; 10 11use IO::File; 12use IO::Uncompress::RawInflate 2.101 ; 13use IO::Compress::Base::Common 2.101 qw(:Status ); 14use IO::Uncompress::Adapter::Inflate 2.101 ; 15use IO::Uncompress::Adapter::Identity 2.101 ; 16use IO::Compress::Zlib::Extra 2.101 ; 17use IO::Compress::Zip::Constants 2.101 ; 18 19use Compress::Raw::Zlib 2.101 () ; 20 21BEGIN 22{ 23 # Don't trigger any __DIE__ Hooks. 24 local $SIG{__DIE__}; 25 26 eval{ require IO::Uncompress::Adapter::Bunzip2 ; 27 IO::Uncompress::Adapter::Bunzip2->import() } ; 28 eval{ require IO::Uncompress::Adapter::UnLzma ; 29 IO::Uncompress::Adapter::UnLzma->import() } ; 30 eval{ require IO::Uncompress::Adapter::UnXz ; 31 IO::Uncompress::Adapter::UnXz->import() } ; 32 eval{ require IO::Uncompress::Adapter::UnZstd ; 33 IO::Uncompress::Adapter::UnZstd->import() } ; 34} 35 36 37require Exporter ; 38 39our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); 40 41$VERSION = '2.102'; 42$UnzipError = ''; 43 44@ISA = qw(IO::Uncompress::RawInflate Exporter); 45@EXPORT_OK = qw( $UnzipError unzip ); 46%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ; 47push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; 48Exporter::export_ok_tags('all'); 49 50%headerLookup = ( 51 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory, 52 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory, 53 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec, 54 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc, 55 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra, 56 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature, 57 ); 58 59my %MethodNames = ( 60 ZIP_CM_DEFLATE() => 'Deflated', 61 ZIP_CM_BZIP2() => 'Bzip2', 62 ZIP_CM_LZMA() => 'Lzma', 63 ZIP_CM_STORE() => 'Stored', 64 ZIP_CM_XZ() => 'Xz', 65 ZIP_CM_ZSTD() => 'Zstd', 66 ); 67 68sub new 69{ 70 my $class = shift ; 71 my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError); 72 $obj->_create(undef, 0, @_); 73} 74 75sub unzip 76{ 77 my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError); 78 return $obj->_inf(@_) ; 79} 80 81sub getExtraParams 82{ 83 84 return ( 85# # Zip header fields 86 'name' => [IO::Compress::Base::Common::Parse_any, undef], 87 88 'stream' => [IO::Compress::Base::Common::Parse_boolean, 0], 89 'efs' => [IO::Compress::Base::Common::Parse_boolean, 0], 90 91 # TODO - This means reading the central directory to get 92 # 1. the local header offsets 93 # 2. The compressed data length 94 ); 95} 96 97sub ckParams 98{ 99 my $self = shift ; 100 my $got = shift ; 101 102 # unzip always needs crc32 103 $got->setValue('crc32' => 1); 104 105 *$self->{UnzipData}{Name} = $got->getValue('name'); 106 *$self->{UnzipData}{efs} = $got->getValue('efs'); 107 108 return 1; 109} 110 111sub mkUncomp 112{ 113 my $self = shift ; 114 my $got = shift ; 115 116 my $magic = $self->ckMagic() 117 or return 0; 118 119 *$self->{Info} = $self->readHeader($magic) 120 or return undef ; 121 122 return 1; 123 124} 125 126sub ckMagic 127{ 128 my $self = shift; 129 130 my $magic ; 131 $self->smartReadExact(\$magic, 4); 132 133 *$self->{HeaderPending} = $magic ; 134 135 return $self->HeaderError("Minimum header size is " . 136 4 . " bytes") 137 if length $magic != 4 ; 138 139 return $self->HeaderError("Bad Magic") 140 if ! _isZipMagic($magic) ; 141 142 *$self->{Type} = 'zip'; 143 144 return $magic ; 145} 146 147 148sub fastForward 149{ 150 my $self = shift; 151 my $offset = shift; 152 153 # TODO - if Stream isn't enabled & reading from file, use seek 154 155 my $buffer = ''; 156 my $c = 1024 * 16; 157 158 while ($offset > 0) 159 { 160 $c = length $offset 161 if length $offset < $c ; 162 163 $offset -= $c; 164 165 $self->smartReadExact(\$buffer, $c) 166 or return 0; 167 } 168 169 return 1; 170} 171 172 173sub readHeader 174{ 175 my $self = shift; 176 my $magic = shift ; 177 178 my $name = *$self->{UnzipData}{Name} ; 179 my $hdr = $self->_readZipHeader($magic) ; 180 181 while (defined $hdr) 182 { 183 if (! defined $name || $hdr->{Name} eq $name) 184 { 185 return $hdr ; 186 } 187 188 # skip the data 189 # TODO - when Stream is off, use seek 190 my $buffer; 191 if (*$self->{ZipData}{Streaming}) { 192 while (1) { 193 194 my $b; 195 my $status = $self->smartRead(\$b, 1024 * 16); 196 197 return $self->saveErrorString(undef, "Truncated file") 198 if $status <= 0 ; 199 200 my $temp_buf ; 201 my $out; 202 203 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out); 204 205 return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, 206 *$self->{Uncomp}{ErrorNo}) 207 if $self->saveStatus($status) == STATUS_ERROR; 208 209 $self->pushBack($b) ; 210 211 if ($status == STATUS_ENDSTREAM) { 212 *$self->{Uncomp}->reset(); 213 last; 214 } 215 } 216 217 # skip the trailer 218 $self->smartReadExact(\$buffer, $hdr->{TrailerLength}) 219 or return $self->saveErrorString(undef, "Truncated file"); 220 } 221 else { 222 my $c = $hdr->{CompressedLength}->get64bit(); 223 $self->fastForward($c) 224 or return $self->saveErrorString(undef, "Truncated file"); 225 $buffer = ''; 226 } 227 228 $self->chkTrailer($buffer) == STATUS_OK 229 or return $self->saveErrorString(undef, "Truncated file"); 230 231 $hdr = $self->_readFullZipHeader(); 232 233 return $self->saveErrorString(undef, "Cannot find '$name'") 234 if $self->smartEof(); 235 } 236 237 return undef; 238} 239 240sub chkTrailer 241{ 242 my $self = shift; 243 my $trailer = shift; 244 245 my ($sig, $CRC32, $cSize, $uSize) ; 246 my ($cSizeHi, $uSizeHi) = (0, 0); 247 if (*$self->{ZipData}{Streaming}) { 248 $sig = unpack ("V", substr($trailer, 0, 4)); 249 $CRC32 = unpack ("V", substr($trailer, 4, 4)); 250 251 if (*$self->{ZipData}{Zip64} ) { 252 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8); 253 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8); 254 } 255 else { 256 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4); 257 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4); 258 } 259 260 return $self->TrailerError("Data Descriptor signature, got $sig") 261 if $sig != ZIP_DATA_HDR_SIG; 262 } 263 else { 264 ($CRC32, $cSize, $uSize) = 265 (*$self->{ZipData}{Crc32}, 266 *$self->{ZipData}{CompressedLen}, 267 *$self->{ZipData}{UnCompressedLen}); 268 } 269 270 *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ; 271 *$self->{Info}{CompressedLength} = $cSize->get64bit(); 272 *$self->{Info}{UncompressedLength} = $uSize->get64bit(); 273 274 if (*$self->{Strict}) { 275 return $self->TrailerError("CRC mismatch") 276 if $CRC32 != *$self->{ZipData}{CRC32} ; 277 278 return $self->TrailerError("CSIZE mismatch.") 279 if ! $cSize->equal(*$self->{CompSize}); 280 281 return $self->TrailerError("USIZE mismatch.") 282 if ! $uSize->equal(*$self->{UnCompSize}); 283 } 284 285 my $reachedEnd = STATUS_ERROR ; 286 # check for central directory or end of central directory 287 while (1) 288 { 289 my $magic ; 290 my $got = $self->smartRead(\$magic, 4); 291 292 return $self->saveErrorString(STATUS_ERROR, "Truncated file") 293 if $got != 4 && *$self->{Strict}; 294 295 if ($got == 0) { 296 return STATUS_EOF ; 297 } 298 elsif ($got < 0) { 299 return STATUS_ERROR ; 300 } 301 elsif ($got < 4) { 302 $self->pushBack($magic) ; 303 return STATUS_OK ; 304 } 305 306 my $sig = unpack("V", $magic) ; 307 308 my $hdr; 309 if ($hdr = $headerLookup{$sig}) 310 { 311 if (&$hdr($self, $magic) != STATUS_OK ) { 312 if (*$self->{Strict}) { 313 return STATUS_ERROR ; 314 } 315 else { 316 $self->clearError(); 317 return STATUS_OK ; 318 } 319 } 320 321 if ($sig == ZIP_END_CENTRAL_HDR_SIG) 322 { 323 return STATUS_OK ; 324 last; 325 } 326 } 327 elsif ($sig == ZIP_LOCAL_HDR_SIG) 328 { 329 $self->pushBack($magic) ; 330 return STATUS_OK ; 331 } 332 else 333 { 334 # put the data back 335 $self->pushBack($magic) ; 336 last; 337 } 338 } 339 340 return $reachedEnd ; 341} 342 343sub skipCentralDirectory 344{ 345 my $self = shift; 346 my $magic = shift ; 347 348 my $buffer; 349 $self->smartReadExact(\$buffer, 46 - 4) 350 or return $self->TrailerError("Minimum header size is " . 351 46 . " bytes") ; 352 353 my $keep = $magic . $buffer ; 354 *$self->{HeaderPending} = $keep ; 355 356 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2)); 357 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2)); 358 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2)); 359 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2)); 360 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4)); 361 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4)); 362 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4)); 363 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4)); 364 my $filename_length = unpack ("v", substr($buffer, 28-4, 2)); 365 my $extra_length = unpack ("v", substr($buffer, 30-4, 2)); 366 my $comment_length = unpack ("v", substr($buffer, 32-4, 2)); 367 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2)); 368 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2)); 369 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2)); 370 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2)); 371 372 373 my $filename; 374 my $extraField; 375 my $comment ; 376 if ($filename_length) 377 { 378 $self->smartReadExact(\$filename, $filename_length) 379 or return $self->TruncatedTrailer("filename"); 380 $keep .= $filename ; 381 } 382 383 if ($extra_length) 384 { 385 $self->smartReadExact(\$extraField, $extra_length) 386 or return $self->TruncatedTrailer("extra"); 387 $keep .= $extraField ; 388 } 389 390 if ($comment_length) 391 { 392 $self->smartReadExact(\$comment, $comment_length) 393 or return $self->TruncatedTrailer("comment"); 394 $keep .= $comment ; 395 } 396 397 return STATUS_OK ; 398} 399 400sub skipArchiveExtra 401{ 402 my $self = shift; 403 my $magic = shift ; 404 405 my $buffer; 406 $self->smartReadExact(\$buffer, 4) 407 or return $self->TrailerError("Minimum header size is " . 408 4 . " bytes") ; 409 410 my $keep = $magic . $buffer ; 411 412 my $size = unpack ("V", $buffer); 413 414 $self->smartReadExact(\$buffer, $size) 415 or return $self->TrailerError("Minimum header size is " . 416 $size . " bytes") ; 417 418 $keep .= $buffer ; 419 *$self->{HeaderPending} = $keep ; 420 421 return STATUS_OK ; 422} 423 424 425sub skipCentralDirectory64Rec 426{ 427 my $self = shift; 428 my $magic = shift ; 429 430 my $buffer; 431 $self->smartReadExact(\$buffer, 8) 432 or return $self->TrailerError("Minimum header size is " . 433 8 . " bytes") ; 434 435 my $keep = $magic . $buffer ; 436 437 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer); 438 my $size = $sizeHi * U64::MAX32 + $sizeLo; 439 440 $self->fastForward($size) 441 or return $self->TrailerError("Minimum header size is " . 442 $size . " bytes") ; 443 444 #$keep .= $buffer ; 445 #*$self->{HeaderPending} = $keep ; 446 447 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2)); 448 #my $extractVersion = unpack ("v", substr($buffer, 2, 2)); 449 #my $diskNumber = unpack ("V", substr($buffer, 4, 4)); 450 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4)); 451 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8)); 452 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8)); 453 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8)); 454 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8)); 455 456 return STATUS_OK ; 457} 458 459sub skipCentralDirectory64Loc 460{ 461 my $self = shift; 462 my $magic = shift ; 463 464 my $buffer; 465 $self->smartReadExact(\$buffer, 20 - 4) 466 or return $self->TrailerError("Minimum header size is " . 467 20 . " bytes") ; 468 469 my $keep = $magic . $buffer ; 470 *$self->{HeaderPending} = $keep ; 471 472 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4)); 473 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8)); 474 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4)); 475 476 return STATUS_OK ; 477} 478 479sub skipEndCentralDirectory 480{ 481 my $self = shift; 482 my $magic = shift ; 483 484 485 my $buffer; 486 $self->smartReadExact(\$buffer, 22 - 4) 487 or return $self->TrailerError("Minimum header size is " . 488 22 . " bytes") ; 489 490 my $keep = $magic . $buffer ; 491 *$self->{HeaderPending} = $keep ; 492 493 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2)); 494 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2)); 495 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2)); 496 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2)); 497 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 4)); 498 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 4)); 499 my $comment_length = unpack ("v", substr($buffer, 20-4, 2)); 500 501 502 my $comment ; 503 if ($comment_length) 504 { 505 $self->smartReadExact(\$comment, $comment_length) 506 or return $self->TruncatedTrailer("comment"); 507 $keep .= $comment ; 508 } 509 510 return STATUS_OK ; 511} 512 513 514sub _isZipMagic 515{ 516 my $buffer = shift ; 517 return 0 if length $buffer < 4 ; 518 my $sig = unpack("V", $buffer) ; 519 return $sig == ZIP_LOCAL_HDR_SIG ; 520} 521 522 523sub _readFullZipHeader($) 524{ 525 my ($self) = @_ ; 526 my $magic = '' ; 527 528 $self->smartReadExact(\$magic, 4); 529 530 *$self->{HeaderPending} = $magic ; 531 532 return $self->HeaderError("Minimum header size is " . 533 30 . " bytes") 534 if length $magic != 4 ; 535 536 537 return $self->HeaderError("Bad Magic") 538 if ! _isZipMagic($magic) ; 539 540 my $status = $self->_readZipHeader($magic); 541 delete *$self->{Transparent} if ! defined $status ; 542 return $status ; 543} 544 545sub _readZipHeader($) 546{ 547 my ($self, $magic) = @_ ; 548 my ($HeaderCRC) ; 549 my ($buffer) = '' ; 550 551 $self->smartReadExact(\$buffer, 30 - 4) 552 or return $self->HeaderError("Minimum header size is " . 553 30 . " bytes") ; 554 555 my $keep = $magic . $buffer ; 556 *$self->{HeaderPending} = $keep ; 557 558 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2)); 559 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2)); 560 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2)); 561 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4)); 562 my $crc32 = unpack ("V", substr($buffer, 14-4, 4)); 563 my $compressedLength = U64::newUnpack_V32 substr($buffer, 18-4, 4); 564 my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4); 565 my $filename_length = unpack ("v", substr($buffer, 26-4, 2)); 566 my $extra_length = unpack ("v", substr($buffer, 28-4, 2)); 567 568 my $filename; 569 my $extraField; 570 my @EXTRA = (); 571 572 # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out 573 # compressedLength/uncompressedLength & crc32 in the local file header. 574 # The expected data descriptor is not populated. 575 # So only assume streaming if the Streaming bit is set AND the compressed length is zero 576 my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) && $crc32 == 0) ? 1 : 0 ; 577 578 my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0; 579 580 return $self->HeaderError("Encrypted content not supported") 581 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK); 582 583 return $self->HeaderError("Patch content not supported") 584 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK; 585 586 *$self->{ZipData}{Streaming} = $streamingMode; 587 588 589 if ($filename_length) 590 { 591 $self->smartReadExact(\$filename, $filename_length) 592 or return $self->TruncatedHeader("Filename"); 593 594 if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004) 595 { 596 require Encode; 597 eval { $filename = Encode::decode_utf8($filename, 1) } 598 or Carp::croak "Zip Filename not UTF-8" ; 599 } 600 601 $keep .= $filename ; 602 } 603 604 my $zip64 = 0 ; 605 606 if ($extra_length) 607 { 608 $self->smartReadExact(\$extraField, $extra_length) 609 or return $self->TruncatedHeader("Extra Field"); 610 611 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField, 612 \@EXTRA, 1, 0); 613 return $self->HeaderError($bad) 614 if defined $bad; 615 616 $keep .= $extraField ; 617 618 my %Extra ; 619 for (@EXTRA) 620 { 621 $Extra{$_->[0]} = \$_->[1]; 622 } 623 624 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()}) 625 { 626 $zip64 = 1 ; 627 628 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} }; 629 630 # This code assumes that all the fields in the Zip64 631 # extra field aren't necessarily present. The spec says that 632 # they only exist if the equivalent local headers are -1. 633 634 if (! $streamingMode) { 635 my $offset = 0 ; 636 637 if (U64::full32 $uncompressedLength->get32bit() ) { 638 $uncompressedLength 639 = U64::newUnpack_V64 substr($buff, 0, 8); 640 641 $offset += 8 ; 642 } 643 644 if (U64::full32 $compressedLength->get32bit() ) { 645 646 $compressedLength 647 = U64::newUnpack_V64 substr($buff, $offset, 8); 648 649 $offset += 8 ; 650 } 651 } 652 } 653 } 654 655 *$self->{ZipData}{Zip64} = $zip64; 656 657 if (! $streamingMode) { 658 *$self->{ZipData}{Streaming} = 0; 659 *$self->{ZipData}{Crc32} = $crc32; 660 *$self->{ZipData}{CompressedLen} = $compressedLength; 661 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength; 662 *$self->{CompressedInputLengthRemaining} = 663 *$self->{CompressedInputLength} = $compressedLength->get64bit(); 664 } 665 666 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); 667 *$self->{ZipData}{Method} = $compressedMethod; 668 if ($compressedMethod == ZIP_CM_DEFLATE) 669 { 670 *$self->{Type} = 'zip-deflate'; 671 my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0); 672 673 *$self->{Uncomp} = $obj; 674 } 675 elsif ($compressedMethod == ZIP_CM_BZIP2) 676 { 677 return $self->HeaderError("Unsupported Compression format $compressedMethod") 678 if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ; 679 680 *$self->{Type} = 'zip-bzip2'; 681 682 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); 683 684 *$self->{Uncomp} = $obj; 685 } 686 elsif ($compressedMethod == ZIP_CM_XZ) 687 { 688 return $self->HeaderError("Unsupported Compression format $compressedMethod") 689 if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ; 690 691 *$self->{Type} = 'zip-xz'; 692 693 my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject(); 694 695 *$self->{Uncomp} = $obj; 696 } 697 elsif ($compressedMethod == ZIP_CM_ZSTD) 698 { 699 return $self->HeaderError("Unsupported Compression format $compressedMethod") 700 if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ; 701 702 *$self->{Type} = 'zip-zstd'; 703 704 my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject(); 705 706 *$self->{Uncomp} = $obj; 707 } 708 elsif ($compressedMethod == ZIP_CM_LZMA) 709 { 710 return $self->HeaderError("Unsupported Compression format $compressedMethod") 711 if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ; 712 713 *$self->{Type} = 'zip-lzma'; 714 my $LzmaHeader; 715 $self->smartReadExact(\$LzmaHeader, 4) 716 or return $self->saveErrorString(undef, "Truncated file"); 717 my ($verHi, $verLo) = unpack ("CC", substr($LzmaHeader, 0, 2)); 718 my $LzmaPropertiesSize = unpack ("v", substr($LzmaHeader, 2, 2)); 719 720 721 my $LzmaPropertyData; 722 $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize) 723 or return $self->saveErrorString(undef, "Truncated file"); 724 725 if (! $streamingMode) { 726 *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ; 727 *$self->{CompressedInputLengthRemaining} = 728 *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit(); 729 } 730 731 my $obj = 732 IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData); 733 734 *$self->{Uncomp} = $obj; 735 } 736 elsif ($compressedMethod == ZIP_CM_STORE) 737 { 738 *$self->{Type} = 'zip-stored'; 739 740 my $obj = 741 IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode, 742 $zip64); 743 744 *$self->{Uncomp} = $obj; 745 } 746 else 747 { 748 return $self->HeaderError("Unsupported Compression format $compressedMethod"); 749 } 750 751 return { 752 'Type' => 'zip', 753 'FingerprintLength' => 4, 754 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0, 755 'HeaderLength' => length $keep, 756 'Zip64' => $zip64, 757 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16, 758 'Header' => $keep, 759 'CompressedLength' => $compressedLength , 760 'UncompressedLength' => $uncompressedLength , 761 'CRC32' => $crc32 , 762 'Name' => $filename, 763 'efs' => $efs_flag, # language encoding flag 764 'Time' => _dosToUnixTime($lastModTime), 765 'Stream' => $streamingMode, 766 767 'MethodID' => $compressedMethod, 768 'MethodName' => $MethodNames{$compressedMethod} || 'Unknown', 769 770# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, 771# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, 772# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, 773# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, 774# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, 775# 'Comment' => $comment, 776# 'OsID' => $os, 777# 'OsName' => defined $GZIP_OS_Names{$os} 778# ? $GZIP_OS_Names{$os} : "Unknown", 779# 'HeaderCRC' => $HeaderCRC, 780# 'Flags' => $flag, 781# 'ExtraFlags' => $xfl, 782 'ExtraFieldRaw' => $extraField, 783 'ExtraField' => [ @EXTRA ], 784 785 786 } 787} 788 789sub filterUncompressed 790{ 791 my $self = shift ; 792 793 if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { 794 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ; 795 } 796 else { 797 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]); 798 } 799} 800 801 802# from Archive::Zip & info-zip 803sub _dosToUnixTime 804{ 805 my $dt = shift; 806 807 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; 808 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; 809 my $mday = ( ( $dt >> 16 ) & 0x1f ); 810 811 my $hour = ( ( $dt >> 11 ) & 0x1f ); 812 my $min = ( ( $dt >> 5 ) & 0x3f ); 813 my $sec = ( ( $dt << 1 ) & 0x3e ); 814 815 816 use POSIX 'mktime'; 817 818 my $time_t = mktime( $sec, $min, $hour, $mday, $mon, $year, 0, 0, -1 ); 819 return 0 if ! defined $time_t; 820 return $time_t; 821} 822 823#sub scanCentralDirectory 824#{ 825# # Use cases 826# # 1 32-bit CD 827# # 2 64-bit CD 828# 829# my $self = shift ; 830# 831# my @CD = (); 832# my $offset = $self->findCentralDirectoryOffset(); 833# 834# return 0 835# if ! defined $offset; 836# 837# $self->smarkSeek($offset, 0, SEEK_SET) ; 838# 839# # Now walk the Central Directory Records 840# my $buffer ; 841# while ($self->smartReadExact(\$buffer, 46) && 842# unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 843# 844# my $compressedLength = unpack ("V", substr($buffer, 20, 4)); 845# my $filename_length = unpack ("v", substr($buffer, 28, 2)); 846# my $extra_length = unpack ("v", substr($buffer, 30, 2)); 847# my $comment_length = unpack ("v", substr($buffer, 32, 2)); 848# 849# $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR) 850# if $extra_length || $comment_length || $filename_length; 851# push @CD, $compressedLength ; 852# } 853# 854#} 855# 856#sub findCentralDirectoryOffset 857#{ 858# my $self = shift ; 859# 860# # Most common use-case is where there is no comment, so 861# # know exactly where the end of central directory record 862# # should be. 863# 864# $self->smarkSeek(-22, 0, SEEK_END) ; 865# 866# my $buffer; 867# $self->smartReadExact(\$buffer, 22) ; 868# 869# my $zip64 = 0; 870# my $centralDirOffset ; 871# if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 872# $centralDirOffset = unpack ("V", substr($buffer, 16, 2)); 873# } 874# else { 875# die "xxxx"; 876# } 877# 878# return $centralDirOffset ; 879#} 880# 881#sub is84BitCD 882#{ 883# # TODO 884# my $self = shift ; 885#} 886 887 888sub skip 889{ 890 my $self = shift; 891 my $size = shift; 892 893 use Fcntl qw(SEEK_CUR); 894 if (ref $size eq 'U64') { 895 $self->smartSeek($size->get64bit(), SEEK_CUR); 896 } 897 else { 898 $self->smartSeek($size, SEEK_CUR); 899 } 900 901} 902 903 904sub scanCentralDirectory 905{ 906 my $self = shift; 907 908 my $here = $self->tell(); 909 910 # Use cases 911 # 1 32-bit CD 912 # 2 64-bit CD 913 914 my @CD = (); 915 my $offset = $self->findCentralDirectoryOffset(); 916 917 return () 918 if ! defined $offset; 919 920 $self->smarkSeek($offset, 0, SEEK_SET) ; 921 922 # Now walk the Central Directory Records 923 my $buffer ; 924 while ($self->smartReadExact(\$buffer, 46) && 925 unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 926 927 my $compressedLength = unpack("V", substr($buffer, 20, 4)); 928 my $uncompressedLength = unpack("V", substr($buffer, 24, 4)); 929 my $filename_length = unpack("v", substr($buffer, 28, 2)); 930 my $extra_length = unpack("v", substr($buffer, 30, 2)); 931 my $comment_length = unpack("v", substr($buffer, 32, 2)); 932 933 $self->skip($filename_length ) ; 934 935 my $v64 = U64->new( $compressedLength ); 936 937 if (U64::full32 $compressedLength ) { 938 $self->smartReadExact(\$buffer, $extra_length) ; 939 die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer) 940 if length($buffer) != $extra_length; 941 my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength); 942 943 # If not Zip64 extra field, assume size is 0xFFFFFFFF 944 $v64 = $got if defined $got; 945 } 946 else { 947 $self->skip($extra_length) ; 948 } 949 950 $self->skip($comment_length ) ; 951 952 push @CD, $v64 ; 953 } 954 955 $self->smartSeek($here, 0, SEEK_SET) ; 956 957 return @CD; 958} 959 960sub get64Extra 961{ 962 my $self = shift ; 963 964 my $buffer = shift; 965 my $is_uncomp = shift ; 966 967 my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer); 968 969 if (! defined $extra) 970 { 971 return undef; 972 } 973 else 974 { 975 my $u64 = U64::newUnpack_V64(substr($extra, $is_uncomp ? 8 : 0)) ; 976 return $u64; 977 } 978} 979 980sub offsetFromZip64 981{ 982 my $self = shift ; 983 my $here = shift; 984 985 $self->smartSeek($here - 20, 0, SEEK_SET) 986 or die "xx $!" ; 987 988 my $buffer; 989 my $got = 0; 990 $self->smartReadExact(\$buffer, 20) 991 or die "xxx $here $got $!" ; 992 993 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) { 994 my $cd64 = U64::Value_VV64 substr($buffer, 8, 8); 995 996 $self->smartSeek($cd64, 0, SEEK_SET) ; 997 998 $self->smartReadExact(\$buffer, 4) 999 or die "xxx" ; 1000 1001 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) { 1002 1003 $self->smartReadExact(\$buffer, 8) 1004 or die "xxx" ; 1005 my $size = U64::Value_VV64($buffer); 1006 $self->smartReadExact(\$buffer, $size) 1007 or die "xxx" ; 1008 1009 my $cd64 = U64::Value_VV64 substr($buffer, 36, 8); 1010 1011 return $cd64 ; 1012 } 1013 1014 die "zzz"; 1015 } 1016 1017 die "zzz"; 1018} 1019 1020use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); 1021 1022sub findCentralDirectoryOffset 1023{ 1024 my $self = shift ; 1025 1026 # Most common use-case is where there is no comment, so 1027 # know exactly where the end of central directory record 1028 # should be. 1029 1030 $self->smartSeek(-22, 0, SEEK_END) ; 1031 my $here = $self->tell(); 1032 1033 my $buffer; 1034 $self->smartReadExact(\$buffer, 22) 1035 or die "xxx" ; 1036 1037 my $zip64 = 0; 1038 my $centralDirOffset ; 1039 if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 1040 $centralDirOffset = unpack("V", substr($buffer, 16, 4)); 1041 } 1042 else { 1043 $self->smartSeek(0, 0, SEEK_END) ; 1044 1045 my $fileLen = $self->tell(); 1046 my $want = 0 ; 1047 1048 while(1) { 1049 $want += 1024; 1050 my $seekTo = $fileLen - $want; 1051 if ($seekTo < 0 ) { 1052 $seekTo = 0; 1053 $want = $fileLen ; 1054 } 1055 $self->smartSeek( $seekTo, 0, SEEK_SET) 1056 or die "xxx $!" ; 1057 my $got; 1058 $self->smartReadExact($buffer, $want) 1059 or die "xxx " ; 1060 my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); 1061 1062 if ($pos >= 0) { 1063 #$here = $self->tell(); 1064 $here = $seekTo + $pos ; 1065 $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); 1066 last ; 1067 } 1068 1069 return undef 1070 if $want == $fileLen; 1071 } 1072 } 1073 1074 $centralDirOffset = $self->offsetFromZip64($here) 1075 if U64::full32 $centralDirOffset ; 1076 1077 return $centralDirOffset ; 1078} 1079 10801; 1081 1082__END__ 1083 1084 1085=head1 NAME 1086 1087IO::Uncompress::Unzip - Read zip files/buffers 1088 1089=head1 SYNOPSIS 1090 1091 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1092 1093 my $status = unzip $input => $output [,OPTS] 1094 or die "unzip failed: $UnzipError\n"; 1095 1096 my $z = IO::Uncompress::Unzip->new( $input [OPTS] ) 1097 or die "unzip failed: $UnzipError\n"; 1098 1099 $status = $z->read($buffer) 1100 $status = $z->read($buffer, $length) 1101 $status = $z->read($buffer, $length, $offset) 1102 $line = $z->getline() 1103 $char = $z->getc() 1104 $char = $z->ungetc() 1105 $char = $z->opened() 1106 1107 $status = $z->inflateSync() 1108 1109 $data = $z->trailingData() 1110 $status = $z->nextStream() 1111 $data = $z->getHeaderInfo() 1112 $z->tell() 1113 $z->seek($position, $whence) 1114 $z->binmode() 1115 $z->fileno() 1116 $z->eof() 1117 $z->close() 1118 1119 $UnzipError ; 1120 1121 # IO::File mode 1122 1123 <$z> 1124 read($z, $buffer); 1125 read($z, $buffer, $length); 1126 read($z, $buffer, $length, $offset); 1127 tell($z) 1128 seek($z, $position, $whence) 1129 binmode($z) 1130 fileno($z) 1131 eof($z) 1132 close($z) 1133 1134=head1 DESCRIPTION 1135 1136This module provides a Perl interface that allows the reading of 1137zlib files/buffers. 1138 1139For writing zip files/buffers, see the companion module IO::Compress::Zip. 1140 1141The primary purpose of this module is to provide I<streaming> read access to 1142zip files and buffers. 1143 1144At present the following compression methods are supported by IO::Uncompress::Unzip 1145 1146=over 5 1147 1148=item Store (0) 1149 1150=item Deflate (8) 1151 1152=item Bzip2 (12) 1153 1154To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must 1155be installed. 1156 1157=item Lzma (14) 1158 1159To read LZMA content, the module C<IO::Uncompress::UnLzma> must 1160be installed. 1161 1162=item Xz (95) 1163 1164To read Xz content, the module C<IO::Uncompress::UnXz> must 1165be installed. 1166 1167=item Zstandard (93) 1168 1169To read Zstandard content, the module C<IO::Uncompress::UnZstd> must 1170be installed. 1171 1172=back 1173 1174=head1 Functional Interface 1175 1176A top-level function, C<unzip>, is provided to carry out 1177"one-shot" uncompression between buffers and/or files. For finer 1178control over the uncompression process, see the L</"OO Interface"> 1179section. 1180 1181 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1182 1183 unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] 1184 or die "unzip failed: $UnzipError\n"; 1185 1186The functional interface needs Perl5.005 or better. 1187 1188=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] 1189 1190C<unzip> expects at least two parameters, 1191C<$input_filename_or_reference> and C<$output_filename_or_reference> 1192and zero or more optional parameters (see L</Optional Parameters>) 1193 1194=head3 The C<$input_filename_or_reference> parameter 1195 1196The parameter, C<$input_filename_or_reference>, is used to define the 1197source of the compressed data. 1198 1199It can take one of the following forms: 1200 1201=over 5 1202 1203=item A filename 1204 1205If the C<$input_filename_or_reference> parameter is a simple scalar, it is 1206assumed to be a filename. This file will be opened for reading and the 1207input data will be read from it. 1208 1209=item A filehandle 1210 1211If the C<$input_filename_or_reference> parameter is a filehandle, the input 1212data will be read from it. The string '-' can be used as an alias for 1213standard input. 1214 1215=item A scalar reference 1216 1217If C<$input_filename_or_reference> is a scalar reference, the input data 1218will be read from C<$$input_filename_or_reference>. 1219 1220=item An array reference 1221 1222If C<$input_filename_or_reference> is an array reference, each element in 1223the array must be a filename. 1224 1225The input data will be read from each file in turn. 1226 1227The complete array will be walked to ensure that it only 1228contains valid filenames before any data is uncompressed. 1229 1230=item An Input FileGlob string 1231 1232If C<$input_filename_or_reference> is a string that is delimited by the 1233characters "<" and ">" C<unzip> will assume that it is an 1234I<input fileglob string>. The input is the list of files that match the 1235fileglob. 1236 1237See L<File::GlobMapper|File::GlobMapper> for more details. 1238 1239=back 1240 1241If the C<$input_filename_or_reference> parameter is any other type, 1242C<undef> will be returned. 1243 1244=head3 The C<$output_filename_or_reference> parameter 1245 1246The parameter C<$output_filename_or_reference> is used to control the 1247destination of the uncompressed data. This parameter can take one of 1248these forms. 1249 1250=over 5 1251 1252=item A filename 1253 1254If the C<$output_filename_or_reference> parameter is a simple scalar, it is 1255assumed to be a filename. This file will be opened for writing and the 1256uncompressed data will be written to it. 1257 1258=item A filehandle 1259 1260If the C<$output_filename_or_reference> parameter is a filehandle, the 1261uncompressed data will be written to it. The string '-' can be used as 1262an alias for standard output. 1263 1264=item A scalar reference 1265 1266If C<$output_filename_or_reference> is a scalar reference, the 1267uncompressed data will be stored in C<$$output_filename_or_reference>. 1268 1269=item An Array Reference 1270 1271If C<$output_filename_or_reference> is an array reference, 1272the uncompressed data will be pushed onto the array. 1273 1274=item An Output FileGlob 1275 1276If C<$output_filename_or_reference> is a string that is delimited by the 1277characters "<" and ">" C<unzip> will assume that it is an 1278I<output fileglob string>. The output is the list of files that match the 1279fileglob. 1280 1281When C<$output_filename_or_reference> is an fileglob string, 1282C<$input_filename_or_reference> must also be a fileglob string. Anything 1283else is an error. 1284 1285See L<File::GlobMapper|File::GlobMapper> for more details. 1286 1287=back 1288 1289If the C<$output_filename_or_reference> parameter is any other type, 1290C<undef> will be returned. 1291 1292=head2 Notes 1293 1294When C<$input_filename_or_reference> maps to multiple compressed 1295files/buffers and C<$output_filename_or_reference> is 1296a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a 1297concatenation of all the uncompressed data from each of the input 1298files/buffers. 1299 1300=head2 Optional Parameters 1301 1302The optional parameters for the one-shot function C<unzip> 1303are (for the most part) identical to those used with the OO interface defined in the 1304L</"Constructor Options"> section. The exceptions are listed below 1305 1306=over 5 1307 1308=item C<< AutoClose => 0|1 >> 1309 1310This option applies to any input or output data streams to 1311C<unzip> that are filehandles. 1312 1313If C<AutoClose> is specified, and the value is true, it will result in all 1314input and/or output filehandles being closed once C<unzip> has 1315completed. 1316 1317This parameter defaults to 0. 1318 1319=item C<< BinModeOut => 0|1 >> 1320 1321This option is now a no-op. All files will be written in binmode. 1322 1323=item C<< Append => 0|1 >> 1324 1325The behaviour of this option is dependent on the type of output data 1326stream. 1327 1328=over 5 1329 1330=item * A Buffer 1331 1332If C<Append> is enabled, all uncompressed data will be append to the end of 1333the output buffer. Otherwise the output buffer will be cleared before any 1334uncompressed data is written to it. 1335 1336=item * A Filename 1337 1338If C<Append> is enabled, the file will be opened in append mode. Otherwise 1339the contents of the file, if any, will be truncated before any uncompressed 1340data is written to it. 1341 1342=item * A Filehandle 1343 1344If C<Append> is enabled, the filehandle will be positioned to the end of 1345the file via a call to C<seek> before any uncompressed data is 1346written to it. Otherwise the file pointer will not be moved. 1347 1348=back 1349 1350When C<Append> is specified, and set to true, it will I<append> all uncompressed 1351data to the output data stream. 1352 1353So when the output is a filehandle it will carry out a seek to the eof 1354before writing any uncompressed data. If the output is a filename, it will be opened for 1355appending. If the output is a buffer, all uncompressed data will be 1356appended to the existing buffer. 1357 1358Conversely when C<Append> is not specified, or it is present and is set to 1359false, it will operate as follows. 1360 1361When the output is a filename, it will truncate the contents of the file 1362before writing any uncompressed data. If the output is a filehandle 1363its position will not be changed. If the output is a buffer, it will be 1364wiped before any uncompressed data is output. 1365 1366Defaults to 0. 1367 1368=item C<< MultiStream => 0|1 >> 1369 1370If the input file/buffer contains multiple compressed data streams, this 1371option will uncompress the whole lot as a single data stream. 1372 1373Defaults to 0. 1374 1375=item C<< TrailingData => $scalar >> 1376 1377Returns the data, if any, that is present immediately after the compressed 1378data stream once uncompression is complete. 1379 1380This option can be used when there is useful information immediately 1381following the compressed data stream, and you don't know the length of the 1382compressed data stream. 1383 1384If the input is a buffer, C<trailingData> will return everything from the 1385end of the compressed data stream to the end of the buffer. 1386 1387If the input is a filehandle, C<trailingData> will return the data that is 1388left in the filehandle input buffer once the end of the compressed data 1389stream has been reached. You can then use the filehandle to read the rest 1390of the input file. 1391 1392Don't bother using C<trailingData> if the input is a filename. 1393 1394If you know the length of the compressed data stream before you start 1395uncompressing, you can avoid having to use C<trailingData> by setting the 1396C<InputLength> option. 1397 1398=back 1399 1400=head2 Examples 1401 1402Say you have a zip file, C<file1.zip>, that only contains a 1403single member, you can read it and write the uncompressed data to the 1404file C<file1.txt> like this. 1405 1406 use strict ; 1407 use warnings ; 1408 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1409 1410 my $input = "file1.zip"; 1411 my $output = "file1.txt"; 1412 unzip $input => $output 1413 or die "unzip failed: $UnzipError\n"; 1414 1415If you have a zip file that contains multiple members and want to read a 1416specific member from the file, say C<"data1">, use the C<Name> option 1417 1418 use strict ; 1419 use warnings ; 1420 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1421 1422 my $input = "file1.zip"; 1423 my $output = "file1.txt"; 1424 unzip $input => $output, Name => "data1" 1425 or die "unzip failed: $UnzipError\n"; 1426 1427Alternatively, if you want to read the C<"data1"> member into memory, use 1428a scalar reference for the C<output> parameter. 1429 1430 use strict ; 1431 use warnings ; 1432 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1433 1434 my $input = "file1.zip"; 1435 my $output ; 1436 unzip $input => \$output, Name => "data1" 1437 or die "unzip failed: $UnzipError\n"; 1438 # $output now contains the uncompressed data 1439 1440To read from an existing Perl filehandle, C<$input>, and write the 1441uncompressed data to a buffer, C<$buffer>. 1442 1443 use strict ; 1444 use warnings ; 1445 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1446 use IO::File ; 1447 1448 my $input = IO::File->new( "<file1.zip" ) 1449 or die "Cannot open 'file1.zip': $!\n" ; 1450 my $buffer ; 1451 unzip $input => \$buffer 1452 or die "unzip failed: $UnzipError\n"; 1453 1454=head1 OO Interface 1455 1456=head2 Constructor 1457 1458The format of the constructor for IO::Uncompress::Unzip is shown below 1459 1460 my $z = IO::Uncompress::Unzip->new( $input [OPTS] ) 1461 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1462 1463Returns an C<IO::Uncompress::Unzip> object on success and undef on failure. 1464The variable C<$UnzipError> will contain an error message on failure. 1465 1466If you are running Perl 5.005 or better the object, C<$z>, returned from 1467IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle. 1468This means that all normal input file operations can be carried out with 1469C<$z>. For example, to read a line from a compressed file/buffer you can 1470use either of these forms 1471 1472 $line = $z->getline(); 1473 $line = <$z>; 1474 1475The mandatory parameter C<$input> is used to determine the source of the 1476compressed data. This parameter can take one of three forms. 1477 1478=over 5 1479 1480=item A filename 1481 1482If the C<$input> parameter is a scalar, it is assumed to be a filename. This 1483file will be opened for reading and the compressed data will be read from it. 1484 1485=item A filehandle 1486 1487If the C<$input> parameter is a filehandle, the compressed data will be 1488read from it. 1489The string '-' can be used as an alias for standard input. 1490 1491=item A scalar reference 1492 1493If C<$input> is a scalar reference, the compressed data will be read from 1494C<$$input>. 1495 1496=back 1497 1498=head2 Constructor Options 1499 1500The option names defined below are case insensitive and can be optionally 1501prefixed by a '-'. So all of the following are valid 1502 1503 -AutoClose 1504 -autoclose 1505 AUTOCLOSE 1506 autoclose 1507 1508OPTS is a combination of the following options: 1509 1510=over 5 1511 1512=item C<< Name => "membername" >> 1513 1514Open "membername" from the zip file for reading. 1515 1516=item C<< Efs => 0| 1 >> 1517 1518When this option is set to true AND the zip archive being read has 1519the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8. 1520 1521If the member name in the zip archive is not valid UTF-8 when this optionn is true, 1522the script will die with an error message. 1523 1524Note that this option only works with Perl 5.8.4 or better. 1525 1526This option defaults to B<false>. 1527 1528=item C<< AutoClose => 0|1 >> 1529 1530This option is only valid when the C<$input> parameter is a filehandle. If 1531specified, and the value is true, it will result in the file being closed once 1532either the C<close> method is called or the IO::Uncompress::Unzip object is 1533destroyed. 1534 1535This parameter defaults to 0. 1536 1537=item C<< MultiStream => 0|1 >> 1538 1539Treats the complete zip file/buffer as a single compressed data 1540stream. When reading in multi-stream mode each member of the zip 1541file/buffer will be uncompressed in turn until the end of the file/buffer 1542is encountered. 1543 1544This parameter defaults to 0. 1545 1546=item C<< Prime => $string >> 1547 1548This option will uncompress the contents of C<$string> before processing the 1549input file/buffer. 1550 1551This option can be useful when the compressed data is embedded in another 1552file/data structure and it is not possible to work out where the compressed 1553data begins without having to read the first few bytes. If this is the 1554case, the uncompression can be I<primed> with these bytes using this 1555option. 1556 1557=item C<< Transparent => 0|1 >> 1558 1559If this option is set and the input file/buffer is not compressed data, 1560the module will allow reading of it anyway. 1561 1562In addition, if the input file/buffer does contain compressed data and 1563there is non-compressed data immediately following it, setting this option 1564will make this module treat the whole file/buffer as a single data stream. 1565 1566This option defaults to 1. 1567 1568=item C<< BlockSize => $num >> 1569 1570When reading the compressed input data, IO::Uncompress::Unzip will read it in 1571blocks of C<$num> bytes. 1572 1573This option defaults to 4096. 1574 1575=item C<< InputLength => $size >> 1576 1577When present this option will limit the number of compressed bytes read 1578from the input file/buffer to C<$size>. This option can be used in the 1579situation where there is useful data directly after the compressed data 1580stream and you know beforehand the exact length of the compressed data 1581stream. 1582 1583This option is mostly used when reading from a filehandle, in which case 1584the file pointer will be left pointing to the first byte directly after the 1585compressed data stream. 1586 1587This option defaults to off. 1588 1589=item C<< Append => 0|1 >> 1590 1591This option controls what the C<read> method does with uncompressed data. 1592 1593If set to 1, all uncompressed data will be appended to the output parameter 1594of the C<read> method. 1595 1596If set to 0, the contents of the output parameter of the C<read> method 1597will be overwritten by the uncompressed data. 1598 1599Defaults to 0. 1600 1601=item C<< Strict => 0|1 >> 1602 1603This option controls whether the extra checks defined below are used when 1604carrying out the decompression. When Strict is on, the extra tests are 1605carried out, when Strict is off they are not. 1606 1607The default for this option is off. 1608 1609=back 1610 1611=head2 Examples 1612 1613TODO 1614 1615=head1 Methods 1616 1617=head2 read 1618 1619Usage is 1620 1621 $status = $z->read($buffer) 1622 1623Reads a block of compressed data (the size of the compressed block is 1624determined by the C<Buffer> option in the constructor), uncompresses it and 1625writes any uncompressed data into C<$buffer>. If the C<Append> parameter is 1626set in the constructor, the uncompressed data will be appended to the 1627C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. 1628 1629Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1630or a negative number on error. 1631 1632=head2 read 1633 1634Usage is 1635 1636 $status = $z->read($buffer, $length) 1637 $status = $z->read($buffer, $length, $offset) 1638 1639 $status = read($z, $buffer, $length) 1640 $status = read($z, $buffer, $length, $offset) 1641 1642Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. 1643 1644The main difference between this form of the C<read> method and the 1645previous one, is that this one will attempt to return I<exactly> C<$length> 1646bytes. The only circumstances that this function will not is if end-of-file 1647or an IO error is encountered. 1648 1649Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1650or a negative number on error. 1651 1652=head2 getline 1653 1654Usage is 1655 1656 $line = $z->getline() 1657 $line = <$z> 1658 1659Reads a single line. 1660 1661This method fully supports the use of the variable C<$/> (or 1662C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to 1663determine what constitutes an end of line. Paragraph mode, record mode and 1664file slurp mode are all supported. 1665 1666=head2 getc 1667 1668Usage is 1669 1670 $char = $z->getc() 1671 1672Read a single character. 1673 1674=head2 ungetc 1675 1676Usage is 1677 1678 $char = $z->ungetc($string) 1679 1680=head2 inflateSync 1681 1682Usage is 1683 1684 $status = $z->inflateSync() 1685 1686TODO 1687 1688=head2 getHeaderInfo 1689 1690Usage is 1691 1692 $hdr = $z->getHeaderInfo(); 1693 @hdrs = $z->getHeaderInfo(); 1694 1695This method returns either a hash reference (in scalar context) or a list 1696or hash references (in array context) that contains information about each 1697of the header fields in the compressed data stream(s). 1698 1699=head2 tell 1700 1701Usage is 1702 1703 $z->tell() 1704 tell $z 1705 1706Returns the uncompressed file offset. 1707 1708=head2 eof 1709 1710Usage is 1711 1712 $z->eof(); 1713 eof($z); 1714 1715Returns true if the end of the compressed input stream has been reached. 1716 1717=head2 seek 1718 1719 $z->seek($position, $whence); 1720 seek($z, $position, $whence); 1721 1722Provides a sub-set of the C<seek> functionality, with the restriction 1723that it is only legal to seek forward in the input file/buffer. 1724It is a fatal error to attempt to seek backward. 1725 1726Note that the implementation of C<seek> in this module does not provide 1727true random access to a compressed file/buffer. It works by uncompressing 1728data from the current offset in the file/buffer until it reaches the 1729uncompressed offset specified in the parameters to C<seek>. For very small 1730files this may be acceptable behaviour. For large files it may cause an 1731unacceptable delay. 1732 1733The C<$whence> parameter takes one the usual values, namely SEEK_SET, 1734SEEK_CUR or SEEK_END. 1735 1736Returns 1 on success, 0 on failure. 1737 1738=head2 binmode 1739 1740Usage is 1741 1742 $z->binmode 1743 binmode $z ; 1744 1745This is a noop provided for completeness. 1746 1747=head2 opened 1748 1749 $z->opened() 1750 1751Returns true if the object currently refers to a opened file/buffer. 1752 1753=head2 autoflush 1754 1755 my $prev = $z->autoflush() 1756 my $prev = $z->autoflush(EXPR) 1757 1758If the C<$z> object is associated with a file or a filehandle, this method 1759returns the current autoflush setting for the underlying filehandle. If 1760C<EXPR> is present, and is non-zero, it will enable flushing after every 1761write/print operation. 1762 1763If C<$z> is associated with a buffer, this method has no effect and always 1764returns C<undef>. 1765 1766B<Note> that the special variable C<$|> B<cannot> be used to set or 1767retrieve the autoflush setting. 1768 1769=head2 input_line_number 1770 1771 $z->input_line_number() 1772 $z->input_line_number(EXPR) 1773 1774Returns the current uncompressed line number. If C<EXPR> is present it has 1775the effect of setting the line number. Note that setting the line number 1776does not change the current position within the file/buffer being read. 1777 1778The contents of C<$/> are used to determine what constitutes a line 1779terminator. 1780 1781=head2 fileno 1782 1783 $z->fileno() 1784 fileno($z) 1785 1786If the C<$z> object is associated with a file or a filehandle, C<fileno> 1787will return the underlying file descriptor. Once the C<close> method is 1788called C<fileno> will return C<undef>. 1789 1790If the C<$z> object is associated with a buffer, this method will return 1791C<undef>. 1792 1793=head2 close 1794 1795 $z->close() ; 1796 close $z ; 1797 1798Closes the output file/buffer. 1799 1800For most versions of Perl this method will be automatically invoked if 1801the IO::Uncompress::Unzip object is destroyed (either explicitly or by the 1802variable with the reference to the object going out of scope). The 1803exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In 1804these cases, the C<close> method will be called automatically, but 1805not until global destruction of all live objects when the program is 1806terminating. 1807 1808Therefore, if you want your scripts to be able to run on all versions 1809of Perl, you should call C<close> explicitly and not rely on automatic 1810closing. 1811 1812Returns true on success, otherwise 0. 1813 1814If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip 1815object was created, and the object is associated with a file, the 1816underlying file will also be closed. 1817 1818=head2 nextStream 1819 1820Usage is 1821 1822 my $status = $z->nextStream(); 1823 1824Skips to the next compressed data stream in the input file/buffer. If a new 1825compressed data stream is found, the eof marker will be cleared and C<$.> 1826will be reset to 0. 1827 1828If trailing data is present immediately after the zip archive and the 1829C<Transparent> option is enabled, this method will consider that trailing 1830data to be another member of the zip archive. 1831 1832Returns 1 if a new stream was found, 0 if none was found, and -1 if an 1833error was encountered. 1834 1835=head2 trailingData 1836 1837Usage is 1838 1839 my $data = $z->trailingData(); 1840 1841Returns the data, if any, that is present immediately after the compressed 1842data stream once uncompression is complete. It only makes sense to call 1843this method once the end of the compressed data stream has been 1844encountered. 1845 1846This option can be used when there is useful information immediately 1847following the compressed data stream, and you don't know the length of the 1848compressed data stream. 1849 1850If the input is a buffer, C<trailingData> will return everything from the 1851end of the compressed data stream to the end of the buffer. 1852 1853If the input is a filehandle, C<trailingData> will return the data that is 1854left in the filehandle input buffer once the end of the compressed data 1855stream has been reached. You can then use the filehandle to read the rest 1856of the input file. 1857 1858Don't bother using C<trailingData> if the input is a filename. 1859 1860If you know the length of the compressed data stream before you start 1861uncompressing, you can avoid having to use C<trailingData> by setting the 1862C<InputLength> option in the constructor. 1863 1864=head1 Importing 1865 1866No symbolic constants are required by IO::Uncompress::Unzip at present. 1867 1868=over 5 1869 1870=item :all 1871 1872Imports C<unzip> and C<$UnzipError>. 1873Same as doing this 1874 1875 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1876 1877=back 1878 1879=head1 EXAMPLES 1880 1881=head2 Working with Net::FTP 1882 1883See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP"> 1884 1885=head2 Walking through a zip file 1886 1887The code below can be used to traverse a zip file, one compressed data 1888stream at a time. 1889 1890 use IO::Uncompress::Unzip qw($UnzipError); 1891 1892 my $zipfile = "somefile.zip"; 1893 my $u = IO::Uncompress::Unzip->new( $zipfile ) 1894 or die "Cannot open $zipfile: $UnzipError"; 1895 1896 my $status; 1897 for ($status = 1; $status > 0; $status = $u->nextStream()) 1898 { 1899 1900 my $name = $u->getHeaderInfo()->{Name}; 1901 warn "Processing member $name\n" ; 1902 1903 my $buff; 1904 while (($status = $u->read($buff)) > 0) { 1905 # Do something here 1906 } 1907 1908 last if $status < 0; 1909 } 1910 1911 die "Error processing $zipfile: $!\n" 1912 if $status < 0 ; 1913 1914Each individual compressed data stream is read until the logical 1915end-of-file is reached. Then C<nextStream> is called. This will skip to the 1916start of the next compressed data stream and clear the end-of-file flag. 1917 1918It is also worth noting that C<nextStream> can be called at any time -- you 1919don't have to wait until you have exhausted a compressed data stream before 1920skipping to the next one. 1921 1922=head2 Unzipping a complete zip file to disk 1923 1924Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip> 1925to read a zip file and unzip its contents to disk. 1926 1927The script is available from L<https://gist.github.com/eqhmcow/5389877> 1928 1929=head1 SUPPORT 1930 1931General feedback/questions/bug reports should be sent to 1932L<https://github.com/pmqs/IO-Compress/issues> (preferred) or 1933L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>. 1934 1935=head1 SEE ALSO 1936 1937L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress> 1938 1939L<IO::Compress::FAQ|IO::Compress::FAQ> 1940 1941L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>, 1942L<Archive::Tar|Archive::Tar>, 1943L<IO::Zlib|IO::Zlib> 1944 1945For RFC 1950, 1951 and 1952 see 1946L<http://www.faqs.org/rfcs/rfc1950.html>, 1947L<http://www.faqs.org/rfcs/rfc1951.html> and 1948L<http://www.faqs.org/rfcs/rfc1952.html> 1949 1950The I<zlib> compression library was written by Jean-loup Gailly 1951C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>. 1952 1953The primary site for the I<zlib> compression library is 1954L<http://www.zlib.org>. 1955 1956The primary site for gzip is L<http://www.gzip.org>. 1957 1958=head1 AUTHOR 1959 1960This module was written by Paul Marquess, C<pmqs@cpan.org>. 1961 1962=head1 MODIFICATION HISTORY 1963 1964See the Changes file. 1965 1966=head1 COPYRIGHT AND LICENSE 1967 1968Copyright (c) 2005-2021 Paul Marquess. All rights reserved. 1969 1970This program is free software; you can redistribute it and/or 1971modify it under the same terms as Perl itself. 1972