1package IO::Uncompress::Unzip; 2 3require 5.006 ; 4 5# for RFC1952 6 7use strict ; 8use warnings; 9use bytes; 10 11use IO::File; 12use IO::Uncompress::RawInflate 2.212 ; 13use IO::Compress::Base::Common 2.212 qw(:Status ); 14use IO::Uncompress::Adapter::Inflate 2.212 ; 15use IO::Uncompress::Adapter::Identity 2.212 ; 16use IO::Compress::Zlib::Extra 2.212 ; 17use IO::Compress::Zip::Constants 2.212 ; 18 19use Compress::Raw::Zlib 2.212 () ; 20 21BEGIN 22{ 23 # Don't trigger any __DIE__ Hooks. 24 local $SIG{__DIE__}; 25 26 eval{ require IO::Uncompress::Adapter::Bunzip2 ; 27 IO::Uncompress::Adapter::Bunzip2->VERSION(2.212) } ; 28 eval{ require IO::Uncompress::Adapter::UnLzma ; 29 IO::Uncompress::Adapter::UnLzma->VERSION(2.212) } ; 30 eval{ require IO::Uncompress::Adapter::UnXz ; 31 IO::Uncompress::Adapter::UnXz->VERSION(2.212) } ; 32 eval{ require IO::Uncompress::Adapter::UnZstd ; 33 IO::Uncompress::Adapter::UnZstd->VERSION(2.212) } ; 34} 35 36 37require Exporter ; 38 39our ($VERSION, @ISA, @EXPORT_OK, %EXPORT_TAGS, $UnzipError, %headerLookup); 40 41$VERSION = '2.212'; 42$UnzipError = ''; 43 44@ISA = qw(IO::Uncompress::RawInflate Exporter); 45@EXPORT_OK = qw($UnzipError unzip ); 46%EXPORT_TAGS = %IO::Uncompress::RawInflate::EXPORT_TAGS ; 47push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ; 48Exporter::export_ok_tags('all'); 49 50%headerLookup = ( 51 ZIP_CENTRAL_HDR_SIG, \&skipCentralDirectory, 52 ZIP_END_CENTRAL_HDR_SIG, \&skipEndCentralDirectory, 53 ZIP64_END_CENTRAL_REC_HDR_SIG, \&skipCentralDirectory64Rec, 54 ZIP64_END_CENTRAL_LOC_HDR_SIG, \&skipCentralDirectory64Loc, 55 ZIP64_ARCHIVE_EXTRA_SIG, \&skipArchiveExtra, 56 ZIP64_DIGITAL_SIGNATURE_SIG, \&skipDigitalSignature, 57 ); 58 59my %MethodNames = ( 60 ZIP_CM_DEFLATE() => 'Deflated', 61 ZIP_CM_BZIP2() => 'Bzip2', 62 ZIP_CM_LZMA() => 'Lzma', 63 ZIP_CM_STORE() => 'Stored', 64 ZIP_CM_XZ() => 'Xz', 65 ZIP_CM_ZSTD() => 'Zstd', 66 ); 67 68sub new 69{ 70 my $class = shift ; 71 my $obj = IO::Compress::Base::Common::createSelfTiedObject($class, \$UnzipError); 72 $obj->_create(undef, 0, @_); 73} 74 75sub unzip 76{ 77 my $obj = IO::Compress::Base::Common::createSelfTiedObject(undef, \$UnzipError); 78 return $obj->_inf(@_) ; 79} 80 81sub getExtraParams 82{ 83 84 return ( 85# # Zip header fields 86 'name' => [IO::Compress::Base::Common::Parse_any, undef], 87 88 'stream' => [IO::Compress::Base::Common::Parse_boolean, 0], 89 'efs' => [IO::Compress::Base::Common::Parse_boolean, 0], 90 91 # TODO - This means reading the central directory to get 92 # 1. the local header offsets 93 # 2. The compressed data length 94 ); 95} 96 97sub ckParams 98{ 99 my $self = shift ; 100 my $got = shift ; 101 102 # unzip always needs crc32 103 $got->setValue('crc32' => 1); 104 105 *$self->{UnzipData}{Name} = $got->getValue('name'); 106 *$self->{UnzipData}{efs} = $got->getValue('efs'); 107 108 return 1; 109} 110 111sub mkUncomp 112{ 113 my $self = shift ; 114 my $got = shift ; 115 116 my $magic = $self->ckMagic() 117 or return 0; 118 119 *$self->{Info} = $self->readHeader($magic) 120 or return undef ; 121 122 return 1; 123 124} 125 126sub ckMagic 127{ 128 my $self = shift; 129 130 my $magic ; 131 $self->smartReadExact(\$magic, 4); 132 133 *$self->{HeaderPending} = $magic ; 134 135 return $self->HeaderError("Minimum header size is " . 136 4 . " bytes") 137 if length $magic != 4 ; 138 139 return $self->HeaderError("Bad Magic") 140 if ! _isZipMagic($magic) ; 141 142 *$self->{Type} = 'zip'; 143 144 return $magic ; 145} 146 147 148sub fastForward 149{ 150 my $self = shift; 151 my $offset = shift; 152 153 # TODO - if Stream isn't enabled & reading from file, use seek 154 155 my $buffer = ''; 156 my $c = 1024 * 16; 157 158 while ($offset > 0) 159 { 160 $c = length $offset 161 if length $offset < $c ; 162 163 $offset -= $c; 164 165 $self->smartReadExact(\$buffer, $c) 166 or return 0; 167 } 168 169 return 1; 170} 171 172 173sub readHeader 174{ 175 my $self = shift; 176 my $magic = shift ; 177 178 my $name = *$self->{UnzipData}{Name} ; 179 my $hdr = $self->_readZipHeader($magic) ; 180 181 while (defined $hdr) 182 { 183 if (! defined $name || $hdr->{Name} eq $name) 184 { 185 return $hdr ; 186 } 187 188 # skip the data 189 # TODO - when Stream is off, use seek 190 my $buffer; 191 if (*$self->{ZipData}{Streaming}) { 192 while (1) { 193 194 my $b; 195 my $status = $self->smartRead(\$b, 1024 * 16); 196 197 return $self->saveErrorString(undef, "Truncated file") 198 if $status <= 0 ; 199 200 my $temp_buf ; 201 my $out; 202 203 $status = *$self->{Uncomp}->uncompr(\$b, \$temp_buf, 0, $out); 204 205 return $self->saveErrorString(undef, *$self->{Uncomp}{Error}, 206 *$self->{Uncomp}{ErrorNo}) 207 if $self->saveStatus($status) == STATUS_ERROR; 208 209 $self->pushBack($b) ; 210 211 if ($status == STATUS_ENDSTREAM) { 212 *$self->{Uncomp}->reset(); 213 last; 214 } 215 } 216 217 # skip the trailer 218 $self->smartReadExact(\$buffer, $hdr->{TrailerLength}) 219 or return $self->saveErrorString(undef, "Truncated file"); 220 } 221 else { 222 my $c = $hdr->{CompressedLength}->get64bit(); 223 $self->fastForward($c) 224 or return $self->saveErrorString(undef, "Truncated file"); 225 $buffer = ''; 226 } 227 228 $self->chkTrailer($buffer) == STATUS_OK 229 or return $self->saveErrorString(undef, "Truncated file"); 230 231 $hdr = $self->_readFullZipHeader(); 232 233 return $self->saveErrorString(undef, "Cannot find '$name'") 234 if $self->smartEof(); 235 } 236 237 return undef; 238} 239 240sub chkTrailer 241{ 242 my $self = shift; 243 my $trailer = shift; 244 245 my ($sig, $CRC32, $cSize, $uSize) ; 246 my ($cSizeHi, $uSizeHi) = (0, 0); 247 if (*$self->{ZipData}{Streaming}) { 248 $sig = unpack ("V", substr($trailer, 0, 4)); 249 $CRC32 = unpack ("V", substr($trailer, 4, 4)); 250 251 if (*$self->{ZipData}{Zip64} ) { 252 $cSize = U64::newUnpack_V64 substr($trailer, 8, 8); 253 $uSize = U64::newUnpack_V64 substr($trailer, 16, 8); 254 } 255 else { 256 $cSize = U64::newUnpack_V32 substr($trailer, 8, 4); 257 $uSize = U64::newUnpack_V32 substr($trailer, 12, 4); 258 } 259 260 return $self->TrailerError("Data Descriptor signature, got $sig") 261 if $sig != ZIP_DATA_HDR_SIG; 262 } 263 else { 264 ($CRC32, $cSize, $uSize) = 265 (*$self->{ZipData}{Crc32}, 266 *$self->{ZipData}{CompressedLen}, 267 *$self->{ZipData}{UnCompressedLen}); 268 } 269 270 *$self->{Info}{CRC32} = *$self->{ZipData}{CRC32} ; 271 *$self->{Info}{CompressedLength} = $cSize->get64bit(); 272 *$self->{Info}{UncompressedLength} = $uSize->get64bit(); 273 274 if (*$self->{Strict}) { 275 return $self->TrailerError("CRC mismatch") 276 if $CRC32 != *$self->{ZipData}{CRC32} ; 277 278 return $self->TrailerError("CSIZE mismatch.") 279 if ! $cSize->equal(*$self->{CompSize}); 280 281 return $self->TrailerError("USIZE mismatch.") 282 if ! $uSize->equal(*$self->{UnCompSize}); 283 } 284 285 my $reachedEnd = STATUS_ERROR ; 286 # check for central directory or end of central directory 287 while (1) 288 { 289 my $magic ; 290 my $got = $self->smartRead(\$magic, 4); 291 292 return $self->saveErrorString(STATUS_ERROR, "Truncated file") 293 if $got != 4 && *$self->{Strict}; 294 295 if ($got == 0) { 296 return STATUS_EOF ; 297 } 298 elsif ($got < 0) { 299 return STATUS_ERROR ; 300 } 301 elsif ($got < 4) { 302 $self->pushBack($magic) ; 303 return STATUS_OK ; 304 } 305 306 my $sig = unpack("V", $magic) ; 307 308 my $hdr; 309 if ($hdr = $headerLookup{$sig}) 310 { 311 if (&$hdr($self, $magic) != STATUS_OK ) { 312 if (*$self->{Strict}) { 313 return STATUS_ERROR ; 314 } 315 else { 316 $self->clearError(); 317 return STATUS_OK ; 318 } 319 } 320 321 if ($sig == ZIP_END_CENTRAL_HDR_SIG) 322 { 323 return STATUS_OK ; 324 last; 325 } 326 } 327 elsif ($sig == ZIP_LOCAL_HDR_SIG) 328 { 329 $self->pushBack($magic) ; 330 return STATUS_OK ; 331 } 332 else 333 { 334 # put the data back 335 $self->pushBack($magic) ; 336 last; 337 } 338 } 339 340 return $reachedEnd ; 341} 342 343sub skipCentralDirectory 344{ 345 my $self = shift; 346 my $magic = shift ; 347 348 my $buffer; 349 $self->smartReadExact(\$buffer, 46 - 4) 350 or return $self->TrailerError("Minimum header size is " . 351 46 . " bytes") ; 352 353 my $keep = $magic . $buffer ; 354 *$self->{HeaderPending} = $keep ; 355 356 #my $versionMadeBy = unpack ("v", substr($buffer, 4-4, 2)); 357 #my $extractVersion = unpack ("v", substr($buffer, 6-4, 2)); 358 #my $gpFlag = unpack ("v", substr($buffer, 8-4, 2)); 359 #my $compressedMethod = unpack ("v", substr($buffer, 10-4, 2)); 360 #my $lastModTime = unpack ("V", substr($buffer, 12-4, 4)); 361 #my $crc32 = unpack ("V", substr($buffer, 16-4, 4)); 362 my $compressedLength = unpack ("V", substr($buffer, 20-4, 4)); 363 my $uncompressedLength = unpack ("V", substr($buffer, 24-4, 4)); 364 my $filename_length = unpack ("v", substr($buffer, 28-4, 2)); 365 my $extra_length = unpack ("v", substr($buffer, 30-4, 2)); 366 my $comment_length = unpack ("v", substr($buffer, 32-4, 2)); 367 #my $disk_start = unpack ("v", substr($buffer, 34-4, 2)); 368 #my $int_file_attrib = unpack ("v", substr($buffer, 36-4, 2)); 369 #my $ext_file_attrib = unpack ("V", substr($buffer, 38-4, 2)); 370 #my $lcl_hdr_offset = unpack ("V", substr($buffer, 42-4, 2)); 371 372 373 my $filename; 374 my $extraField; 375 my $comment ; 376 if ($filename_length) 377 { 378 $self->smartReadExact(\$filename, $filename_length) 379 or return $self->TruncatedTrailer("filename"); 380 $keep .= $filename ; 381 } 382 383 if ($extra_length) 384 { 385 $self->smartReadExact(\$extraField, $extra_length) 386 or return $self->TruncatedTrailer("extra"); 387 $keep .= $extraField ; 388 } 389 390 if ($comment_length) 391 { 392 $self->smartReadExact(\$comment, $comment_length) 393 or return $self->TruncatedTrailer("comment"); 394 $keep .= $comment ; 395 } 396 397 return STATUS_OK ; 398} 399 400sub skipArchiveExtra 401{ 402 my $self = shift; 403 my $magic = shift ; 404 405 my $buffer; 406 $self->smartReadExact(\$buffer, 4) 407 or return $self->TrailerError("Minimum header size is " . 408 4 . " bytes") ; 409 410 my $keep = $magic . $buffer ; 411 412 my $size = unpack ("V", $buffer); 413 414 $self->smartReadExact(\$buffer, $size) 415 or return $self->TrailerError("Minimum header size is " . 416 $size . " bytes") ; 417 418 $keep .= $buffer ; 419 *$self->{HeaderPending} = $keep ; 420 421 return STATUS_OK ; 422} 423 424 425sub skipCentralDirectory64Rec 426{ 427 my $self = shift; 428 my $magic = shift ; 429 430 my $buffer; 431 $self->smartReadExact(\$buffer, 8) 432 or return $self->TrailerError("Minimum header size is " . 433 8 . " bytes") ; 434 435 my $keep = $magic . $buffer ; 436 437 my ($sizeLo, $sizeHi) = unpack ("V V", $buffer); 438 my $size = $sizeHi * U64::MAX32 + $sizeLo; 439 440 $self->fastForward($size) 441 or return $self->TrailerError("Minimum header size is " . 442 $size . " bytes") ; 443 444 #$keep .= $buffer ; 445 #*$self->{HeaderPending} = $keep ; 446 447 #my $versionMadeBy = unpack ("v", substr($buffer, 0, 2)); 448 #my $extractVersion = unpack ("v", substr($buffer, 2, 2)); 449 #my $diskNumber = unpack ("V", substr($buffer, 4, 4)); 450 #my $cntrlDirDiskNo = unpack ("V", substr($buffer, 8, 4)); 451 #my $entriesInThisCD = unpack ("V V", substr($buffer, 12, 8)); 452 #my $entriesInCD = unpack ("V V", substr($buffer, 20, 8)); 453 #my $sizeOfCD = unpack ("V V", substr($buffer, 28, 8)); 454 #my $offsetToCD = unpack ("V V", substr($buffer, 36, 8)); 455 456 return STATUS_OK ; 457} 458 459sub skipCentralDirectory64Loc 460{ 461 my $self = shift; 462 my $magic = shift ; 463 464 my $buffer; 465 $self->smartReadExact(\$buffer, 20 - 4) 466 or return $self->TrailerError("Minimum header size is " . 467 20 . " bytes") ; 468 469 my $keep = $magic . $buffer ; 470 *$self->{HeaderPending} = $keep ; 471 472 #my $startCdDisk = unpack ("V", substr($buffer, 4-4, 4)); 473 #my $offsetToCD = unpack ("V V", substr($buffer, 8-4, 8)); 474 #my $diskCount = unpack ("V", substr($buffer, 16-4, 4)); 475 476 return STATUS_OK ; 477} 478 479sub skipEndCentralDirectory 480{ 481 my $self = shift; 482 my $magic = shift ; 483 484 485 my $buffer; 486 $self->smartReadExact(\$buffer, 22 - 4) 487 or return $self->TrailerError("Minimum header size is " . 488 22 . " bytes") ; 489 490 my $keep = $magic . $buffer ; 491 *$self->{HeaderPending} = $keep ; 492 493 #my $diskNumber = unpack ("v", substr($buffer, 4-4, 2)); 494 #my $cntrlDirDiskNo = unpack ("v", substr($buffer, 6-4, 2)); 495 #my $entriesInThisCD = unpack ("v", substr($buffer, 8-4, 2)); 496 #my $entriesInCD = unpack ("v", substr($buffer, 10-4, 2)); 497 #my $sizeOfCD = unpack ("V", substr($buffer, 12-4, 4)); 498 #my $offsetToCD = unpack ("V", substr($buffer, 16-4, 4)); 499 my $comment_length = unpack ("v", substr($buffer, 20-4, 2)); 500 501 502 my $comment ; 503 if ($comment_length) 504 { 505 $self->smartReadExact(\$comment, $comment_length) 506 or return $self->TruncatedTrailer("comment"); 507 $keep .= $comment ; 508 } 509 510 return STATUS_OK ; 511} 512 513 514sub _isZipMagic 515{ 516 my $buffer = shift ; 517 return 0 if length $buffer < 4 ; 518 my $sig = unpack("V", $buffer) ; 519 return $sig == ZIP_LOCAL_HDR_SIG ; 520} 521 522 523sub _readFullZipHeader($) 524{ 525 my ($self) = @_ ; 526 my $magic = '' ; 527 528 $self->smartReadExact(\$magic, 4); 529 530 *$self->{HeaderPending} = $magic ; 531 532 return $self->HeaderError("Minimum header size is " . 533 30 . " bytes") 534 if length $magic != 4 ; 535 536 537 return $self->HeaderError("Bad Magic") 538 if ! _isZipMagic($magic) ; 539 540 my $status = $self->_readZipHeader($magic); 541 delete *$self->{Transparent} if ! defined $status ; 542 return $status ; 543} 544 545sub _readZipHeader($) 546{ 547 my ($self, $magic) = @_ ; 548 my ($HeaderCRC) ; 549 my ($buffer) = '' ; 550 551 $self->smartReadExact(\$buffer, 30 - 4) 552 or return $self->HeaderError("Minimum header size is " . 553 30 . " bytes") ; 554 555 my $keep = $magic . $buffer ; 556 *$self->{HeaderPending} = $keep ; 557 558 my $extractVersion = unpack ("v", substr($buffer, 4-4, 2)); 559 my $gpFlag = unpack ("v", substr($buffer, 6-4, 2)); 560 my $compressedMethod = unpack ("v", substr($buffer, 8-4, 2)); 561 my $lastModTime = unpack ("V", substr($buffer, 10-4, 4)); 562 my $crc32 = unpack ("V", substr($buffer, 14-4, 4)); 563 my $compressedLength = U64::newUnpack_V32 substr($buffer, 18-4, 4); 564 my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4); 565 my $filename_length = unpack ("v", substr($buffer, 26-4, 2)); 566 my $extra_length = unpack ("v", substr($buffer, 28-4, 2)); 567 568 my $filename; 569 my $extraField; 570 my @EXTRA = (); 571 572 # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out 573 # compressedLength/uncompressedLength & crc32 in the local file header. 574 # The expected data descriptor is not populated. 575 # So only assume streaming if the Streaming bit is set AND the compressed length is zero 576 my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK) && $crc32 == 0) ? 1 : 0 ; 577 578 my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0; 579 580 return $self->HeaderError("Encrypted content not supported") 581 if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK); 582 583 return $self->HeaderError("Patch content not supported") 584 if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK; 585 586 *$self->{ZipData}{Streaming} = $streamingMode; 587 588 589 if ($filename_length) 590 { 591 $self->smartReadExact(\$filename, $filename_length) 592 or return $self->TruncatedHeader("Filename"); 593 594 if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004) 595 { 596 require Encode; 597 eval { $filename = Encode::decode_utf8($filename, 1) } 598 or Carp::croak "Zip Filename not UTF-8" ; 599 } 600 601 $keep .= $filename ; 602 } 603 604 my $zip64 = 0 ; 605 606 if ($extra_length) 607 { 608 $self->smartReadExact(\$extraField, $extra_length) 609 or return $self->TruncatedHeader("Extra Field"); 610 611 my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField, 612 \@EXTRA, 1, 0); 613 return $self->HeaderError($bad) 614 if defined $bad; 615 616 $keep .= $extraField ; 617 618 my %Extra ; 619 for (@EXTRA) 620 { 621 $Extra{$_->[0]} = \$_->[1]; 622 } 623 624 if (defined $Extra{ZIP_EXTRA_ID_ZIP64()}) 625 { 626 $zip64 = 1 ; 627 628 my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} }; 629 630 # This code assumes that all the fields in the Zip64 631 # extra field aren't necessarily present. The spec says that 632 # they only exist if the equivalent local headers are -1. 633 634 if (! $streamingMode) { 635 my $offset = 0 ; 636 637 if (U64::full32 $uncompressedLength->get32bit() ) { 638 $uncompressedLength 639 = U64::newUnpack_V64 substr($buff, 0, 8); 640 641 $offset += 8 ; 642 } 643 644 if (U64::full32 $compressedLength->get32bit() ) { 645 646 $compressedLength 647 = U64::newUnpack_V64 substr($buff, $offset, 8); 648 649 $offset += 8 ; 650 } 651 } 652 } 653 } 654 655 *$self->{ZipData}{Zip64} = $zip64; 656 657 if (! $streamingMode) { 658 *$self->{ZipData}{Streaming} = 0; 659 *$self->{ZipData}{Crc32} = $crc32; 660 *$self->{ZipData}{CompressedLen} = $compressedLength; 661 *$self->{ZipData}{UnCompressedLen} = $uncompressedLength; 662 *$self->{CompressedInputLengthRemaining} = 663 *$self->{CompressedInputLength} = $compressedLength->get64bit(); 664 } 665 666 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef); 667 *$self->{ZipData}{Method} = $compressedMethod; 668 if ($compressedMethod == ZIP_CM_DEFLATE) 669 { 670 *$self->{Type} = 'zip-deflate'; 671 my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0); 672 673 *$self->{Uncomp} = $obj; 674 } 675 elsif ($compressedMethod == ZIP_CM_BZIP2) 676 { 677 return $self->HeaderError("Unsupported Compression format $compressedMethod") 678 if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ; 679 680 *$self->{Type} = 'zip-bzip2'; 681 682 my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject(); 683 684 *$self->{Uncomp} = $obj; 685 } 686 elsif ($compressedMethod == ZIP_CM_XZ) 687 { 688 return $self->HeaderError("Unsupported Compression format $compressedMethod") 689 if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ; 690 691 *$self->{Type} = 'zip-xz'; 692 693 my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject(); 694 695 *$self->{Uncomp} = $obj; 696 } 697 elsif ($compressedMethod == ZIP_CM_ZSTD) 698 { 699 return $self->HeaderError("Unsupported Compression format $compressedMethod") 700 if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ; 701 702 *$self->{Type} = 'zip-zstd'; 703 704 my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject(); 705 706 *$self->{Uncomp} = $obj; 707 } 708 elsif ($compressedMethod == ZIP_CM_LZMA) 709 { 710 return $self->HeaderError("Unsupported Compression format $compressedMethod") 711 if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ; 712 713 *$self->{Type} = 'zip-lzma'; 714 my $LzmaHeader; 715 $self->smartReadExact(\$LzmaHeader, 4) 716 or return $self->saveErrorString(undef, "Truncated file"); 717 my ($verHi, $verLo) = unpack ("CC", substr($LzmaHeader, 0, 2)); 718 my $LzmaPropertiesSize = unpack ("v", substr($LzmaHeader, 2, 2)); 719 720 721 my $LzmaPropertyData; 722 $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize) 723 or return $self->saveErrorString(undef, "Truncated file"); 724 725 if (! $streamingMode) { 726 *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ; 727 *$self->{CompressedInputLengthRemaining} = 728 *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit(); 729 } 730 731 my $obj = 732 IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData); 733 734 *$self->{Uncomp} = $obj; 735 } 736 elsif ($compressedMethod == ZIP_CM_STORE) 737 { 738 *$self->{Type} = 'zip-stored'; 739 740 my $obj = 741 IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode, 742 $zip64); 743 744 *$self->{Uncomp} = $obj; 745 } 746 else 747 { 748 return $self->HeaderError("Unsupported Compression format $compressedMethod"); 749 } 750 751 return { 752 'Type' => 'zip', 753 'FingerprintLength' => 4, 754 #'HeaderLength' => $compressedMethod == 8 ? length $keep : 0, 755 'HeaderLength' => length $keep, 756 'Zip64' => $zip64, 757 'TrailerLength' => ! $streamingMode ? 0 : $zip64 ? 24 : 16, 758 'Header' => $keep, 759 'CompressedLength' => $compressedLength , 760 'UncompressedLength' => $uncompressedLength , 761 'CRC32' => $crc32 , 762 'Name' => $filename, 763 'efs' => $efs_flag, # language encoding flag 764 'Time' => _dosToUnixTime($lastModTime), 765 'Stream' => $streamingMode, 766 767 'MethodID' => $compressedMethod, 768 'MethodName' => $MethodNames{$compressedMethod} || 'Unknown', 769 770# 'TextFlag' => $flag & GZIP_FLG_FTEXT ? 1 : 0, 771# 'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0, 772# 'NameFlag' => $flag & GZIP_FLG_FNAME ? 1 : 0, 773# 'CommentFlag' => $flag & GZIP_FLG_FCOMMENT ? 1 : 0, 774# 'ExtraFlag' => $flag & GZIP_FLG_FEXTRA ? 1 : 0, 775# 'Comment' => $comment, 776# 'OsID' => $os, 777# 'OsName' => defined $GZIP_OS_Names{$os} 778# ? $GZIP_OS_Names{$os} : "Unknown", 779# 'HeaderCRC' => $HeaderCRC, 780# 'Flags' => $flag, 781# 'ExtraFlags' => $xfl, 782 'ExtraFieldRaw' => $extraField, 783 'ExtraField' => [ @EXTRA ], 784 785 786 } 787} 788 789sub filterUncompressed 790{ 791 my $self = shift ; 792 793 if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) { 794 *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ; 795 } 796 else { 797 *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]); 798 } 799} 800 801 802# from Archive::Zip & info-zip 803sub _dosToUnixTime 804{ 805 my $dt = shift; 806 807 my $year = ( ( $dt >> 25 ) & 0x7f ) + 80; 808 my $mon = ( ( $dt >> 21 ) & 0x0f ) - 1; 809 my $mday = ( ( $dt >> 16 ) & 0x1f ); 810 811 my $hour = ( ( $dt >> 11 ) & 0x1f ); 812 my $min = ( ( $dt >> 5 ) & 0x3f ); 813 my $sec = ( ( $dt << 1 ) & 0x3e ); 814 815 use Time::Local ; 816 my $time_t = Time::Local::timelocal( $sec, $min, $hour, $mday, $mon, $year); 817 return 0 if ! defined $time_t; 818 return $time_t; 819 820} 821 822#sub scanCentralDirectory 823#{ 824# # Use cases 825# # 1 32-bit CD 826# # 2 64-bit CD 827# 828# my $self = shift ; 829# 830# my @CD = (); 831# my $offset = $self->findCentralDirectoryOffset(); 832# 833# return 0 834# if ! defined $offset; 835# 836# $self->smarkSeek($offset, 0, SEEK_SET) ; 837# 838# # Now walk the Central Directory Records 839# my $buffer ; 840# while ($self->smartReadExact(\$buffer, 46) && 841# unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 842# 843# my $compressedLength = unpack ("V", substr($buffer, 20, 4)); 844# my $filename_length = unpack ("v", substr($buffer, 28, 2)); 845# my $extra_length = unpack ("v", substr($buffer, 30, 2)); 846# my $comment_length = unpack ("v", substr($buffer, 32, 2)); 847# 848# $self->smarkSeek($filename_length + $extra_length + $comment_length, 0, SEEK_CUR) 849# if $extra_length || $comment_length || $filename_length; 850# push @CD, $compressedLength ; 851# } 852# 853#} 854# 855#sub findCentralDirectoryOffset 856#{ 857# my $self = shift ; 858# 859# # Most common use-case is where there is no comment, so 860# # know exactly where the end of central directory record 861# # should be. 862# 863# $self->smarkSeek(-22, 0, SEEK_END) ; 864# 865# my $buffer; 866# $self->smartReadExact(\$buffer, 22) ; 867# 868# my $zip64 = 0; 869# my $centralDirOffset ; 870# if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 871# $centralDirOffset = unpack ("V", substr($buffer, 16, 2)); 872# } 873# else { 874# die "xxxx"; 875# } 876# 877# return $centralDirOffset ; 878#} 879# 880#sub is84BitCD 881#{ 882# # TODO 883# my $self = shift ; 884#} 885 886 887sub skip 888{ 889 my $self = shift; 890 my $size = shift; 891 892 use Fcntl qw(SEEK_CUR); 893 if (ref $size eq 'U64') { 894 $self->smartSeek($size->get64bit(), SEEK_CUR); 895 } 896 else { 897 $self->smartSeek($size, SEEK_CUR); 898 } 899 900} 901 902 903sub scanCentralDirectory 904{ 905 my $self = shift; 906 907 my $here = $self->tell(); 908 909 # Use cases 910 # 1 32-bit CD 911 # 2 64-bit CD 912 913 my @CD = (); 914 my $offset = $self->findCentralDirectoryOffset(); 915 916 return () 917 if ! defined $offset; 918 919 $self->smarkSeek($offset, 0, SEEK_SET) ; 920 921 # Now walk the Central Directory Records 922 my $buffer ; 923 while ($self->smartReadExact(\$buffer, 46) && 924 unpack("V", $buffer) == ZIP_CENTRAL_HDR_SIG) { 925 926 my $compressedLength = unpack("V", substr($buffer, 20, 4)); 927 my $uncompressedLength = unpack("V", substr($buffer, 24, 4)); 928 my $filename_length = unpack("v", substr($buffer, 28, 2)); 929 my $extra_length = unpack("v", substr($buffer, 30, 2)); 930 my $comment_length = unpack("v", substr($buffer, 32, 2)); 931 932 $self->skip($filename_length ) ; 933 934 my $v64 = U64->new( $compressedLength ); 935 936 if (U64::full32 $compressedLength ) { 937 $self->smartReadExact(\$buffer, $extra_length) ; 938 die "xxx $offset $comment_length $filename_length $extra_length" . length($buffer) 939 if length($buffer) != $extra_length; 940 my $got = $self->get64Extra($buffer, U64::full32 $uncompressedLength); 941 942 # If not Zip64 extra field, assume size is 0xFFFFFFFF 943 $v64 = $got if defined $got; 944 } 945 else { 946 $self->skip($extra_length) ; 947 } 948 949 $self->skip($comment_length ) ; 950 951 push @CD, $v64 ; 952 } 953 954 $self->smartSeek($here, 0, SEEK_SET) ; 955 956 return @CD; 957} 958 959sub get64Extra 960{ 961 my $self = shift ; 962 963 my $buffer = shift; 964 my $is_uncomp = shift ; 965 966 my $extra = IO::Compress::Zlib::Extra::findID(0x0001, $buffer); 967 968 if (! defined $extra) 969 { 970 return undef; 971 } 972 else 973 { 974 my $u64 = U64::newUnpack_V64(substr($extra, $is_uncomp ? 8 : 0)) ; 975 return $u64; 976 } 977} 978 979sub offsetFromZip64 980{ 981 my $self = shift ; 982 my $here = shift; 983 984 $self->smartSeek($here - 20, 0, SEEK_SET) 985 or die "xx $!" ; 986 987 my $buffer; 988 my $got = 0; 989 $self->smartReadExact(\$buffer, 20) 990 or die "xxx $here $got $!" ; 991 992 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_LOC_HDR_SIG ) { 993 my $cd64 = U64::Value_VV64 substr($buffer, 8, 8); 994 995 $self->smartSeek($cd64, 0, SEEK_SET) ; 996 997 $self->smartReadExact(\$buffer, 4) 998 or die "xxx" ; 999 1000 if ( unpack("V", $buffer) == ZIP64_END_CENTRAL_REC_HDR_SIG ) { 1001 1002 $self->smartReadExact(\$buffer, 8) 1003 or die "xxx" ; 1004 my $size = U64::Value_VV64($buffer); 1005 $self->smartReadExact(\$buffer, $size) 1006 or die "xxx" ; 1007 1008 my $cd64 = U64::Value_VV64 substr($buffer, 36, 8); 1009 1010 return $cd64 ; 1011 } 1012 1013 die "zzz"; 1014 } 1015 1016 die "zzz"; 1017} 1018 1019use constant Pack_ZIP_END_CENTRAL_HDR_SIG => pack("V", ZIP_END_CENTRAL_HDR_SIG); 1020 1021sub findCentralDirectoryOffset 1022{ 1023 my $self = shift ; 1024 1025 # Most common use-case is where there is no comment, so 1026 # know exactly where the end of central directory record 1027 # should be. 1028 1029 $self->smartSeek(-22, 0, SEEK_END) ; 1030 my $here = $self->tell(); 1031 1032 my $buffer; 1033 $self->smartReadExact(\$buffer, 22) 1034 or die "xxx" ; 1035 1036 my $zip64 = 0; 1037 my $centralDirOffset ; 1038 if ( unpack("V", $buffer) == ZIP_END_CENTRAL_HDR_SIG ) { 1039 $centralDirOffset = unpack("V", substr($buffer, 16, 4)); 1040 } 1041 else { 1042 $self->smartSeek(0, 0, SEEK_END) ; 1043 1044 my $fileLen = $self->tell(); 1045 my $want = 0 ; 1046 1047 while(1) { 1048 $want += 1024; 1049 my $seekTo = $fileLen - $want; 1050 if ($seekTo < 0 ) { 1051 $seekTo = 0; 1052 $want = $fileLen ; 1053 } 1054 $self->smartSeek( $seekTo, 0, SEEK_SET) 1055 or die "xxx $!" ; 1056 my $got; 1057 $self->smartReadExact($buffer, $want) 1058 or die "xxx " ; 1059 my $pos = rindex( $buffer, Pack_ZIP_END_CENTRAL_HDR_SIG); 1060 1061 if ($pos >= 0) { 1062 #$here = $self->tell(); 1063 $here = $seekTo + $pos ; 1064 $centralDirOffset = unpack("V", substr($buffer, $pos + 16, 4)); 1065 last ; 1066 } 1067 1068 return undef 1069 if $want == $fileLen; 1070 } 1071 } 1072 1073 $centralDirOffset = $self->offsetFromZip64($here) 1074 if U64::full32 $centralDirOffset ; 1075 1076 return $centralDirOffset ; 1077} 1078 10791; 1080 1081__END__ 1082 1083 1084=head1 NAME 1085 1086IO::Uncompress::Unzip - Read zip files/buffers 1087 1088=head1 SYNOPSIS 1089 1090 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1091 1092 my $status = unzip $input => $output [,OPTS] 1093 or die "unzip failed: $UnzipError\n"; 1094 1095 my $z = IO::Uncompress::Unzip->new( $input [OPTS] ) 1096 or die "unzip failed: $UnzipError\n"; 1097 1098 $status = $z->read($buffer) 1099 $status = $z->read($buffer, $length) 1100 $status = $z->read($buffer, $length, $offset) 1101 $line = $z->getline() 1102 $char = $z->getc() 1103 $char = $z->ungetc() 1104 $char = $z->opened() 1105 1106 $status = $z->inflateSync() 1107 1108 $data = $z->trailingData() 1109 $status = $z->nextStream() 1110 $data = $z->getHeaderInfo() 1111 $z->tell() 1112 $z->seek($position, $whence) 1113 $z->binmode() 1114 $z->fileno() 1115 $z->eof() 1116 $z->close() 1117 1118 $UnzipError ; 1119 1120 # IO::File mode 1121 1122 <$z> 1123 read($z, $buffer); 1124 read($z, $buffer, $length); 1125 read($z, $buffer, $length, $offset); 1126 tell($z) 1127 seek($z, $position, $whence) 1128 binmode($z) 1129 fileno($z) 1130 eof($z) 1131 close($z) 1132 1133=head1 DESCRIPTION 1134 1135This module provides a Perl interface that allows the reading of 1136zlib files/buffers. 1137 1138For writing zip files/buffers, see the companion module IO::Compress::Zip. 1139 1140The primary purpose of this module is to provide I<streaming> read access to 1141zip files and buffers. 1142 1143At present the following compression methods are supported by IO::Uncompress::Unzip 1144 1145=over 5 1146 1147=item Store (0) 1148 1149=item Deflate (8) 1150 1151=item Bzip2 (12) 1152 1153To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must 1154be installed. 1155 1156=item Lzma (14) 1157 1158To read LZMA content, the module C<IO::Uncompress::UnLzma> must 1159be installed. 1160 1161=item Xz (95) 1162 1163To read Xz content, the module C<IO::Uncompress::UnXz> must 1164be installed. 1165 1166=item Zstandard (93) 1167 1168To read Zstandard content, the module C<IO::Uncompress::UnZstd> must 1169be installed. 1170 1171=back 1172 1173=head1 Functional Interface 1174 1175A top-level function, C<unzip>, is provided to carry out 1176"one-shot" uncompression between buffers and/or files. For finer 1177control over the uncompression process, see the L</"OO Interface"> 1178section. 1179 1180 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1181 1182 unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS] 1183 or die "unzip failed: $UnzipError\n"; 1184 1185The functional interface needs Perl5.005 or better. 1186 1187=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS] 1188 1189C<unzip> expects at least two parameters, 1190C<$input_filename_or_reference> and C<$output_filename_or_reference> 1191and zero or more optional parameters (see L</Optional Parameters>) 1192 1193=head3 The C<$input_filename_or_reference> parameter 1194 1195The parameter, C<$input_filename_or_reference>, is used to define the 1196source of the compressed data. 1197 1198It can take one of the following forms: 1199 1200=over 5 1201 1202=item A filename 1203 1204If the C<$input_filename_or_reference> parameter is a simple scalar, it is 1205assumed to be a filename. This file will be opened for reading and the 1206input data will be read from it. 1207 1208=item A filehandle 1209 1210If the C<$input_filename_or_reference> parameter is a filehandle, the input 1211data will be read from it. The string '-' can be used as an alias for 1212standard input. 1213 1214=item A scalar reference 1215 1216If C<$input_filename_or_reference> is a scalar reference, the input data 1217will be read from C<$$input_filename_or_reference>. 1218 1219=item An array reference 1220 1221If C<$input_filename_or_reference> is an array reference, each element in 1222the array must be a filename. 1223 1224The input data will be read from each file in turn. 1225 1226The complete array will be walked to ensure that it only 1227contains valid filenames before any data is uncompressed. 1228 1229=item An Input FileGlob string 1230 1231If C<$input_filename_or_reference> is a string that is delimited by the 1232characters "<" and ">" C<unzip> will assume that it is an 1233I<input fileglob string>. The input is the list of files that match the 1234fileglob. 1235 1236See L<File::GlobMapper|File::GlobMapper> for more details. 1237 1238=back 1239 1240If the C<$input_filename_or_reference> parameter is any other type, 1241C<undef> will be returned. 1242 1243=head3 The C<$output_filename_or_reference> parameter 1244 1245The parameter C<$output_filename_or_reference> is used to control the 1246destination of the uncompressed data. This parameter can take one of 1247these forms. 1248 1249=over 5 1250 1251=item A filename 1252 1253If the C<$output_filename_or_reference> parameter is a simple scalar, it is 1254assumed to be a filename. This file will be opened for writing and the 1255uncompressed data will be written to it. 1256 1257=item A filehandle 1258 1259If the C<$output_filename_or_reference> parameter is a filehandle, the 1260uncompressed data will be written to it. The string '-' can be used as 1261an alias for standard output. 1262 1263=item A scalar reference 1264 1265If C<$output_filename_or_reference> is a scalar reference, the 1266uncompressed data will be stored in C<$$output_filename_or_reference>. 1267 1268=item An Array Reference 1269 1270If C<$output_filename_or_reference> is an array reference, 1271the uncompressed data will be pushed onto the array. 1272 1273=item An Output FileGlob 1274 1275If C<$output_filename_or_reference> is a string that is delimited by the 1276characters "<" and ">" C<unzip> will assume that it is an 1277I<output fileglob string>. The output is the list of files that match the 1278fileglob. 1279 1280When C<$output_filename_or_reference> is an fileglob string, 1281C<$input_filename_or_reference> must also be a fileglob string. Anything 1282else is an error. 1283 1284See L<File::GlobMapper|File::GlobMapper> for more details. 1285 1286=back 1287 1288If the C<$output_filename_or_reference> parameter is any other type, 1289C<undef> will be returned. 1290 1291=head2 Notes 1292 1293When C<$input_filename_or_reference> maps to multiple compressed 1294files/buffers and C<$output_filename_or_reference> is 1295a single file/buffer, after uncompression C<$output_filename_or_reference> will contain a 1296concatenation of all the uncompressed data from each of the input 1297files/buffers. 1298 1299=head2 Optional Parameters 1300 1301The optional parameters for the one-shot function C<unzip> 1302are (for the most part) identical to those used with the OO interface defined in the 1303L</"Constructor Options"> section. The exceptions are listed below 1304 1305=over 5 1306 1307=item C<< AutoClose => 0|1 >> 1308 1309This option applies to any input or output data streams to 1310C<unzip> that are filehandles. 1311 1312If C<AutoClose> is specified, and the value is true, it will result in all 1313input and/or output filehandles being closed once C<unzip> has 1314completed. 1315 1316This parameter defaults to 0. 1317 1318=item C<< BinModeOut => 0|1 >> 1319 1320This option is now a no-op. All files will be written in binmode. 1321 1322=item C<< Append => 0|1 >> 1323 1324The behaviour of this option is dependent on the type of output data 1325stream. 1326 1327=over 5 1328 1329=item * A Buffer 1330 1331If C<Append> is enabled, all uncompressed data will be append to the end of 1332the output buffer. Otherwise the output buffer will be cleared before any 1333uncompressed data is written to it. 1334 1335=item * A Filename 1336 1337If C<Append> is enabled, the file will be opened in append mode. Otherwise 1338the contents of the file, if any, will be truncated before any uncompressed 1339data is written to it. 1340 1341=item * A Filehandle 1342 1343If C<Append> is enabled, the filehandle will be positioned to the end of 1344the file via a call to C<seek> before any uncompressed data is 1345written to it. Otherwise the file pointer will not be moved. 1346 1347=back 1348 1349When C<Append> is specified, and set to true, it will I<append> all uncompressed 1350data to the output data stream. 1351 1352So when the output is a filehandle it will carry out a seek to the eof 1353before writing any uncompressed data. If the output is a filename, it will be opened for 1354appending. If the output is a buffer, all uncompressed data will be 1355appended to the existing buffer. 1356 1357Conversely when C<Append> is not specified, or it is present and is set to 1358false, it will operate as follows. 1359 1360When the output is a filename, it will truncate the contents of the file 1361before writing any uncompressed data. If the output is a filehandle 1362its position will not be changed. If the output is a buffer, it will be 1363wiped before any uncompressed data is output. 1364 1365Defaults to 0. 1366 1367=item C<< MultiStream => 0|1 >> 1368 1369If the input file/buffer contains multiple compressed data streams, this 1370option will uncompress the whole lot as a single data stream. 1371 1372Defaults to 0. 1373 1374=item C<< TrailingData => $scalar >> 1375 1376Returns the data, if any, that is present immediately after the compressed 1377data stream once uncompression is complete. 1378 1379This option can be used when there is useful information immediately 1380following the compressed data stream, and you don't know the length of the 1381compressed data stream. 1382 1383If the input is a buffer, C<trailingData> will return everything from the 1384end of the compressed data stream to the end of the buffer. 1385 1386If the input is a filehandle, C<trailingData> will return the data that is 1387left in the filehandle input buffer once the end of the compressed data 1388stream has been reached. You can then use the filehandle to read the rest 1389of the input file. 1390 1391Don't bother using C<trailingData> if the input is a filename. 1392 1393If you know the length of the compressed data stream before you start 1394uncompressing, you can avoid having to use C<trailingData> by setting the 1395C<InputLength> option. 1396 1397=back 1398 1399=head2 OneShot Examples 1400 1401Say you have a zip file, C<file1.zip>, that only contains a 1402single member, you can read it and write the uncompressed data to the 1403file C<file1.txt> like this. 1404 1405 use strict ; 1406 use warnings ; 1407 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1408 1409 my $input = "file1.zip"; 1410 my $output = "file1.txt"; 1411 unzip $input => $output 1412 or die "unzip failed: $UnzipError\n"; 1413 1414If you have a zip file that contains multiple members and want to read a 1415specific member from the file, say C<"data1">, use the C<Name> option 1416 1417 use strict ; 1418 use warnings ; 1419 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1420 1421 my $input = "file1.zip"; 1422 my $output = "file1.txt"; 1423 unzip $input => $output, Name => "data1" 1424 or die "unzip failed: $UnzipError\n"; 1425 1426Alternatively, if you want to read the C<"data1"> member into memory, use 1427a scalar reference for the C<output> parameter. 1428 1429 use strict ; 1430 use warnings ; 1431 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1432 1433 my $input = "file1.zip"; 1434 my $output ; 1435 unzip $input => \$output, Name => "data1" 1436 or die "unzip failed: $UnzipError\n"; 1437 # $output now contains the uncompressed data 1438 1439To read from an existing Perl filehandle, C<$input>, and write the 1440uncompressed data to a buffer, C<$buffer>. 1441 1442 use strict ; 1443 use warnings ; 1444 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1445 use IO::File ; 1446 1447 my $input = IO::File->new( "<file1.zip" ) 1448 or die "Cannot open 'file1.zip': $!\n" ; 1449 my $buffer ; 1450 unzip $input => \$buffer 1451 or die "unzip failed: $UnzipError\n"; 1452 1453=head1 OO Interface 1454 1455=head2 Constructor 1456 1457The format of the constructor for IO::Uncompress::Unzip is shown below 1458 1459 my $z = IO::Uncompress::Unzip->new( $input [OPTS] ) 1460 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1461 1462The constructor takes one mandatory parameter, C<$input>, defined below, and 1463zero or more C<OPTS>, defined in L<Constructor Options>. 1464 1465Returns an C<IO::Uncompress::Unzip> object on success and undef on failure. 1466The variable C<$UnzipError> will contain an error message on failure. 1467 1468If you are running Perl 5.005 or better the object, C<$z>, returned from 1469IO::Uncompress::Unzip can be used exactly like an L<IO::File|IO::File> filehandle. 1470This means that all normal input file operations can be carried out with 1471C<$z>. For example, to read a line from a compressed file/buffer you can 1472use either of these forms 1473 1474 $line = $z->getline(); 1475 $line = <$z>; 1476 1477Below is a simple exaple of using the OO interface to read the compressed file 1478C<myfile.zip> and write its contents to stdout. 1479 1480 my $filename = "myfile.zip"; 1481 my $z = IO::Uncompress::Unzip->new($filename) 1482 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1483 1484 while (<$z>) { 1485 print $_; 1486 } 1487 $z->close(); 1488 1489See L</EXAMPLES> for further examples 1490 1491The mandatory parameter C<$input> is used to determine the source of the 1492compressed data. This parameter can take one of three forms. 1493 1494=over 5 1495 1496=item A filename 1497 1498If the C<$input> parameter is a scalar, it is assumed to be a filename. This 1499file will be opened for reading and the compressed data will be read from it. 1500 1501=item A filehandle 1502 1503If the C<$input> parameter is a filehandle, the compressed data will be 1504read from it. 1505The string '-' can be used as an alias for standard input. 1506 1507=item A scalar reference 1508 1509If C<$input> is a scalar reference, the compressed data will be read from 1510C<$$input>. 1511 1512=back 1513 1514=head2 Constructor Options 1515 1516The option names defined below are case insensitive and can be optionally 1517prefixed by a '-'. So all of the following are valid 1518 1519 -AutoClose 1520 -autoclose 1521 AUTOCLOSE 1522 autoclose 1523 1524OPTS is a combination of the following options: 1525 1526=over 5 1527 1528=item C<< Name => "membername" >> 1529 1530Open "membername" from the zip file for reading. 1531 1532=item C<< Efs => 0| 1 >> 1533 1534When this option is set to true AND the zip archive being read has 1535the "Language Encoding Flag" (EFS) set, the member name is assumed to be encoded in UTF-8. 1536 1537If the member name in the zip archive is not valid UTF-8 when this optionn is true, 1538the script will die with an error message. 1539 1540Note that this option only works with Perl 5.8.4 or better. 1541 1542This option defaults to B<false>. 1543 1544=item C<< AutoClose => 0|1 >> 1545 1546This option is only valid when the C<$input> parameter is a filehandle. If 1547specified, and the value is true, it will result in the file being closed once 1548either the C<close> method is called or the IO::Uncompress::Unzip object is 1549destroyed. 1550 1551This parameter defaults to 0. 1552 1553=item C<< MultiStream => 0|1 >> 1554 1555Treats the complete zip file/buffer as a single compressed data 1556stream. When reading in multi-stream mode each member of the zip 1557file/buffer will be uncompressed in turn until the end of the file/buffer 1558is encountered. 1559 1560This parameter defaults to 0. 1561 1562=item C<< Prime => $string >> 1563 1564This option will uncompress the contents of C<$string> before processing the 1565input file/buffer. 1566 1567This option can be useful when the compressed data is embedded in another 1568file/data structure and it is not possible to work out where the compressed 1569data begins without having to read the first few bytes. If this is the 1570case, the uncompression can be I<primed> with these bytes using this 1571option. 1572 1573=item C<< Transparent => 0|1 >> 1574 1575If this option is set and the input file/buffer is not compressed data, 1576the module will allow reading of it anyway. 1577 1578In addition, if the input file/buffer does contain compressed data and 1579there is non-compressed data immediately following it, setting this option 1580will make this module treat the whole file/buffer as a single data stream. 1581 1582This option defaults to 1. 1583 1584=item C<< BlockSize => $num >> 1585 1586When reading the compressed input data, IO::Uncompress::Unzip will read it in 1587blocks of C<$num> bytes. 1588 1589This option defaults to 4096. 1590 1591=item C<< InputLength => $size >> 1592 1593When present this option will limit the number of compressed bytes read 1594from the input file/buffer to C<$size>. This option can be used in the 1595situation where there is useful data directly after the compressed data 1596stream and you know beforehand the exact length of the compressed data 1597stream. 1598 1599This option is mostly used when reading from a filehandle, in which case 1600the file pointer will be left pointing to the first byte directly after the 1601compressed data stream. 1602 1603This option defaults to off. 1604 1605=item C<< Append => 0|1 >> 1606 1607This option controls what the C<read> method does with uncompressed data. 1608 1609If set to 1, all uncompressed data will be appended to the output parameter 1610of the C<read> method. 1611 1612If set to 0, the contents of the output parameter of the C<read> method 1613will be overwritten by the uncompressed data. 1614 1615Defaults to 0. 1616 1617=item C<< Strict => 0|1 >> 1618 1619This option controls whether the extra checks defined below are used when 1620carrying out the decompression. When Strict is on, the extra tests are 1621carried out, when Strict is off they are not. 1622 1623The default for this option is off. 1624 1625=back 1626 1627=head1 Methods 1628 1629=head2 read 1630 1631Usage is 1632 1633 $status = $z->read($buffer) 1634 1635Reads a block of compressed data (the size of the compressed block is 1636determined by the C<Buffer> option in the constructor), uncompresses it and 1637writes any uncompressed data into C<$buffer>. If the C<Append> parameter is 1638set in the constructor, the uncompressed data will be appended to the 1639C<$buffer> parameter. Otherwise C<$buffer> will be overwritten. 1640 1641Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1642or a negative number on error. 1643 1644=head2 read 1645 1646Usage is 1647 1648 $status = $z->read($buffer, $length) 1649 $status = $z->read($buffer, $length, $offset) 1650 1651 $status = read($z, $buffer, $length) 1652 $status = read($z, $buffer, $length, $offset) 1653 1654Attempt to read C<$length> bytes of uncompressed data into C<$buffer>. 1655 1656The main difference between this form of the C<read> method and the 1657previous one, is that this one will attempt to return I<exactly> C<$length> 1658bytes. The only circumstances that this function will not is if end-of-file 1659or an IO error is encountered. 1660 1661Returns the number of uncompressed bytes written to C<$buffer>, zero if eof 1662or a negative number on error. 1663 1664=head2 getline 1665 1666Usage is 1667 1668 $line = $z->getline() 1669 $line = <$z> 1670 1671Reads a single line. 1672 1673This method fully supports the use of the variable C<$/> (or 1674C<$INPUT_RECORD_SEPARATOR> or C<$RS> when C<English> is in use) to 1675determine what constitutes an end of line. Paragraph mode, record mode and 1676file slurp mode are all supported. 1677 1678=head2 getc 1679 1680Usage is 1681 1682 $char = $z->getc() 1683 1684Read a single character. 1685 1686=head2 ungetc 1687 1688Usage is 1689 1690 $char = $z->ungetc($string) 1691 1692=head2 inflateSync 1693 1694Usage is 1695 1696 $status = $z->inflateSync() 1697 1698TODO 1699 1700=head2 getHeaderInfo 1701 1702Usage is 1703 1704 $hdr = $z->getHeaderInfo(); 1705 @hdrs = $z->getHeaderInfo(); 1706 1707This method returns either a hash reference (in scalar context) or a list 1708or hash references (in array context) that contains information about each 1709of the header fields in the compressed data stream(s). 1710 1711=head2 tell 1712 1713Usage is 1714 1715 $z->tell() 1716 tell $z 1717 1718Returns the uncompressed file offset. 1719 1720=head2 eof 1721 1722Usage is 1723 1724 $z->eof(); 1725 eof($z); 1726 1727Returns true if the end of the compressed input stream has been reached. 1728 1729=head2 seek 1730 1731 $z->seek($position, $whence); 1732 seek($z, $position, $whence); 1733 1734Provides a sub-set of the C<seek> functionality, with the restriction 1735that it is only legal to seek forward in the input file/buffer. 1736It is a fatal error to attempt to seek backward. 1737 1738Note that the implementation of C<seek> in this module does not provide 1739true random access to a compressed file/buffer. It works by uncompressing 1740data from the current offset in the file/buffer until it reaches the 1741uncompressed offset specified in the parameters to C<seek>. For very small 1742files this may be acceptable behaviour. For large files it may cause an 1743unacceptable delay. 1744 1745The C<$whence> parameter takes one the usual values, namely SEEK_SET, 1746SEEK_CUR or SEEK_END. 1747 1748Returns 1 on success, 0 on failure. 1749 1750=head2 binmode 1751 1752Usage is 1753 1754 $z->binmode 1755 binmode $z ; 1756 1757This is a noop provided for completeness. 1758 1759=head2 opened 1760 1761 $z->opened() 1762 1763Returns true if the object currently refers to a opened file/buffer. 1764 1765=head2 autoflush 1766 1767 my $prev = $z->autoflush() 1768 my $prev = $z->autoflush(EXPR) 1769 1770If the C<$z> object is associated with a file or a filehandle, this method 1771returns the current autoflush setting for the underlying filehandle. If 1772C<EXPR> is present, and is non-zero, it will enable flushing after every 1773write/print operation. 1774 1775If C<$z> is associated with a buffer, this method has no effect and always 1776returns C<undef>. 1777 1778B<Note> that the special variable C<$|> B<cannot> be used to set or 1779retrieve the autoflush setting. 1780 1781=head2 input_line_number 1782 1783 $z->input_line_number() 1784 $z->input_line_number(EXPR) 1785 1786Returns the current uncompressed line number. If C<EXPR> is present it has 1787the effect of setting the line number. Note that setting the line number 1788does not change the current position within the file/buffer being read. 1789 1790The contents of C<$/> are used to determine what constitutes a line 1791terminator. 1792 1793=head2 fileno 1794 1795 $z->fileno() 1796 fileno($z) 1797 1798If the C<$z> object is associated with a file or a filehandle, C<fileno> 1799will return the underlying file descriptor. Once the C<close> method is 1800called C<fileno> will return C<undef>. 1801 1802If the C<$z> object is associated with a buffer, this method will return 1803C<undef>. 1804 1805=head2 close 1806 1807 $z->close() ; 1808 close $z ; 1809 1810Closes the output file/buffer. 1811 1812For most versions of Perl this method will be automatically invoked if 1813the IO::Uncompress::Unzip object is destroyed (either explicitly or by the 1814variable with the reference to the object going out of scope). The 1815exceptions are Perl versions 5.005 through 5.00504 and 5.8.0. In 1816these cases, the C<close> method will be called automatically, but 1817not until global destruction of all live objects when the program is 1818terminating. 1819 1820Therefore, if you want your scripts to be able to run on all versions 1821of Perl, you should call C<close> explicitly and not rely on automatic 1822closing. 1823 1824Returns true on success, otherwise 0. 1825 1826If the C<AutoClose> option has been enabled when the IO::Uncompress::Unzip 1827object was created, and the object is associated with a file, the 1828underlying file will also be closed. 1829 1830=head2 nextStream 1831 1832Usage is 1833 1834 my $status = $z->nextStream(); 1835 1836Skips to the next compressed data stream in the input file/buffer. If a new 1837compressed data stream is found, the eof marker will be cleared and C<$.> 1838will be reset to 0. 1839 1840If trailing data is present immediately after the zip archive and the 1841C<Transparent> option is enabled, this method will consider that trailing 1842data to be another member of the zip archive. 1843 1844Returns 1 if a new stream was found, 0 if none was found, and -1 if an 1845error was encountered. 1846 1847=head2 trailingData 1848 1849Usage is 1850 1851 my $data = $z->trailingData(); 1852 1853Returns the data, if any, that is present immediately after the compressed 1854data stream once uncompression is complete. It only makes sense to call 1855this method once the end of the compressed data stream has been 1856encountered. 1857 1858This option can be used when there is useful information immediately 1859following the compressed data stream, and you don't know the length of the 1860compressed data stream. 1861 1862If the input is a buffer, C<trailingData> will return everything from the 1863end of the compressed data stream to the end of the buffer. 1864 1865If the input is a filehandle, C<trailingData> will return the data that is 1866left in the filehandle input buffer once the end of the compressed data 1867stream has been reached. You can then use the filehandle to read the rest 1868of the input file. 1869 1870Don't bother using C<trailingData> if the input is a filename. 1871 1872If you know the length of the compressed data stream before you start 1873uncompressing, you can avoid having to use C<trailingData> by setting the 1874C<InputLength> option in the constructor. 1875 1876=head1 Importing 1877 1878No symbolic constants are required by IO::Uncompress::Unzip at present. 1879 1880=over 5 1881 1882=item :all 1883 1884Imports C<unzip> and C<$UnzipError>. 1885Same as doing this 1886 1887 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1888 1889=back 1890 1891=head1 EXAMPLES 1892 1893=head2 Simple Read 1894 1895Say you have a zip file, C<file1.zip>, that only contains a 1896single member, you can read it and write the uncompressed data to the 1897file C<file1.txt> like this. 1898 1899 use strict ; 1900 use warnings ; 1901 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1902 1903 my $filename = "file1.zip"; 1904 my $z = IO::Uncompress::Unzip->new($filename) 1905 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1906 open my $out, ">", "file1.txt"; 1907 1908 while (<$z>) { 1909 print $out $_; 1910 } 1911 $z->close(); 1912 1913If you have a zip file that contains multiple members and want to read a 1914specific member from the file, say C<"data1">, use the C<Name> option when 1915constructing the 1916 1917 use strict ; 1918 use warnings ; 1919 use IO::Uncompress::Unzip qw(unzip $UnzipError) ; 1920 1921 my $filename = "file1.zip"; 1922 my $z = IO::Uncompress::Unzip->new($filename, Name => "data1") 1923 or die "IO::Uncompress::Unzip failed: $UnzipError\n"; 1924 1925=head2 Walking through a zip file 1926 1927The code below can be used to traverse a zip file, one compressed data 1928stream at a time. 1929 1930 use IO::Uncompress::Unzip qw($UnzipError); 1931 1932 my $zipfile = "somefile.zip"; 1933 my $u = IO::Uncompress::Unzip->new( $zipfile ) 1934 or die "Cannot open $zipfile: $UnzipError"; 1935 1936 my $status; 1937 for ($status = 1; $status > 0; $status = $u->nextStream()) 1938 { 1939 1940 my $name = $u->getHeaderInfo()->{Name}; 1941 warn "Processing member $name\n" ; 1942 1943 my $buff; 1944 while (($status = $u->read($buff)) > 0) { 1945 # Do something here 1946 } 1947 1948 last if $status < 0; 1949 } 1950 1951 die "Error processing $zipfile: $!\n" 1952 if $status < 0 ; 1953 1954Each individual compressed data stream is read until the logical 1955end-of-file is reached. Then C<nextStream> is called. This will skip to the 1956start of the next compressed data stream and clear the end-of-file flag. 1957 1958It is also worth noting that C<nextStream> can be called at any time -- you 1959don't have to wait until you have exhausted a compressed data stream before 1960skipping to the next one. 1961 1962=head2 Unzipping a complete zip file to disk 1963 1964Daniel S. Sterling has written a script that uses C<IO::Uncompress::UnZip> 1965to read a zip file and unzip its contents to disk. 1966 1967The script is available from L<https://gist.github.com/eqhmcow/5389877> 1968 1969=head2 Working with Net::FTP 1970 1971See L<IO::Compress::FAQ|IO::Compress::FAQ/"Compressed files and Net::FTP"> 1972 1973=head1 SUPPORT 1974 1975General feedback/questions/bug reports should be sent to 1976L<https://github.com/pmqs/IO-Compress/issues> (preferred) or 1977L<https://rt.cpan.org/Public/Dist/Display.html?Name=IO-Compress>. 1978 1979=head1 SEE ALSO 1980 1981L<Compress::Zlib>, L<IO::Compress::Gzip>, L<IO::Uncompress::Gunzip>, L<IO::Compress::Deflate>, L<IO::Uncompress::Inflate>, L<IO::Compress::RawDeflate>, L<IO::Uncompress::RawInflate>, L<IO::Compress::Bzip2>, L<IO::Uncompress::Bunzip2>, L<IO::Compress::Lzma>, L<IO::Uncompress::UnLzma>, L<IO::Compress::Xz>, L<IO::Uncompress::UnXz>, L<IO::Compress::Lzip>, L<IO::Uncompress::UnLzip>, L<IO::Compress::Lzop>, L<IO::Uncompress::UnLzop>, L<IO::Compress::Lzf>, L<IO::Uncompress::UnLzf>, L<IO::Compress::Zstd>, L<IO::Uncompress::UnZstd>, L<IO::Uncompress::AnyInflate>, L<IO::Uncompress::AnyUncompress> 1982 1983L<IO::Compress::FAQ|IO::Compress::FAQ> 1984 1985L<File::GlobMapper|File::GlobMapper>, L<Archive::Zip|Archive::Zip>, 1986L<Archive::Tar|Archive::Tar>, 1987L<IO::Zlib|IO::Zlib> 1988 1989For RFC 1950, 1951 and 1952 see 1990L<https://datatracker.ietf.org/doc/html/rfc1950>, 1991L<https://datatracker.ietf.org/doc/html/rfc1951> and 1992L<https://datatracker.ietf.org/doc/html/rfc1952> 1993 1994The I<zlib> compression library was written by Jean-loup Gailly 1995C<gzip@prep.ai.mit.edu> and Mark Adler C<madler@alumni.caltech.edu>. 1996 1997The primary site for the I<zlib> compression library is 1998L<http://www.zlib.org>. 1999 2000The primary site for the I<zlib-ng> compression library is 2001L<https://github.com/zlib-ng/zlib-ng>. 2002 2003The primary site for gzip is L<http://www.gzip.org>. 2004 2005=head1 AUTHOR 2006 2007This module was written by Paul Marquess, C<pmqs@cpan.org>. 2008 2009=head1 MODIFICATION HISTORY 2010 2011See the Changes file. 2012 2013=head1 COPYRIGHT AND LICENSE 2014 2015Copyright (c) 2005-2024 Paul Marquess. All rights reserved. 2016 2017This program is free software; you can redistribute it and/or 2018modify it under the same terms as Perl itself. 2019