1### the gnu tar specification: 2### https://www.gnu.org/software/tar/manual/tar.html 3### 4### and the pax format spec, which tar derives from: 5### https://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6 7package Archive::Tar; 8require 5.005_03; 9 10use Cwd; 11use IO::Zlib; 12use IO::File; 13use Carp qw(carp croak); 14use File::Spec (); 15use File::Spec::Unix (); 16use File::Path (); 17 18use Archive::Tar::File; 19use Archive::Tar::Constant; 20 21require Exporter; 22 23use strict; 24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK 27 $EXTRACT_BLOCK_SIZE 28 ]; 29 30@ISA = qw[Exporter]; 31@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP COMPRESS_XZ ]; 32$DEBUG = 0; 33$WARN = 1; 34$FOLLOW_SYMLINK = 0; 35$VERSION = "3.02_001"; 36$CHOWN = 1; 37$CHMOD = 1; 38$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 39$DO_NOT_USE_PREFIX = 0; 40$INSECURE_EXTRACT_MODE = 0; 41$ZERO_PAD_NUMBERS = 0; 42$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed'; 43$EXTRACT_BLOCK_SIZE = 1024 * 1024 * 1024; 44 45BEGIN { 46 use Config; 47 $HAS_PERLIO = $Config::Config{useperlio}; 48 49 ### try and load IO::String anyway, so you can dynamically 50 ### switch between perlio and IO::String 51 $HAS_IO_STRING = eval { 52 require IO::String; 53 IO::String->import; 54 1; 55 } || 0; 56} 57 58=head1 NAME 59 60Archive::Tar - module for manipulations of tar archives 61 62=head1 SYNOPSIS 63 64 use Archive::Tar; 65 my $tar = Archive::Tar->new; 66 67 $tar->read('origin.tgz'); 68 $tar->extract(); 69 70 $tar->add_files('file/foo.pl', 'docs/README'); 71 $tar->add_data('file/baz.txt', 'This is the contents now'); 72 73 $tar->rename('oldname', 'new/file/name'); 74 $tar->chown('/', 'root'); 75 $tar->chown('/', 'root:root'); 76 $tar->chmod('/tmp', '1777'); 77 78 $tar->write('files.tar'); # plain tar 79 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 80 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 81 $tar->write('files.txz', COMPRESS_XZ); # xz compressed 82 83=head1 DESCRIPTION 84 85Archive::Tar provides an object oriented mechanism for handling tar 86files. It provides class methods for quick and easy files handling 87while also allowing for the creation of tar file objects for custom 88manipulation. If you have the IO::Zlib module installed, 89Archive::Tar will also support compressed or gzipped tar files. 90 91An object of class Archive::Tar represents a .tar(.gz) archive full 92of files and things. 93 94=head1 Object Methods 95 96=head2 Archive::Tar->new( [$file, $compressed] ) 97 98Returns a new Tar object. If given any arguments, C<new()> calls the 99C<read()> method automatically, passing on the arguments provided to 100the C<read()> method. 101 102If C<new()> is invoked with arguments and the C<read()> method fails 103for any reason, C<new()> returns undef. 104 105=cut 106 107my $tmpl = { 108 _data => [ ], 109 _file => 'Unknown', 110}; 111 112### install get/set accessors for this object. 113for my $key ( keys %$tmpl ) { 114 no strict 'refs'; 115 *{__PACKAGE__."::$key"} = sub { 116 my $self = shift; 117 $self->{$key} = $_[0] if @_; 118 return $self->{$key}; 119 } 120} 121 122sub new { 123 my $class = shift; 124 $class = ref $class if ref $class; 125 126 ### copying $tmpl here since a shallow copy makes it use the 127 ### same aref, causing for files to remain in memory always. 128 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 129 130 if (@_) { 131 unless ( $obj->read( @_ ) ) { 132 $obj->_error(qq[No data could be read from file]); 133 return; 134 } 135 } 136 137 return $obj; 138} 139 140=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 141 142Read the given tar file into memory. 143The first argument can either be the name of a file or a reference to 144an already open filehandle (or an IO::Zlib object if it's compressed) 145 146The C<read> will I<replace> any previous content in C<$tar>! 147 148The second argument may be considered optional, but remains for 149backwards compatibility. Archive::Tar now looks at the file 150magic to determine what class should be used to open the file 151and will transparently Do The Right Thing. 152 153Archive::Tar will warn if you try to pass a bzip2 / xz compressed file and the 154IO::Uncompress::Bunzip2 / IO::Uncompress::UnXz are not available and simply return. 155 156Note that you can currently B<not> pass a C<gzip> compressed 157filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 158filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, a C<xz> compressed 159filehandle, which is not opened with C<IO::Uncompress::UnXz>, nor a string 160containing the full archive information (either compressed or 161uncompressed). These are worth while features, but not currently 162implemented. See the C<TODO> section. 163 164The third argument can be a hash reference with options. Note that 165all options are case-sensitive. 166 167=over 4 168 169=item limit 170 171Do not read more than C<limit> files. This is useful if you have 172very big archives, and are only interested in the first few files. 173 174=item filter 175 176Can be set to a regular expression. Only files with names that match 177the expression will be read. 178 179=item md5 180 181Set to 1 and the md5sum of files will be returned (instead of file data) 182 my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 183 while( my $f = $iter->() ) { 184 print $f->data . "\t" . $f->full_path . $/; 185 } 186 187=item extract 188 189If set to true, immediately extract entries when reading them. This 190gives you the same memory break as the C<extract_archive> function. 191Note however that entries will not be read into memory, but written 192straight to disk. This means no C<Archive::Tar::File> objects are 193created for you to inspect. 194 195=back 196 197All files are stored internally as C<Archive::Tar::File> objects. 198Please consult the L<Archive::Tar::File> documentation for details. 199 200Returns the number of files read in scalar context, and a list of 201C<Archive::Tar::File> objects in list context. 202 203=cut 204 205sub read { 206 my $self = shift; 207 my $file = shift; 208 my $gzip = shift || 0; 209 my $opts = shift || {}; 210 211 unless( defined $file ) { 212 $self->_error( qq[No file to read from!] ); 213 return; 214 } else { 215 $self->_file( $file ); 216 } 217 218 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 219 or return; 220 221 my $data = $self->_read_tar( $handle, $opts ) or return; 222 223 $self->_data( $data ); 224 225 return wantarray ? @$data : scalar @$data; 226} 227 228sub _get_handle { 229 my $self = shift; 230 my $file = shift; return unless defined $file; 231 my $compress = shift || 0; 232 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 233 234 ### Check if file is a file handle or IO glob 235 if ( ref $file ) { 236 return $file if eval{ *$file{IO} }; 237 return $file if eval{ $file->isa(q{IO::Handle}) }; 238 $file = q{}.$file; 239 } 240 241 ### get a FH opened to the right class, so we can use it transparently 242 ### throughout the program 243 my $fh; 244 { ### reading magic only makes sense if we're opening a file for 245 ### reading. otherwise, just use what the user requested. 246 my $magic = ''; 247 if( MODE_READ->($mode) ) { 248 open my $tmp, $file or do { 249 $self->_error( qq[Could not open '$file' for reading: $!] ); 250 return; 251 }; 252 253 ### read the first 6 bytes of the file to figure out which class to 254 ### use to open the file. 255 sysread( $tmp, $magic, 6 ); 256 close $tmp; 257 } 258 259 ### is it xz? 260 ### if you asked specifically for xz compression, or if we're in 261 ### read mode and the magic numbers add up, use xz 262 if( XZ and ( 263 ($compress eq COMPRESS_XZ) or 264 ( MODE_READ->($mode) and $magic =~ XZ_MAGIC_NUM ) 265 ) 266 ) { 267 if( MODE_READ->($mode) ) { 268 $fh = IO::Uncompress::UnXz->new( $file ) or do { 269 $self->_error( qq[Could not read '$file': ] . 270 $IO::Uncompress::UnXz::UnXzError 271 ); 272 return; 273 }; 274 } else { 275 $fh = IO::Compress::Xz->new( $file ) or do { 276 $self->_error( qq[Could not write to '$file': ] . 277 $IO::Compress::Xz::XzError 278 ); 279 return; 280 }; 281 } 282 283 ### is it bzip? 284 ### if you asked specifically for bzip compression, or if we're in 285 ### read mode and the magic numbers add up, use bzip 286 } elsif( BZIP and ( 287 ($compress eq COMPRESS_BZIP) or 288 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 289 ) 290 ) { 291 292 ### different reader/writer modules, different error vars... sigh 293 if( MODE_READ->($mode) ) { 294 $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do { 295 $self->_error( qq[Could not read '$file': ] . 296 $IO::Uncompress::Bunzip2::Bunzip2Error 297 ); 298 return; 299 }; 300 301 } else { 302 $fh = IO::Compress::Bzip2->new( $file ) or do { 303 $self->_error( qq[Could not write to '$file': ] . 304 $IO::Compress::Bzip2::Bzip2Error 305 ); 306 return; 307 }; 308 } 309 310 ### is it gzip? 311 ### if you asked for compression, if you wanted to read or the gzip 312 ### magic number is present (redundant with read) 313 } elsif( ZLIB and ( 314 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 315 ) 316 ) { 317 $fh = IO::Zlib->new; 318 319 unless( $fh->open( $file, $mode ) ) { 320 $self->_error(qq[Could not create filehandle for '$file': $!]); 321 return; 322 } 323 324 ### is it plain tar? 325 } else { 326 $fh = IO::File->new; 327 328 unless( $fh->open( $file, $mode ) ) { 329 $self->_error(qq[Could not create filehandle for '$file': $!]); 330 return; 331 } 332 333 ### enable bin mode on tar archives 334 binmode $fh; 335 } 336 } 337 338 return $fh; 339} 340 341 342sub _read_tar { 343 my $self = shift; 344 my $handle = shift or return; 345 my $opts = shift || {}; 346 347 my $count = $opts->{limit} || 0; 348 my $filter = $opts->{filter}; 349 my $md5 = $opts->{md5} || 0; # cdrake 350 my $filter_cb = $opts->{filter_cb}; 351 my $extract = $opts->{extract} || 0; 352 353 ### set a cap on the amount of files to extract ### 354 my $limit = 0; 355 $limit = 1 if $count > 0; 356 357 my $tarfile = [ ]; 358 my $chunk; 359 my $read = 0; 360 my $real_name; # to set the name of a file when 361 # we're encountering @longlink 362 my $data; 363 364 LOOP: 365 while( $handle->read( $chunk, HEAD ) ) { 366 ### IO::Zlib doesn't support this yet 367 my $offset; 368 if ( ref($handle) ne 'IO::Zlib' ) { 369 local $@; 370 $offset = eval { tell $handle } || 'unknown'; 371 $@ = ''; 372 } 373 else { 374 $offset = 'unknown'; 375 } 376 377 unless( $read++ ) { 378 my $gzip = GZIP_MAGIC_NUM; 379 if( $chunk =~ /$gzip/ ) { 380 $self->_error( qq[Cannot read compressed format in tar-mode] ); 381 return; 382 } 383 384 ### size is < HEAD, which means a corrupted file, as the minimum 385 ### length is _at least_ HEAD 386 if (length $chunk != HEAD) { 387 $self->_error( qq[Cannot read enough bytes from the tarfile] ); 388 return; 389 } 390 } 391 392 ### if we can't read in all bytes... ### 393 last if length $chunk != HEAD; 394 395 ### Apparently this should really be two blocks of 512 zeroes, 396 ### but GNU tar sometimes gets it wrong. See comment in the 397 ### source code (tar.c) to GNU cpio. 398 next if $chunk eq TAR_END; 399 400 ### according to the posix spec, the last 12 bytes of the header are 401 ### null bytes, to pad it to a 512 byte block. That means if these 402 ### bytes are NOT null bytes, it's a corrupt header. See: 403 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 404 ### line 111 405 { my $nulls = join '', "\0" x 12; 406 unless( $nulls eq substr( $chunk, 500, 12 ) ) { 407 $self->_error( qq[Invalid header block at offset $offset] ); 408 next LOOP; 409 } 410 } 411 412 ### pass the realname, so we can set it 'proper' right away 413 ### some of the heuristics are done on the name, so important 414 ### to set it ASAP 415 my $entry; 416 { my %extra_args = (); 417 $extra_args{'name'} = $$real_name if defined $real_name; 418 419 unless( $entry = Archive::Tar::File->new( chunk => $chunk, 420 %extra_args ) 421 ) { 422 $self->_error( qq[Couldn't read chunk at offset $offset] ); 423 next LOOP; 424 } 425 } 426 427 ### ignore labels: 428 ### https://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 429 next if $entry->is_label; 430 431 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 432 433 if ( $entry->is_file && !$entry->validate ) { 434 ### sometimes the chunk is rather fux0r3d and a whole 512 435 ### bytes ends up in the ->name area. 436 ### clean it up, if need be 437 my $name = $entry->name; 438 $name = substr($name, 0, 100) if length $name > 100; 439 $name =~ s/\n/ /g; 440 441 $self->_error( $name . qq[: checksum error] ); 442 next LOOP; 443 } 444 445 my $block = BLOCK_SIZE->( $entry->size ); 446 447 $data = $entry->get_content_by_ref; 448 449 my $skip = 0; 450 my $ctx; # cdrake 451 ### skip this entry if we're filtering 452 453 if($md5) { # cdrake 454 $ctx = Digest::MD5->new; # cdrake 455 $skip=5; # cdrake 456 457 } elsif ($filter && $entry->name !~ $filter) { 458 $skip = 1; 459 460 } elsif ($filter_cb && ! $filter_cb->($entry)) { 461 $skip = 2; 462 463 ### skip this entry if it's a pax header. This is a special file added 464 ### by, among others, git-generated tarballs. It holds comments and is 465 ### not meant for extracting. See #38932: pax_global_header extracted 466 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 467 $skip = 3; 468 } 469 470 if ($skip) { 471 # 472 # Since we're skipping, do not allocate memory for the 473 # whole file. Read it 64 BLOCKS at a time. Do not 474 # complete the skip yet because maybe what we read is a 475 # longlink and it won't get skipped after all 476 # 477 my $amt = $block; 478 my $fsz=$entry->size; # cdrake 479 while ($amt > 0) { 480 $$data = ''; 481 my $this = 64 * BLOCK; 482 $this = $amt if $this > $amt; 483 if( $handle->read( $$data, $this ) < $this ) { 484 $self->_error( qq[Read error on tarfile (missing data) ']. 485 $entry->full_path ."' at offset $offset" ); 486 next LOOP; 487 } 488 $amt -= $this; 489 $fsz -= $this; # cdrake 490 substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 491 $ctx->add($$data) if($skip==5); # cdrake 492 } 493 $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 494 } else { 495 496 ### just read everything into memory 497 ### can't do lazy loading since IO::Zlib doesn't support 'seek' 498 ### this is because Compress::Zlib doesn't support it =/ 499 ### this reads in the whole data in one read() call. 500 if ( $handle->read( $$data, $block ) < $block ) { 501 $self->_error( qq[Read error on tarfile (missing data) ']. 502 $entry->full_path ."' at offset $offset" ); 503 next LOOP; 504 } 505 ### throw away trailing garbage ### 506 substr ($$data, $entry->size) = "" if defined $$data; 507 } 508 509 ### part II of the @LongLink munging -- need to do /after/ 510 ### the checksum check. 511 if( $entry->is_longlink ) { 512 ### weird thing in tarfiles -- if the file is actually a 513 ### @LongLink, the data part seems to have a trailing ^@ 514 ### (unprintable) char. to display, pipe output through less. 515 ### but that doesn't *always* happen.. so check if the last 516 ### character is a control character, and if so remove it 517 ### at any rate, we better remove that character here, or tests 518 ### like 'eq' and hash lookups based on names will SO not work 519 ### remove it by calculating the proper size, and then 520 ### tossing out everything that's longer than that size. 521 522 ### count number of nulls 523 my $nulls = $$data =~ tr/\0/\0/; 524 525 ### cut data + size by that many bytes 526 $entry->size( $entry->size - $nulls ); 527 substr ($$data, $entry->size) = ""; 528 } 529 } 530 531 ### clean up of the entries.. posix tar /apparently/ has some 532 ### weird 'feature' that allows for filenames > 255 characters 533 ### they'll put a header in with as name '././@LongLink' and the 534 ### contents will be the name of the /next/ file in the archive 535 ### pretty crappy and kludgy if you ask me 536 537 ### set the name for the next entry if this is a @LongLink; 538 ### this is one ugly hack =/ but needed for direct extraction 539 if( $entry->is_longlink ) { 540 $real_name = $data; 541 next LOOP; 542 } elsif ( defined $real_name ) { 543 $entry->name( $$real_name ); 544 $entry->prefix(''); 545 undef $real_name; 546 } 547 548 if ($filter && $entry->name !~ $filter) { 549 next LOOP; 550 551 } elsif ($filter_cb && ! $filter_cb->($entry)) { 552 next LOOP; 553 554 ### skip this entry if it's a pax header. This is a special file added 555 ### by, among others, git-generated tarballs. It holds comments and is 556 ### not meant for extracting. See #38932: pax_global_header extracted 557 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 558 next LOOP; 559 } 560 561 if ( $extract && !$entry->is_longlink 562 && !$entry->is_unknown 563 && !$entry->is_label ) { 564 $self->_extract_file( $entry ) or return; 565 } 566 567 ### Guard against tarfiles with garbage at the end 568 last LOOP if $entry->name eq ''; 569 570 ### push only the name on the rv if we're extracting 571 ### -- for extract_archive 572 push @$tarfile, ($extract ? $entry->name : $entry); 573 574 if( $limit ) { 575 $count-- unless $entry->is_longlink || $entry->is_dir; 576 last LOOP unless $count; 577 } 578 } continue { 579 undef $data; 580 } 581 582 return $tarfile; 583} 584 585=head2 $tar->contains_file( $filename ) 586 587Check if the archive contains a certain file. 588It will return true if the file is in the archive, false otherwise. 589 590Note however, that this function does an exact match using C<eq> 591on the full path. So it cannot compensate for case-insensitive file- 592systems or compare 2 paths to see if they would point to the same 593underlying file. 594 595=cut 596 597sub contains_file { 598 my $self = shift; 599 my $full = shift; 600 601 return unless defined $full; 602 603 ### don't warn if the entry isn't there.. that's what this function 604 ### is for after all. 605 local $WARN = 0; 606 return 1 if $self->_find_entry($full); 607 return; 608} 609 610=head2 $tar->extract( [@filenames] ) 611 612Write files whose names are equivalent to any of the names in 613C<@filenames> to disk, creating subdirectories as necessary. This 614might not work too well under VMS. 615Under MacPerl, the file's modification time will be converted to the 616MacOS zero of time, and appropriate conversions will be done to the 617path. However, the length of each element of the path is not 618inspected to see whether it's longer than MacOS currently allows (32 619characters). 620 621If C<extract> is called without a list of file names, the entire 622contents of the archive are extracted. 623 624Returns a list of filenames extracted. 625 626=cut 627 628sub extract { 629 my $self = shift; 630 my @args = @_; 631 my @files; 632 my $hashmap; 633 634 # use the speed optimization for all extracted files 635 local($self->{cwd}) = cwd() unless $self->{cwd}; 636 637 ### you requested the extraction of only certain files 638 if( @args ) { 639 for my $file ( @args ) { 640 641 ### it's already an object? 642 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 643 push @files, $file; 644 next; 645 646 ### go find it then 647 } else { 648 649 # create hash-map once to speed up lookup 650 $hashmap = $hashmap || { 651 map { $_->full_path, $_ } @{$self->_data} 652 }; 653 654 if (exists $hashmap->{$file}) { 655 ### we found the file you're looking for 656 push @files, $hashmap->{$file}; 657 } else { 658 return $self->_error( 659 qq[Could not find '$file' in archive] ); 660 } 661 } 662 } 663 664 ### just grab all the file items 665 } else { 666 @files = $self->get_files; 667 } 668 669 ### nothing found? that's an error 670 unless( scalar @files ) { 671 $self->_error( qq[No files found for ] . $self->_file ); 672 return; 673 } 674 675 ### now extract them 676 for my $entry ( @files ) { 677 unless( $self->_extract_file( $entry ) ) { 678 $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 679 return; 680 } 681 } 682 683 return @files; 684} 685 686=head2 $tar->extract_file( $file, [$extract_path] ) 687 688Write an entry, whose name is equivalent to the file name provided to 689disk. Optionally takes a second parameter, which is the full native 690path (including filename) the entry will be written to. 691 692For example: 693 694 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 695 696 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 697 698Returns true on success, false on failure. 699 700=cut 701 702sub extract_file { 703 my $self = shift; 704 my $file = shift; return unless defined $file; 705 my $alt = shift; 706 707 my $entry = $self->_find_entry( $file ) 708 or $self->_error( qq[Could not find an entry for '$file'] ), return; 709 710 return $self->_extract_file( $entry, $alt ); 711} 712 713sub _extract_file { 714 my $self = shift; 715 my $entry = shift or return; 716 my $alt = shift; 717 718 ### you wanted an alternate extraction location ### 719 my $name = defined $alt ? $alt : $entry->full_path; 720 721 ### splitpath takes a bool at the end to indicate 722 ### that it's splitting a dir 723 my ($vol,$dirs,$file); 724 if ( defined $alt ) { # It's a local-OS path 725 ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 726 $entry->is_dir ); 727 } else { 728 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 729 $entry->is_dir ); 730 } 731 732 my $dir; 733 ### is $name an absolute path? ### 734 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 735 736 ### absolute names are not allowed to be in tarballs under 737 ### strict mode, so only allow it if a user tells us to do it 738 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 739 $self->_error( 740 q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 741 q[Not extracting absolute paths under SECURE EXTRACT MODE] 742 ); 743 return; 744 } 745 746 ### user asked us to, it's fine. 747 $dir = File::Spec->catpath( $vol, $dirs, "" ); 748 749 ### it's a relative path ### 750 } else { 751 my $cwd = (ref $self and defined $self->{cwd}) 752 ? $self->{cwd} 753 : cwd(); 754 755 my @dirs = defined $alt 756 ? File::Spec->splitdir( $dirs ) # It's a local-OS path 757 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 758 # straight from the tarball 759 760 if( not defined $alt and 761 not $INSECURE_EXTRACT_MODE 762 ) { 763 764 ### paths that leave the current directory are not allowed under 765 ### strict mode, so only allow it if a user tells us to do this. 766 if( grep { $_ eq '..' } @dirs ) { 767 768 $self->_error( 769 q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 770 q[the current working directory. Not extracting under ]. 771 q[SECURE EXTRACT MODE] 772 ); 773 return; 774 } 775 776 ### the archive may be asking us to extract into a symlink. This 777 ### is not sane and a possible security issue, as outlined here: 778 ### https://rt.cpan.org/Ticket/Display.html?id=30380 779 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 780 ### https://issues.rpath.com/browse/RPL-1716 781 my $full_path = $cwd; 782 for my $d ( @dirs ) { 783 $full_path = File::Spec->catdir( $full_path, $d ); 784 785 ### we've already checked this one, and it's safe. Move on. 786 next if ref $self and $self->{_link_cache}->{$full_path}; 787 788 if( -l $full_path ) { 789 my $to = readlink $full_path; 790 my $diag = "symlinked directory ($full_path => $to)"; 791 792 $self->_error( 793 q[Entry ']. $entry->full_path .q[' is attempting to ]. 794 qq[extract to a $diag. This is considered a security ]. 795 q[vulnerability and not allowed under SECURE EXTRACT ]. 796 q[MODE] 797 ); 798 return; 799 } 800 801 ### XXX keep a cache if possible, so the stats become cheaper: 802 $self->{_link_cache}->{$full_path} = 1 if ref $self; 803 } 804 } 805 806 ### '.' is the directory delimiter on VMS, which has to be escaped 807 ### or changed to '_' on vms. vmsify is used, because older versions 808 ### of vmspath do not handle this properly. 809 ### Must not add a '/' to an empty directory though. 810 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 811 812 my ($cwd_vol,$cwd_dir,$cwd_file) 813 = File::Spec->splitpath( $cwd ); 814 my @cwd = File::Spec->splitdir( $cwd_dir ); 815 push @cwd, $cwd_file if length $cwd_file; 816 817 ### We need to pass '' as the last element to catpath. Craig Berry 818 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 819 ### The root problem is that splitpath on UNIX always returns the 820 ### final path element as a file even if it is a directory, and of 821 ### course there is no way it can know the difference without checking 822 ### against the filesystem, which it is documented as not doing. When 823 ### you turn around and call catpath, on VMS you have to know which bits 824 ### are directory bits and which bits are file bits. In this case we 825 ### know the result should be a directory. I had thought you could omit 826 ### the file argument to catpath in such a case, but apparently on UNIX 827 ### you can't. 828 $dir = File::Spec->catpath( 829 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 830 ); 831 832 ### catdir() returns undef if the path is longer than 255 chars on 833 ### older VMS systems. 834 unless ( defined $dir ) { 835 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 836 return; 837 } 838 839 } 840 841 if( -e $dir && !-d _ ) { 842 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 843 return; 844 } 845 846 unless ( -d _ ) { 847 eval { File::Path::mkpath( $dir, 0, 0777 ) }; 848 if( $@ ) { 849 my $fp = $entry->full_path; 850 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 851 return; 852 } 853 854 ### XXX chown here? that might not be the same as in the archive 855 ### as we're only chown'ing to the owner of the file we're extracting 856 ### not to the owner of the directory itself, which may or may not 857 ### be another entry in the archive 858 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 859 ### way to go. 860 #if( $CHOWN && CAN_CHOWN ) { 861 # chown $entry->uid, $entry->gid, $dir or 862 # $self->_error( qq[Could not set uid/gid on '$dir'] ); 863 #} 864 } 865 866 ### we're done if we just needed to create a dir ### 867 return 1 if $entry->is_dir; 868 869 my $full = File::Spec->catfile( $dir, $file ); 870 871 if( $entry->is_unknown ) { 872 $self->_error( qq[Unknown file type for file '$full'] ); 873 return; 874 } 875 876 ### If a file system already contains a block device with the same name as 877 ### the being extracted regular file, we would write the file's content 878 ### to the block device. So remove the existing file (block device) now. 879 ### If an archive contains multiple same-named entries, the last one 880 ### should replace the previous ones. So remove the old file now. 881 ### If the old entry is a symlink to a file outside of the CWD, the new 882 ### entry would create a file there. This is CVE-2018-12015 883 ### <https://rt.cpan.org/Ticket/Display.html?id=125523>. 884 if (-l $full || -e _) { 885 if (!unlink $full) { 886 $self->_error( qq[Could not remove old file '$full': $!] ); 887 return; 888 } 889 } 890 if( length $entry->type && $entry->is_file ) { 891 my $fh = IO::File->new; 892 $fh->open( $full, '>' ) or ( 893 $self->_error( qq[Could not open file '$full': $!] ), 894 return 895 ); 896 897 if( $entry->size ) { 898 binmode $fh; 899 my $offset = 0; 900 my $content = $entry->get_content_by_ref(); 901 while ($offset < $entry->size) { 902 my $written 903 = syswrite $fh, $$content, $EXTRACT_BLOCK_SIZE, $offset; 904 if (defined $written) { 905 $offset += $written; 906 } else { 907 $self->_error( qq[Could not write data to '$full': $!] ); 908 return; 909 } 910 } 911 } 912 913 close $fh or ( 914 $self->_error( qq[Could not close file '$full'] ), 915 return 916 ); 917 918 } else { 919 $self->_make_special_file( $entry, $full ) or return; 920 } 921 922 ### only update the timestamp if it's not a symlink; that will change the 923 ### timestamp of the original. This addresses bug #33669: Could not update 924 ### timestamp warning on symlinks 925 if( not -l $full ) { 926 utime time, $entry->mtime - TIME_OFFSET, $full or 927 $self->_error( qq[Could not update timestamp] ); 928 } 929 930 if( $CHOWN && CAN_CHOWN->() and not -l $full ) { 931 CORE::chown( $entry->uid, $entry->gid, $full ) or 932 $self->_error( qq[Could not set uid/gid on '$full'] ); 933 } 934 935 ### only chmod if we're allowed to, but never chmod symlinks, since they'll 936 ### change the perms on the file they're linking too... 937 if( $CHMOD and not -l $full ) { 938 my $mode = $entry->mode; 939 unless ($SAME_PERMISSIONS) { 940 $mode &= ~(oct(7000) | umask); 941 } 942 CORE::chmod( $mode, $full ) or 943 $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 944 } 945 946 return 1; 947} 948 949sub _make_special_file { 950 my $self = shift; 951 my $entry = shift or return; 952 my $file = shift; return unless defined $file; 953 954 my $err; 955 956 if( $entry->is_symlink ) { 957 my $fail; 958 if( ON_UNIX ) { 959 symlink( $entry->linkname, $file ) or $fail++; 960 961 } else { 962 $self->_extract_special_file_as_plain_file( $entry, $file ) 963 or $fail++; 964 } 965 966 $err = qq[Making symbolic link '$file' to '] . 967 $entry->linkname .q[' failed] if $fail; 968 969 } elsif ( $entry->is_hardlink ) { 970 my $fail; 971 if( ON_UNIX ) { 972 link( $entry->linkname, $file ) or $fail++; 973 974 } else { 975 $self->_extract_special_file_as_plain_file( $entry, $file ) 976 or $fail++; 977 } 978 979 $err = qq[Making hard link from '] . $entry->linkname . 980 qq[' to '$file' failed] if $fail; 981 982 } elsif ( $entry->is_fifo ) { 983 ON_UNIX && !system('mknod', $file, 'p') or 984 $err = qq[Making fifo ']. $entry->name .qq[' failed]; 985 986 } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 987 my $mode = $entry->is_blockdev ? 'b' : 'c'; 988 989 ON_UNIX && !system('mknod', $file, $mode, 990 $entry->devmajor, $entry->devminor) or 991 $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 992 $entry->devmajor . qq[ min=] . $entry->devminor . 993 qq[) failed.]; 994 995 } elsif ( $entry->is_socket ) { 996 ### the original doesn't do anything special for sockets.... ### 997 1; 998 } 999 1000 return $err ? $self->_error( $err ) : 1; 1001} 1002 1003### don't know how to make symlinks, let's just extract the file as 1004### a plain file 1005sub _extract_special_file_as_plain_file { 1006 my $self = shift; 1007 my $entry = shift or return; 1008 my $file = shift; return unless defined $file; 1009 1010 my $err; 1011 TRY: { 1012 my $orig = $self->_find_entry( $entry->linkname, $entry ); 1013 1014 unless( $orig ) { 1015 $err = qq[Could not find file '] . $entry->linkname . 1016 qq[' in memory.]; 1017 last TRY; 1018 } 1019 1020 ### clone the entry, make it appear as a normal file ### 1021 my $clone = $orig->clone; 1022 $clone->_downgrade_to_plainfile; 1023 $self->_extract_file( $clone, $file ) or last TRY; 1024 1025 return 1; 1026 } 1027 1028 return $self->_error($err); 1029} 1030 1031=head2 $tar->list_files( [\@properties] ) 1032 1033Returns a list of the names of all the files in the archive. 1034 1035If C<list_files()> is passed an array reference as its first argument 1036it returns a list of hash references containing the requested 1037properties of each file. The following list of properties is 1038supported: name, size, mtime (last modified date), mode, uid, gid, 1039linkname, uname, gname, devmajor, devminor, prefix. 1040 1041Passing an array reference containing only one element, 'name', is 1042special cased to return a list of names rather than a list of hash 1043references, making it equivalent to calling C<list_files> without 1044arguments. 1045 1046=cut 1047 1048sub list_files { 1049 my $self = shift; 1050 my $aref = shift || [ ]; 1051 1052 unless( $self->_data ) { 1053 $self->read() or return; 1054 } 1055 1056 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 1057 return map { $_->full_path } @{$self->_data}; 1058 } else { 1059 1060 #my @rv; 1061 #for my $obj ( @{$self->_data} ) { 1062 # push @rv, { map { $_ => $obj->$_() } @$aref }; 1063 #} 1064 #return @rv; 1065 1066 ### this does the same as the above.. just needs a +{ } 1067 ### to make sure perl doesn't confuse it for a block 1068 return map { my $o=$_; 1069 +{ map { $_ => $o->$_() } @$aref } 1070 } @{$self->_data}; 1071 } 1072} 1073 1074sub _find_entry { 1075 my $self = shift; 1076 my $file = shift; 1077 1078 unless( defined $file ) { 1079 $self->_error( qq[No file specified] ); 1080 return; 1081 } 1082 1083 ### it's an object already 1084 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1085 1086seach_entry: 1087 if($self->_data){ 1088 for my $entry ( @{$self->_data} ) { 1089 my $path = $entry->full_path; 1090 return $entry if $path eq $file; 1091 } 1092 } 1093 1094 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1095 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin ) 1096 $file = _symlinks_resolver( $link_entry->name, $file ); 1097 goto seach_entry if $self->_data; 1098 1099 #this will be slower than never, but won't failed! 1100 1101 my $iterargs = $link_entry->{'_archive'}; 1102 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){ 1103 #faster but whole archive will be read in memory 1104 #read whole archive and share data 1105 my $archive = Archive::Tar->new; 1106 $archive->read( @$iterargs ); 1107 push @$iterargs, $archive; #take a trace for destruction 1108 if($archive->_data){ 1109 $self->_data( $archive->_data ); 1110 goto seach_entry; 1111 } 1112 }#faster 1113 1114 {#slower but lower memory usage 1115 # $iterargs = [$filename, $compressed, $opts]; 1116 my $next = Archive::Tar->iter( @$iterargs ); 1117 while(my $e = $next->()){ 1118 if($e->full_path eq $file){ 1119 undef $next; 1120 return $e; 1121 } 1122 } 1123 }#slower 1124 } 1125 } 1126 1127 $self->_error( qq[No such file in archive: '$file'] ); 1128 return; 1129} 1130 1131=head2 $tar->get_files( [@filenames] ) 1132 1133Returns the C<Archive::Tar::File> objects matching the filenames 1134provided. If no filename list was passed, all C<Archive::Tar::File> 1135objects in the current Tar object are returned. 1136 1137Please refer to the C<Archive::Tar::File> documentation on how to 1138handle these objects. 1139 1140=cut 1141 1142sub get_files { 1143 my $self = shift; 1144 1145 return @{ $self->_data } unless @_; 1146 1147 my @list; 1148 for my $file ( @_ ) { 1149 push @list, grep { defined } $self->_find_entry( $file ); 1150 } 1151 1152 return @list; 1153} 1154 1155=head2 $tar->get_content( $file ) 1156 1157Return the content of the named file. 1158 1159=cut 1160 1161sub get_content { 1162 my $self = shift; 1163 my $entry = $self->_find_entry( shift ) or return; 1164 1165 return $entry->data; 1166} 1167 1168=head2 $tar->replace_content( $file, $content ) 1169 1170Make the string $content be the content for the file named $file. 1171 1172=cut 1173 1174sub replace_content { 1175 my $self = shift; 1176 my $entry = $self->_find_entry( shift ) or return; 1177 1178 return $entry->replace_content( shift ); 1179} 1180 1181=head2 $tar->rename( $file, $new_name ) 1182 1183Rename the file of the in-memory archive to $new_name. 1184 1185Note that you must specify a Unix path for $new_name, since per tar 1186standard, all files in the archive must be Unix paths. 1187 1188Returns true on success and false on failure. 1189 1190=cut 1191 1192sub rename { 1193 my $self = shift; 1194 my $file = shift; return unless defined $file; 1195 my $new = shift; return unless defined $new; 1196 1197 my $entry = $self->_find_entry( $file ) or return; 1198 1199 return $entry->rename( $new ); 1200} 1201 1202=head2 $tar->chmod( $file, $mode ) 1203 1204Change mode of $file to $mode. 1205 1206Returns true on success and false on failure. 1207 1208=cut 1209 1210sub chmod { 1211 my $self = shift; 1212 my $file = shift; return unless defined $file; 1213 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1214 my @args = ("$mode"); 1215 1216 my $entry = $self->_find_entry( $file ) or return; 1217 my $x = $entry->chmod( @args ); 1218 return $x; 1219} 1220 1221=head2 $tar->chown( $file, $uname [, $gname] ) 1222 1223Change owner $file to $uname and $gname. 1224 1225Returns true on success and false on failure. 1226 1227=cut 1228 1229sub chown { 1230 my $self = shift; 1231 my $file = shift; return unless defined $file; 1232 my $uname = shift; return unless defined $uname; 1233 my @args = ($uname); 1234 push(@args, shift); 1235 1236 my $entry = $self->_find_entry( $file ) or return; 1237 my $x = $entry->chown( @args ); 1238 return $x; 1239} 1240 1241=head2 $tar->remove (@filenamelist) 1242 1243Removes any entries with names matching any of the given filenames 1244from the in-memory archive. Returns a list of C<Archive::Tar::File> 1245objects that remain. 1246 1247=cut 1248 1249sub remove { 1250 my $self = shift; 1251 my @list = @_; 1252 1253 my %seen = map { $_->full_path => $_ } @{$self->_data}; 1254 delete $seen{ $_ } for @list; 1255 1256 $self->_data( [values %seen] ); 1257 1258 return values %seen; 1259} 1260 1261=head2 $tar->clear 1262 1263C<clear> clears the current in-memory archive. This effectively gives 1264you a 'blank' object, ready to be filled again. Note that C<clear> 1265only has effect on the object, not the underlying tarfile. 1266 1267=cut 1268 1269sub clear { 1270 my $self = shift or return; 1271 1272 $self->_data( [] ); 1273 $self->_file( '' ); 1274 1275 return 1; 1276} 1277 1278 1279=head2 $tar->write ( [$file, $compressed, $prefix] ) 1280 1281Write the in-memory archive to disk. The first argument can either 1282be the name of a file or a reference to an already open filehandle (a 1283GLOB reference). 1284 1285The second argument is used to indicate compression. You can 1286compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed 1287to be the C<gzip> compression level (between 1 and 9), but the use of 1288constants is preferred: 1289 1290 # write a gzip compressed file 1291 $tar->write( 'out.tgz', COMPRESS_GZIP ); 1292 1293 # write a bzip compressed file 1294 $tar->write( 'out.tbz', COMPRESS_BZIP ); 1295 1296 # write a xz compressed file 1297 $tar->write( 'out.txz', COMPRESS_XZ ); 1298 1299Note that when you pass in a filehandle, the compression argument 1300is ignored, as all files are printed verbatim to your filehandle. 1301If you wish to enable compression with filehandles, use an 1302C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead. 1303 1304The third argument is an optional prefix. All files will be tucked 1305away in the directory you specify as prefix. So if you have files 1306'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1307will be written to the archive as 'foo/a' and 'foo/b'. 1308 1309If no arguments are given, C<write> returns the entire formatted 1310archive as a string, which could be useful if you'd like to stuff the 1311archive into a socket or a pipe to gzip or something. 1312 1313 1314=cut 1315 1316sub write { 1317 my $self = shift; 1318 my $file = shift; $file = '' unless defined $file; 1319 my $gzip = shift || 0; 1320 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1321 my $dummy = ''; 1322 1323 ### only need a handle if we have a file to print to ### 1324 my $handle = length($file) 1325 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1326 or return ) 1327 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1328 : $HAS_IO_STRING ? IO::String->new 1329 : __PACKAGE__->no_string_support(); 1330 1331 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1332 ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1333 ### printed to the archive 1334 local $\; 1335 1336 for my $entry ( @{$self->_data} ) { 1337 ### entries to be written to the tarfile ### 1338 my @write_me; 1339 1340 ### only now will we change the object to reflect the current state 1341 ### of the name and prefix fields -- this needs to be limited to 1342 ### write() only! 1343 my $clone = $entry->clone; 1344 1345 1346 ### so, if you don't want use to use the prefix, we'll stuff 1347 ### everything in the name field instead 1348 if( $DO_NOT_USE_PREFIX ) { 1349 1350 ### you might have an extended prefix, if so, set it in the clone 1351 ### XXX is ::Unix right? 1352 $clone->name( length $ext_prefix 1353 ? File::Spec::Unix->catdir( $ext_prefix, 1354 $clone->full_path) 1355 : $clone->full_path ); 1356 $clone->prefix( '' ); 1357 1358 ### otherwise, we'll have to set it properly -- prefix part in the 1359 ### prefix and name part in the name field. 1360 } else { 1361 1362 ### split them here, not before! 1363 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1364 1365 ### you might have an extended prefix, if so, set it in the clone 1366 ### XXX is ::Unix right? 1367 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1368 if length $ext_prefix; 1369 1370 $clone->prefix( $prefix ); 1371 $clone->name( $name ); 1372 } 1373 1374 ### names are too long, and will get truncated if we don't add a 1375 ### '@LongLink' file... 1376 my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1377 length($clone->prefix) > PREFIX_LENGTH 1378 ) || 0; 1379 1380 ### perhaps we need to make a longlink file? 1381 if( $make_longlink ) { 1382 my $longlink = Archive::Tar::File->new( 1383 data => LONGLINK_NAME, 1384 $clone->full_path, 1385 { type => LONGLINK } 1386 ); 1387 1388 unless( $longlink ) { 1389 $self->_error( qq[Could not create 'LongLink' entry for ] . 1390 qq[oversize file '] . $clone->full_path ."'" ); 1391 return; 1392 }; 1393 1394 push @write_me, $longlink; 1395 } 1396 1397 push @write_me, $clone; 1398 1399 ### write the one, optionally 2 a::t::file objects to the handle 1400 for my $clone (@write_me) { 1401 1402 ### if the file is a symlink, there are 2 options: 1403 ### either we leave the symlink intact, but then we don't write any 1404 ### data OR we follow the symlink, which means we actually make a 1405 ### copy. if we do the latter, we have to change the TYPE of the 1406 ### clone to 'FILE' 1407 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1408 my $data_ok = !$clone->is_symlink && $clone->has_content; 1409 1410 ### downgrade to a 'normal' file if it's a symlink we're going to 1411 ### treat as a regular file 1412 $clone->_downgrade_to_plainfile if $link_ok; 1413 1414 ### get the header for this block 1415 my $header = $self->_format_tar_entry( $clone ); 1416 unless( $header ) { 1417 $self->_error(q[Could not format header for: ] . 1418 $clone->full_path ); 1419 return; 1420 } 1421 1422 unless( print $handle $header ) { 1423 $self->_error(q[Could not write header for: ] . 1424 $clone->full_path); 1425 return; 1426 } 1427 1428 if( $link_ok or $data_ok ) { 1429 unless( print $handle $clone->data ) { 1430 $self->_error(q[Could not write data for: ] . 1431 $clone->full_path); 1432 return; 1433 } 1434 1435 ### pad the end of the clone if required ### 1436 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1437 } 1438 1439 } ### done writing these entries 1440 } 1441 1442 ### write the end markers ### 1443 print $handle TAR_END x 2 or 1444 return $self->_error( qq[Could not write tar end markers] ); 1445 1446 ### did you want it written to a file, or returned as a string? ### 1447 my $rv = length($file) ? 1 1448 : $HAS_PERLIO ? $dummy 1449 : do { seek $handle, 0, 0; local $/; <$handle> }; 1450 1451 ### make sure to close the handle if we created it 1452 if ( $file ne $handle ) { 1453 unless( close $handle ) { 1454 $self->_error( qq[Could not write tar] ); 1455 return; 1456 } 1457 } 1458 1459 return $rv; 1460} 1461 1462sub _format_tar_entry { 1463 my $self = shift; 1464 my $entry = shift or return; 1465 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1466 my $no_prefix = shift || 0; 1467 1468 my $file = $entry->name; 1469 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1470 1471 ### remove the prefix from the file name 1472 ### not sure if this is still needed --kane 1473 ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1474 ### this for us. Even worse, this would break if we tried to add a file 1475 ### like x/x. 1476 #if( length $prefix ) { 1477 # $file =~ s/^$match//; 1478 #} 1479 1480 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1481 if length $ext_prefix; 1482 1483 ### not sure why this is... ### 1484 my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1485 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1486 1487 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1488 1489 ### this might be optimizable with a 'changed' flag in the file objects ### 1490 my $tar = pack ( 1491 PACK, 1492 $file, 1493 1494 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1495 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1496 1497 "", # checksum field - space padded a bit down 1498 1499 (map { $entry->$_() } qw[type linkname magic]), 1500 1501 $entry->version || TAR_VERSION, 1502 1503 (map { $entry->$_() } qw[uname gname]), 1504 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1505 1506 ($no_prefix ? '' : $prefix) 1507 ); 1508 1509 ### add the checksum ### 1510 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1511 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1512 1513 return $tar; 1514} 1515 1516=head2 $tar->add_files( @filenamelist ) 1517 1518Takes a list of filenames and adds them to the in-memory archive. 1519 1520The path to the file is automatically converted to a Unix like 1521equivalent for use in the archive, and, if on MacOS, the file's 1522modification time is converted from the MacOS epoch to the Unix epoch. 1523So tar archives created on MacOS with B<Archive::Tar> can be read 1524both with I<tar> on Unix and applications like I<suntar> or 1525I<Stuffit Expander> on MacOS. 1526 1527Be aware that the file's type/creator and resource fork will be lost, 1528which is usually what you want in cross-platform archives. 1529 1530Instead of a filename, you can also pass it an existing C<Archive::Tar::File> 1531object from, for example, another archive. The object will be clone, and 1532effectively be a copy of the original, not an alias. 1533 1534Returns a list of C<Archive::Tar::File> objects that were just added. 1535 1536=cut 1537 1538sub add_files { 1539 my $self = shift; 1540 my @files = @_ or return; 1541 1542 my @rv; 1543 for my $file ( @files ) { 1544 1545 ### you passed an Archive::Tar::File object 1546 ### clone it so we don't accidentally have a reference to 1547 ### an object from another archive 1548 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1549 push @rv, $file->clone; 1550 next; 1551 } 1552 1553 eval { 1554 if( utf8::is_utf8( $file )) { 1555 utf8::encode( $file ); 1556 } 1557 }; 1558 1559 unless( -e $file || -l $file ) { 1560 $self->_error( qq[No such file: '$file'] ); 1561 next; 1562 } 1563 1564 my $obj = Archive::Tar::File->new( file => $file ); 1565 unless( $obj ) { 1566 $self->_error( qq[Unable to add file: '$file'] ); 1567 next; 1568 } 1569 1570 push @rv, $obj; 1571 } 1572 1573 push @{$self->{_data}}, @rv; 1574 1575 return @rv; 1576} 1577 1578=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1579 1580Takes a filename, a scalar full of data and optionally a reference to 1581a hash with specific options. 1582 1583Will add a file to the in-memory archive, with name C<$filename> and 1584content C<$data>. Specific properties can be set using C<$opthashref>. 1585The following list of properties is supported: name, size, mtime 1586(last modified date), mode, uid, gid, linkname, uname, gname, 1587devmajor, devminor, prefix, type. (On MacOS, the file's path and 1588modification times are converted to Unix equivalents.) 1589 1590Valid values for the file type are the following constants defined by 1591Archive::Tar::Constant: 1592 1593=over 4 1594 1595=item FILE 1596 1597Regular file. 1598 1599=item HARDLINK 1600 1601=item SYMLINK 1602 1603Hard and symbolic ("soft") links; linkname should specify target. 1604 1605=item CHARDEV 1606 1607=item BLOCKDEV 1608 1609Character and block devices. devmajor and devminor should specify the major 1610and minor device numbers. 1611 1612=item DIR 1613 1614Directory. 1615 1616=item FIFO 1617 1618FIFO (named pipe). 1619 1620=item SOCKET 1621 1622Socket. 1623 1624=back 1625 1626Returns the C<Archive::Tar::File> object that was just added, or 1627C<undef> on failure. 1628 1629=cut 1630 1631sub add_data { 1632 my $self = shift; 1633 my ($file, $data, $opt) = @_; 1634 1635 my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1636 unless( $obj ) { 1637 $self->_error( qq[Unable to add file: '$file'] ); 1638 return; 1639 } 1640 1641 push @{$self->{_data}}, $obj; 1642 1643 return $obj; 1644} 1645 1646=head2 $tar->error( [$BOOL] ) 1647 1648Returns the current error string (usually, the last error reported). 1649If a true value was specified, it will give the C<Carp::longmess> 1650equivalent of the error, in effect giving you a stacktrace. 1651 1652For backwards compatibility, this error is also available as 1653C<$Archive::Tar::error> although it is much recommended you use the 1654method call instead. 1655 1656=cut 1657 1658{ 1659 $error = ''; 1660 my $longmess; 1661 1662 sub _error { 1663 my $self = shift; 1664 my $msg = $error = shift; 1665 $longmess = Carp::longmess($error); 1666 if (ref $self) { 1667 $self->{_error} = $error; 1668 $self->{_longmess} = $longmess; 1669 } 1670 1671 ### set Archive::Tar::WARN to 0 to disable printing 1672 ### of errors 1673 if( $WARN ) { 1674 carp $DEBUG ? $longmess : $msg; 1675 } 1676 1677 return; 1678 } 1679 1680 sub error { 1681 my $self = shift; 1682 if (ref $self) { 1683 return shift() ? $self->{_longmess} : $self->{_error}; 1684 } else { 1685 return shift() ? $longmess : $error; 1686 } 1687 } 1688} 1689 1690=head2 $tar->setcwd( $cwd ); 1691 1692C<Archive::Tar> needs to know the current directory, and it will run 1693C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1694tarfile and saves it in the file system. (As of version 1.30, however, 1695C<Archive::Tar> will use the speed optimization described below 1696automatically, so it's only relevant if you're using C<extract_file()>). 1697 1698Since C<Archive::Tar> doesn't change the current directory internally 1699while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1700can be avoided if we can guarantee that the current directory doesn't 1701get changed externally. 1702 1703To use this performance boost, set the current directory via 1704 1705 use Cwd; 1706 $tar->setcwd( cwd() ); 1707 1708once before calling a function like C<extract_file> and 1709C<Archive::Tar> will use the current directory setting from then on 1710and won't call C<Cwd::cwd()> internally. 1711 1712To switch back to the default behaviour, use 1713 1714 $tar->setcwd( undef ); 1715 1716and C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1717 1718If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1719be called for you. 1720 1721=cut 1722 1723sub setcwd { 1724 my $self = shift; 1725 my $cwd = shift; 1726 1727 $self->{cwd} = $cwd; 1728} 1729 1730=head1 Class Methods 1731 1732=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1733 1734Creates a tar file from the list of files provided. The first 1735argument can either be the name of the tar file to create or a 1736reference to an open file handle (e.g. a GLOB reference). 1737 1738The second argument is used to indicate compression. You can 1739compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed 1740to be the C<gzip> compression level (between 1 and 9), but the use of 1741constants is preferred: 1742 1743 # write a gzip compressed file 1744 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1745 1746 # write a bzip compressed file 1747 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1748 1749 # write a xz compressed file 1750 Archive::Tar->create_archive( 'out.txz', COMPRESS_XZ, @filelist ); 1751 1752Note that when you pass in a filehandle, the compression argument 1753is ignored, as all files are printed verbatim to your filehandle. 1754If you wish to enable compression with filehandles, use an 1755C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead. 1756 1757The remaining arguments list the files to be included in the tar file. 1758These files must all exist. Any files which don't exist or can't be 1759read are silently ignored. 1760 1761If the archive creation fails for any reason, C<create_archive> will 1762return false. Please use the C<error> method to find the cause of the 1763failure. 1764 1765Note that this method does not write C<on the fly> as it were; it 1766still reads all the files into memory before writing out the archive. 1767Consult the FAQ below if this is a problem. 1768 1769=cut 1770 1771sub create_archive { 1772 my $class = shift; 1773 1774 my $file = shift; return unless defined $file; 1775 my $gzip = shift || 0; 1776 my @files = @_; 1777 1778 unless( @files ) { 1779 return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1780 } 1781 1782 my $tar = $class->new; 1783 $tar->add_files( @files ); 1784 return $tar->write( $file, $gzip ); 1785} 1786 1787=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1788 1789Returns an iterator function that reads the tar file without loading 1790it all in memory. Each time the function is called it will return the 1791next file in the tarball. The files are returned as 1792C<Archive::Tar::File> objects. The iterator function returns the 1793empty list once it has exhausted the files contained. 1794 1795The second argument can be a hash reference with options, which are 1796identical to the arguments passed to C<read()>. 1797 1798Example usage: 1799 1800 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1801 1802 while( my $f = $next->() ) { 1803 print $f->name, "\n"; 1804 1805 $f->extract or warn "Extraction failed"; 1806 1807 # .... 1808 } 1809 1810=cut 1811 1812 1813sub iter { 1814 my $class = shift; 1815 my $filename = shift; 1816 return unless defined $filename; 1817 my $compressed = shift || 0; 1818 my $opts = shift || {}; 1819 1820 ### get a handle to read from. 1821 my $handle = $class->_get_handle( 1822 $filename, 1823 $compressed, 1824 READ_ONLY->( ZLIB ) 1825 ) or return; 1826 1827 my @data; 1828 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ]; 1829 return sub { 1830 return shift(@data) if @data; # more than one file returned? 1831 return unless $handle; # handle exhausted? 1832 1833 ### read data, should only return file 1834 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1835 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 1836 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1837 foreach(@data){ 1838 #may refine this heuristic for ON_UNIX? 1839 if($_->linkname){ 1840 #is there a better slot to store/share it ? 1841 $_->{'_archive'} = $CONSTRUCT_ARGS; 1842 } 1843 } 1844 } 1845 1846 ### return one piece of data 1847 return shift(@data) if @data; 1848 1849 ### data is exhausted, free the filehandle 1850 undef $handle; 1851 if(@$CONSTRUCT_ARGS == 4){ 1852 #free archive in memory 1853 undef $CONSTRUCT_ARGS->[-1]; 1854 } 1855 return; 1856 }; 1857} 1858 1859=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1860 1861Returns a list of the names of all the files in the archive. The 1862first argument can either be the name of the tar file to list or a 1863reference to an open file handle (e.g. a GLOB reference). 1864 1865If C<list_archive()> is passed an array reference as its third 1866argument it returns a list of hash references containing the requested 1867properties of each file. The following list of properties is 1868supported: full_path, name, size, mtime (last modified date), mode, 1869uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type. 1870 1871See C<Archive::Tar::File> for details about supported properties. 1872 1873Passing an array reference containing only one element, 'name', is 1874special cased to return a list of names rather than a list of hash 1875references. 1876 1877=cut 1878 1879sub list_archive { 1880 my $class = shift; 1881 my $file = shift; return unless defined $file; 1882 my $gzip = shift || 0; 1883 1884 my $tar = $class->new($file, $gzip); 1885 return unless $tar; 1886 1887 return $tar->list_files( @_ ); 1888} 1889 1890=head2 Archive::Tar->extract_archive($file, $compressed) 1891 1892Extracts the contents of the tar file. The first argument can either 1893be the name of the tar file to create or a reference to an open file 1894handle (e.g. a GLOB reference). All relative paths in the tar file will 1895be created underneath the current working directory. 1896 1897C<extract_archive> will return a list of files it extracted. 1898If the archive extraction fails for any reason, C<extract_archive> 1899will return false. Please use the C<error> method to find the cause 1900of the failure. 1901 1902=cut 1903 1904sub extract_archive { 1905 my $class = shift; 1906 my $file = shift; return unless defined $file; 1907 my $gzip = shift || 0; 1908 1909 my $tar = $class->new( ) or return; 1910 1911 return $tar->read( $file, $gzip, { extract => 1 } ); 1912} 1913 1914=head2 $bool = Archive::Tar->has_io_string 1915 1916Returns true if we currently have C<IO::String> support loaded. 1917 1918Either C<IO::String> or C<perlio> support is needed to support writing 1919stringified archives. Currently, C<perlio> is the preferred method, if 1920available. 1921 1922See the C<GLOBAL VARIABLES> section to see how to change this preference. 1923 1924=cut 1925 1926sub has_io_string { return $HAS_IO_STRING; } 1927 1928=head2 $bool = Archive::Tar->has_perlio 1929 1930Returns true if we currently have C<perlio> support loaded. 1931 1932This requires C<perl-5.8> or higher, compiled with C<perlio> 1933 1934Either C<IO::String> or C<perlio> support is needed to support writing 1935stringified archives. Currently, C<perlio> is the preferred method, if 1936available. 1937 1938See the C<GLOBAL VARIABLES> section to see how to change this preference. 1939 1940=cut 1941 1942sub has_perlio { return $HAS_PERLIO; } 1943 1944=head2 $bool = Archive::Tar->has_zlib_support 1945 1946Returns true if C<Archive::Tar> can extract C<zlib> compressed archives 1947 1948=cut 1949 1950sub has_zlib_support { return ZLIB } 1951 1952=head2 $bool = Archive::Tar->has_bzip2_support 1953 1954Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1955 1956=cut 1957 1958sub has_bzip2_support { return BZIP } 1959 1960=head2 $bool = Archive::Tar->has_xz_support 1961 1962Returns true if C<Archive::Tar> can extract C<xz> compressed archives 1963 1964=cut 1965 1966sub has_xz_support { return XZ } 1967 1968=head2 Archive::Tar->can_handle_compressed_files 1969 1970A simple checking routine, which will return true if C<Archive::Tar> 1971is able to uncompress compressed archives on the fly with C<IO::Zlib>, 1972C<IO::Compress::Bzip2> and C<IO::Compress::Xz> or false if not both are installed. 1973 1974You can use this as a shortcut to determine whether C<Archive::Tar> 1975will do what you think before passing compressed archives to its 1976C<read> method. 1977 1978=cut 1979 1980sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1981 1982sub no_string_support { 1983 croak("You have to install IO::String to support writing archives to strings"); 1984} 1985 1986sub _symlinks_resolver{ 1987 my ($src, $trg) = @_; 1988 my @src = split /[\/\\]/, $src; 1989 my @trg = split /[\/\\]/, $trg; 1990 pop @src; #strip out current object name 1991 if(@trg and $trg[0] eq ''){ 1992 shift @trg; 1993 #restart path from scratch 1994 @src = ( ); 1995 } 1996 foreach my $part ( @trg ){ 1997 next if $part eq '.'; #ignore current 1998 if($part eq '..'){ 1999 #got to parent 2000 pop @src; 2001 } 2002 else{ 2003 #append it 2004 push @src, $part; 2005 } 2006 } 2007 my $path = join('/', @src); 2008 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG; 2009 return $path; 2010} 2011 20121; 2013 2014__END__ 2015 2016=head1 GLOBAL VARIABLES 2017 2018=head2 $Archive::Tar::FOLLOW_SYMLINK 2019 2020Set this variable to C<1> to make C<Archive::Tar> effectively make a 2021copy of the file when extracting. Default is C<0>, which 2022means the symlink stays intact. Of course, you will have to pack the 2023file linked to as well. 2024 2025This option is checked when you write out the tarfile using C<write> 2026or C<create_archive>. 2027 2028This works just like C</bin/tar>'s C<-h> option. 2029 2030=head2 $Archive::Tar::CHOWN 2031 2032By default, C<Archive::Tar> will try to C<chown> your files if it is 2033able to. In some cases, this may not be desired. In that case, set 2034this variable to C<0> to disable C<chown>-ing, even if it were 2035possible. 2036 2037The default is C<1>. 2038 2039=head2 $Archive::Tar::CHMOD 2040 2041By default, C<Archive::Tar> will try to C<chmod> your files to 2042whatever mode was specified for the particular file in the archive. 2043In some cases, this may not be desired. In that case, set this 2044variable to C<0> to disable C<chmod>-ing. 2045 2046The default is C<1>. 2047 2048=head2 $Archive::Tar::SAME_PERMISSIONS 2049 2050When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 2051the permissions on files from the archive are used without modification 2052of if they are filtered by removing any setid bits and applying the 2053current umask. 2054 2055The default is C<1> for the root user and C<0> for normal users. 2056 2057=head2 $Archive::Tar::DO_NOT_USE_PREFIX 2058 2059By default, C<Archive::Tar> will try to put paths that are over 2060100 characters in the C<prefix> field of your tar header, as 2061defined per POSIX-standard. However, some (older) tar programs 2062do not implement this spec. To retain compatibility with these older 2063or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 2064variable to a true value, and C<Archive::Tar> will use an alternate 2065way of dealing with paths over 100 characters by using the 2066C<GNU Extended Header> feature. 2067 2068Note that clients who do not support the C<GNU Extended Header> 2069feature will not be able to read these archives. Such clients include 2070tars on C<Solaris>, C<Irix> and C<AIX>. 2071 2072The default is C<0>. 2073 2074=head2 $Archive::Tar::DEBUG 2075 2076Set this variable to C<1> to always get the C<Carp::longmess> output 2077of the warnings, instead of the regular C<carp>. This is the same 2078message you would get by doing: 2079 2080 $tar->error(1); 2081 2082Defaults to C<0>. 2083 2084=head2 $Archive::Tar::WARN 2085 2086Set this variable to C<0> if you do not want any warnings printed. 2087Personally I recommend against doing this, but people asked for the 2088option. Also, be advised that this is of course not threadsafe. 2089 2090Defaults to C<1>. 2091 2092=head2 $Archive::Tar::error 2093 2094Holds the last reported error. Kept for historical reasons, but its 2095use is very much discouraged. Use the C<error()> method instead: 2096 2097 warn $tar->error unless $tar->extract; 2098 2099Note that in older versions of this module, the C<error()> method 2100would return an effectively global value even when called an instance 2101method as above. This has since been fixed, and multiple instances of 2102C<Archive::Tar> now have separate error strings. 2103 2104=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 2105 2106This variable indicates whether C<Archive::Tar> should allow 2107files to be extracted outside their current working directory. 2108 2109Allowing this could have security implications, as a malicious 2110tar archive could alter or replace any file the extracting user 2111has permissions to. Therefor, the default is to not allow 2112insecure extractions. 2113 2114If you trust the archive, or have other reasons to allow the 2115archive to write files outside your current working directory, 2116set this variable to C<true>. 2117 2118Note that this is a backwards incompatible change from version 2119C<1.36> and before. 2120 2121=head2 $Archive::Tar::HAS_PERLIO 2122 2123This variable holds a boolean indicating if we currently have 2124C<perlio> support loaded. This will be enabled for any perl 2125greater than C<5.8> compiled with C<perlio>. 2126 2127If you feel strongly about disabling it, set this variable to 2128C<false>. Note that you will then need C<IO::String> installed 2129to support writing stringified archives. 2130 2131Don't change this variable unless you B<really> know what you're 2132doing. 2133 2134=head2 $Archive::Tar::HAS_IO_STRING 2135 2136This variable holds a boolean indicating if we currently have 2137C<IO::String> support loaded. This will be enabled for any perl 2138that has a loadable C<IO::String> module. 2139 2140If you feel strongly about disabling it, set this variable to 2141C<false>. Note that you will then need C<perlio> support from 2142your perl to be able to write stringified archives. 2143 2144Don't change this variable unless you B<really> know what you're 2145doing. 2146 2147=head2 $Archive::Tar::ZERO_PAD_NUMBERS 2148 2149This variable holds a boolean indicating if we will create 2150zero padded numbers for C<size>, C<mtime> and C<checksum>. 2151The default is C<0>, indicating that we will create space padded 2152numbers. Added for compatibility with C<busybox> implementations. 2153 2154=head2 Tuning the way RESOLVE_SYMLINK will works 2155 2156You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable, 2157or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar. 2158 2159Values can be one of the following: 2160 2161=over 4 2162 2163=item none 2164 2165Disable this mechanism and failed as it was in previous version (<1.88) 2166 2167=item speed (default) 2168 2169If you prefer speed 2170this will read again the whole archive using read() so all entries 2171will be available 2172 2173=item memory 2174 2175If you prefer memory 2176 2177=back 2178 2179Limitation: It won't work for terminal, pipe or sockets or every non seekable 2180source. 2181 2182=head2 $Archive::Tar::EXTRACT_BLOCK_SIZE 2183 2184This variable holds an integer with the block size that should be used when 2185writing files during extraction. It defaults to 1 GiB. Please note that this 2186cannot be arbitrarily large since some operating systems limit the number of 2187bytes that can be written in one call to C<write(2)>, so if this is too large, 2188extraction may fail with an error. 2189 2190=cut 2191 2192=head1 FAQ 2193 2194=over 4 2195 2196=item What's the minimum perl version required to run Archive::Tar? 2197 2198You will need perl version 5.005_03 or newer. 2199 2200=item Isn't Archive::Tar slow? 2201 2202Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2203However, it's very portable. If speed is an issue, consider using 2204C</bin/tar> instead. 2205 2206=item Isn't Archive::Tar heavier on memory than /bin/tar? 2207 2208Yes it is, see previous answer. Since C<Compress::Zlib> and therefore 2209C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2210choice but to read the archive into memory. 2211This is ok if you want to do in-memory manipulation of the archive. 2212 2213If you just want to extract, use the C<extract_archive> class method 2214instead. It will optimize and write to disk immediately. 2215 2216Another option is to use the C<iter> class method to iterate over 2217the files in the tarball without reading them all in memory at once. 2218 2219=item Can you lazy-load data instead? 2220 2221In some cases, yes. You can use the C<iter> class method to iterate 2222over the files in the tarball without reading them all in memory at once. 2223 2224=item How much memory will an X kb tar file need? 2225 2226Probably more than X kb, since it will all be read into memory. If 2227this is a problem, and you don't need to do in memory manipulation 2228of the archive, consider using the C<iter> class method, or C</bin/tar> 2229instead. 2230 2231=item What do you do with unsupported filetypes in an archive? 2232 2233C<Unix> has a few filetypes that aren't supported on other platforms, 2234like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2235try to make a copy of the original file, rather than throwing an error. 2236 2237This does require you to read the entire archive in to memory first, 2238since otherwise we wouldn't know what data to fill the copy with. 2239(This means that you cannot use the class methods, including C<iter> 2240on archives that have incompatible filetypes and still expect things 2241to work). 2242 2243For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2244the extraction of this particular item didn't work. 2245 2246=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2247 2248By default, C<Archive::Tar> is in a completely POSIX-compatible 2249mode, which uses the POSIX-specification of C<tar> to store files. 2250For paths greater than 100 characters, this is done using the 2251C<POSIX header prefix>. Non-POSIX-compatible clients may not support 2252this part of the specification, and may only support the C<GNU Extended 2253Header> functionality. To facilitate those clients, you can set the 2254C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2255C<GLOBAL VARIABLES> section for details on this variable. 2256 2257Note that GNU tar earlier than version 1.14 does not cope well with 2258the C<POSIX header prefix>. If you use such a version, consider setting 2259the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2260 2261=item How do I extract only files that have property X from an archive? 2262 2263Sometimes, you might not wish to extract a complete archive, just 2264the files that are relevant to you, based on some criteria. 2265 2266You can do this by filtering a list of C<Archive::Tar::File> objects 2267based on your criteria. For example, to extract only files that have 2268the string C<foo> in their title, you would use: 2269 2270 $tar->extract( 2271 grep { $_->full_path =~ /foo/ } $tar->get_files 2272 ); 2273 2274This way, you can filter on any attribute of the files in the archive. 2275Consult the C<Archive::Tar::File> documentation on how to use these 2276objects. 2277 2278=item How do I access .tar.Z files? 2279 2280The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2281the C<IO::Zlib> module) to access tar files that have been compressed 2282with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2283utility cannot be read by C<Compress::Zlib> and so cannot be directly 2284accesses by C<Archive::Tar>. 2285 2286If the C<uncompress> or C<gunzip> programs are available, you can use 2287one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2288 2289Firstly with C<uncompress> 2290 2291 use Archive::Tar; 2292 2293 open F, "uncompress -c $filename |"; 2294 my $tar = Archive::Tar->new(*F); 2295 ... 2296 2297and this with C<gunzip> 2298 2299 use Archive::Tar; 2300 2301 open F, "gunzip -c $filename |"; 2302 my $tar = Archive::Tar->new(*F); 2303 ... 2304 2305Similarly, if the C<compress> program is available, you can use this to 2306write a C<.tar.Z> file 2307 2308 use Archive::Tar; 2309 use IO::File; 2310 2311 my $fh = IO::File->new( "| compress -c >$filename" ); 2312 my $tar = Archive::Tar->new(); 2313 ... 2314 $tar->write($fh); 2315 $fh->close ; 2316 2317=item How do I handle Unicode strings? 2318 2319C<Archive::Tar> uses byte semantics for any files it reads from or writes 2320to disk. This is not a problem if you only deal with files and never 2321look at their content or work solely with byte strings. But if you use 2322Unicode strings with character semantics, some additional steps need 2323to be taken. 2324 2325For example, if you add a Unicode string like 2326 2327 # Problem 2328 $tar->add_data('file.txt', "Euro: \x{20AC}"); 2329 2330then there will be a problem later when the tarfile gets written out 2331to disk via C<< $tar->write() >>: 2332 2333 Wide character in print at .../Archive/Tar.pm line 1014. 2334 2335The data was added as a Unicode string and when writing it out to disk, 2336the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2337tried to convert the string to ISO-8859 and failed. The written file 2338now contains garbage. 2339 2340For this reason, Unicode strings need to be converted to UTF-8-encoded 2341bytestrings before they are handed off to C<add_data()>: 2342 2343 use Encode; 2344 my $data = "Accented character: \x{20AC}"; 2345 $data = encode('utf8', $data); 2346 2347 $tar->add_data('file.txt', $data); 2348 2349A opposite problem occurs if you extract a UTF8-encoded file from a 2350tarball. Using C<get_content()> on the C<Archive::Tar::File> object 2351will return its content as a bytestring, not as a Unicode string. 2352 2353If you want it to be a Unicode string (because you want character 2354semantics with operations like regular expression matching), you need 2355to decode the UTF8-encoded content and have Perl convert it into 2356a Unicode string: 2357 2358 use Encode; 2359 my $data = $tar->get_content(); 2360 2361 # Make it a Unicode string 2362 $data = decode('utf8', $data); 2363 2364There is no easy way to provide this functionality in C<Archive::Tar>, 2365because a tarball can contain many files, and each of which could be 2366encoded in a different way. 2367 2368=back 2369 2370=head1 CAVEATS 2371 2372The AIX tar does not fill all unused space in the tar archive with 0x00. 2373This sometimes leads to warning messages from C<Archive::Tar>. 2374 2375 Invalid header block at offset nnn 2376 2377A fix for that problem is scheduled to be released in the following levels 2378of AIX, all of which should be coming out in the 4th quarter of 2009: 2379 2380 AIX 5.3 TL7 SP10 2381 AIX 5.3 TL8 SP8 2382 AIX 5.3 TL9 SP5 2383 AIX 5.3 TL10 SP2 2384 2385 AIX 6.1 TL0 SP11 2386 AIX 6.1 TL1 SP7 2387 AIX 6.1 TL2 SP6 2388 AIX 6.1 TL3 SP3 2389 2390The IBM APAR number for this problem is IZ50240 (Reported component ID: 23915765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2392If you need an ifix please contact your local IBM AIX support. 2393 2394=head1 TODO 2395 2396=over 4 2397 2398=item Check if passed in handles are open for read/write 2399 2400Currently I don't know of any portable pure perl way to do this. 2401Suggestions welcome. 2402 2403=item Allow archives to be passed in as string 2404 2405Currently, we only allow opened filehandles or filenames, but 2406not strings. The internals would need some reworking to facilitate 2407stringified archives. 2408 2409=item Facilitate processing an opened filehandle of a compressed archive 2410 2411Currently, we only support this if the filehandle is an IO::Zlib object. 2412Environments, like apache, will present you with an opened filehandle 2413to an uploaded file, which might be a compressed archive. 2414 2415=back 2416 2417=head1 SEE ALSO 2418 2419=over 4 2420 2421=item The GNU tar specification 2422 2423L<https://www.gnu.org/software/tar/manual/tar.html> 2424 2425=item The PAX format specification 2426 2427The specification which tar derives from; L<https://pubs.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2428 2429=back 2430 2431=head1 AUTHOR 2432 2433This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2434 2435Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2436 2437=head1 ACKNOWLEDGEMENTS 2438 2439Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2440Rainer Tammer and especially Andrew Savige for their help and suggestions. 2441 2442=head1 COPYRIGHT 2443 2444This module is copyright (c) 2002 - 2009 Jos Boumans 2445E<lt>kane@cpan.orgE<gt>. All rights reserved. 2446 2447This library is free software; you may redistribute and/or modify 2448it under the same terms as Perl itself. 2449 2450=cut 2451