1### the gnu tar specification: 2### http://www.gnu.org/software/tar/manual/tar.html 3### 4### and the pax format spec, which tar derives from: 5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6 7package Archive::Tar; 8require 5.005_03; 9 10use Cwd; 11use IO::Zlib; 12use IO::File; 13use Carp qw(carp croak); 14use File::Spec (); 15use File::Spec::Unix (); 16use File::Path (); 17 18use Archive::Tar::File; 19use Archive::Tar::Constant; 20 21require Exporter; 22 23use strict; 24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK 27 ]; 28 29@ISA = qw[Exporter]; 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; 31$DEBUG = 0; 32$WARN = 1; 33$FOLLOW_SYMLINK = 0; 34$VERSION = "2.32"; 35$CHOWN = 1; 36$CHMOD = 1; 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38$DO_NOT_USE_PREFIX = 0; 39$INSECURE_EXTRACT_MODE = 0; 40$ZERO_PAD_NUMBERS = 0; 41$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed'; 42 43BEGIN { 44 use Config; 45 $HAS_PERLIO = $Config::Config{useperlio}; 46 47 ### try and load IO::String anyway, so you can dynamically 48 ### switch between perlio and IO::String 49 $HAS_IO_STRING = eval { 50 require IO::String; 51 import IO::String; 52 1; 53 } || 0; 54} 55 56=head1 NAME 57 58Archive::Tar - module for manipulations of tar archives 59 60=head1 SYNOPSIS 61 62 use Archive::Tar; 63 my $tar = Archive::Tar->new; 64 65 $tar->read('origin.tgz'); 66 $tar->extract(); 67 68 $tar->add_files('file/foo.pl', 'docs/README'); 69 $tar->add_data('file/baz.txt', 'This is the contents now'); 70 71 $tar->rename('oldname', 'new/file/name'); 72 $tar->chown('/', 'root'); 73 $tar->chown('/', 'root:root'); 74 $tar->chmod('/tmp', '1777'); 75 76 $tar->write('files.tar'); # plain tar 77 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 78 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 79 80=head1 DESCRIPTION 81 82Archive::Tar provides an object oriented mechanism for handling tar 83files. It provides class methods for quick and easy files handling 84while also allowing for the creation of tar file objects for custom 85manipulation. If you have the IO::Zlib module installed, 86Archive::Tar will also support compressed or gzipped tar files. 87 88An object of class Archive::Tar represents a .tar(.gz) archive full 89of files and things. 90 91=head1 Object Methods 92 93=head2 Archive::Tar->new( [$file, $compressed] ) 94 95Returns a new Tar object. If given any arguments, C<new()> calls the 96C<read()> method automatically, passing on the arguments provided to 97the C<read()> method. 98 99If C<new()> is invoked with arguments and the C<read()> method fails 100for any reason, C<new()> returns undef. 101 102=cut 103 104my $tmpl = { 105 _data => [ ], 106 _file => 'Unknown', 107}; 108 109### install get/set accessors for this object. 110for my $key ( keys %$tmpl ) { 111 no strict 'refs'; 112 *{__PACKAGE__."::$key"} = sub { 113 my $self = shift; 114 $self->{$key} = $_[0] if @_; 115 return $self->{$key}; 116 } 117} 118 119sub new { 120 my $class = shift; 121 $class = ref $class if ref $class; 122 123 ### copying $tmpl here since a shallow copy makes it use the 124 ### same aref, causing for files to remain in memory always. 125 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 126 127 if (@_) { 128 unless ( $obj->read( @_ ) ) { 129 $obj->_error(qq[No data could be read from file]); 130 return; 131 } 132 } 133 134 return $obj; 135} 136 137=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 138 139Read the given tar file into memory. 140The first argument can either be the name of a file or a reference to 141an already open filehandle (or an IO::Zlib object if it's compressed) 142 143The C<read> will I<replace> any previous content in C<$tar>! 144 145The second argument may be considered optional, but remains for 146backwards compatibility. Archive::Tar now looks at the file 147magic to determine what class should be used to open the file 148and will transparently Do The Right Thing. 149 150Archive::Tar will warn if you try to pass a bzip2 compressed file and the 151IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. 152 153Note that you can currently B<not> pass a C<gzip> compressed 154filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 155filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string 156containing the full archive information (either compressed or 157uncompressed). These are worth while features, but not currently 158implemented. See the C<TODO> section. 159 160The third argument can be a hash reference with options. Note that 161all options are case-sensitive. 162 163=over 4 164 165=item limit 166 167Do not read more than C<limit> files. This is useful if you have 168very big archives, and are only interested in the first few files. 169 170=item filter 171 172Can be set to a regular expression. Only files with names that match 173the expression will be read. 174 175=item md5 176 177Set to 1 and the md5sum of files will be returned (instead of file data) 178 my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 179 while( my $f = $iter->() ) { 180 print $f->data . "\t" . $f->full_path . $/; 181 } 182 183=item extract 184 185If set to true, immediately extract entries when reading them. This 186gives you the same memory break as the C<extract_archive> function. 187Note however that entries will not be read into memory, but written 188straight to disk. This means no C<Archive::Tar::File> objects are 189created for you to inspect. 190 191=back 192 193All files are stored internally as C<Archive::Tar::File> objects. 194Please consult the L<Archive::Tar::File> documentation for details. 195 196Returns the number of files read in scalar context, and a list of 197C<Archive::Tar::File> objects in list context. 198 199=cut 200 201sub read { 202 my $self = shift; 203 my $file = shift; 204 my $gzip = shift || 0; 205 my $opts = shift || {}; 206 207 unless( defined $file ) { 208 $self->_error( qq[No file to read from!] ); 209 return; 210 } else { 211 $self->_file( $file ); 212 } 213 214 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 215 or return; 216 217 my $data = $self->_read_tar( $handle, $opts ) or return; 218 219 $self->_data( $data ); 220 221 return wantarray ? @$data : scalar @$data; 222} 223 224sub _get_handle { 225 my $self = shift; 226 my $file = shift; return unless defined $file; 227 my $compress = shift || 0; 228 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 229 230 ### Check if file is a file handle or IO glob 231 if ( ref $file ) { 232 return $file if eval{ *$file{IO} }; 233 return $file if eval{ $file->isa(q{IO::Handle}) }; 234 $file = q{}.$file; 235 } 236 237 ### get a FH opened to the right class, so we can use it transparently 238 ### throughout the program 239 my $fh; 240 { ### reading magic only makes sense if we're opening a file for 241 ### reading. otherwise, just use what the user requested. 242 my $magic = ''; 243 if( MODE_READ->($mode) ) { 244 open my $tmp, $file or do { 245 $self->_error( qq[Could not open '$file' for reading: $!] ); 246 return; 247 }; 248 249 ### read the first 4 bites of the file to figure out which class to 250 ### use to open the file. 251 sysread( $tmp, $magic, 4 ); 252 close $tmp; 253 } 254 255 ### is it bzip? 256 ### if you asked specifically for bzip compression, or if we're in 257 ### read mode and the magic numbers add up, use bzip 258 if( BZIP and ( 259 ($compress eq COMPRESS_BZIP) or 260 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 261 ) 262 ) { 263 264 ### different reader/writer modules, different error vars... sigh 265 if( MODE_READ->($mode) ) { 266 $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do { 267 $self->_error( qq[Could not read '$file': ] . 268 $IO::Uncompress::Bunzip2::Bunzip2Error 269 ); 270 return; 271 }; 272 273 } else { 274 $fh = IO::Compress::Bzip2->new( $file ) or do { 275 $self->_error( qq[Could not write to '$file': ] . 276 $IO::Compress::Bzip2::Bzip2Error 277 ); 278 return; 279 }; 280 } 281 282 ### is it gzip? 283 ### if you asked for compression, if you wanted to read or the gzip 284 ### magic number is present (redundant with read) 285 } elsif( ZLIB and ( 286 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 287 ) 288 ) { 289 $fh = IO::Zlib->new; 290 291 unless( $fh->open( $file, $mode ) ) { 292 $self->_error(qq[Could not create filehandle for '$file': $!]); 293 return; 294 } 295 296 ### is it plain tar? 297 } else { 298 $fh = IO::File->new; 299 300 unless( $fh->open( $file, $mode ) ) { 301 $self->_error(qq[Could not create filehandle for '$file': $!]); 302 return; 303 } 304 305 ### enable bin mode on tar archives 306 binmode $fh; 307 } 308 } 309 310 return $fh; 311} 312 313 314sub _read_tar { 315 my $self = shift; 316 my $handle = shift or return; 317 my $opts = shift || {}; 318 319 my $count = $opts->{limit} || 0; 320 my $filter = $opts->{filter}; 321 my $md5 = $opts->{md5} || 0; # cdrake 322 my $filter_cb = $opts->{filter_cb}; 323 my $extract = $opts->{extract} || 0; 324 325 ### set a cap on the amount of files to extract ### 326 my $limit = 0; 327 $limit = 1 if $count > 0; 328 329 my $tarfile = [ ]; 330 my $chunk; 331 my $read = 0; 332 my $real_name; # to set the name of a file when 333 # we're encountering @longlink 334 my $data; 335 336 LOOP: 337 while( $handle->read( $chunk, HEAD ) ) { 338 ### IO::Zlib doesn't support this yet 339 my $offset; 340 if ( ref($handle) ne 'IO::Zlib' ) { 341 local $@; 342 $offset = eval { tell $handle } || 'unknown'; 343 $@ = ''; 344 } 345 else { 346 $offset = 'unknown'; 347 } 348 349 unless( $read++ ) { 350 my $gzip = GZIP_MAGIC_NUM; 351 if( $chunk =~ /$gzip/ ) { 352 $self->_error( qq[Cannot read compressed format in tar-mode] ); 353 return; 354 } 355 356 ### size is < HEAD, which means a corrupted file, as the minimum 357 ### length is _at least_ HEAD 358 if (length $chunk != HEAD) { 359 $self->_error( qq[Cannot read enough bytes from the tarfile] ); 360 return; 361 } 362 } 363 364 ### if we can't read in all bytes... ### 365 last if length $chunk != HEAD; 366 367 ### Apparently this should really be two blocks of 512 zeroes, 368 ### but GNU tar sometimes gets it wrong. See comment in the 369 ### source code (tar.c) to GNU cpio. 370 next if $chunk eq TAR_END; 371 372 ### according to the posix spec, the last 12 bytes of the header are 373 ### null bytes, to pad it to a 512 byte block. That means if these 374 ### bytes are NOT null bytes, it's a corrupt header. See: 375 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 376 ### line 111 377 { my $nulls = join '', "\0" x 12; 378 unless( $nulls eq substr( $chunk, 500, 12 ) ) { 379 $self->_error( qq[Invalid header block at offset $offset] ); 380 next LOOP; 381 } 382 } 383 384 ### pass the realname, so we can set it 'proper' right away 385 ### some of the heuristics are done on the name, so important 386 ### to set it ASAP 387 my $entry; 388 { my %extra_args = (); 389 $extra_args{'name'} = $$real_name if defined $real_name; 390 391 unless( $entry = Archive::Tar::File->new( chunk => $chunk, 392 %extra_args ) 393 ) { 394 $self->_error( qq[Couldn't read chunk at offset $offset] ); 395 next LOOP; 396 } 397 } 398 399 ### ignore labels: 400 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 401 next if $entry->is_label; 402 403 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 404 405 if ( $entry->is_file && !$entry->validate ) { 406 ### sometimes the chunk is rather fux0r3d and a whole 512 407 ### bytes ends up in the ->name area. 408 ### clean it up, if need be 409 my $name = $entry->name; 410 $name = substr($name, 0, 100) if length $name > 100; 411 $name =~ s/\n/ /g; 412 413 $self->_error( $name . qq[: checksum error] ); 414 next LOOP; 415 } 416 417 my $block = BLOCK_SIZE->( $entry->size ); 418 419 $data = $entry->get_content_by_ref; 420 421 my $skip = 0; 422 my $ctx; # cdrake 423 ### skip this entry if we're filtering 424 425 if($md5) { # cdrake 426 $ctx = Digest::MD5->new; # cdrake 427 $skip=5; # cdrake 428 429 } elsif ($filter && $entry->name !~ $filter) { 430 $skip = 1; 431 432 } elsif ($filter_cb && ! $filter_cb->($entry)) { 433 $skip = 2; 434 435 ### skip this entry if it's a pax header. This is a special file added 436 ### by, among others, git-generated tarballs. It holds comments and is 437 ### not meant for extracting. See #38932: pax_global_header extracted 438 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 439 $skip = 3; 440 } 441 442 if ($skip) { 443 # 444 # Since we're skipping, do not allocate memory for the 445 # whole file. Read it 64 BLOCKS at a time. Do not 446 # complete the skip yet because maybe what we read is a 447 # longlink and it won't get skipped after all 448 # 449 my $amt = $block; 450 my $fsz=$entry->size; # cdrake 451 while ($amt > 0) { 452 $$data = ''; 453 my $this = 64 * BLOCK; 454 $this = $amt if $this > $amt; 455 if( $handle->read( $$data, $this ) < $this ) { 456 $self->_error( qq[Read error on tarfile (missing data) ']. 457 $entry->full_path ."' at offset $offset" ); 458 next LOOP; 459 } 460 $amt -= $this; 461 $fsz -= $this; # cdrake 462 substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 463 $ctx->add($$data) if($skip==5); # cdrake 464 } 465 $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 466 } else { 467 468 ### just read everything into memory 469 ### can't do lazy loading since IO::Zlib doesn't support 'seek' 470 ### this is because Compress::Zlib doesn't support it =/ 471 ### this reads in the whole data in one read() call. 472 if ( $handle->read( $$data, $block ) < $block ) { 473 $self->_error( qq[Read error on tarfile (missing data) ']. 474 $entry->full_path ."' at offset $offset" ); 475 next LOOP; 476 } 477 ### throw away trailing garbage ### 478 substr ($$data, $entry->size) = "" if defined $$data; 479 } 480 481 ### part II of the @LongLink munging -- need to do /after/ 482 ### the checksum check. 483 if( $entry->is_longlink ) { 484 ### weird thing in tarfiles -- if the file is actually a 485 ### @LongLink, the data part seems to have a trailing ^@ 486 ### (unprintable) char. to display, pipe output through less. 487 ### but that doesn't *always* happen.. so check if the last 488 ### character is a control character, and if so remove it 489 ### at any rate, we better remove that character here, or tests 490 ### like 'eq' and hash lookups based on names will SO not work 491 ### remove it by calculating the proper size, and then 492 ### tossing out everything that's longer than that size. 493 494 ### count number of nulls 495 my $nulls = $$data =~ tr/\0/\0/; 496 497 ### cut data + size by that many bytes 498 $entry->size( $entry->size - $nulls ); 499 substr ($$data, $entry->size) = ""; 500 } 501 } 502 503 ### clean up of the entries.. posix tar /apparently/ has some 504 ### weird 'feature' that allows for filenames > 255 characters 505 ### they'll put a header in with as name '././@LongLink' and the 506 ### contents will be the name of the /next/ file in the archive 507 ### pretty crappy and kludgy if you ask me 508 509 ### set the name for the next entry if this is a @LongLink; 510 ### this is one ugly hack =/ but needed for direct extraction 511 if( $entry->is_longlink ) { 512 $real_name = $data; 513 next LOOP; 514 } elsif ( defined $real_name ) { 515 $entry->name( $$real_name ); 516 $entry->prefix(''); 517 undef $real_name; 518 } 519 520 if ($filter && $entry->name !~ $filter) { 521 next LOOP; 522 523 } elsif ($filter_cb && ! $filter_cb->($entry)) { 524 next LOOP; 525 526 ### skip this entry if it's a pax header. This is a special file added 527 ### by, among others, git-generated tarballs. It holds comments and is 528 ### not meant for extracting. See #38932: pax_global_header extracted 529 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 530 next LOOP; 531 } 532 533 if ( $extract && !$entry->is_longlink 534 && !$entry->is_unknown 535 && !$entry->is_label ) { 536 $self->_extract_file( $entry ) or return; 537 } 538 539 ### Guard against tarfiles with garbage at the end 540 last LOOP if $entry->name eq ''; 541 542 ### push only the name on the rv if we're extracting 543 ### -- for extract_archive 544 push @$tarfile, ($extract ? $entry->name : $entry); 545 546 if( $limit ) { 547 $count-- unless $entry->is_longlink || $entry->is_dir; 548 last LOOP unless $count; 549 } 550 } continue { 551 undef $data; 552 } 553 554 return $tarfile; 555} 556 557=head2 $tar->contains_file( $filename ) 558 559Check if the archive contains a certain file. 560It will return true if the file is in the archive, false otherwise. 561 562Note however, that this function does an exact match using C<eq> 563on the full path. So it cannot compensate for case-insensitive file- 564systems or compare 2 paths to see if they would point to the same 565underlying file. 566 567=cut 568 569sub contains_file { 570 my $self = shift; 571 my $full = shift; 572 573 return unless defined $full; 574 575 ### don't warn if the entry isn't there.. that's what this function 576 ### is for after all. 577 local $WARN = 0; 578 return 1 if $self->_find_entry($full); 579 return; 580} 581 582=head2 $tar->extract( [@filenames] ) 583 584Write files whose names are equivalent to any of the names in 585C<@filenames> to disk, creating subdirectories as necessary. This 586might not work too well under VMS. 587Under MacPerl, the file's modification time will be converted to the 588MacOS zero of time, and appropriate conversions will be done to the 589path. However, the length of each element of the path is not 590inspected to see whether it's longer than MacOS currently allows (32 591characters). 592 593If C<extract> is called without a list of file names, the entire 594contents of the archive are extracted. 595 596Returns a list of filenames extracted. 597 598=cut 599 600sub extract { 601 my $self = shift; 602 my @args = @_; 603 my @files; 604 my $hashmap; 605 606 # use the speed optimization for all extracted files 607 local($self->{cwd}) = cwd() unless $self->{cwd}; 608 609 ### you requested the extraction of only certain files 610 if( @args ) { 611 for my $file ( @args ) { 612 613 ### it's already an object? 614 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 615 push @files, $file; 616 next; 617 618 ### go find it then 619 } else { 620 621 # create hash-map once to speed up lookup 622 $hashmap = $hashmap || { 623 map { $_->full_path, $_ } @{$self->_data} 624 }; 625 626 if (exists $hashmap->{$file}) { 627 ### we found the file you're looking for 628 push @files, $hashmap->{$file}; 629 } else { 630 return $self->_error( 631 qq[Could not find '$file' in archive] ); 632 } 633 } 634 } 635 636 ### just grab all the file items 637 } else { 638 @files = $self->get_files; 639 } 640 641 ### nothing found? that's an error 642 unless( scalar @files ) { 643 $self->_error( qq[No files found for ] . $self->_file ); 644 return; 645 } 646 647 ### now extract them 648 for my $entry ( @files ) { 649 unless( $self->_extract_file( $entry ) ) { 650 $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 651 return; 652 } 653 } 654 655 return @files; 656} 657 658=head2 $tar->extract_file( $file, [$extract_path] ) 659 660Write an entry, whose name is equivalent to the file name provided to 661disk. Optionally takes a second parameter, which is the full native 662path (including filename) the entry will be written to. 663 664For example: 665 666 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 667 668 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 669 670Returns true on success, false on failure. 671 672=cut 673 674sub extract_file { 675 my $self = shift; 676 my $file = shift; return unless defined $file; 677 my $alt = shift; 678 679 my $entry = $self->_find_entry( $file ) 680 or $self->_error( qq[Could not find an entry for '$file'] ), return; 681 682 return $self->_extract_file( $entry, $alt ); 683} 684 685sub _extract_file { 686 my $self = shift; 687 my $entry = shift or return; 688 my $alt = shift; 689 690 ### you wanted an alternate extraction location ### 691 my $name = defined $alt ? $alt : $entry->full_path; 692 693 ### splitpath takes a bool at the end to indicate 694 ### that it's splitting a dir 695 my ($vol,$dirs,$file); 696 if ( defined $alt ) { # It's a local-OS path 697 ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 698 $entry->is_dir ); 699 } else { 700 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 701 $entry->is_dir ); 702 } 703 704 my $dir; 705 ### is $name an absolute path? ### 706 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 707 708 ### absolute names are not allowed to be in tarballs under 709 ### strict mode, so only allow it if a user tells us to do it 710 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 711 $self->_error( 712 q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 713 q[Not extracting absolute paths under SECURE EXTRACT MODE] 714 ); 715 return; 716 } 717 718 ### user asked us to, it's fine. 719 $dir = File::Spec->catpath( $vol, $dirs, "" ); 720 721 ### it's a relative path ### 722 } else { 723 my $cwd = (ref $self and defined $self->{cwd}) 724 ? $self->{cwd} 725 : cwd(); 726 727 my @dirs = defined $alt 728 ? File::Spec->splitdir( $dirs ) # It's a local-OS path 729 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 730 # straight from the tarball 731 732 if( not defined $alt and 733 not $INSECURE_EXTRACT_MODE 734 ) { 735 736 ### paths that leave the current directory are not allowed under 737 ### strict mode, so only allow it if a user tells us to do this. 738 if( grep { $_ eq '..' } @dirs ) { 739 740 $self->_error( 741 q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 742 q[the current working directory. Not extracting under ]. 743 q[SECURE EXTRACT MODE] 744 ); 745 return; 746 } 747 748 ### the archive may be asking us to extract into a symlink. This 749 ### is not sane and a possible security issue, as outlined here: 750 ### https://rt.cpan.org/Ticket/Display.html?id=30380 751 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 752 ### https://issues.rpath.com/browse/RPL-1716 753 my $full_path = $cwd; 754 for my $d ( @dirs ) { 755 $full_path = File::Spec->catdir( $full_path, $d ); 756 757 ### we've already checked this one, and it's safe. Move on. 758 next if ref $self and $self->{_link_cache}->{$full_path}; 759 760 if( -l $full_path ) { 761 my $to = readlink $full_path; 762 my $diag = "symlinked directory ($full_path => $to)"; 763 764 $self->_error( 765 q[Entry ']. $entry->full_path .q[' is attempting to ]. 766 qq[extract to a $diag. This is considered a security ]. 767 q[vulnerability and not allowed under SECURE EXTRACT ]. 768 q[MODE] 769 ); 770 return; 771 } 772 773 ### XXX keep a cache if possible, so the stats become cheaper: 774 $self->{_link_cache}->{$full_path} = 1 if ref $self; 775 } 776 } 777 778 ### '.' is the directory delimiter on VMS, which has to be escaped 779 ### or changed to '_' on vms. vmsify is used, because older versions 780 ### of vmspath do not handle this properly. 781 ### Must not add a '/' to an empty directory though. 782 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 783 784 my ($cwd_vol,$cwd_dir,$cwd_file) 785 = File::Spec->splitpath( $cwd ); 786 my @cwd = File::Spec->splitdir( $cwd_dir ); 787 push @cwd, $cwd_file if length $cwd_file; 788 789 ### We need to pass '' as the last element to catpath. Craig Berry 790 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 791 ### The root problem is that splitpath on UNIX always returns the 792 ### final path element as a file even if it is a directory, and of 793 ### course there is no way it can know the difference without checking 794 ### against the filesystem, which it is documented as not doing. When 795 ### you turn around and call catpath, on VMS you have to know which bits 796 ### are directory bits and which bits are file bits. In this case we 797 ### know the result should be a directory. I had thought you could omit 798 ### the file argument to catpath in such a case, but apparently on UNIX 799 ### you can't. 800 $dir = File::Spec->catpath( 801 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 802 ); 803 804 ### catdir() returns undef if the path is longer than 255 chars on 805 ### older VMS systems. 806 unless ( defined $dir ) { 807 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 808 return; 809 } 810 811 } 812 813 if( -e $dir && !-d _ ) { 814 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 815 return; 816 } 817 818 unless ( -d _ ) { 819 eval { File::Path::mkpath( $dir, 0, 0777 ) }; 820 if( $@ ) { 821 my $fp = $entry->full_path; 822 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 823 return; 824 } 825 826 ### XXX chown here? that might not be the same as in the archive 827 ### as we're only chown'ing to the owner of the file we're extracting 828 ### not to the owner of the directory itself, which may or may not 829 ### be another entry in the archive 830 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 831 ### way to go. 832 #if( $CHOWN && CAN_CHOWN ) { 833 # chown $entry->uid, $entry->gid, $dir or 834 # $self->_error( qq[Could not set uid/gid on '$dir'] ); 835 #} 836 } 837 838 ### we're done if we just needed to create a dir ### 839 return 1 if $entry->is_dir; 840 841 my $full = File::Spec->catfile( $dir, $file ); 842 843 if( $entry->is_unknown ) { 844 $self->_error( qq[Unknown file type for file '$full'] ); 845 return; 846 } 847 848 ### If a file system already contains a block device with the same name as 849 ### the being extracted regular file, we would write the file's content 850 ### to the block device. So remove the existing file (block device) now. 851 ### If an archive contains multiple same-named entries, the last one 852 ### should replace the previous ones. So remove the old file now. 853 ### If the old entry is a symlink to a file outside of the CWD, the new 854 ### entry would create a file there. This is CVE-2018-12015 855 ### <https://rt.cpan.org/Ticket/Display.html?id=125523>. 856 if (-l $full || -e _) { 857 if (!unlink $full) { 858 $self->_error( qq[Could not remove old file '$full': $!] ); 859 return; 860 } 861 } 862 if( length $entry->type && $entry->is_file ) { 863 my $fh = IO::File->new; 864 $fh->open( $full, '>' ) or ( 865 $self->_error( qq[Could not open file '$full': $!] ), 866 return 867 ); 868 869 if( $entry->size ) { 870 binmode $fh; 871 syswrite $fh, $entry->data or ( 872 $self->_error( qq[Could not write data to '$full'] ), 873 return 874 ); 875 } 876 877 close $fh or ( 878 $self->_error( qq[Could not close file '$full'] ), 879 return 880 ); 881 882 } else { 883 $self->_make_special_file( $entry, $full ) or return; 884 } 885 886 ### only update the timestamp if it's not a symlink; that will change the 887 ### timestamp of the original. This addresses bug #33669: Could not update 888 ### timestamp warning on symlinks 889 if( not -l $full ) { 890 utime time, $entry->mtime - TIME_OFFSET, $full or 891 $self->_error( qq[Could not update timestamp] ); 892 } 893 894 if( $CHOWN && CAN_CHOWN->() and not -l $full ) { 895 chown $entry->uid, $entry->gid, $full or 896 $self->_error( qq[Could not set uid/gid on '$full'] ); 897 } 898 899 ### only chmod if we're allowed to, but never chmod symlinks, since they'll 900 ### change the perms on the file they're linking too... 901 if( $CHMOD and not -l $full ) { 902 my $mode = $entry->mode; 903 unless ($SAME_PERMISSIONS) { 904 $mode &= ~(oct(7000) | umask); 905 } 906 chmod $mode, $full or 907 $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 908 } 909 910 return 1; 911} 912 913sub _make_special_file { 914 my $self = shift; 915 my $entry = shift or return; 916 my $file = shift; return unless defined $file; 917 918 my $err; 919 920 if( $entry->is_symlink ) { 921 my $fail; 922 if( ON_UNIX ) { 923 symlink( $entry->linkname, $file ) or $fail++; 924 925 } else { 926 $self->_extract_special_file_as_plain_file( $entry, $file ) 927 or $fail++; 928 } 929 930 $err = qq[Making symbolic link '$file' to '] . 931 $entry->linkname .q[' failed] if $fail; 932 933 } elsif ( $entry->is_hardlink ) { 934 my $fail; 935 if( ON_UNIX ) { 936 link( $entry->linkname, $file ) or $fail++; 937 938 } else { 939 $self->_extract_special_file_as_plain_file( $entry, $file ) 940 or $fail++; 941 } 942 943 $err = qq[Making hard link from '] . $entry->linkname . 944 qq[' to '$file' failed] if $fail; 945 946 } elsif ( $entry->is_fifo ) { 947 ON_UNIX && !system('mknod', $file, 'p') or 948 $err = qq[Making fifo ']. $entry->name .qq[' failed]; 949 950 } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 951 my $mode = $entry->is_blockdev ? 'b' : 'c'; 952 953 ON_UNIX && !system('mknod', $file, $mode, 954 $entry->devmajor, $entry->devminor) or 955 $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 956 $entry->devmajor . qq[ min=] . $entry->devminor . 957 qq[) failed.]; 958 959 } elsif ( $entry->is_socket ) { 960 ### the original doesn't do anything special for sockets.... ### 961 1; 962 } 963 964 return $err ? $self->_error( $err ) : 1; 965} 966 967### don't know how to make symlinks, let's just extract the file as 968### a plain file 969sub _extract_special_file_as_plain_file { 970 my $self = shift; 971 my $entry = shift or return; 972 my $file = shift; return unless defined $file; 973 974 my $err; 975 TRY: { 976 my $orig = $self->_find_entry( $entry->linkname, $entry ); 977 978 unless( $orig ) { 979 $err = qq[Could not find file '] . $entry->linkname . 980 qq[' in memory.]; 981 last TRY; 982 } 983 984 ### clone the entry, make it appear as a normal file ### 985 my $clone = $orig->clone; 986 $clone->_downgrade_to_plainfile; 987 $self->_extract_file( $clone, $file ) or last TRY; 988 989 return 1; 990 } 991 992 return $self->_error($err); 993} 994 995=head2 $tar->list_files( [\@properties] ) 996 997Returns a list of the names of all the files in the archive. 998 999If C<list_files()> is passed an array reference as its first argument 1000it returns a list of hash references containing the requested 1001properties of each file. The following list of properties is 1002supported: name, size, mtime (last modified date), mode, uid, gid, 1003linkname, uname, gname, devmajor, devminor, prefix. 1004 1005Passing an array reference containing only one element, 'name', is 1006special cased to return a list of names rather than a list of hash 1007references, making it equivalent to calling C<list_files> without 1008arguments. 1009 1010=cut 1011 1012sub list_files { 1013 my $self = shift; 1014 my $aref = shift || [ ]; 1015 1016 unless( $self->_data ) { 1017 $self->read() or return; 1018 } 1019 1020 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 1021 return map { $_->full_path } @{$self->_data}; 1022 } else { 1023 1024 #my @rv; 1025 #for my $obj ( @{$self->_data} ) { 1026 # push @rv, { map { $_ => $obj->$_() } @$aref }; 1027 #} 1028 #return @rv; 1029 1030 ### this does the same as the above.. just needs a +{ } 1031 ### to make sure perl doesn't confuse it for a block 1032 return map { my $o=$_; 1033 +{ map { $_ => $o->$_() } @$aref } 1034 } @{$self->_data}; 1035 } 1036} 1037 1038sub _find_entry { 1039 my $self = shift; 1040 my $file = shift; 1041 1042 unless( defined $file ) { 1043 $self->_error( qq[No file specified] ); 1044 return; 1045 } 1046 1047 ### it's an object already 1048 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1049 1050seach_entry: 1051 if($self->_data){ 1052 for my $entry ( @{$self->_data} ) { 1053 my $path = $entry->full_path; 1054 return $entry if $path eq $file; 1055 } 1056 } 1057 1058 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1059 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin ) 1060 $file = _symlinks_resolver( $link_entry->name, $file ); 1061 goto seach_entry if $self->_data; 1062 1063 #this will be slower than never, but won't failed! 1064 1065 my $iterargs = $link_entry->{'_archive'}; 1066 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){ 1067 #faster but whole archive will be read in memory 1068 #read whole archive and share data 1069 my $archive = Archive::Tar->new; 1070 $archive->read( @$iterargs ); 1071 push @$iterargs, $archive; #take a trace for destruction 1072 if($archive->_data){ 1073 $self->_data( $archive->_data ); 1074 goto seach_entry; 1075 } 1076 }#faster 1077 1078 {#slower but lower memory usage 1079 # $iterargs = [$filename, $compressed, $opts]; 1080 my $next = Archive::Tar->iter( @$iterargs ); 1081 while(my $e = $next->()){ 1082 if($e->full_path eq $file){ 1083 undef $next; 1084 return $e; 1085 } 1086 } 1087 }#slower 1088 } 1089 } 1090 1091 $self->_error( qq[No such file in archive: '$file'] ); 1092 return; 1093} 1094 1095=head2 $tar->get_files( [@filenames] ) 1096 1097Returns the C<Archive::Tar::File> objects matching the filenames 1098provided. If no filename list was passed, all C<Archive::Tar::File> 1099objects in the current Tar object are returned. 1100 1101Please refer to the C<Archive::Tar::File> documentation on how to 1102handle these objects. 1103 1104=cut 1105 1106sub get_files { 1107 my $self = shift; 1108 1109 return @{ $self->_data } unless @_; 1110 1111 my @list; 1112 for my $file ( @_ ) { 1113 push @list, grep { defined } $self->_find_entry( $file ); 1114 } 1115 1116 return @list; 1117} 1118 1119=head2 $tar->get_content( $file ) 1120 1121Return the content of the named file. 1122 1123=cut 1124 1125sub get_content { 1126 my $self = shift; 1127 my $entry = $self->_find_entry( shift ) or return; 1128 1129 return $entry->data; 1130} 1131 1132=head2 $tar->replace_content( $file, $content ) 1133 1134Make the string $content be the content for the file named $file. 1135 1136=cut 1137 1138sub replace_content { 1139 my $self = shift; 1140 my $entry = $self->_find_entry( shift ) or return; 1141 1142 return $entry->replace_content( shift ); 1143} 1144 1145=head2 $tar->rename( $file, $new_name ) 1146 1147Rename the file of the in-memory archive to $new_name. 1148 1149Note that you must specify a Unix path for $new_name, since per tar 1150standard, all files in the archive must be Unix paths. 1151 1152Returns true on success and false on failure. 1153 1154=cut 1155 1156sub rename { 1157 my $self = shift; 1158 my $file = shift; return unless defined $file; 1159 my $new = shift; return unless defined $new; 1160 1161 my $entry = $self->_find_entry( $file ) or return; 1162 1163 return $entry->rename( $new ); 1164} 1165 1166=head2 $tar->chmod( $file, $mode ) 1167 1168Change mode of $file to $mode. 1169 1170Returns true on success and false on failure. 1171 1172=cut 1173 1174sub chmod { 1175 my $self = shift; 1176 my $file = shift; return unless defined $file; 1177 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1178 my @args = ("$mode"); 1179 1180 my $entry = $self->_find_entry( $file ) or return; 1181 my $x = $entry->chmod( @args ); 1182 return $x; 1183} 1184 1185=head2 $tar->chown( $file, $uname [, $gname] ) 1186 1187Change owner $file to $uname and $gname. 1188 1189Returns true on success and false on failure. 1190 1191=cut 1192 1193sub chown { 1194 my $self = shift; 1195 my $file = shift; return unless defined $file; 1196 my $uname = shift; return unless defined $uname; 1197 my @args = ($uname); 1198 push(@args, shift); 1199 1200 my $entry = $self->_find_entry( $file ) or return; 1201 my $x = $entry->chown( @args ); 1202 return $x; 1203} 1204 1205=head2 $tar->remove (@filenamelist) 1206 1207Removes any entries with names matching any of the given filenames 1208from the in-memory archive. Returns a list of C<Archive::Tar::File> 1209objects that remain. 1210 1211=cut 1212 1213sub remove { 1214 my $self = shift; 1215 my @list = @_; 1216 1217 my %seen = map { $_->full_path => $_ } @{$self->_data}; 1218 delete $seen{ $_ } for @list; 1219 1220 $self->_data( [values %seen] ); 1221 1222 return values %seen; 1223} 1224 1225=head2 $tar->clear 1226 1227C<clear> clears the current in-memory archive. This effectively gives 1228you a 'blank' object, ready to be filled again. Note that C<clear> 1229only has effect on the object, not the underlying tarfile. 1230 1231=cut 1232 1233sub clear { 1234 my $self = shift or return; 1235 1236 $self->_data( [] ); 1237 $self->_file( '' ); 1238 1239 return 1; 1240} 1241 1242 1243=head2 $tar->write ( [$file, $compressed, $prefix] ) 1244 1245Write the in-memory archive to disk. The first argument can either 1246be the name of a file or a reference to an already open filehandle (a 1247GLOB reference). 1248 1249The second argument is used to indicate compression. You can either 1250compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1251to be the C<gzip> compression level (between 1 and 9), but the use of 1252constants is preferred: 1253 1254 # write a gzip compressed file 1255 $tar->write( 'out.tgz', COMPRESS_GZIP ); 1256 1257 # write a bzip compressed file 1258 $tar->write( 'out.tbz', COMPRESS_BZIP ); 1259 1260Note that when you pass in a filehandle, the compression argument 1261is ignored, as all files are printed verbatim to your filehandle. 1262If you wish to enable compression with filehandles, use an 1263C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1264 1265The third argument is an optional prefix. All files will be tucked 1266away in the directory you specify as prefix. So if you have files 1267'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1268will be written to the archive as 'foo/a' and 'foo/b'. 1269 1270If no arguments are given, C<write> returns the entire formatted 1271archive as a string, which could be useful if you'd like to stuff the 1272archive into a socket or a pipe to gzip or something. 1273 1274 1275=cut 1276 1277sub write { 1278 my $self = shift; 1279 my $file = shift; $file = '' unless defined $file; 1280 my $gzip = shift || 0; 1281 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1282 my $dummy = ''; 1283 1284 ### only need a handle if we have a file to print to ### 1285 my $handle = length($file) 1286 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1287 or return ) 1288 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1289 : $HAS_IO_STRING ? IO::String->new 1290 : __PACKAGE__->no_string_support(); 1291 1292 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1293 ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1294 ### printed to the archive 1295 local $\; 1296 1297 for my $entry ( @{$self->_data} ) { 1298 ### entries to be written to the tarfile ### 1299 my @write_me; 1300 1301 ### only now will we change the object to reflect the current state 1302 ### of the name and prefix fields -- this needs to be limited to 1303 ### write() only! 1304 my $clone = $entry->clone; 1305 1306 1307 ### so, if you don't want use to use the prefix, we'll stuff 1308 ### everything in the name field instead 1309 if( $DO_NOT_USE_PREFIX ) { 1310 1311 ### you might have an extended prefix, if so, set it in the clone 1312 ### XXX is ::Unix right? 1313 $clone->name( length $ext_prefix 1314 ? File::Spec::Unix->catdir( $ext_prefix, 1315 $clone->full_path) 1316 : $clone->full_path ); 1317 $clone->prefix( '' ); 1318 1319 ### otherwise, we'll have to set it properly -- prefix part in the 1320 ### prefix and name part in the name field. 1321 } else { 1322 1323 ### split them here, not before! 1324 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1325 1326 ### you might have an extended prefix, if so, set it in the clone 1327 ### XXX is ::Unix right? 1328 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1329 if length $ext_prefix; 1330 1331 $clone->prefix( $prefix ); 1332 $clone->name( $name ); 1333 } 1334 1335 ### names are too long, and will get truncated if we don't add a 1336 ### '@LongLink' file... 1337 my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1338 length($clone->prefix) > PREFIX_LENGTH 1339 ) || 0; 1340 1341 ### perhaps we need to make a longlink file? 1342 if( $make_longlink ) { 1343 my $longlink = Archive::Tar::File->new( 1344 data => LONGLINK_NAME, 1345 $clone->full_path, 1346 { type => LONGLINK } 1347 ); 1348 1349 unless( $longlink ) { 1350 $self->_error( qq[Could not create 'LongLink' entry for ] . 1351 qq[oversize file '] . $clone->full_path ."'" ); 1352 return; 1353 }; 1354 1355 push @write_me, $longlink; 1356 } 1357 1358 push @write_me, $clone; 1359 1360 ### write the one, optionally 2 a::t::file objects to the handle 1361 for my $clone (@write_me) { 1362 1363 ### if the file is a symlink, there are 2 options: 1364 ### either we leave the symlink intact, but then we don't write any 1365 ### data OR we follow the symlink, which means we actually make a 1366 ### copy. if we do the latter, we have to change the TYPE of the 1367 ### clone to 'FILE' 1368 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1369 my $data_ok = !$clone->is_symlink && $clone->has_content; 1370 1371 ### downgrade to a 'normal' file if it's a symlink we're going to 1372 ### treat as a regular file 1373 $clone->_downgrade_to_plainfile if $link_ok; 1374 1375 ### get the header for this block 1376 my $header = $self->_format_tar_entry( $clone ); 1377 unless( $header ) { 1378 $self->_error(q[Could not format header for: ] . 1379 $clone->full_path ); 1380 return; 1381 } 1382 1383 unless( print $handle $header ) { 1384 $self->_error(q[Could not write header for: ] . 1385 $clone->full_path); 1386 return; 1387 } 1388 1389 if( $link_ok or $data_ok ) { 1390 unless( print $handle $clone->data ) { 1391 $self->_error(q[Could not write data for: ] . 1392 $clone->full_path); 1393 return; 1394 } 1395 1396 ### pad the end of the clone if required ### 1397 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1398 } 1399 1400 } ### done writing these entries 1401 } 1402 1403 ### write the end markers ### 1404 print $handle TAR_END x 2 or 1405 return $self->_error( qq[Could not write tar end markers] ); 1406 1407 ### did you want it written to a file, or returned as a string? ### 1408 my $rv = length($file) ? 1 1409 : $HAS_PERLIO ? $dummy 1410 : do { seek $handle, 0, 0; local $/; <$handle> }; 1411 1412 ### make sure to close the handle if we created it 1413 if ( $file ne $handle ) { 1414 unless( close $handle ) { 1415 $self->_error( qq[Could not write tar] ); 1416 return; 1417 } 1418 } 1419 1420 return $rv; 1421} 1422 1423sub _format_tar_entry { 1424 my $self = shift; 1425 my $entry = shift or return; 1426 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1427 my $no_prefix = shift || 0; 1428 1429 my $file = $entry->name; 1430 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1431 1432 ### remove the prefix from the file name 1433 ### not sure if this is still needed --kane 1434 ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1435 ### this for us. Even worse, this would break if we tried to add a file 1436 ### like x/x. 1437 #if( length $prefix ) { 1438 # $file =~ s/^$match//; 1439 #} 1440 1441 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1442 if length $ext_prefix; 1443 1444 ### not sure why this is... ### 1445 my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1446 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1447 1448 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1449 1450 ### this might be optimizable with a 'changed' flag in the file objects ### 1451 my $tar = pack ( 1452 PACK, 1453 $file, 1454 1455 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1456 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1457 1458 "", # checksum field - space padded a bit down 1459 1460 (map { $entry->$_() } qw[type linkname magic]), 1461 1462 $entry->version || TAR_VERSION, 1463 1464 (map { $entry->$_() } qw[uname gname]), 1465 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1466 1467 ($no_prefix ? '' : $prefix) 1468 ); 1469 1470 ### add the checksum ### 1471 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1472 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1473 1474 return $tar; 1475} 1476 1477=head2 $tar->add_files( @filenamelist ) 1478 1479Takes a list of filenames and adds them to the in-memory archive. 1480 1481The path to the file is automatically converted to a Unix like 1482equivalent for use in the archive, and, if on MacOS, the file's 1483modification time is converted from the MacOS epoch to the Unix epoch. 1484So tar archives created on MacOS with B<Archive::Tar> can be read 1485both with I<tar> on Unix and applications like I<suntar> or 1486I<Stuffit Expander> on MacOS. 1487 1488Be aware that the file's type/creator and resource fork will be lost, 1489which is usually what you want in cross-platform archives. 1490 1491Instead of a filename, you can also pass it an existing C<Archive::Tar::File> 1492object from, for example, another archive. The object will be clone, and 1493effectively be a copy of the original, not an alias. 1494 1495Returns a list of C<Archive::Tar::File> objects that were just added. 1496 1497=cut 1498 1499sub add_files { 1500 my $self = shift; 1501 my @files = @_ or return; 1502 1503 my @rv; 1504 for my $file ( @files ) { 1505 1506 ### you passed an Archive::Tar::File object 1507 ### clone it so we don't accidentally have a reference to 1508 ### an object from another archive 1509 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1510 push @rv, $file->clone; 1511 next; 1512 } 1513 1514 eval { 1515 if( utf8::is_utf8( $file )) { 1516 utf8::encode( $file ); 1517 } 1518 }; 1519 1520 unless( -e $file || -l $file ) { 1521 $self->_error( qq[No such file: '$file'] ); 1522 next; 1523 } 1524 1525 my $obj = Archive::Tar::File->new( file => $file ); 1526 unless( $obj ) { 1527 $self->_error( qq[Unable to add file: '$file'] ); 1528 next; 1529 } 1530 1531 push @rv, $obj; 1532 } 1533 1534 push @{$self->{_data}}, @rv; 1535 1536 return @rv; 1537} 1538 1539=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1540 1541Takes a filename, a scalar full of data and optionally a reference to 1542a hash with specific options. 1543 1544Will add a file to the in-memory archive, with name C<$filename> and 1545content C<$data>. Specific properties can be set using C<$opthashref>. 1546The following list of properties is supported: name, size, mtime 1547(last modified date), mode, uid, gid, linkname, uname, gname, 1548devmajor, devminor, prefix, type. (On MacOS, the file's path and 1549modification times are converted to Unix equivalents.) 1550 1551Valid values for the file type are the following constants defined by 1552Archive::Tar::Constant: 1553 1554=over 4 1555 1556=item FILE 1557 1558Regular file. 1559 1560=item HARDLINK 1561 1562=item SYMLINK 1563 1564Hard and symbolic ("soft") links; linkname should specify target. 1565 1566=item CHARDEV 1567 1568=item BLOCKDEV 1569 1570Character and block devices. devmajor and devminor should specify the major 1571and minor device numbers. 1572 1573=item DIR 1574 1575Directory. 1576 1577=item FIFO 1578 1579FIFO (named pipe). 1580 1581=item SOCKET 1582 1583Socket. 1584 1585=back 1586 1587Returns the C<Archive::Tar::File> object that was just added, or 1588C<undef> on failure. 1589 1590=cut 1591 1592sub add_data { 1593 my $self = shift; 1594 my ($file, $data, $opt) = @_; 1595 1596 my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1597 unless( $obj ) { 1598 $self->_error( qq[Unable to add file: '$file'] ); 1599 return; 1600 } 1601 1602 push @{$self->{_data}}, $obj; 1603 1604 return $obj; 1605} 1606 1607=head2 $tar->error( [$BOOL] ) 1608 1609Returns the current error string (usually, the last error reported). 1610If a true value was specified, it will give the C<Carp::longmess> 1611equivalent of the error, in effect giving you a stacktrace. 1612 1613For backwards compatibility, this error is also available as 1614C<$Archive::Tar::error> although it is much recommended you use the 1615method call instead. 1616 1617=cut 1618 1619{ 1620 $error = ''; 1621 my $longmess; 1622 1623 sub _error { 1624 my $self = shift; 1625 my $msg = $error = shift; 1626 $longmess = Carp::longmess($error); 1627 if (ref $self) { 1628 $self->{_error} = $error; 1629 $self->{_longmess} = $longmess; 1630 } 1631 1632 ### set Archive::Tar::WARN to 0 to disable printing 1633 ### of errors 1634 if( $WARN ) { 1635 carp $DEBUG ? $longmess : $msg; 1636 } 1637 1638 return; 1639 } 1640 1641 sub error { 1642 my $self = shift; 1643 if (ref $self) { 1644 return shift() ? $self->{_longmess} : $self->{_error}; 1645 } else { 1646 return shift() ? $longmess : $error; 1647 } 1648 } 1649} 1650 1651=head2 $tar->setcwd( $cwd ); 1652 1653C<Archive::Tar> needs to know the current directory, and it will run 1654C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1655tarfile and saves it in the file system. (As of version 1.30, however, 1656C<Archive::Tar> will use the speed optimization described below 1657automatically, so it's only relevant if you're using C<extract_file()>). 1658 1659Since C<Archive::Tar> doesn't change the current directory internally 1660while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1661can be avoided if we can guarantee that the current directory doesn't 1662get changed externally. 1663 1664To use this performance boost, set the current directory via 1665 1666 use Cwd; 1667 $tar->setcwd( cwd() ); 1668 1669once before calling a function like C<extract_file> and 1670C<Archive::Tar> will use the current directory setting from then on 1671and won't call C<Cwd::cwd()> internally. 1672 1673To switch back to the default behaviour, use 1674 1675 $tar->setcwd( undef ); 1676 1677and C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1678 1679If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1680be called for you. 1681 1682=cut 1683 1684sub setcwd { 1685 my $self = shift; 1686 my $cwd = shift; 1687 1688 $self->{cwd} = $cwd; 1689} 1690 1691=head1 Class Methods 1692 1693=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1694 1695Creates a tar file from the list of files provided. The first 1696argument can either be the name of the tar file to create or a 1697reference to an open file handle (e.g. a GLOB reference). 1698 1699The second argument is used to indicate compression. You can either 1700compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1701to be the C<gzip> compression level (between 1 and 9), but the use of 1702constants is preferred: 1703 1704 # write a gzip compressed file 1705 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1706 1707 # write a bzip compressed file 1708 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1709 1710Note that when you pass in a filehandle, the compression argument 1711is ignored, as all files are printed verbatim to your filehandle. 1712If you wish to enable compression with filehandles, use an 1713C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1714 1715The remaining arguments list the files to be included in the tar file. 1716These files must all exist. Any files which don't exist or can't be 1717read are silently ignored. 1718 1719If the archive creation fails for any reason, C<create_archive> will 1720return false. Please use the C<error> method to find the cause of the 1721failure. 1722 1723Note that this method does not write C<on the fly> as it were; it 1724still reads all the files into memory before writing out the archive. 1725Consult the FAQ below if this is a problem. 1726 1727=cut 1728 1729sub create_archive { 1730 my $class = shift; 1731 1732 my $file = shift; return unless defined $file; 1733 my $gzip = shift || 0; 1734 my @files = @_; 1735 1736 unless( @files ) { 1737 return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1738 } 1739 1740 my $tar = $class->new; 1741 $tar->add_files( @files ); 1742 return $tar->write( $file, $gzip ); 1743} 1744 1745=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1746 1747Returns an iterator function that reads the tar file without loading 1748it all in memory. Each time the function is called it will return the 1749next file in the tarball. The files are returned as 1750C<Archive::Tar::File> objects. The iterator function returns the 1751empty list once it has exhausted the files contained. 1752 1753The second argument can be a hash reference with options, which are 1754identical to the arguments passed to C<read()>. 1755 1756Example usage: 1757 1758 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1759 1760 while( my $f = $next->() ) { 1761 print $f->name, "\n"; 1762 1763 $f->extract or warn "Extraction failed"; 1764 1765 # .... 1766 } 1767 1768=cut 1769 1770 1771sub iter { 1772 my $class = shift; 1773 my $filename = shift; 1774 return unless defined $filename; 1775 my $compressed = shift || 0; 1776 my $opts = shift || {}; 1777 1778 ### get a handle to read from. 1779 my $handle = $class->_get_handle( 1780 $filename, 1781 $compressed, 1782 READ_ONLY->( ZLIB ) 1783 ) or return; 1784 1785 my @data; 1786 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ]; 1787 return sub { 1788 return shift(@data) if @data; # more than one file returned? 1789 return unless $handle; # handle exhausted? 1790 1791 ### read data, should only return file 1792 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1793 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 1794 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1795 foreach(@data){ 1796 #may refine this heuristic for ON_UNIX? 1797 if($_->linkname){ 1798 #is there a better slot to store/share it ? 1799 $_->{'_archive'} = $CONSTRUCT_ARGS; 1800 } 1801 } 1802 } 1803 1804 ### return one piece of data 1805 return shift(@data) if @data; 1806 1807 ### data is exhausted, free the filehandle 1808 undef $handle; 1809 if(@$CONSTRUCT_ARGS == 4){ 1810 #free archive in memory 1811 undef $CONSTRUCT_ARGS->[-1]; 1812 } 1813 return; 1814 }; 1815} 1816 1817=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1818 1819Returns a list of the names of all the files in the archive. The 1820first argument can either be the name of the tar file to list or a 1821reference to an open file handle (e.g. a GLOB reference). 1822 1823If C<list_archive()> is passed an array reference as its third 1824argument it returns a list of hash references containing the requested 1825properties of each file. The following list of properties is 1826supported: full_path, name, size, mtime (last modified date), mode, 1827uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type. 1828 1829See C<Archive::Tar::File> for details about supported properties. 1830 1831Passing an array reference containing only one element, 'name', is 1832special cased to return a list of names rather than a list of hash 1833references. 1834 1835=cut 1836 1837sub list_archive { 1838 my $class = shift; 1839 my $file = shift; return unless defined $file; 1840 my $gzip = shift || 0; 1841 1842 my $tar = $class->new($file, $gzip); 1843 return unless $tar; 1844 1845 return $tar->list_files( @_ ); 1846} 1847 1848=head2 Archive::Tar->extract_archive($file, $compressed) 1849 1850Extracts the contents of the tar file. The first argument can either 1851be the name of the tar file to create or a reference to an open file 1852handle (e.g. a GLOB reference). All relative paths in the tar file will 1853be created underneath the current working directory. 1854 1855C<extract_archive> will return a list of files it extracted. 1856If the archive extraction fails for any reason, C<extract_archive> 1857will return false. Please use the C<error> method to find the cause 1858of the failure. 1859 1860=cut 1861 1862sub extract_archive { 1863 my $class = shift; 1864 my $file = shift; return unless defined $file; 1865 my $gzip = shift || 0; 1866 1867 my $tar = $class->new( ) or return; 1868 1869 return $tar->read( $file, $gzip, { extract => 1 } ); 1870} 1871 1872=head2 $bool = Archive::Tar->has_io_string 1873 1874Returns true if we currently have C<IO::String> support loaded. 1875 1876Either C<IO::String> or C<perlio> support is needed to support writing 1877stringified archives. Currently, C<perlio> is the preferred method, if 1878available. 1879 1880See the C<GLOBAL VARIABLES> section to see how to change this preference. 1881 1882=cut 1883 1884sub has_io_string { return $HAS_IO_STRING; } 1885 1886=head2 $bool = Archive::Tar->has_perlio 1887 1888Returns true if we currently have C<perlio> support loaded. 1889 1890This requires C<perl-5.8> or higher, compiled with C<perlio> 1891 1892Either C<IO::String> or C<perlio> support is needed to support writing 1893stringified archives. Currently, C<perlio> is the preferred method, if 1894available. 1895 1896See the C<GLOBAL VARIABLES> section to see how to change this preference. 1897 1898=cut 1899 1900sub has_perlio { return $HAS_PERLIO; } 1901 1902=head2 $bool = Archive::Tar->has_zlib_support 1903 1904Returns true if C<Archive::Tar> can extract C<zlib> compressed archives 1905 1906=cut 1907 1908sub has_zlib_support { return ZLIB } 1909 1910=head2 $bool = Archive::Tar->has_bzip2_support 1911 1912Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1913 1914=cut 1915 1916sub has_bzip2_support { return BZIP } 1917 1918=head2 Archive::Tar->can_handle_compressed_files 1919 1920A simple checking routine, which will return true if C<Archive::Tar> 1921is able to uncompress compressed archives on the fly with C<IO::Zlib> 1922and C<IO::Compress::Bzip2> or false if not both are installed. 1923 1924You can use this as a shortcut to determine whether C<Archive::Tar> 1925will do what you think before passing compressed archives to its 1926C<read> method. 1927 1928=cut 1929 1930sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1931 1932sub no_string_support { 1933 croak("You have to install IO::String to support writing archives to strings"); 1934} 1935 1936sub _symlinks_resolver{ 1937 my ($src, $trg) = @_; 1938 my @src = split /[\/\\]/, $src; 1939 my @trg = split /[\/\\]/, $trg; 1940 pop @src; #strip out current object name 1941 if(@trg and $trg[0] eq ''){ 1942 shift @trg; 1943 #restart path from scratch 1944 @src = ( ); 1945 } 1946 foreach my $part ( @trg ){ 1947 next if $part eq '.'; #ignore current 1948 if($part eq '..'){ 1949 #got to parent 1950 pop @src; 1951 } 1952 else{ 1953 #append it 1954 push @src, $part; 1955 } 1956 } 1957 my $path = join('/', @src); 1958 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG; 1959 return $path; 1960} 1961 19621; 1963 1964__END__ 1965 1966=head1 GLOBAL VARIABLES 1967 1968=head2 $Archive::Tar::FOLLOW_SYMLINK 1969 1970Set this variable to C<1> to make C<Archive::Tar> effectively make a 1971copy of the file when extracting. Default is C<0>, which 1972means the symlink stays intact. Of course, you will have to pack the 1973file linked to as well. 1974 1975This option is checked when you write out the tarfile using C<write> 1976or C<create_archive>. 1977 1978This works just like C</bin/tar>'s C<-h> option. 1979 1980=head2 $Archive::Tar::CHOWN 1981 1982By default, C<Archive::Tar> will try to C<chown> your files if it is 1983able to. In some cases, this may not be desired. In that case, set 1984this variable to C<0> to disable C<chown>-ing, even if it were 1985possible. 1986 1987The default is C<1>. 1988 1989=head2 $Archive::Tar::CHMOD 1990 1991By default, C<Archive::Tar> will try to C<chmod> your files to 1992whatever mode was specified for the particular file in the archive. 1993In some cases, this may not be desired. In that case, set this 1994variable to C<0> to disable C<chmod>-ing. 1995 1996The default is C<1>. 1997 1998=head2 $Archive::Tar::SAME_PERMISSIONS 1999 2000When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 2001the permissions on files from the archive are used without modification 2002of if they are filtered by removing any setid bits and applying the 2003current umask. 2004 2005The default is C<1> for the root user and C<0> for normal users. 2006 2007=head2 $Archive::Tar::DO_NOT_USE_PREFIX 2008 2009By default, C<Archive::Tar> will try to put paths that are over 2010100 characters in the C<prefix> field of your tar header, as 2011defined per POSIX-standard. However, some (older) tar programs 2012do not implement this spec. To retain compatibility with these older 2013or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 2014variable to a true value, and C<Archive::Tar> will use an alternate 2015way of dealing with paths over 100 characters by using the 2016C<GNU Extended Header> feature. 2017 2018Note that clients who do not support the C<GNU Extended Header> 2019feature will not be able to read these archives. Such clients include 2020tars on C<Solaris>, C<Irix> and C<AIX>. 2021 2022The default is C<0>. 2023 2024=head2 $Archive::Tar::DEBUG 2025 2026Set this variable to C<1> to always get the C<Carp::longmess> output 2027of the warnings, instead of the regular C<carp>. This is the same 2028message you would get by doing: 2029 2030 $tar->error(1); 2031 2032Defaults to C<0>. 2033 2034=head2 $Archive::Tar::WARN 2035 2036Set this variable to C<0> if you do not want any warnings printed. 2037Personally I recommend against doing this, but people asked for the 2038option. Also, be advised that this is of course not threadsafe. 2039 2040Defaults to C<1>. 2041 2042=head2 $Archive::Tar::error 2043 2044Holds the last reported error. Kept for historical reasons, but its 2045use is very much discouraged. Use the C<error()> method instead: 2046 2047 warn $tar->error unless $tar->extract; 2048 2049Note that in older versions of this module, the C<error()> method 2050would return an effectively global value even when called an instance 2051method as above. This has since been fixed, and multiple instances of 2052C<Archive::Tar> now have separate error strings. 2053 2054=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 2055 2056This variable indicates whether C<Archive::Tar> should allow 2057files to be extracted outside their current working directory. 2058 2059Allowing this could have security implications, as a malicious 2060tar archive could alter or replace any file the extracting user 2061has permissions to. Therefor, the default is to not allow 2062insecure extractions. 2063 2064If you trust the archive, or have other reasons to allow the 2065archive to write files outside your current working directory, 2066set this variable to C<true>. 2067 2068Note that this is a backwards incompatible change from version 2069C<1.36> and before. 2070 2071=head2 $Archive::Tar::HAS_PERLIO 2072 2073This variable holds a boolean indicating if we currently have 2074C<perlio> support loaded. This will be enabled for any perl 2075greater than C<5.8> compiled with C<perlio>. 2076 2077If you feel strongly about disabling it, set this variable to 2078C<false>. Note that you will then need C<IO::String> installed 2079to support writing stringified archives. 2080 2081Don't change this variable unless you B<really> know what you're 2082doing. 2083 2084=head2 $Archive::Tar::HAS_IO_STRING 2085 2086This variable holds a boolean indicating if we currently have 2087C<IO::String> support loaded. This will be enabled for any perl 2088that has a loadable C<IO::String> module. 2089 2090If you feel strongly about disabling it, set this variable to 2091C<false>. Note that you will then need C<perlio> support from 2092your perl to be able to write stringified archives. 2093 2094Don't change this variable unless you B<really> know what you're 2095doing. 2096 2097=head2 $Archive::Tar::ZERO_PAD_NUMBERS 2098 2099This variable holds a boolean indicating if we will create 2100zero padded numbers for C<size>, C<mtime> and C<checksum>. 2101The default is C<0>, indicating that we will create space padded 2102numbers. Added for compatibility with C<busybox> implementations. 2103 2104=head2 Tuning the way RESOLVE_SYMLINK will works 2105 2106 You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable, 2107 or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar. 2108 2109 Values can be one of the following: 2110 2111 none 2112 Disable this mechanism and failed as it was in previous version (<1.88) 2113 2114 speed (default) 2115 If you prefer speed 2116 this will read again the whole archive using read() so all entries 2117 will be available 2118 2119 memory 2120 If you prefer memory 2121 2122 Limitation 2123 2124 It won't work for terminal, pipe or sockets or every non seekable source. 2125 2126=cut 2127 2128=head1 FAQ 2129 2130=over 4 2131 2132=item What's the minimum perl version required to run Archive::Tar? 2133 2134You will need perl version 5.005_03 or newer. 2135 2136=item Isn't Archive::Tar slow? 2137 2138Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2139However, it's very portable. If speed is an issue, consider using 2140C</bin/tar> instead. 2141 2142=item Isn't Archive::Tar heavier on memory than /bin/tar? 2143 2144Yes it is, see previous answer. Since C<Compress::Zlib> and therefore 2145C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2146choice but to read the archive into memory. 2147This is ok if you want to do in-memory manipulation of the archive. 2148 2149If you just want to extract, use the C<extract_archive> class method 2150instead. It will optimize and write to disk immediately. 2151 2152Another option is to use the C<iter> class method to iterate over 2153the files in the tarball without reading them all in memory at once. 2154 2155=item Can you lazy-load data instead? 2156 2157In some cases, yes. You can use the C<iter> class method to iterate 2158over the files in the tarball without reading them all in memory at once. 2159 2160=item How much memory will an X kb tar file need? 2161 2162Probably more than X kb, since it will all be read into memory. If 2163this is a problem, and you don't need to do in memory manipulation 2164of the archive, consider using the C<iter> class method, or C</bin/tar> 2165instead. 2166 2167=item What do you do with unsupported filetypes in an archive? 2168 2169C<Unix> has a few filetypes that aren't supported on other platforms, 2170like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2171try to make a copy of the original file, rather than throwing an error. 2172 2173This does require you to read the entire archive in to memory first, 2174since otherwise we wouldn't know what data to fill the copy with. 2175(This means that you cannot use the class methods, including C<iter> 2176on archives that have incompatible filetypes and still expect things 2177to work). 2178 2179For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2180the extraction of this particular item didn't work. 2181 2182=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2183 2184By default, C<Archive::Tar> is in a completely POSIX-compatible 2185mode, which uses the POSIX-specification of C<tar> to store files. 2186For paths greater than 100 characters, this is done using the 2187C<POSIX header prefix>. Non-POSIX-compatible clients may not support 2188this part of the specification, and may only support the C<GNU Extended 2189Header> functionality. To facilitate those clients, you can set the 2190C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2191C<GLOBAL VARIABLES> section for details on this variable. 2192 2193Note that GNU tar earlier than version 1.14 does not cope well with 2194the C<POSIX header prefix>. If you use such a version, consider setting 2195the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2196 2197=item How do I extract only files that have property X from an archive? 2198 2199Sometimes, you might not wish to extract a complete archive, just 2200the files that are relevant to you, based on some criteria. 2201 2202You can do this by filtering a list of C<Archive::Tar::File> objects 2203based on your criteria. For example, to extract only files that have 2204the string C<foo> in their title, you would use: 2205 2206 $tar->extract( 2207 grep { $_->full_path =~ /foo/ } $tar->get_files 2208 ); 2209 2210This way, you can filter on any attribute of the files in the archive. 2211Consult the C<Archive::Tar::File> documentation on how to use these 2212objects. 2213 2214=item How do I access .tar.Z files? 2215 2216The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2217the C<IO::Zlib> module) to access tar files that have been compressed 2218with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2219utility cannot be read by C<Compress::Zlib> and so cannot be directly 2220accesses by C<Archive::Tar>. 2221 2222If the C<uncompress> or C<gunzip> programs are available, you can use 2223one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2224 2225Firstly with C<uncompress> 2226 2227 use Archive::Tar; 2228 2229 open F, "uncompress -c $filename |"; 2230 my $tar = Archive::Tar->new(*F); 2231 ... 2232 2233and this with C<gunzip> 2234 2235 use Archive::Tar; 2236 2237 open F, "gunzip -c $filename |"; 2238 my $tar = Archive::Tar->new(*F); 2239 ... 2240 2241Similarly, if the C<compress> program is available, you can use this to 2242write a C<.tar.Z> file 2243 2244 use Archive::Tar; 2245 use IO::File; 2246 2247 my $fh = new IO::File "| compress -c >$filename"; 2248 my $tar = Archive::Tar->new(); 2249 ... 2250 $tar->write($fh); 2251 $fh->close ; 2252 2253=item How do I handle Unicode strings? 2254 2255C<Archive::Tar> uses byte semantics for any files it reads from or writes 2256to disk. This is not a problem if you only deal with files and never 2257look at their content or work solely with byte strings. But if you use 2258Unicode strings with character semantics, some additional steps need 2259to be taken. 2260 2261For example, if you add a Unicode string like 2262 2263 # Problem 2264 $tar->add_data('file.txt', "Euro: \x{20AC}"); 2265 2266then there will be a problem later when the tarfile gets written out 2267to disk via C<< $tar->write() >>: 2268 2269 Wide character in print at .../Archive/Tar.pm line 1014. 2270 2271The data was added as a Unicode string and when writing it out to disk, 2272the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2273tried to convert the string to ISO-8859 and failed. The written file 2274now contains garbage. 2275 2276For this reason, Unicode strings need to be converted to UTF-8-encoded 2277bytestrings before they are handed off to C<add_data()>: 2278 2279 use Encode; 2280 my $data = "Accented character: \x{20AC}"; 2281 $data = encode('utf8', $data); 2282 2283 $tar->add_data('file.txt', $data); 2284 2285A opposite problem occurs if you extract a UTF8-encoded file from a 2286tarball. Using C<get_content()> on the C<Archive::Tar::File> object 2287will return its content as a bytestring, not as a Unicode string. 2288 2289If you want it to be a Unicode string (because you want character 2290semantics with operations like regular expression matching), you need 2291to decode the UTF8-encoded content and have Perl convert it into 2292a Unicode string: 2293 2294 use Encode; 2295 my $data = $tar->get_content(); 2296 2297 # Make it a Unicode string 2298 $data = decode('utf8', $data); 2299 2300There is no easy way to provide this functionality in C<Archive::Tar>, 2301because a tarball can contain many files, and each of which could be 2302encoded in a different way. 2303 2304=back 2305 2306=head1 CAVEATS 2307 2308The AIX tar does not fill all unused space in the tar archive with 0x00. 2309This sometimes leads to warning messages from C<Archive::Tar>. 2310 2311 Invalid header block at offset nnn 2312 2313A fix for that problem is scheduled to be released in the following levels 2314of AIX, all of which should be coming out in the 4th quarter of 2009: 2315 2316 AIX 5.3 TL7 SP10 2317 AIX 5.3 TL8 SP8 2318 AIX 5.3 TL9 SP5 2319 AIX 5.3 TL10 SP2 2320 2321 AIX 6.1 TL0 SP11 2322 AIX 6.1 TL1 SP7 2323 AIX 6.1 TL2 SP6 2324 AIX 6.1 TL3 SP3 2325 2326The IBM APAR number for this problem is IZ50240 (Reported component ID: 23275765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2328If you need an ifix please contact your local IBM AIX support. 2329 2330=head1 TODO 2331 2332=over 4 2333 2334=item Check if passed in handles are open for read/write 2335 2336Currently I don't know of any portable pure perl way to do this. 2337Suggestions welcome. 2338 2339=item Allow archives to be passed in as string 2340 2341Currently, we only allow opened filehandles or filenames, but 2342not strings. The internals would need some reworking to facilitate 2343stringified archives. 2344 2345=item Facilitate processing an opened filehandle of a compressed archive 2346 2347Currently, we only support this if the filehandle is an IO::Zlib object. 2348Environments, like apache, will present you with an opened filehandle 2349to an uploaded file, which might be a compressed archive. 2350 2351=back 2352 2353=head1 SEE ALSO 2354 2355=over 4 2356 2357=item The GNU tar specification 2358 2359C<http://www.gnu.org/software/tar/manual/tar.html> 2360 2361=item The PAX format specification 2362 2363The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2364 2365=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2366 2367=item GNU tar intends to switch to POSIX compatibility 2368 2369GNU Tar authors have expressed their intention to become completely 2370POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2371 2372=item A Comparison between various tar implementations 2373 2374Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2375 2376=back 2377 2378=head1 AUTHOR 2379 2380This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2381 2382Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2383 2384=head1 ACKNOWLEDGEMENTS 2385 2386Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2387Rainer Tammer and especially Andrew Savige for their help and suggestions. 2388 2389=head1 COPYRIGHT 2390 2391This module is copyright (c) 2002 - 2009 Jos Boumans 2392E<lt>kane@cpan.orgE<gt>. All rights reserved. 2393 2394This library is free software; you may redistribute and/or modify 2395it under the same terms as Perl itself. 2396 2397=cut 2398