1### the gnu tar specification: 2### http://www.gnu.org/software/tar/manual/tar.html 3### 4### and the pax format spec, which tar derives from: 5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6 7package Archive::Tar; 8require 5.005_03; 9 10use Cwd; 11use IO::Zlib; 12use IO::File; 13use Carp qw(carp croak); 14use File::Spec (); 15use File::Spec::Unix (); 16use File::Path (); 17 18use Archive::Tar::File; 19use Archive::Tar::Constant; 20 21require Exporter; 22 23use strict; 24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK 27 ]; 28 29@ISA = qw[Exporter]; 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP COMPRESS_XZ ]; 31$DEBUG = 0; 32$WARN = 1; 33$FOLLOW_SYMLINK = 0; 34$VERSION = "2.40"; 35$CHOWN = 1; 36$CHMOD = 1; 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38$DO_NOT_USE_PREFIX = 0; 39$INSECURE_EXTRACT_MODE = 0; 40$ZERO_PAD_NUMBERS = 0; 41$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed'; 42 43BEGIN { 44 use Config; 45 $HAS_PERLIO = $Config::Config{useperlio}; 46 47 ### try and load IO::String anyway, so you can dynamically 48 ### switch between perlio and IO::String 49 $HAS_IO_STRING = eval { 50 require IO::String; 51 IO::String->import; 52 1; 53 } || 0; 54} 55 56=head1 NAME 57 58Archive::Tar - module for manipulations of tar archives 59 60=head1 SYNOPSIS 61 62 use Archive::Tar; 63 my $tar = Archive::Tar->new; 64 65 $tar->read('origin.tgz'); 66 $tar->extract(); 67 68 $tar->add_files('file/foo.pl', 'docs/README'); 69 $tar->add_data('file/baz.txt', 'This is the contents now'); 70 71 $tar->rename('oldname', 'new/file/name'); 72 $tar->chown('/', 'root'); 73 $tar->chown('/', 'root:root'); 74 $tar->chmod('/tmp', '1777'); 75 76 $tar->write('files.tar'); # plain tar 77 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 78 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 79 $tar->write('files.txz', COMPRESS_XZ); # xz compressed 80 81=head1 DESCRIPTION 82 83Archive::Tar provides an object oriented mechanism for handling tar 84files. It provides class methods for quick and easy files handling 85while also allowing for the creation of tar file objects for custom 86manipulation. If you have the IO::Zlib module installed, 87Archive::Tar will also support compressed or gzipped tar files. 88 89An object of class Archive::Tar represents a .tar(.gz) archive full 90of files and things. 91 92=head1 Object Methods 93 94=head2 Archive::Tar->new( [$file, $compressed] ) 95 96Returns a new Tar object. If given any arguments, C<new()> calls the 97C<read()> method automatically, passing on the arguments provided to 98the C<read()> method. 99 100If C<new()> is invoked with arguments and the C<read()> method fails 101for any reason, C<new()> returns undef. 102 103=cut 104 105my $tmpl = { 106 _data => [ ], 107 _file => 'Unknown', 108}; 109 110### install get/set accessors for this object. 111for my $key ( keys %$tmpl ) { 112 no strict 'refs'; 113 *{__PACKAGE__."::$key"} = sub { 114 my $self = shift; 115 $self->{$key} = $_[0] if @_; 116 return $self->{$key}; 117 } 118} 119 120sub new { 121 my $class = shift; 122 $class = ref $class if ref $class; 123 124 ### copying $tmpl here since a shallow copy makes it use the 125 ### same aref, causing for files to remain in memory always. 126 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 127 128 if (@_) { 129 unless ( $obj->read( @_ ) ) { 130 $obj->_error(qq[No data could be read from file]); 131 return; 132 } 133 } 134 135 return $obj; 136} 137 138=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 139 140Read the given tar file into memory. 141The first argument can either be the name of a file or a reference to 142an already open filehandle (or an IO::Zlib object if it's compressed) 143 144The C<read> will I<replace> any previous content in C<$tar>! 145 146The second argument may be considered optional, but remains for 147backwards compatibility. Archive::Tar now looks at the file 148magic to determine what class should be used to open the file 149and will transparently Do The Right Thing. 150 151Archive::Tar will warn if you try to pass a bzip2 / xz compressed file and the 152IO::Uncompress::Bunzip2 / IO::Uncompress::UnXz are not available and simply return. 153 154Note that you can currently B<not> pass a C<gzip> compressed 155filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 156filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, a C<xz> compressed 157filehandle, which is not opened with C<IO::Uncompress::UnXz>, nor a string 158containing the full archive information (either compressed or 159uncompressed). These are worth while features, but not currently 160implemented. See the C<TODO> section. 161 162The third argument can be a hash reference with options. Note that 163all options are case-sensitive. 164 165=over 4 166 167=item limit 168 169Do not read more than C<limit> files. This is useful if you have 170very big archives, and are only interested in the first few files. 171 172=item filter 173 174Can be set to a regular expression. Only files with names that match 175the expression will be read. 176 177=item md5 178 179Set to 1 and the md5sum of files will be returned (instead of file data) 180 my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 181 while( my $f = $iter->() ) { 182 print $f->data . "\t" . $f->full_path . $/; 183 } 184 185=item extract 186 187If set to true, immediately extract entries when reading them. This 188gives you the same memory break as the C<extract_archive> function. 189Note however that entries will not be read into memory, but written 190straight to disk. This means no C<Archive::Tar::File> objects are 191created for you to inspect. 192 193=back 194 195All files are stored internally as C<Archive::Tar::File> objects. 196Please consult the L<Archive::Tar::File> documentation for details. 197 198Returns the number of files read in scalar context, and a list of 199C<Archive::Tar::File> objects in list context. 200 201=cut 202 203sub read { 204 my $self = shift; 205 my $file = shift; 206 my $gzip = shift || 0; 207 my $opts = shift || {}; 208 209 unless( defined $file ) { 210 $self->_error( qq[No file to read from!] ); 211 return; 212 } else { 213 $self->_file( $file ); 214 } 215 216 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 217 or return; 218 219 my $data = $self->_read_tar( $handle, $opts ) or return; 220 221 $self->_data( $data ); 222 223 return wantarray ? @$data : scalar @$data; 224} 225 226sub _get_handle { 227 my $self = shift; 228 my $file = shift; return unless defined $file; 229 my $compress = shift || 0; 230 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 231 232 ### Check if file is a file handle or IO glob 233 if ( ref $file ) { 234 return $file if eval{ *$file{IO} }; 235 return $file if eval{ $file->isa(q{IO::Handle}) }; 236 $file = q{}.$file; 237 } 238 239 ### get a FH opened to the right class, so we can use it transparently 240 ### throughout the program 241 my $fh; 242 { ### reading magic only makes sense if we're opening a file for 243 ### reading. otherwise, just use what the user requested. 244 my $magic = ''; 245 if( MODE_READ->($mode) ) { 246 open my $tmp, $file or do { 247 $self->_error( qq[Could not open '$file' for reading: $!] ); 248 return; 249 }; 250 251 ### read the first 6 bytes of the file to figure out which class to 252 ### use to open the file. 253 sysread( $tmp, $magic, 6 ); 254 close $tmp; 255 } 256 257 ### is it xz? 258 ### if you asked specifically for xz compression, or if we're in 259 ### read mode and the magic numbers add up, use xz 260 if( XZ and ( 261 ($compress eq COMPRESS_XZ) or 262 ( MODE_READ->($mode) and $magic =~ XZ_MAGIC_NUM ) 263 ) 264 ) { 265 if( MODE_READ->($mode) ) { 266 $fh = IO::Uncompress::UnXz->new( $file ) or do { 267 $self->_error( qq[Could not read '$file': ] . 268 $IO::Uncompress::UnXz::UnXzError 269 ); 270 return; 271 }; 272 } else { 273 $fh = IO::Compress::Xz->new( $file ) or do { 274 $self->_error( qq[Could not write to '$file': ] . 275 $IO::Compress::Xz::XzError 276 ); 277 return; 278 }; 279 } 280 281 ### is it bzip? 282 ### if you asked specifically for bzip compression, or if we're in 283 ### read mode and the magic numbers add up, use bzip 284 } elsif( BZIP and ( 285 ($compress eq COMPRESS_BZIP) or 286 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 287 ) 288 ) { 289 290 ### different reader/writer modules, different error vars... sigh 291 if( MODE_READ->($mode) ) { 292 $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do { 293 $self->_error( qq[Could not read '$file': ] . 294 $IO::Uncompress::Bunzip2::Bunzip2Error 295 ); 296 return; 297 }; 298 299 } else { 300 $fh = IO::Compress::Bzip2->new( $file ) or do { 301 $self->_error( qq[Could not write to '$file': ] . 302 $IO::Compress::Bzip2::Bzip2Error 303 ); 304 return; 305 }; 306 } 307 308 ### is it gzip? 309 ### if you asked for compression, if you wanted to read or the gzip 310 ### magic number is present (redundant with read) 311 } elsif( ZLIB and ( 312 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 313 ) 314 ) { 315 $fh = IO::Zlib->new; 316 317 unless( $fh->open( $file, $mode ) ) { 318 $self->_error(qq[Could not create filehandle for '$file': $!]); 319 return; 320 } 321 322 ### is it plain tar? 323 } else { 324 $fh = IO::File->new; 325 326 unless( $fh->open( $file, $mode ) ) { 327 $self->_error(qq[Could not create filehandle for '$file': $!]); 328 return; 329 } 330 331 ### enable bin mode on tar archives 332 binmode $fh; 333 } 334 } 335 336 return $fh; 337} 338 339 340sub _read_tar { 341 my $self = shift; 342 my $handle = shift or return; 343 my $opts = shift || {}; 344 345 my $count = $opts->{limit} || 0; 346 my $filter = $opts->{filter}; 347 my $md5 = $opts->{md5} || 0; # cdrake 348 my $filter_cb = $opts->{filter_cb}; 349 my $extract = $opts->{extract} || 0; 350 351 ### set a cap on the amount of files to extract ### 352 my $limit = 0; 353 $limit = 1 if $count > 0; 354 355 my $tarfile = [ ]; 356 my $chunk; 357 my $read = 0; 358 my $real_name; # to set the name of a file when 359 # we're encountering @longlink 360 my $data; 361 362 LOOP: 363 while( $handle->read( $chunk, HEAD ) ) { 364 ### IO::Zlib doesn't support this yet 365 my $offset; 366 if ( ref($handle) ne 'IO::Zlib' ) { 367 local $@; 368 $offset = eval { tell $handle } || 'unknown'; 369 $@ = ''; 370 } 371 else { 372 $offset = 'unknown'; 373 } 374 375 unless( $read++ ) { 376 my $gzip = GZIP_MAGIC_NUM; 377 if( $chunk =~ /$gzip/ ) { 378 $self->_error( qq[Cannot read compressed format in tar-mode] ); 379 return; 380 } 381 382 ### size is < HEAD, which means a corrupted file, as the minimum 383 ### length is _at least_ HEAD 384 if (length $chunk != HEAD) { 385 $self->_error( qq[Cannot read enough bytes from the tarfile] ); 386 return; 387 } 388 } 389 390 ### if we can't read in all bytes... ### 391 last if length $chunk != HEAD; 392 393 ### Apparently this should really be two blocks of 512 zeroes, 394 ### but GNU tar sometimes gets it wrong. See comment in the 395 ### source code (tar.c) to GNU cpio. 396 next if $chunk eq TAR_END; 397 398 ### according to the posix spec, the last 12 bytes of the header are 399 ### null bytes, to pad it to a 512 byte block. That means if these 400 ### bytes are NOT null bytes, it's a corrupt header. See: 401 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 402 ### line 111 403 { my $nulls = join '', "\0" x 12; 404 unless( $nulls eq substr( $chunk, 500, 12 ) ) { 405 $self->_error( qq[Invalid header block at offset $offset] ); 406 next LOOP; 407 } 408 } 409 410 ### pass the realname, so we can set it 'proper' right away 411 ### some of the heuristics are done on the name, so important 412 ### to set it ASAP 413 my $entry; 414 { my %extra_args = (); 415 $extra_args{'name'} = $$real_name if defined $real_name; 416 417 unless( $entry = Archive::Tar::File->new( chunk => $chunk, 418 %extra_args ) 419 ) { 420 $self->_error( qq[Couldn't read chunk at offset $offset] ); 421 next LOOP; 422 } 423 } 424 425 ### ignore labels: 426 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 427 next if $entry->is_label; 428 429 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 430 431 if ( $entry->is_file && !$entry->validate ) { 432 ### sometimes the chunk is rather fux0r3d and a whole 512 433 ### bytes ends up in the ->name area. 434 ### clean it up, if need be 435 my $name = $entry->name; 436 $name = substr($name, 0, 100) if length $name > 100; 437 $name =~ s/\n/ /g; 438 439 $self->_error( $name . qq[: checksum error] ); 440 next LOOP; 441 } 442 443 my $block = BLOCK_SIZE->( $entry->size ); 444 445 $data = $entry->get_content_by_ref; 446 447 my $skip = 0; 448 my $ctx; # cdrake 449 ### skip this entry if we're filtering 450 451 if($md5) { # cdrake 452 $ctx = Digest::MD5->new; # cdrake 453 $skip=5; # cdrake 454 455 } elsif ($filter && $entry->name !~ $filter) { 456 $skip = 1; 457 458 } elsif ($filter_cb && ! $filter_cb->($entry)) { 459 $skip = 2; 460 461 ### skip this entry if it's a pax header. This is a special file added 462 ### by, among others, git-generated tarballs. It holds comments and is 463 ### not meant for extracting. See #38932: pax_global_header extracted 464 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 465 $skip = 3; 466 } 467 468 if ($skip) { 469 # 470 # Since we're skipping, do not allocate memory for the 471 # whole file. Read it 64 BLOCKS at a time. Do not 472 # complete the skip yet because maybe what we read is a 473 # longlink and it won't get skipped after all 474 # 475 my $amt = $block; 476 my $fsz=$entry->size; # cdrake 477 while ($amt > 0) { 478 $$data = ''; 479 my $this = 64 * BLOCK; 480 $this = $amt if $this > $amt; 481 if( $handle->read( $$data, $this ) < $this ) { 482 $self->_error( qq[Read error on tarfile (missing data) ']. 483 $entry->full_path ."' at offset $offset" ); 484 next LOOP; 485 } 486 $amt -= $this; 487 $fsz -= $this; # cdrake 488 substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 489 $ctx->add($$data) if($skip==5); # cdrake 490 } 491 $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 492 } else { 493 494 ### just read everything into memory 495 ### can't do lazy loading since IO::Zlib doesn't support 'seek' 496 ### this is because Compress::Zlib doesn't support it =/ 497 ### this reads in the whole data in one read() call. 498 if ( $handle->read( $$data, $block ) < $block ) { 499 $self->_error( qq[Read error on tarfile (missing data) ']. 500 $entry->full_path ."' at offset $offset" ); 501 next LOOP; 502 } 503 ### throw away trailing garbage ### 504 substr ($$data, $entry->size) = "" if defined $$data; 505 } 506 507 ### part II of the @LongLink munging -- need to do /after/ 508 ### the checksum check. 509 if( $entry->is_longlink ) { 510 ### weird thing in tarfiles -- if the file is actually a 511 ### @LongLink, the data part seems to have a trailing ^@ 512 ### (unprintable) char. to display, pipe output through less. 513 ### but that doesn't *always* happen.. so check if the last 514 ### character is a control character, and if so remove it 515 ### at any rate, we better remove that character here, or tests 516 ### like 'eq' and hash lookups based on names will SO not work 517 ### remove it by calculating the proper size, and then 518 ### tossing out everything that's longer than that size. 519 520 ### count number of nulls 521 my $nulls = $$data =~ tr/\0/\0/; 522 523 ### cut data + size by that many bytes 524 $entry->size( $entry->size - $nulls ); 525 substr ($$data, $entry->size) = ""; 526 } 527 } 528 529 ### clean up of the entries.. posix tar /apparently/ has some 530 ### weird 'feature' that allows for filenames > 255 characters 531 ### they'll put a header in with as name '././@LongLink' and the 532 ### contents will be the name of the /next/ file in the archive 533 ### pretty crappy and kludgy if you ask me 534 535 ### set the name for the next entry if this is a @LongLink; 536 ### this is one ugly hack =/ but needed for direct extraction 537 if( $entry->is_longlink ) { 538 $real_name = $data; 539 next LOOP; 540 } elsif ( defined $real_name ) { 541 $entry->name( $$real_name ); 542 $entry->prefix(''); 543 undef $real_name; 544 } 545 546 if ($filter && $entry->name !~ $filter) { 547 next LOOP; 548 549 } elsif ($filter_cb && ! $filter_cb->($entry)) { 550 next LOOP; 551 552 ### skip this entry if it's a pax header. This is a special file added 553 ### by, among others, git-generated tarballs. It holds comments and is 554 ### not meant for extracting. See #38932: pax_global_header extracted 555 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 556 next LOOP; 557 } 558 559 if ( $extract && !$entry->is_longlink 560 && !$entry->is_unknown 561 && !$entry->is_label ) { 562 $self->_extract_file( $entry ) or return; 563 } 564 565 ### Guard against tarfiles with garbage at the end 566 last LOOP if $entry->name eq ''; 567 568 ### push only the name on the rv if we're extracting 569 ### -- for extract_archive 570 push @$tarfile, ($extract ? $entry->name : $entry); 571 572 if( $limit ) { 573 $count-- unless $entry->is_longlink || $entry->is_dir; 574 last LOOP unless $count; 575 } 576 } continue { 577 undef $data; 578 } 579 580 return $tarfile; 581} 582 583=head2 $tar->contains_file( $filename ) 584 585Check if the archive contains a certain file. 586It will return true if the file is in the archive, false otherwise. 587 588Note however, that this function does an exact match using C<eq> 589on the full path. So it cannot compensate for case-insensitive file- 590systems or compare 2 paths to see if they would point to the same 591underlying file. 592 593=cut 594 595sub contains_file { 596 my $self = shift; 597 my $full = shift; 598 599 return unless defined $full; 600 601 ### don't warn if the entry isn't there.. that's what this function 602 ### is for after all. 603 local $WARN = 0; 604 return 1 if $self->_find_entry($full); 605 return; 606} 607 608=head2 $tar->extract( [@filenames] ) 609 610Write files whose names are equivalent to any of the names in 611C<@filenames> to disk, creating subdirectories as necessary. This 612might not work too well under VMS. 613Under MacPerl, the file's modification time will be converted to the 614MacOS zero of time, and appropriate conversions will be done to the 615path. However, the length of each element of the path is not 616inspected to see whether it's longer than MacOS currently allows (32 617characters). 618 619If C<extract> is called without a list of file names, the entire 620contents of the archive are extracted. 621 622Returns a list of filenames extracted. 623 624=cut 625 626sub extract { 627 my $self = shift; 628 my @args = @_; 629 my @files; 630 my $hashmap; 631 632 # use the speed optimization for all extracted files 633 local($self->{cwd}) = cwd() unless $self->{cwd}; 634 635 ### you requested the extraction of only certain files 636 if( @args ) { 637 for my $file ( @args ) { 638 639 ### it's already an object? 640 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 641 push @files, $file; 642 next; 643 644 ### go find it then 645 } else { 646 647 # create hash-map once to speed up lookup 648 $hashmap = $hashmap || { 649 map { $_->full_path, $_ } @{$self->_data} 650 }; 651 652 if (exists $hashmap->{$file}) { 653 ### we found the file you're looking for 654 push @files, $hashmap->{$file}; 655 } else { 656 return $self->_error( 657 qq[Could not find '$file' in archive] ); 658 } 659 } 660 } 661 662 ### just grab all the file items 663 } else { 664 @files = $self->get_files; 665 } 666 667 ### nothing found? that's an error 668 unless( scalar @files ) { 669 $self->_error( qq[No files found for ] . $self->_file ); 670 return; 671 } 672 673 ### now extract them 674 for my $entry ( @files ) { 675 unless( $self->_extract_file( $entry ) ) { 676 $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 677 return; 678 } 679 } 680 681 return @files; 682} 683 684=head2 $tar->extract_file( $file, [$extract_path] ) 685 686Write an entry, whose name is equivalent to the file name provided to 687disk. Optionally takes a second parameter, which is the full native 688path (including filename) the entry will be written to. 689 690For example: 691 692 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 693 694 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 695 696Returns true on success, false on failure. 697 698=cut 699 700sub extract_file { 701 my $self = shift; 702 my $file = shift; return unless defined $file; 703 my $alt = shift; 704 705 my $entry = $self->_find_entry( $file ) 706 or $self->_error( qq[Could not find an entry for '$file'] ), return; 707 708 return $self->_extract_file( $entry, $alt ); 709} 710 711sub _extract_file { 712 my $self = shift; 713 my $entry = shift or return; 714 my $alt = shift; 715 716 ### you wanted an alternate extraction location ### 717 my $name = defined $alt ? $alt : $entry->full_path; 718 719 ### splitpath takes a bool at the end to indicate 720 ### that it's splitting a dir 721 my ($vol,$dirs,$file); 722 if ( defined $alt ) { # It's a local-OS path 723 ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 724 $entry->is_dir ); 725 } else { 726 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 727 $entry->is_dir ); 728 } 729 730 my $dir; 731 ### is $name an absolute path? ### 732 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 733 734 ### absolute names are not allowed to be in tarballs under 735 ### strict mode, so only allow it if a user tells us to do it 736 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 737 $self->_error( 738 q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 739 q[Not extracting absolute paths under SECURE EXTRACT MODE] 740 ); 741 return; 742 } 743 744 ### user asked us to, it's fine. 745 $dir = File::Spec->catpath( $vol, $dirs, "" ); 746 747 ### it's a relative path ### 748 } else { 749 my $cwd = (ref $self and defined $self->{cwd}) 750 ? $self->{cwd} 751 : cwd(); 752 753 my @dirs = defined $alt 754 ? File::Spec->splitdir( $dirs ) # It's a local-OS path 755 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 756 # straight from the tarball 757 758 if( not defined $alt and 759 not $INSECURE_EXTRACT_MODE 760 ) { 761 762 ### paths that leave the current directory are not allowed under 763 ### strict mode, so only allow it if a user tells us to do this. 764 if( grep { $_ eq '..' } @dirs ) { 765 766 $self->_error( 767 q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 768 q[the current working directory. Not extracting under ]. 769 q[SECURE EXTRACT MODE] 770 ); 771 return; 772 } 773 774 ### the archive may be asking us to extract into a symlink. This 775 ### is not sane and a possible security issue, as outlined here: 776 ### https://rt.cpan.org/Ticket/Display.html?id=30380 777 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 778 ### https://issues.rpath.com/browse/RPL-1716 779 my $full_path = $cwd; 780 for my $d ( @dirs ) { 781 $full_path = File::Spec->catdir( $full_path, $d ); 782 783 ### we've already checked this one, and it's safe. Move on. 784 next if ref $self and $self->{_link_cache}->{$full_path}; 785 786 if( -l $full_path ) { 787 my $to = readlink $full_path; 788 my $diag = "symlinked directory ($full_path => $to)"; 789 790 $self->_error( 791 q[Entry ']. $entry->full_path .q[' is attempting to ]. 792 qq[extract to a $diag. This is considered a security ]. 793 q[vulnerability and not allowed under SECURE EXTRACT ]. 794 q[MODE] 795 ); 796 return; 797 } 798 799 ### XXX keep a cache if possible, so the stats become cheaper: 800 $self->{_link_cache}->{$full_path} = 1 if ref $self; 801 } 802 } 803 804 ### '.' is the directory delimiter on VMS, which has to be escaped 805 ### or changed to '_' on vms. vmsify is used, because older versions 806 ### of vmspath do not handle this properly. 807 ### Must not add a '/' to an empty directory though. 808 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 809 810 my ($cwd_vol,$cwd_dir,$cwd_file) 811 = File::Spec->splitpath( $cwd ); 812 my @cwd = File::Spec->splitdir( $cwd_dir ); 813 push @cwd, $cwd_file if length $cwd_file; 814 815 ### We need to pass '' as the last element to catpath. Craig Berry 816 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 817 ### The root problem is that splitpath on UNIX always returns the 818 ### final path element as a file even if it is a directory, and of 819 ### course there is no way it can know the difference without checking 820 ### against the filesystem, which it is documented as not doing. When 821 ### you turn around and call catpath, on VMS you have to know which bits 822 ### are directory bits and which bits are file bits. In this case we 823 ### know the result should be a directory. I had thought you could omit 824 ### the file argument to catpath in such a case, but apparently on UNIX 825 ### you can't. 826 $dir = File::Spec->catpath( 827 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 828 ); 829 830 ### catdir() returns undef if the path is longer than 255 chars on 831 ### older VMS systems. 832 unless ( defined $dir ) { 833 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 834 return; 835 } 836 837 } 838 839 if( -e $dir && !-d _ ) { 840 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 841 return; 842 } 843 844 unless ( -d _ ) { 845 eval { File::Path::mkpath( $dir, 0, 0777 ) }; 846 if( $@ ) { 847 my $fp = $entry->full_path; 848 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 849 return; 850 } 851 852 ### XXX chown here? that might not be the same as in the archive 853 ### as we're only chown'ing to the owner of the file we're extracting 854 ### not to the owner of the directory itself, which may or may not 855 ### be another entry in the archive 856 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 857 ### way to go. 858 #if( $CHOWN && CAN_CHOWN ) { 859 # chown $entry->uid, $entry->gid, $dir or 860 # $self->_error( qq[Could not set uid/gid on '$dir'] ); 861 #} 862 } 863 864 ### we're done if we just needed to create a dir ### 865 return 1 if $entry->is_dir; 866 867 my $full = File::Spec->catfile( $dir, $file ); 868 869 if( $entry->is_unknown ) { 870 $self->_error( qq[Unknown file type for file '$full'] ); 871 return; 872 } 873 874 ### If a file system already contains a block device with the same name as 875 ### the being extracted regular file, we would write the file's content 876 ### to the block device. So remove the existing file (block device) now. 877 ### If an archive contains multiple same-named entries, the last one 878 ### should replace the previous ones. So remove the old file now. 879 ### If the old entry is a symlink to a file outside of the CWD, the new 880 ### entry would create a file there. This is CVE-2018-12015 881 ### <https://rt.cpan.org/Ticket/Display.html?id=125523>. 882 if (-l $full || -e _) { 883 if (!unlink $full) { 884 $self->_error( qq[Could not remove old file '$full': $!] ); 885 return; 886 } 887 } 888 if( length $entry->type && $entry->is_file ) { 889 my $fh = IO::File->new; 890 $fh->open( $full, '>' ) or ( 891 $self->_error( qq[Could not open file '$full': $!] ), 892 return 893 ); 894 895 if( $entry->size ) { 896 binmode $fh; 897 syswrite $fh, $entry->data or ( 898 $self->_error( qq[Could not write data to '$full'] ), 899 return 900 ); 901 } 902 903 close $fh or ( 904 $self->_error( qq[Could not close file '$full'] ), 905 return 906 ); 907 908 } else { 909 $self->_make_special_file( $entry, $full ) or return; 910 } 911 912 ### only update the timestamp if it's not a symlink; that will change the 913 ### timestamp of the original. This addresses bug #33669: Could not update 914 ### timestamp warning on symlinks 915 if( not -l $full ) { 916 utime time, $entry->mtime - TIME_OFFSET, $full or 917 $self->_error( qq[Could not update timestamp] ); 918 } 919 920 if( $CHOWN && CAN_CHOWN->() and not -l $full ) { 921 CORE::chown( $entry->uid, $entry->gid, $full ) or 922 $self->_error( qq[Could not set uid/gid on '$full'] ); 923 } 924 925 ### only chmod if we're allowed to, but never chmod symlinks, since they'll 926 ### change the perms on the file they're linking too... 927 if( $CHMOD and not -l $full ) { 928 my $mode = $entry->mode; 929 unless ($SAME_PERMISSIONS) { 930 $mode &= ~(oct(7000) | umask); 931 } 932 CORE::chmod( $mode, $full ) or 933 $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 934 } 935 936 return 1; 937} 938 939sub _make_special_file { 940 my $self = shift; 941 my $entry = shift or return; 942 my $file = shift; return unless defined $file; 943 944 my $err; 945 946 if( $entry->is_symlink ) { 947 my $fail; 948 if( ON_UNIX ) { 949 symlink( $entry->linkname, $file ) or $fail++; 950 951 } else { 952 $self->_extract_special_file_as_plain_file( $entry, $file ) 953 or $fail++; 954 } 955 956 $err = qq[Making symbolic link '$file' to '] . 957 $entry->linkname .q[' failed] if $fail; 958 959 } elsif ( $entry->is_hardlink ) { 960 my $fail; 961 if( ON_UNIX ) { 962 link( $entry->linkname, $file ) or $fail++; 963 964 } else { 965 $self->_extract_special_file_as_plain_file( $entry, $file ) 966 or $fail++; 967 } 968 969 $err = qq[Making hard link from '] . $entry->linkname . 970 qq[' to '$file' failed] if $fail; 971 972 } elsif ( $entry->is_fifo ) { 973 ON_UNIX && !system('mknod', $file, 'p') or 974 $err = qq[Making fifo ']. $entry->name .qq[' failed]; 975 976 } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 977 my $mode = $entry->is_blockdev ? 'b' : 'c'; 978 979 ON_UNIX && !system('mknod', $file, $mode, 980 $entry->devmajor, $entry->devminor) or 981 $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 982 $entry->devmajor . qq[ min=] . $entry->devminor . 983 qq[) failed.]; 984 985 } elsif ( $entry->is_socket ) { 986 ### the original doesn't do anything special for sockets.... ### 987 1; 988 } 989 990 return $err ? $self->_error( $err ) : 1; 991} 992 993### don't know how to make symlinks, let's just extract the file as 994### a plain file 995sub _extract_special_file_as_plain_file { 996 my $self = shift; 997 my $entry = shift or return; 998 my $file = shift; return unless defined $file; 999 1000 my $err; 1001 TRY: { 1002 my $orig = $self->_find_entry( $entry->linkname, $entry ); 1003 1004 unless( $orig ) { 1005 $err = qq[Could not find file '] . $entry->linkname . 1006 qq[' in memory.]; 1007 last TRY; 1008 } 1009 1010 ### clone the entry, make it appear as a normal file ### 1011 my $clone = $orig->clone; 1012 $clone->_downgrade_to_plainfile; 1013 $self->_extract_file( $clone, $file ) or last TRY; 1014 1015 return 1; 1016 } 1017 1018 return $self->_error($err); 1019} 1020 1021=head2 $tar->list_files( [\@properties] ) 1022 1023Returns a list of the names of all the files in the archive. 1024 1025If C<list_files()> is passed an array reference as its first argument 1026it returns a list of hash references containing the requested 1027properties of each file. The following list of properties is 1028supported: name, size, mtime (last modified date), mode, uid, gid, 1029linkname, uname, gname, devmajor, devminor, prefix. 1030 1031Passing an array reference containing only one element, 'name', is 1032special cased to return a list of names rather than a list of hash 1033references, making it equivalent to calling C<list_files> without 1034arguments. 1035 1036=cut 1037 1038sub list_files { 1039 my $self = shift; 1040 my $aref = shift || [ ]; 1041 1042 unless( $self->_data ) { 1043 $self->read() or return; 1044 } 1045 1046 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 1047 return map { $_->full_path } @{$self->_data}; 1048 } else { 1049 1050 #my @rv; 1051 #for my $obj ( @{$self->_data} ) { 1052 # push @rv, { map { $_ => $obj->$_() } @$aref }; 1053 #} 1054 #return @rv; 1055 1056 ### this does the same as the above.. just needs a +{ } 1057 ### to make sure perl doesn't confuse it for a block 1058 return map { my $o=$_; 1059 +{ map { $_ => $o->$_() } @$aref } 1060 } @{$self->_data}; 1061 } 1062} 1063 1064sub _find_entry { 1065 my $self = shift; 1066 my $file = shift; 1067 1068 unless( defined $file ) { 1069 $self->_error( qq[No file specified] ); 1070 return; 1071 } 1072 1073 ### it's an object already 1074 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1075 1076seach_entry: 1077 if($self->_data){ 1078 for my $entry ( @{$self->_data} ) { 1079 my $path = $entry->full_path; 1080 return $entry if $path eq $file; 1081 } 1082 } 1083 1084 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1085 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin ) 1086 $file = _symlinks_resolver( $link_entry->name, $file ); 1087 goto seach_entry if $self->_data; 1088 1089 #this will be slower than never, but won't failed! 1090 1091 my $iterargs = $link_entry->{'_archive'}; 1092 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){ 1093 #faster but whole archive will be read in memory 1094 #read whole archive and share data 1095 my $archive = Archive::Tar->new; 1096 $archive->read( @$iterargs ); 1097 push @$iterargs, $archive; #take a trace for destruction 1098 if($archive->_data){ 1099 $self->_data( $archive->_data ); 1100 goto seach_entry; 1101 } 1102 }#faster 1103 1104 {#slower but lower memory usage 1105 # $iterargs = [$filename, $compressed, $opts]; 1106 my $next = Archive::Tar->iter( @$iterargs ); 1107 while(my $e = $next->()){ 1108 if($e->full_path eq $file){ 1109 undef $next; 1110 return $e; 1111 } 1112 } 1113 }#slower 1114 } 1115 } 1116 1117 $self->_error( qq[No such file in archive: '$file'] ); 1118 return; 1119} 1120 1121=head2 $tar->get_files( [@filenames] ) 1122 1123Returns the C<Archive::Tar::File> objects matching the filenames 1124provided. If no filename list was passed, all C<Archive::Tar::File> 1125objects in the current Tar object are returned. 1126 1127Please refer to the C<Archive::Tar::File> documentation on how to 1128handle these objects. 1129 1130=cut 1131 1132sub get_files { 1133 my $self = shift; 1134 1135 return @{ $self->_data } unless @_; 1136 1137 my @list; 1138 for my $file ( @_ ) { 1139 push @list, grep { defined } $self->_find_entry( $file ); 1140 } 1141 1142 return @list; 1143} 1144 1145=head2 $tar->get_content( $file ) 1146 1147Return the content of the named file. 1148 1149=cut 1150 1151sub get_content { 1152 my $self = shift; 1153 my $entry = $self->_find_entry( shift ) or return; 1154 1155 return $entry->data; 1156} 1157 1158=head2 $tar->replace_content( $file, $content ) 1159 1160Make the string $content be the content for the file named $file. 1161 1162=cut 1163 1164sub replace_content { 1165 my $self = shift; 1166 my $entry = $self->_find_entry( shift ) or return; 1167 1168 return $entry->replace_content( shift ); 1169} 1170 1171=head2 $tar->rename( $file, $new_name ) 1172 1173Rename the file of the in-memory archive to $new_name. 1174 1175Note that you must specify a Unix path for $new_name, since per tar 1176standard, all files in the archive must be Unix paths. 1177 1178Returns true on success and false on failure. 1179 1180=cut 1181 1182sub rename { 1183 my $self = shift; 1184 my $file = shift; return unless defined $file; 1185 my $new = shift; return unless defined $new; 1186 1187 my $entry = $self->_find_entry( $file ) or return; 1188 1189 return $entry->rename( $new ); 1190} 1191 1192=head2 $tar->chmod( $file, $mode ) 1193 1194Change mode of $file to $mode. 1195 1196Returns true on success and false on failure. 1197 1198=cut 1199 1200sub chmod { 1201 my $self = shift; 1202 my $file = shift; return unless defined $file; 1203 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1204 my @args = ("$mode"); 1205 1206 my $entry = $self->_find_entry( $file ) or return; 1207 my $x = $entry->chmod( @args ); 1208 return $x; 1209} 1210 1211=head2 $tar->chown( $file, $uname [, $gname] ) 1212 1213Change owner $file to $uname and $gname. 1214 1215Returns true on success and false on failure. 1216 1217=cut 1218 1219sub chown { 1220 my $self = shift; 1221 my $file = shift; return unless defined $file; 1222 my $uname = shift; return unless defined $uname; 1223 my @args = ($uname); 1224 push(@args, shift); 1225 1226 my $entry = $self->_find_entry( $file ) or return; 1227 my $x = $entry->chown( @args ); 1228 return $x; 1229} 1230 1231=head2 $tar->remove (@filenamelist) 1232 1233Removes any entries with names matching any of the given filenames 1234from the in-memory archive. Returns a list of C<Archive::Tar::File> 1235objects that remain. 1236 1237=cut 1238 1239sub remove { 1240 my $self = shift; 1241 my @list = @_; 1242 1243 my %seen = map { $_->full_path => $_ } @{$self->_data}; 1244 delete $seen{ $_ } for @list; 1245 1246 $self->_data( [values %seen] ); 1247 1248 return values %seen; 1249} 1250 1251=head2 $tar->clear 1252 1253C<clear> clears the current in-memory archive. This effectively gives 1254you a 'blank' object, ready to be filled again. Note that C<clear> 1255only has effect on the object, not the underlying tarfile. 1256 1257=cut 1258 1259sub clear { 1260 my $self = shift or return; 1261 1262 $self->_data( [] ); 1263 $self->_file( '' ); 1264 1265 return 1; 1266} 1267 1268 1269=head2 $tar->write ( [$file, $compressed, $prefix] ) 1270 1271Write the in-memory archive to disk. The first argument can either 1272be the name of a file or a reference to an already open filehandle (a 1273GLOB reference). 1274 1275The second argument is used to indicate compression. You can 1276compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed 1277to be the C<gzip> compression level (between 1 and 9), but the use of 1278constants is preferred: 1279 1280 # write a gzip compressed file 1281 $tar->write( 'out.tgz', COMPRESS_GZIP ); 1282 1283 # write a bzip compressed file 1284 $tar->write( 'out.tbz', COMPRESS_BZIP ); 1285 1286 # write a xz compressed file 1287 $tar->write( 'out.txz', COMPRESS_XZ ); 1288 1289Note that when you pass in a filehandle, the compression argument 1290is ignored, as all files are printed verbatim to your filehandle. 1291If you wish to enable compression with filehandles, use an 1292C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead. 1293 1294The third argument is an optional prefix. All files will be tucked 1295away in the directory you specify as prefix. So if you have files 1296'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1297will be written to the archive as 'foo/a' and 'foo/b'. 1298 1299If no arguments are given, C<write> returns the entire formatted 1300archive as a string, which could be useful if you'd like to stuff the 1301archive into a socket or a pipe to gzip or something. 1302 1303 1304=cut 1305 1306sub write { 1307 my $self = shift; 1308 my $file = shift; $file = '' unless defined $file; 1309 my $gzip = shift || 0; 1310 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1311 my $dummy = ''; 1312 1313 ### only need a handle if we have a file to print to ### 1314 my $handle = length($file) 1315 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1316 or return ) 1317 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1318 : $HAS_IO_STRING ? IO::String->new 1319 : __PACKAGE__->no_string_support(); 1320 1321 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1322 ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1323 ### printed to the archive 1324 local $\; 1325 1326 for my $entry ( @{$self->_data} ) { 1327 ### entries to be written to the tarfile ### 1328 my @write_me; 1329 1330 ### only now will we change the object to reflect the current state 1331 ### of the name and prefix fields -- this needs to be limited to 1332 ### write() only! 1333 my $clone = $entry->clone; 1334 1335 1336 ### so, if you don't want use to use the prefix, we'll stuff 1337 ### everything in the name field instead 1338 if( $DO_NOT_USE_PREFIX ) { 1339 1340 ### you might have an extended prefix, if so, set it in the clone 1341 ### XXX is ::Unix right? 1342 $clone->name( length $ext_prefix 1343 ? File::Spec::Unix->catdir( $ext_prefix, 1344 $clone->full_path) 1345 : $clone->full_path ); 1346 $clone->prefix( '' ); 1347 1348 ### otherwise, we'll have to set it properly -- prefix part in the 1349 ### prefix and name part in the name field. 1350 } else { 1351 1352 ### split them here, not before! 1353 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1354 1355 ### you might have an extended prefix, if so, set it in the clone 1356 ### XXX is ::Unix right? 1357 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1358 if length $ext_prefix; 1359 1360 $clone->prefix( $prefix ); 1361 $clone->name( $name ); 1362 } 1363 1364 ### names are too long, and will get truncated if we don't add a 1365 ### '@LongLink' file... 1366 my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1367 length($clone->prefix) > PREFIX_LENGTH 1368 ) || 0; 1369 1370 ### perhaps we need to make a longlink file? 1371 if( $make_longlink ) { 1372 my $longlink = Archive::Tar::File->new( 1373 data => LONGLINK_NAME, 1374 $clone->full_path, 1375 { type => LONGLINK } 1376 ); 1377 1378 unless( $longlink ) { 1379 $self->_error( qq[Could not create 'LongLink' entry for ] . 1380 qq[oversize file '] . $clone->full_path ."'" ); 1381 return; 1382 }; 1383 1384 push @write_me, $longlink; 1385 } 1386 1387 push @write_me, $clone; 1388 1389 ### write the one, optionally 2 a::t::file objects to the handle 1390 for my $clone (@write_me) { 1391 1392 ### if the file is a symlink, there are 2 options: 1393 ### either we leave the symlink intact, but then we don't write any 1394 ### data OR we follow the symlink, which means we actually make a 1395 ### copy. if we do the latter, we have to change the TYPE of the 1396 ### clone to 'FILE' 1397 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1398 my $data_ok = !$clone->is_symlink && $clone->has_content; 1399 1400 ### downgrade to a 'normal' file if it's a symlink we're going to 1401 ### treat as a regular file 1402 $clone->_downgrade_to_plainfile if $link_ok; 1403 1404 ### get the header for this block 1405 my $header = $self->_format_tar_entry( $clone ); 1406 unless( $header ) { 1407 $self->_error(q[Could not format header for: ] . 1408 $clone->full_path ); 1409 return; 1410 } 1411 1412 unless( print $handle $header ) { 1413 $self->_error(q[Could not write header for: ] . 1414 $clone->full_path); 1415 return; 1416 } 1417 1418 if( $link_ok or $data_ok ) { 1419 unless( print $handle $clone->data ) { 1420 $self->_error(q[Could not write data for: ] . 1421 $clone->full_path); 1422 return; 1423 } 1424 1425 ### pad the end of the clone if required ### 1426 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1427 } 1428 1429 } ### done writing these entries 1430 } 1431 1432 ### write the end markers ### 1433 print $handle TAR_END x 2 or 1434 return $self->_error( qq[Could not write tar end markers] ); 1435 1436 ### did you want it written to a file, or returned as a string? ### 1437 my $rv = length($file) ? 1 1438 : $HAS_PERLIO ? $dummy 1439 : do { seek $handle, 0, 0; local $/; <$handle> }; 1440 1441 ### make sure to close the handle if we created it 1442 if ( $file ne $handle ) { 1443 unless( close $handle ) { 1444 $self->_error( qq[Could not write tar] ); 1445 return; 1446 } 1447 } 1448 1449 return $rv; 1450} 1451 1452sub _format_tar_entry { 1453 my $self = shift; 1454 my $entry = shift or return; 1455 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1456 my $no_prefix = shift || 0; 1457 1458 my $file = $entry->name; 1459 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1460 1461 ### remove the prefix from the file name 1462 ### not sure if this is still needed --kane 1463 ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1464 ### this for us. Even worse, this would break if we tried to add a file 1465 ### like x/x. 1466 #if( length $prefix ) { 1467 # $file =~ s/^$match//; 1468 #} 1469 1470 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1471 if length $ext_prefix; 1472 1473 ### not sure why this is... ### 1474 my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1475 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1476 1477 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1478 1479 ### this might be optimizable with a 'changed' flag in the file objects ### 1480 my $tar = pack ( 1481 PACK, 1482 $file, 1483 1484 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1485 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1486 1487 "", # checksum field - space padded a bit down 1488 1489 (map { $entry->$_() } qw[type linkname magic]), 1490 1491 $entry->version || TAR_VERSION, 1492 1493 (map { $entry->$_() } qw[uname gname]), 1494 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1495 1496 ($no_prefix ? '' : $prefix) 1497 ); 1498 1499 ### add the checksum ### 1500 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1501 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1502 1503 return $tar; 1504} 1505 1506=head2 $tar->add_files( @filenamelist ) 1507 1508Takes a list of filenames and adds them to the in-memory archive. 1509 1510The path to the file is automatically converted to a Unix like 1511equivalent for use in the archive, and, if on MacOS, the file's 1512modification time is converted from the MacOS epoch to the Unix epoch. 1513So tar archives created on MacOS with B<Archive::Tar> can be read 1514both with I<tar> on Unix and applications like I<suntar> or 1515I<Stuffit Expander> on MacOS. 1516 1517Be aware that the file's type/creator and resource fork will be lost, 1518which is usually what you want in cross-platform archives. 1519 1520Instead of a filename, you can also pass it an existing C<Archive::Tar::File> 1521object from, for example, another archive. The object will be clone, and 1522effectively be a copy of the original, not an alias. 1523 1524Returns a list of C<Archive::Tar::File> objects that were just added. 1525 1526=cut 1527 1528sub add_files { 1529 my $self = shift; 1530 my @files = @_ or return; 1531 1532 my @rv; 1533 for my $file ( @files ) { 1534 1535 ### you passed an Archive::Tar::File object 1536 ### clone it so we don't accidentally have a reference to 1537 ### an object from another archive 1538 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1539 push @rv, $file->clone; 1540 next; 1541 } 1542 1543 eval { 1544 if( utf8::is_utf8( $file )) { 1545 utf8::encode( $file ); 1546 } 1547 }; 1548 1549 unless( -e $file || -l $file ) { 1550 $self->_error( qq[No such file: '$file'] ); 1551 next; 1552 } 1553 1554 my $obj = Archive::Tar::File->new( file => $file ); 1555 unless( $obj ) { 1556 $self->_error( qq[Unable to add file: '$file'] ); 1557 next; 1558 } 1559 1560 push @rv, $obj; 1561 } 1562 1563 push @{$self->{_data}}, @rv; 1564 1565 return @rv; 1566} 1567 1568=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1569 1570Takes a filename, a scalar full of data and optionally a reference to 1571a hash with specific options. 1572 1573Will add a file to the in-memory archive, with name C<$filename> and 1574content C<$data>. Specific properties can be set using C<$opthashref>. 1575The following list of properties is supported: name, size, mtime 1576(last modified date), mode, uid, gid, linkname, uname, gname, 1577devmajor, devminor, prefix, type. (On MacOS, the file's path and 1578modification times are converted to Unix equivalents.) 1579 1580Valid values for the file type are the following constants defined by 1581Archive::Tar::Constant: 1582 1583=over 4 1584 1585=item FILE 1586 1587Regular file. 1588 1589=item HARDLINK 1590 1591=item SYMLINK 1592 1593Hard and symbolic ("soft") links; linkname should specify target. 1594 1595=item CHARDEV 1596 1597=item BLOCKDEV 1598 1599Character and block devices. devmajor and devminor should specify the major 1600and minor device numbers. 1601 1602=item DIR 1603 1604Directory. 1605 1606=item FIFO 1607 1608FIFO (named pipe). 1609 1610=item SOCKET 1611 1612Socket. 1613 1614=back 1615 1616Returns the C<Archive::Tar::File> object that was just added, or 1617C<undef> on failure. 1618 1619=cut 1620 1621sub add_data { 1622 my $self = shift; 1623 my ($file, $data, $opt) = @_; 1624 1625 my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1626 unless( $obj ) { 1627 $self->_error( qq[Unable to add file: '$file'] ); 1628 return; 1629 } 1630 1631 push @{$self->{_data}}, $obj; 1632 1633 return $obj; 1634} 1635 1636=head2 $tar->error( [$BOOL] ) 1637 1638Returns the current error string (usually, the last error reported). 1639If a true value was specified, it will give the C<Carp::longmess> 1640equivalent of the error, in effect giving you a stacktrace. 1641 1642For backwards compatibility, this error is also available as 1643C<$Archive::Tar::error> although it is much recommended you use the 1644method call instead. 1645 1646=cut 1647 1648{ 1649 $error = ''; 1650 my $longmess; 1651 1652 sub _error { 1653 my $self = shift; 1654 my $msg = $error = shift; 1655 $longmess = Carp::longmess($error); 1656 if (ref $self) { 1657 $self->{_error} = $error; 1658 $self->{_longmess} = $longmess; 1659 } 1660 1661 ### set Archive::Tar::WARN to 0 to disable printing 1662 ### of errors 1663 if( $WARN ) { 1664 carp $DEBUG ? $longmess : $msg; 1665 } 1666 1667 return; 1668 } 1669 1670 sub error { 1671 my $self = shift; 1672 if (ref $self) { 1673 return shift() ? $self->{_longmess} : $self->{_error}; 1674 } else { 1675 return shift() ? $longmess : $error; 1676 } 1677 } 1678} 1679 1680=head2 $tar->setcwd( $cwd ); 1681 1682C<Archive::Tar> needs to know the current directory, and it will run 1683C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1684tarfile and saves it in the file system. (As of version 1.30, however, 1685C<Archive::Tar> will use the speed optimization described below 1686automatically, so it's only relevant if you're using C<extract_file()>). 1687 1688Since C<Archive::Tar> doesn't change the current directory internally 1689while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1690can be avoided if we can guarantee that the current directory doesn't 1691get changed externally. 1692 1693To use this performance boost, set the current directory via 1694 1695 use Cwd; 1696 $tar->setcwd( cwd() ); 1697 1698once before calling a function like C<extract_file> and 1699C<Archive::Tar> will use the current directory setting from then on 1700and won't call C<Cwd::cwd()> internally. 1701 1702To switch back to the default behaviour, use 1703 1704 $tar->setcwd( undef ); 1705 1706and C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1707 1708If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1709be called for you. 1710 1711=cut 1712 1713sub setcwd { 1714 my $self = shift; 1715 my $cwd = shift; 1716 1717 $self->{cwd} = $cwd; 1718} 1719 1720=head1 Class Methods 1721 1722=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1723 1724Creates a tar file from the list of files provided. The first 1725argument can either be the name of the tar file to create or a 1726reference to an open file handle (e.g. a GLOB reference). 1727 1728The second argument is used to indicate compression. You can 1729compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed 1730to be the C<gzip> compression level (between 1 and 9), but the use of 1731constants is preferred: 1732 1733 # write a gzip compressed file 1734 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1735 1736 # write a bzip compressed file 1737 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1738 1739 # write a xz compressed file 1740 Archive::Tar->create_archive( 'out.txz', COMPRESS_XZ, @filelist ); 1741 1742Note that when you pass in a filehandle, the compression argument 1743is ignored, as all files are printed verbatim to your filehandle. 1744If you wish to enable compression with filehandles, use an 1745C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead. 1746 1747The remaining arguments list the files to be included in the tar file. 1748These files must all exist. Any files which don't exist or can't be 1749read are silently ignored. 1750 1751If the archive creation fails for any reason, C<create_archive> will 1752return false. Please use the C<error> method to find the cause of the 1753failure. 1754 1755Note that this method does not write C<on the fly> as it were; it 1756still reads all the files into memory before writing out the archive. 1757Consult the FAQ below if this is a problem. 1758 1759=cut 1760 1761sub create_archive { 1762 my $class = shift; 1763 1764 my $file = shift; return unless defined $file; 1765 my $gzip = shift || 0; 1766 my @files = @_; 1767 1768 unless( @files ) { 1769 return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1770 } 1771 1772 my $tar = $class->new; 1773 $tar->add_files( @files ); 1774 return $tar->write( $file, $gzip ); 1775} 1776 1777=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1778 1779Returns an iterator function that reads the tar file without loading 1780it all in memory. Each time the function is called it will return the 1781next file in the tarball. The files are returned as 1782C<Archive::Tar::File> objects. The iterator function returns the 1783empty list once it has exhausted the files contained. 1784 1785The second argument can be a hash reference with options, which are 1786identical to the arguments passed to C<read()>. 1787 1788Example usage: 1789 1790 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1791 1792 while( my $f = $next->() ) { 1793 print $f->name, "\n"; 1794 1795 $f->extract or warn "Extraction failed"; 1796 1797 # .... 1798 } 1799 1800=cut 1801 1802 1803sub iter { 1804 my $class = shift; 1805 my $filename = shift; 1806 return unless defined $filename; 1807 my $compressed = shift || 0; 1808 my $opts = shift || {}; 1809 1810 ### get a handle to read from. 1811 my $handle = $class->_get_handle( 1812 $filename, 1813 $compressed, 1814 READ_ONLY->( ZLIB ) 1815 ) or return; 1816 1817 my @data; 1818 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ]; 1819 return sub { 1820 return shift(@data) if @data; # more than one file returned? 1821 return unless $handle; # handle exhausted? 1822 1823 ### read data, should only return file 1824 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1825 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 1826 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1827 foreach(@data){ 1828 #may refine this heuristic for ON_UNIX? 1829 if($_->linkname){ 1830 #is there a better slot to store/share it ? 1831 $_->{'_archive'} = $CONSTRUCT_ARGS; 1832 } 1833 } 1834 } 1835 1836 ### return one piece of data 1837 return shift(@data) if @data; 1838 1839 ### data is exhausted, free the filehandle 1840 undef $handle; 1841 if(@$CONSTRUCT_ARGS == 4){ 1842 #free archive in memory 1843 undef $CONSTRUCT_ARGS->[-1]; 1844 } 1845 return; 1846 }; 1847} 1848 1849=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1850 1851Returns a list of the names of all the files in the archive. The 1852first argument can either be the name of the tar file to list or a 1853reference to an open file handle (e.g. a GLOB reference). 1854 1855If C<list_archive()> is passed an array reference as its third 1856argument it returns a list of hash references containing the requested 1857properties of each file. The following list of properties is 1858supported: full_path, name, size, mtime (last modified date), mode, 1859uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type. 1860 1861See C<Archive::Tar::File> for details about supported properties. 1862 1863Passing an array reference containing only one element, 'name', is 1864special cased to return a list of names rather than a list of hash 1865references. 1866 1867=cut 1868 1869sub list_archive { 1870 my $class = shift; 1871 my $file = shift; return unless defined $file; 1872 my $gzip = shift || 0; 1873 1874 my $tar = $class->new($file, $gzip); 1875 return unless $tar; 1876 1877 return $tar->list_files( @_ ); 1878} 1879 1880=head2 Archive::Tar->extract_archive($file, $compressed) 1881 1882Extracts the contents of the tar file. The first argument can either 1883be the name of the tar file to create or a reference to an open file 1884handle (e.g. a GLOB reference). All relative paths in the tar file will 1885be created underneath the current working directory. 1886 1887C<extract_archive> will return a list of files it extracted. 1888If the archive extraction fails for any reason, C<extract_archive> 1889will return false. Please use the C<error> method to find the cause 1890of the failure. 1891 1892=cut 1893 1894sub extract_archive { 1895 my $class = shift; 1896 my $file = shift; return unless defined $file; 1897 my $gzip = shift || 0; 1898 1899 my $tar = $class->new( ) or return; 1900 1901 return $tar->read( $file, $gzip, { extract => 1 } ); 1902} 1903 1904=head2 $bool = Archive::Tar->has_io_string 1905 1906Returns true if we currently have C<IO::String> support loaded. 1907 1908Either C<IO::String> or C<perlio> support is needed to support writing 1909stringified archives. Currently, C<perlio> is the preferred method, if 1910available. 1911 1912See the C<GLOBAL VARIABLES> section to see how to change this preference. 1913 1914=cut 1915 1916sub has_io_string { return $HAS_IO_STRING; } 1917 1918=head2 $bool = Archive::Tar->has_perlio 1919 1920Returns true if we currently have C<perlio> support loaded. 1921 1922This requires C<perl-5.8> or higher, compiled with C<perlio> 1923 1924Either C<IO::String> or C<perlio> support is needed to support writing 1925stringified archives. Currently, C<perlio> is the preferred method, if 1926available. 1927 1928See the C<GLOBAL VARIABLES> section to see how to change this preference. 1929 1930=cut 1931 1932sub has_perlio { return $HAS_PERLIO; } 1933 1934=head2 $bool = Archive::Tar->has_zlib_support 1935 1936Returns true if C<Archive::Tar> can extract C<zlib> compressed archives 1937 1938=cut 1939 1940sub has_zlib_support { return ZLIB } 1941 1942=head2 $bool = Archive::Tar->has_bzip2_support 1943 1944Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1945 1946=cut 1947 1948sub has_bzip2_support { return BZIP } 1949 1950=head2 $bool = Archive::Tar->has_xz_support 1951 1952Returns true if C<Archive::Tar> can extract C<xz> compressed archives 1953 1954=cut 1955 1956sub has_xz_support { return XZ } 1957 1958=head2 Archive::Tar->can_handle_compressed_files 1959 1960A simple checking routine, which will return true if C<Archive::Tar> 1961is able to uncompress compressed archives on the fly with C<IO::Zlib>, 1962C<IO::Compress::Bzip2> and C<IO::Compress::Xz> or false if not both are installed. 1963 1964You can use this as a shortcut to determine whether C<Archive::Tar> 1965will do what you think before passing compressed archives to its 1966C<read> method. 1967 1968=cut 1969 1970sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1971 1972sub no_string_support { 1973 croak("You have to install IO::String to support writing archives to strings"); 1974} 1975 1976sub _symlinks_resolver{ 1977 my ($src, $trg) = @_; 1978 my @src = split /[\/\\]/, $src; 1979 my @trg = split /[\/\\]/, $trg; 1980 pop @src; #strip out current object name 1981 if(@trg and $trg[0] eq ''){ 1982 shift @trg; 1983 #restart path from scratch 1984 @src = ( ); 1985 } 1986 foreach my $part ( @trg ){ 1987 next if $part eq '.'; #ignore current 1988 if($part eq '..'){ 1989 #got to parent 1990 pop @src; 1991 } 1992 else{ 1993 #append it 1994 push @src, $part; 1995 } 1996 } 1997 my $path = join('/', @src); 1998 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG; 1999 return $path; 2000} 2001 20021; 2003 2004__END__ 2005 2006=head1 GLOBAL VARIABLES 2007 2008=head2 $Archive::Tar::FOLLOW_SYMLINK 2009 2010Set this variable to C<1> to make C<Archive::Tar> effectively make a 2011copy of the file when extracting. Default is C<0>, which 2012means the symlink stays intact. Of course, you will have to pack the 2013file linked to as well. 2014 2015This option is checked when you write out the tarfile using C<write> 2016or C<create_archive>. 2017 2018This works just like C</bin/tar>'s C<-h> option. 2019 2020=head2 $Archive::Tar::CHOWN 2021 2022By default, C<Archive::Tar> will try to C<chown> your files if it is 2023able to. In some cases, this may not be desired. In that case, set 2024this variable to C<0> to disable C<chown>-ing, even if it were 2025possible. 2026 2027The default is C<1>. 2028 2029=head2 $Archive::Tar::CHMOD 2030 2031By default, C<Archive::Tar> will try to C<chmod> your files to 2032whatever mode was specified for the particular file in the archive. 2033In some cases, this may not be desired. In that case, set this 2034variable to C<0> to disable C<chmod>-ing. 2035 2036The default is C<1>. 2037 2038=head2 $Archive::Tar::SAME_PERMISSIONS 2039 2040When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 2041the permissions on files from the archive are used without modification 2042of if they are filtered by removing any setid bits and applying the 2043current umask. 2044 2045The default is C<1> for the root user and C<0> for normal users. 2046 2047=head2 $Archive::Tar::DO_NOT_USE_PREFIX 2048 2049By default, C<Archive::Tar> will try to put paths that are over 2050100 characters in the C<prefix> field of your tar header, as 2051defined per POSIX-standard. However, some (older) tar programs 2052do not implement this spec. To retain compatibility with these older 2053or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 2054variable to a true value, and C<Archive::Tar> will use an alternate 2055way of dealing with paths over 100 characters by using the 2056C<GNU Extended Header> feature. 2057 2058Note that clients who do not support the C<GNU Extended Header> 2059feature will not be able to read these archives. Such clients include 2060tars on C<Solaris>, C<Irix> and C<AIX>. 2061 2062The default is C<0>. 2063 2064=head2 $Archive::Tar::DEBUG 2065 2066Set this variable to C<1> to always get the C<Carp::longmess> output 2067of the warnings, instead of the regular C<carp>. This is the same 2068message you would get by doing: 2069 2070 $tar->error(1); 2071 2072Defaults to C<0>. 2073 2074=head2 $Archive::Tar::WARN 2075 2076Set this variable to C<0> if you do not want any warnings printed. 2077Personally I recommend against doing this, but people asked for the 2078option. Also, be advised that this is of course not threadsafe. 2079 2080Defaults to C<1>. 2081 2082=head2 $Archive::Tar::error 2083 2084Holds the last reported error. Kept for historical reasons, but its 2085use is very much discouraged. Use the C<error()> method instead: 2086 2087 warn $tar->error unless $tar->extract; 2088 2089Note that in older versions of this module, the C<error()> method 2090would return an effectively global value even when called an instance 2091method as above. This has since been fixed, and multiple instances of 2092C<Archive::Tar> now have separate error strings. 2093 2094=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 2095 2096This variable indicates whether C<Archive::Tar> should allow 2097files to be extracted outside their current working directory. 2098 2099Allowing this could have security implications, as a malicious 2100tar archive could alter or replace any file the extracting user 2101has permissions to. Therefor, the default is to not allow 2102insecure extractions. 2103 2104If you trust the archive, or have other reasons to allow the 2105archive to write files outside your current working directory, 2106set this variable to C<true>. 2107 2108Note that this is a backwards incompatible change from version 2109C<1.36> and before. 2110 2111=head2 $Archive::Tar::HAS_PERLIO 2112 2113This variable holds a boolean indicating if we currently have 2114C<perlio> support loaded. This will be enabled for any perl 2115greater than C<5.8> compiled with C<perlio>. 2116 2117If you feel strongly about disabling it, set this variable to 2118C<false>. Note that you will then need C<IO::String> installed 2119to support writing stringified archives. 2120 2121Don't change this variable unless you B<really> know what you're 2122doing. 2123 2124=head2 $Archive::Tar::HAS_IO_STRING 2125 2126This variable holds a boolean indicating if we currently have 2127C<IO::String> support loaded. This will be enabled for any perl 2128that has a loadable C<IO::String> module. 2129 2130If you feel strongly about disabling it, set this variable to 2131C<false>. Note that you will then need C<perlio> support from 2132your perl to be able to write stringified archives. 2133 2134Don't change this variable unless you B<really> know what you're 2135doing. 2136 2137=head2 $Archive::Tar::ZERO_PAD_NUMBERS 2138 2139This variable holds a boolean indicating if we will create 2140zero padded numbers for C<size>, C<mtime> and C<checksum>. 2141The default is C<0>, indicating that we will create space padded 2142numbers. Added for compatibility with C<busybox> implementations. 2143 2144=head2 Tuning the way RESOLVE_SYMLINK will works 2145 2146 You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable, 2147 or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar. 2148 2149 Values can be one of the following: 2150 2151 none 2152 Disable this mechanism and failed as it was in previous version (<1.88) 2153 2154 speed (default) 2155 If you prefer speed 2156 this will read again the whole archive using read() so all entries 2157 will be available 2158 2159 memory 2160 If you prefer memory 2161 2162 Limitation 2163 2164 It won't work for terminal, pipe or sockets or every non seekable source. 2165 2166=cut 2167 2168=head1 FAQ 2169 2170=over 4 2171 2172=item What's the minimum perl version required to run Archive::Tar? 2173 2174You will need perl version 5.005_03 or newer. 2175 2176=item Isn't Archive::Tar slow? 2177 2178Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2179However, it's very portable. If speed is an issue, consider using 2180C</bin/tar> instead. 2181 2182=item Isn't Archive::Tar heavier on memory than /bin/tar? 2183 2184Yes it is, see previous answer. Since C<Compress::Zlib> and therefore 2185C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2186choice but to read the archive into memory. 2187This is ok if you want to do in-memory manipulation of the archive. 2188 2189If you just want to extract, use the C<extract_archive> class method 2190instead. It will optimize and write to disk immediately. 2191 2192Another option is to use the C<iter> class method to iterate over 2193the files in the tarball without reading them all in memory at once. 2194 2195=item Can you lazy-load data instead? 2196 2197In some cases, yes. You can use the C<iter> class method to iterate 2198over the files in the tarball without reading them all in memory at once. 2199 2200=item How much memory will an X kb tar file need? 2201 2202Probably more than X kb, since it will all be read into memory. If 2203this is a problem, and you don't need to do in memory manipulation 2204of the archive, consider using the C<iter> class method, or C</bin/tar> 2205instead. 2206 2207=item What do you do with unsupported filetypes in an archive? 2208 2209C<Unix> has a few filetypes that aren't supported on other platforms, 2210like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2211try to make a copy of the original file, rather than throwing an error. 2212 2213This does require you to read the entire archive in to memory first, 2214since otherwise we wouldn't know what data to fill the copy with. 2215(This means that you cannot use the class methods, including C<iter> 2216on archives that have incompatible filetypes and still expect things 2217to work). 2218 2219For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2220the extraction of this particular item didn't work. 2221 2222=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2223 2224By default, C<Archive::Tar> is in a completely POSIX-compatible 2225mode, which uses the POSIX-specification of C<tar> to store files. 2226For paths greater than 100 characters, this is done using the 2227C<POSIX header prefix>. Non-POSIX-compatible clients may not support 2228this part of the specification, and may only support the C<GNU Extended 2229Header> functionality. To facilitate those clients, you can set the 2230C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2231C<GLOBAL VARIABLES> section for details on this variable. 2232 2233Note that GNU tar earlier than version 1.14 does not cope well with 2234the C<POSIX header prefix>. If you use such a version, consider setting 2235the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2236 2237=item How do I extract only files that have property X from an archive? 2238 2239Sometimes, you might not wish to extract a complete archive, just 2240the files that are relevant to you, based on some criteria. 2241 2242You can do this by filtering a list of C<Archive::Tar::File> objects 2243based on your criteria. For example, to extract only files that have 2244the string C<foo> in their title, you would use: 2245 2246 $tar->extract( 2247 grep { $_->full_path =~ /foo/ } $tar->get_files 2248 ); 2249 2250This way, you can filter on any attribute of the files in the archive. 2251Consult the C<Archive::Tar::File> documentation on how to use these 2252objects. 2253 2254=item How do I access .tar.Z files? 2255 2256The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2257the C<IO::Zlib> module) to access tar files that have been compressed 2258with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2259utility cannot be read by C<Compress::Zlib> and so cannot be directly 2260accesses by C<Archive::Tar>. 2261 2262If the C<uncompress> or C<gunzip> programs are available, you can use 2263one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2264 2265Firstly with C<uncompress> 2266 2267 use Archive::Tar; 2268 2269 open F, "uncompress -c $filename |"; 2270 my $tar = Archive::Tar->new(*F); 2271 ... 2272 2273and this with C<gunzip> 2274 2275 use Archive::Tar; 2276 2277 open F, "gunzip -c $filename |"; 2278 my $tar = Archive::Tar->new(*F); 2279 ... 2280 2281Similarly, if the C<compress> program is available, you can use this to 2282write a C<.tar.Z> file 2283 2284 use Archive::Tar; 2285 use IO::File; 2286 2287 my $fh = IO::File->new( "| compress -c >$filename" ); 2288 my $tar = Archive::Tar->new(); 2289 ... 2290 $tar->write($fh); 2291 $fh->close ; 2292 2293=item How do I handle Unicode strings? 2294 2295C<Archive::Tar> uses byte semantics for any files it reads from or writes 2296to disk. This is not a problem if you only deal with files and never 2297look at their content or work solely with byte strings. But if you use 2298Unicode strings with character semantics, some additional steps need 2299to be taken. 2300 2301For example, if you add a Unicode string like 2302 2303 # Problem 2304 $tar->add_data('file.txt', "Euro: \x{20AC}"); 2305 2306then there will be a problem later when the tarfile gets written out 2307to disk via C<< $tar->write() >>: 2308 2309 Wide character in print at .../Archive/Tar.pm line 1014. 2310 2311The data was added as a Unicode string and when writing it out to disk, 2312the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2313tried to convert the string to ISO-8859 and failed. The written file 2314now contains garbage. 2315 2316For this reason, Unicode strings need to be converted to UTF-8-encoded 2317bytestrings before they are handed off to C<add_data()>: 2318 2319 use Encode; 2320 my $data = "Accented character: \x{20AC}"; 2321 $data = encode('utf8', $data); 2322 2323 $tar->add_data('file.txt', $data); 2324 2325A opposite problem occurs if you extract a UTF8-encoded file from a 2326tarball. Using C<get_content()> on the C<Archive::Tar::File> object 2327will return its content as a bytestring, not as a Unicode string. 2328 2329If you want it to be a Unicode string (because you want character 2330semantics with operations like regular expression matching), you need 2331to decode the UTF8-encoded content and have Perl convert it into 2332a Unicode string: 2333 2334 use Encode; 2335 my $data = $tar->get_content(); 2336 2337 # Make it a Unicode string 2338 $data = decode('utf8', $data); 2339 2340There is no easy way to provide this functionality in C<Archive::Tar>, 2341because a tarball can contain many files, and each of which could be 2342encoded in a different way. 2343 2344=back 2345 2346=head1 CAVEATS 2347 2348The AIX tar does not fill all unused space in the tar archive with 0x00. 2349This sometimes leads to warning messages from C<Archive::Tar>. 2350 2351 Invalid header block at offset nnn 2352 2353A fix for that problem is scheduled to be released in the following levels 2354of AIX, all of which should be coming out in the 4th quarter of 2009: 2355 2356 AIX 5.3 TL7 SP10 2357 AIX 5.3 TL8 SP8 2358 AIX 5.3 TL9 SP5 2359 AIX 5.3 TL10 SP2 2360 2361 AIX 6.1 TL0 SP11 2362 AIX 6.1 TL1 SP7 2363 AIX 6.1 TL2 SP6 2364 AIX 6.1 TL3 SP3 2365 2366The IBM APAR number for this problem is IZ50240 (Reported component ID: 23675765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2368If you need an ifix please contact your local IBM AIX support. 2369 2370=head1 TODO 2371 2372=over 4 2373 2374=item Check if passed in handles are open for read/write 2375 2376Currently I don't know of any portable pure perl way to do this. 2377Suggestions welcome. 2378 2379=item Allow archives to be passed in as string 2380 2381Currently, we only allow opened filehandles or filenames, but 2382not strings. The internals would need some reworking to facilitate 2383stringified archives. 2384 2385=item Facilitate processing an opened filehandle of a compressed archive 2386 2387Currently, we only support this if the filehandle is an IO::Zlib object. 2388Environments, like apache, will present you with an opened filehandle 2389to an uploaded file, which might be a compressed archive. 2390 2391=back 2392 2393=head1 SEE ALSO 2394 2395=over 4 2396 2397=item The GNU tar specification 2398 2399C<http://www.gnu.org/software/tar/manual/tar.html> 2400 2401=item The PAX format specification 2402 2403The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2404 2405=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2406 2407=item GNU tar intends to switch to POSIX compatibility 2408 2409GNU Tar authors have expressed their intention to become completely 2410POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2411 2412=item A Comparison between various tar implementations 2413 2414Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2415 2416=back 2417 2418=head1 AUTHOR 2419 2420This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2421 2422Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2423 2424=head1 ACKNOWLEDGEMENTS 2425 2426Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2427Rainer Tammer and especially Andrew Savige for their help and suggestions. 2428 2429=head1 COPYRIGHT 2430 2431This module is copyright (c) 2002 - 2009 Jos Boumans 2432E<lt>kane@cpan.orgE<gt>. All rights reserved. 2433 2434This library is free software; you may redistribute and/or modify 2435it under the same terms as Perl itself. 2436 2437=cut 2438