1### the gnu tar specification: 2### http://www.gnu.org/software/tar/manual/tar.html 3### 4### and the pax format spec, which tar derives from: 5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6 7package Archive::Tar; 8require 5.005_03; 9 10use Cwd; 11use IO::Zlib; 12use IO::File; 13use Carp qw(carp croak); 14use File::Spec (); 15use File::Spec::Unix (); 16use File::Path (); 17 18use Archive::Tar::File; 19use Archive::Tar::Constant; 20 21require Exporter; 22 23use strict; 24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 26 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK 27 ]; 28 29@ISA = qw[Exporter]; 30@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; 31$DEBUG = 0; 32$WARN = 1; 33$FOLLOW_SYMLINK = 0; 34$VERSION = "1.96_01"; 35$CHOWN = 1; 36$CHMOD = 1; 37$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38$DO_NOT_USE_PREFIX = 0; 39$INSECURE_EXTRACT_MODE = 0; 40$ZERO_PAD_NUMBERS = 0; 41$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed'; 42 43BEGIN { 44 use Config; 45 $HAS_PERLIO = $Config::Config{useperlio}; 46 47 ### try and load IO::String anyway, so you can dynamically 48 ### switch between perlio and IO::String 49 $HAS_IO_STRING = eval { 50 require IO::String; 51 import IO::String; 52 1; 53 } || 0; 54} 55 56=head1 NAME 57 58Archive::Tar - module for manipulations of tar archives 59 60=head1 SYNOPSIS 61 62 use Archive::Tar; 63 my $tar = Archive::Tar->new; 64 65 $tar->read('origin.tgz'); 66 $tar->extract(); 67 68 $tar->add_files('file/foo.pl', 'docs/README'); 69 $tar->add_data('file/baz.txt', 'This is the contents now'); 70 71 $tar->rename('oldname', 'new/file/name'); 72 $tar->chown('/', 'root'); 73 $tar->chown('/', 'root:root'); 74 $tar->chmod('/tmp', '1777'); 75 76 $tar->write('files.tar'); # plain tar 77 $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 78 $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 79 80=head1 DESCRIPTION 81 82Archive::Tar provides an object oriented mechanism for handling tar 83files. It provides class methods for quick and easy files handling 84while also allowing for the creation of tar file objects for custom 85manipulation. If you have the IO::Zlib module installed, 86Archive::Tar will also support compressed or gzipped tar files. 87 88An object of class Archive::Tar represents a .tar(.gz) archive full 89of files and things. 90 91=head1 Object Methods 92 93=head2 Archive::Tar->new( [$file, $compressed] ) 94 95Returns a new Tar object. If given any arguments, C<new()> calls the 96C<read()> method automatically, passing on the arguments provided to 97the C<read()> method. 98 99If C<new()> is invoked with arguments and the C<read()> method fails 100for any reason, C<new()> returns undef. 101 102=cut 103 104my $tmpl = { 105 _data => [ ], 106 _file => 'Unknown', 107}; 108 109### install get/set accessors for this object. 110for my $key ( keys %$tmpl ) { 111 no strict 'refs'; 112 *{__PACKAGE__."::$key"} = sub { 113 my $self = shift; 114 $self->{$key} = $_[0] if @_; 115 return $self->{$key}; 116 } 117} 118 119sub new { 120 my $class = shift; 121 $class = ref $class if ref $class; 122 123 ### copying $tmpl here since a shallow copy makes it use the 124 ### same aref, causing for files to remain in memory always. 125 my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 126 127 if (@_) { 128 unless ( $obj->read( @_ ) ) { 129 $obj->_error(qq[No data could be read from file]); 130 return; 131 } 132 } 133 134 return $obj; 135} 136 137=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 138 139Read the given tar file into memory. 140The first argument can either be the name of a file or a reference to 141an already open filehandle (or an IO::Zlib object if it's compressed) 142 143The C<read> will I<replace> any previous content in C<$tar>! 144 145The second argument may be considered optional, but remains for 146backwards compatibility. Archive::Tar now looks at the file 147magic to determine what class should be used to open the file 148and will transparently Do The Right Thing. 149 150Archive::Tar will warn if you try to pass a bzip2 compressed file and the 151IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. 152 153Note that you can currently B<not> pass a C<gzip> compressed 154filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 155filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string 156containing the full archive information (either compressed or 157uncompressed). These are worth while features, but not currently 158implemented. See the C<TODO> section. 159 160The third argument can be a hash reference with options. Note that 161all options are case-sensitive. 162 163=over 4 164 165=item limit 166 167Do not read more than C<limit> files. This is useful if you have 168very big archives, and are only interested in the first few files. 169 170=item filter 171 172Can be set to a regular expression. Only files with names that match 173the expression will be read. 174 175=item md5 176 177Set to 1 and the md5sum of files will be returned (instead of file data) 178 my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 179 while( my $f = $iter->() ) { 180 print $f->data . "\t" . $f->full_path . $/; 181 } 182 183=item extract 184 185If set to true, immediately extract entries when reading them. This 186gives you the same memory break as the C<extract_archive> function. 187Note however that entries will not be read into memory, but written 188straight to disk. This means no C<Archive::Tar::File> objects are 189created for you to inspect. 190 191=back 192 193All files are stored internally as C<Archive::Tar::File> objects. 194Please consult the L<Archive::Tar::File> documentation for details. 195 196Returns the number of files read in scalar context, and a list of 197C<Archive::Tar::File> objects in list context. 198 199=cut 200 201sub read { 202 my $self = shift; 203 my $file = shift; 204 my $gzip = shift || 0; 205 my $opts = shift || {}; 206 207 unless( defined $file ) { 208 $self->_error( qq[No file to read from!] ); 209 return; 210 } else { 211 $self->_file( $file ); 212 } 213 214 my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 215 or return; 216 217 my $data = $self->_read_tar( $handle, $opts ) or return; 218 219 $self->_data( $data ); 220 221 return wantarray ? @$data : scalar @$data; 222} 223 224sub _get_handle { 225 my $self = shift; 226 my $file = shift; return unless defined $file; 227 my $compress = shift || 0; 228 my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 229 230 ### Check if file is a file handle or IO glob 231 if ( ref $file ) { 232 return $file if eval{ *$file{IO} }; 233 return $file if eval{ $file->isa(q{IO::Handle}) }; 234 $file = q{}.$file; 235 } 236 237 ### get a FH opened to the right class, so we can use it transparently 238 ### throughout the program 239 my $fh; 240 { ### reading magic only makes sense if we're opening a file for 241 ### reading. otherwise, just use what the user requested. 242 my $magic = ''; 243 if( MODE_READ->($mode) ) { 244 open my $tmp, $file or do { 245 $self->_error( qq[Could not open '$file' for reading: $!] ); 246 return; 247 }; 248 249 ### read the first 4 bites of the file to figure out which class to 250 ### use to open the file. 251 sysread( $tmp, $magic, 4 ); 252 close $tmp; 253 } 254 255 ### is it bzip? 256 ### if you asked specifically for bzip compression, or if we're in 257 ### read mode and the magic numbers add up, use bzip 258 if( BZIP and ( 259 ($compress eq COMPRESS_BZIP) or 260 ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 261 ) 262 ) { 263 264 ### different reader/writer modules, different error vars... sigh 265 if( MODE_READ->($mode) ) { 266 $fh = IO::Uncompress::Bunzip2->new( $file ) or do { 267 $self->_error( qq[Could not read '$file': ] . 268 $IO::Uncompress::Bunzip2::Bunzip2Error 269 ); 270 return; 271 }; 272 273 } else { 274 $fh = IO::Compress::Bzip2->new( $file ) or do { 275 $self->_error( qq[Could not write to '$file': ] . 276 $IO::Compress::Bzip2::Bzip2Error 277 ); 278 return; 279 }; 280 } 281 282 ### is it gzip? 283 ### if you asked for compression, if you wanted to read or the gzip 284 ### magic number is present (redundant with read) 285 } elsif( ZLIB and ( 286 $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 287 ) 288 ) { 289 $fh = IO::Zlib->new; 290 291 unless( $fh->open( $file, $mode ) ) { 292 $self->_error(qq[Could not create filehandle for '$file': $!]); 293 return; 294 } 295 296 ### is it plain tar? 297 } else { 298 $fh = IO::File->new; 299 300 unless( $fh->open( $file, $mode ) ) { 301 $self->_error(qq[Could not create filehandle for '$file': $!]); 302 return; 303 } 304 305 ### enable bin mode on tar archives 306 binmode $fh; 307 } 308 } 309 310 return $fh; 311} 312 313 314sub _read_tar { 315 my $self = shift; 316 my $handle = shift or return; 317 my $opts = shift || {}; 318 319 my $count = $opts->{limit} || 0; 320 my $filter = $opts->{filter}; 321 my $md5 = $opts->{md5} || 0; # cdrake 322 my $filter_cb = $opts->{filter_cb}; 323 my $extract = $opts->{extract} || 0; 324 325 ### set a cap on the amount of files to extract ### 326 my $limit = 0; 327 $limit = 1 if $count > 0; 328 329 my $tarfile = [ ]; 330 my $chunk; 331 my $read = 0; 332 my $real_name; # to set the name of a file when 333 # we're encountering @longlink 334 my $data; 335 336 LOOP: 337 while( $handle->read( $chunk, HEAD ) ) { 338 ### IO::Zlib doesn't support this yet 339 my $offset; 340 if ( ref($handle) ne 'IO::Zlib' ) { 341 local $@; 342 $offset = eval { tell $handle } || 'unknown'; 343 $@ = ''; 344 } 345 else { 346 $offset = 'unknown'; 347 } 348 349 unless( $read++ ) { 350 my $gzip = GZIP_MAGIC_NUM; 351 if( $chunk =~ /$gzip/ ) { 352 $self->_error( qq[Cannot read compressed format in tar-mode] ); 353 return; 354 } 355 356 ### size is < HEAD, which means a corrupted file, as the minimum 357 ### length is _at least_ HEAD 358 if (length $chunk != HEAD) { 359 $self->_error( qq[Cannot read enough bytes from the tarfile] ); 360 return; 361 } 362 } 363 364 ### if we can't read in all bytes... ### 365 last if length $chunk != HEAD; 366 367 ### Apparently this should really be two blocks of 512 zeroes, 368 ### but GNU tar sometimes gets it wrong. See comment in the 369 ### source code (tar.c) to GNU cpio. 370 next if $chunk eq TAR_END; 371 372 ### according to the posix spec, the last 12 bytes of the header are 373 ### null bytes, to pad it to a 512 byte block. That means if these 374 ### bytes are NOT null bytes, it's a corrupt header. See: 375 ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 376 ### line 111 377 { my $nulls = join '', "\0" x 12; 378 unless( $nulls eq substr( $chunk, 500, 12 ) ) { 379 $self->_error( qq[Invalid header block at offset $offset] ); 380 next LOOP; 381 } 382 } 383 384 ### pass the realname, so we can set it 'proper' right away 385 ### some of the heuristics are done on the name, so important 386 ### to set it ASAP 387 my $entry; 388 { my %extra_args = (); 389 $extra_args{'name'} = $$real_name if defined $real_name; 390 391 unless( $entry = Archive::Tar::File->new( chunk => $chunk, 392 %extra_args ) 393 ) { 394 $self->_error( qq[Couldn't read chunk at offset $offset] ); 395 next LOOP; 396 } 397 } 398 399 ### ignore labels: 400 ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 401 next if $entry->is_label; 402 403 if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 404 405 if ( $entry->is_file && !$entry->validate ) { 406 ### sometimes the chunk is rather fux0r3d and a whole 512 407 ### bytes ends up in the ->name area. 408 ### clean it up, if need be 409 my $name = $entry->name; 410 $name = substr($name, 0, 100) if length $name > 100; 411 $name =~ s/\n/ /g; 412 413 $self->_error( $name . qq[: checksum error] ); 414 next LOOP; 415 } 416 417 my $block = BLOCK_SIZE->( $entry->size ); 418 419 $data = $entry->get_content_by_ref; 420 421 my $skip = 0; 422 my $ctx; # cdrake 423 ### skip this entry if we're filtering 424 425 if($md5) { # cdrake 426 $ctx = Digest::MD5->new; # cdrake 427 $skip=5; # cdrake 428 429 } elsif ($filter && $entry->name !~ $filter) { 430 $skip = 1; 431 432 ### skip this entry if it's a pax header. This is a special file added 433 ### by, among others, git-generated tarballs. It holds comments and is 434 ### not meant for extracting. See #38932: pax_global_header extracted 435 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 436 $skip = 2; 437 } elsif ($filter_cb && ! $filter_cb->($entry)) { 438 $skip = 3; 439 } 440 441 if ($skip) { 442 # 443 # Since we're skipping, do not allocate memory for the 444 # whole file. Read it 64 BLOCKS at a time. Do not 445 # complete the skip yet because maybe what we read is a 446 # longlink and it won't get skipped after all 447 # 448 my $amt = $block; 449 my $fsz=$entry->size; # cdrake 450 while ($amt > 0) { 451 $$data = ''; 452 my $this = 64 * BLOCK; 453 $this = $amt if $this > $amt; 454 if( $handle->read( $$data, $this ) < $this ) { 455 $self->_error( qq[Read error on tarfile (missing data) ']. 456 $entry->full_path ."' at offset $offset" ); 457 next LOOP; 458 } 459 $amt -= $this; 460 $fsz -= $this; # cdrake 461 substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 462 $ctx->add($$data) if($skip==5); # cdrake 463 } 464 $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 465 } else { 466 467 ### just read everything into memory 468 ### can't do lazy loading since IO::Zlib doesn't support 'seek' 469 ### this is because Compress::Zlib doesn't support it =/ 470 ### this reads in the whole data in one read() call. 471 if ( $handle->read( $$data, $block ) < $block ) { 472 $self->_error( qq[Read error on tarfile (missing data) ']. 473 $entry->full_path ."' at offset $offset" ); 474 next LOOP; 475 } 476 ### throw away trailing garbage ### 477 substr ($$data, $entry->size) = "" if defined $$data; 478 } 479 480 ### part II of the @LongLink munging -- need to do /after/ 481 ### the checksum check. 482 if( $entry->is_longlink ) { 483 ### weird thing in tarfiles -- if the file is actually a 484 ### @LongLink, the data part seems to have a trailing ^@ 485 ### (unprintable) char. to display, pipe output through less. 486 ### but that doesn't *always* happen.. so check if the last 487 ### character is a control character, and if so remove it 488 ### at any rate, we better remove that character here, or tests 489 ### like 'eq' and hash lookups based on names will SO not work 490 ### remove it by calculating the proper size, and then 491 ### tossing out everything that's longer than that size. 492 493 ### count number of nulls 494 my $nulls = $$data =~ tr/\0/\0/; 495 496 ### cut data + size by that many bytes 497 $entry->size( $entry->size - $nulls ); 498 substr ($$data, $entry->size) = ""; 499 } 500 } 501 502 ### clean up of the entries.. posix tar /apparently/ has some 503 ### weird 'feature' that allows for filenames > 255 characters 504 ### they'll put a header in with as name '././@LongLink' and the 505 ### contents will be the name of the /next/ file in the archive 506 ### pretty crappy and kludgy if you ask me 507 508 ### set the name for the next entry if this is a @LongLink; 509 ### this is one ugly hack =/ but needed for direct extraction 510 if( $entry->is_longlink ) { 511 $real_name = $data; 512 next LOOP; 513 } elsif ( defined $real_name ) { 514 $entry->name( $$real_name ); 515 $entry->prefix(''); 516 undef $real_name; 517 } 518 519 if ($filter && $entry->name !~ $filter) { 520 next LOOP; 521 522 ### skip this entry if it's a pax header. This is a special file added 523 ### by, among others, git-generated tarballs. It holds comments and is 524 ### not meant for extracting. See #38932: pax_global_header extracted 525 } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 526 next LOOP; 527 } elsif ($filter_cb && ! $filter_cb->($entry)) { 528 next LOOP; 529 } 530 531 if ( $extract && !$entry->is_longlink 532 && !$entry->is_unknown 533 && !$entry->is_label ) { 534 $self->_extract_file( $entry ) or return; 535 } 536 537 ### Guard against tarfiles with garbage at the end 538 last LOOP if $entry->name eq ''; 539 540 ### push only the name on the rv if we're extracting 541 ### -- for extract_archive 542 push @$tarfile, ($extract ? $entry->name : $entry); 543 544 if( $limit ) { 545 $count-- unless $entry->is_longlink || $entry->is_dir; 546 last LOOP unless $count; 547 } 548 } continue { 549 undef $data; 550 } 551 552 return $tarfile; 553} 554 555=head2 $tar->contains_file( $filename ) 556 557Check if the archive contains a certain file. 558It will return true if the file is in the archive, false otherwise. 559 560Note however, that this function does an exact match using C<eq> 561on the full path. So it cannot compensate for case-insensitive file- 562systems or compare 2 paths to see if they would point to the same 563underlying file. 564 565=cut 566 567sub contains_file { 568 my $self = shift; 569 my $full = shift; 570 571 return unless defined $full; 572 573 ### don't warn if the entry isn't there.. that's what this function 574 ### is for after all. 575 local $WARN = 0; 576 return 1 if $self->_find_entry($full); 577 return; 578} 579 580=head2 $tar->extract( [@filenames] ) 581 582Write files whose names are equivalent to any of the names in 583C<@filenames> to disk, creating subdirectories as necessary. This 584might not work too well under VMS. 585Under MacPerl, the file's modification time will be converted to the 586MacOS zero of time, and appropriate conversions will be done to the 587path. However, the length of each element of the path is not 588inspected to see whether it's longer than MacOS currently allows (32 589characters). 590 591If C<extract> is called without a list of file names, the entire 592contents of the archive are extracted. 593 594Returns a list of filenames extracted. 595 596=cut 597 598sub extract { 599 my $self = shift; 600 my @args = @_; 601 my @files; 602 603 # use the speed optimization for all extracted files 604 local($self->{cwd}) = cwd() unless $self->{cwd}; 605 606 ### you requested the extraction of only certain files 607 if( @args ) { 608 for my $file ( @args ) { 609 610 ### it's already an object? 611 if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 612 push @files, $file; 613 next; 614 615 ### go find it then 616 } else { 617 618 my $found; 619 for my $entry ( @{$self->_data} ) { 620 next unless $file eq $entry->full_path; 621 622 ### we found the file you're looking for 623 push @files, $entry; 624 $found++; 625 } 626 627 unless( $found ) { 628 return $self->_error( 629 qq[Could not find '$file' in archive] ); 630 } 631 } 632 } 633 634 ### just grab all the file items 635 } else { 636 @files = $self->get_files; 637 } 638 639 ### nothing found? that's an error 640 unless( scalar @files ) { 641 $self->_error( qq[No files found for ] . $self->_file ); 642 return; 643 } 644 645 ### now extract them 646 for my $entry ( @files ) { 647 unless( $self->_extract_file( $entry ) ) { 648 $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 649 return; 650 } 651 } 652 653 return @files; 654} 655 656=head2 $tar->extract_file( $file, [$extract_path] ) 657 658Write an entry, whose name is equivalent to the file name provided to 659disk. Optionally takes a second parameter, which is the full native 660path (including filename) the entry will be written to. 661 662For example: 663 664 $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 665 666 $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 667 668Returns true on success, false on failure. 669 670=cut 671 672sub extract_file { 673 my $self = shift; 674 my $file = shift; return unless defined $file; 675 my $alt = shift; 676 677 my $entry = $self->_find_entry( $file ) 678 or $self->_error( qq[Could not find an entry for '$file'] ), return; 679 680 return $self->_extract_file( $entry, $alt ); 681} 682 683sub _extract_file { 684 my $self = shift; 685 my $entry = shift or return; 686 my $alt = shift; 687 688 ### you wanted an alternate extraction location ### 689 my $name = defined $alt ? $alt : $entry->full_path; 690 691 ### splitpath takes a bool at the end to indicate 692 ### that it's splitting a dir 693 my ($vol,$dirs,$file); 694 if ( defined $alt ) { # It's a local-OS path 695 ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 696 $entry->is_dir ); 697 } else { 698 ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 699 $entry->is_dir ); 700 } 701 702 my $dir; 703 ### is $name an absolute path? ### 704 if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 705 706 ### absolute names are not allowed to be in tarballs under 707 ### strict mode, so only allow it if a user tells us to do it 708 if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 709 $self->_error( 710 q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 711 q[Not extracting absolute paths under SECURE EXTRACT MODE] 712 ); 713 return; 714 } 715 716 ### user asked us to, it's fine. 717 $dir = File::Spec->catpath( $vol, $dirs, "" ); 718 719 ### it's a relative path ### 720 } else { 721 my $cwd = (ref $self and defined $self->{cwd}) 722 ? $self->{cwd} 723 : cwd(); 724 725 my @dirs = defined $alt 726 ? File::Spec->splitdir( $dirs ) # It's a local-OS path 727 : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 728 # straight from the tarball 729 730 if( not defined $alt and 731 not $INSECURE_EXTRACT_MODE 732 ) { 733 734 ### paths that leave the current directory are not allowed under 735 ### strict mode, so only allow it if a user tells us to do this. 736 if( grep { $_ eq '..' } @dirs ) { 737 738 $self->_error( 739 q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 740 q[the current working directory. Not extracting under ]. 741 q[SECURE EXTRACT MODE] 742 ); 743 return; 744 } 745 746 ### the archive may be asking us to extract into a symlink. This 747 ### is not sane and a possible security issue, as outlined here: 748 ### https://rt.cpan.org/Ticket/Display.html?id=30380 749 ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 750 ### https://issues.rpath.com/browse/RPL-1716 751 my $full_path = $cwd; 752 for my $d ( @dirs ) { 753 $full_path = File::Spec->catdir( $full_path, $d ); 754 755 ### we've already checked this one, and it's safe. Move on. 756 next if ref $self and $self->{_link_cache}->{$full_path}; 757 758 if( -l $full_path ) { 759 my $to = readlink $full_path; 760 my $diag = "symlinked directory ($full_path => $to)"; 761 762 $self->_error( 763 q[Entry ']. $entry->full_path .q[' is attempting to ]. 764 qq[extract to a $diag. This is considered a security ]. 765 q[vulnerability and not allowed under SECURE EXTRACT ]. 766 q[MODE] 767 ); 768 return; 769 } 770 771 ### XXX keep a cache if possible, so the stats become cheaper: 772 $self->{_link_cache}->{$full_path} = 1 if ref $self; 773 } 774 } 775 776 ### '.' is the directory delimiter on VMS, which has to be escaped 777 ### or changed to '_' on vms. vmsify is used, because older versions 778 ### of vmspath do not handle this properly. 779 ### Must not add a '/' to an empty directory though. 780 map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 781 782 my ($cwd_vol,$cwd_dir,$cwd_file) 783 = File::Spec->splitpath( $cwd ); 784 my @cwd = File::Spec->splitdir( $cwd_dir ); 785 push @cwd, $cwd_file if length $cwd_file; 786 787 ### We need to pass '' as the last element to catpath. Craig Berry 788 ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 789 ### The root problem is that splitpath on UNIX always returns the 790 ### final path element as a file even if it is a directory, and of 791 ### course there is no way it can know the difference without checking 792 ### against the filesystem, which it is documented as not doing. When 793 ### you turn around and call catpath, on VMS you have to know which bits 794 ### are directory bits and which bits are file bits. In this case we 795 ### know the result should be a directory. I had thought you could omit 796 ### the file argument to catpath in such a case, but apparently on UNIX 797 ### you can't. 798 $dir = File::Spec->catpath( 799 $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 800 ); 801 802 ### catdir() returns undef if the path is longer than 255 chars on 803 ### older VMS systems. 804 unless ( defined $dir ) { 805 $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 806 return; 807 } 808 809 } 810 811 if( -e $dir && !-d _ ) { 812 $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 813 return; 814 } 815 816 unless ( -d _ ) { 817 eval { File::Path::mkpath( $dir, 0, 0777 ) }; 818 if( $@ ) { 819 my $fp = $entry->full_path; 820 $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 821 return; 822 } 823 824 ### XXX chown here? that might not be the same as in the archive 825 ### as we're only chown'ing to the owner of the file we're extracting 826 ### not to the owner of the directory itself, which may or may not 827 ### be another entry in the archive 828 ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 829 ### way to go. 830 #if( $CHOWN && CAN_CHOWN ) { 831 # chown $entry->uid, $entry->gid, $dir or 832 # $self->_error( qq[Could not set uid/gid on '$dir'] ); 833 #} 834 } 835 836 ### we're done if we just needed to create a dir ### 837 return 1 if $entry->is_dir; 838 839 my $full = File::Spec->catfile( $dir, $file ); 840 841 if( $entry->is_unknown ) { 842 $self->_error( qq[Unknown file type for file '$full'] ); 843 return; 844 } 845 846 if( length $entry->type && $entry->is_file ) { 847 my $fh = IO::File->new; 848 $fh->open( '>' . $full ) or ( 849 $self->_error( qq[Could not open file '$full': $!] ), 850 return 851 ); 852 853 if( $entry->size ) { 854 binmode $fh; 855 syswrite $fh, $entry->data or ( 856 $self->_error( qq[Could not write data to '$full'] ), 857 return 858 ); 859 } 860 861 close $fh or ( 862 $self->_error( qq[Could not close file '$full'] ), 863 return 864 ); 865 866 } else { 867 $self->_make_special_file( $entry, $full ) or return; 868 } 869 870 ### only update the timestamp if it's not a symlink; that will change the 871 ### timestamp of the original. This addresses bug #33669: Could not update 872 ### timestamp warning on symlinks 873 if( not -l $full ) { 874 utime time, $entry->mtime - TIME_OFFSET, $full or 875 $self->_error( qq[Could not update timestamp] ); 876 } 877 878 if( $CHOWN && CAN_CHOWN->() and not -l $full ) { 879 chown $entry->uid, $entry->gid, $full or 880 $self->_error( qq[Could not set uid/gid on '$full'] ); 881 } 882 883 ### only chmod if we're allowed to, but never chmod symlinks, since they'll 884 ### change the perms on the file they're linking too... 885 if( $CHMOD and not -l $full ) { 886 my $mode = $entry->mode; 887 unless ($SAME_PERMISSIONS) { 888 $mode &= ~(oct(7000) | umask); 889 } 890 chmod $mode, $full or 891 $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 892 } 893 894 return 1; 895} 896 897sub _make_special_file { 898 my $self = shift; 899 my $entry = shift or return; 900 my $file = shift; return unless defined $file; 901 902 my $err; 903 904 if( $entry->is_symlink ) { 905 my $fail; 906 if( ON_UNIX ) { 907 symlink( $entry->linkname, $file ) or $fail++; 908 909 } else { 910 $self->_extract_special_file_as_plain_file( $entry, $file ) 911 or $fail++; 912 } 913 914 $err = qq[Making symbolic link '$file' to '] . 915 $entry->linkname .q[' failed] if $fail; 916 917 } elsif ( $entry->is_hardlink ) { 918 my $fail; 919 if( ON_UNIX ) { 920 link( $entry->linkname, $file ) or $fail++; 921 922 } else { 923 $self->_extract_special_file_as_plain_file( $entry, $file ) 924 or $fail++; 925 } 926 927 $err = qq[Making hard link from '] . $entry->linkname . 928 qq[' to '$file' failed] if $fail; 929 930 } elsif ( $entry->is_fifo ) { 931 ON_UNIX && !system('mknod', $file, 'p') or 932 $err = qq[Making fifo ']. $entry->name .qq[' failed]; 933 934 } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 935 my $mode = $entry->is_blockdev ? 'b' : 'c'; 936 937 ON_UNIX && !system('mknod', $file, $mode, 938 $entry->devmajor, $entry->devminor) or 939 $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 940 $entry->devmajor . qq[ min=] . $entry->devminor . 941 qq[) failed.]; 942 943 } elsif ( $entry->is_socket ) { 944 ### the original doesn't do anything special for sockets.... ### 945 1; 946 } 947 948 return $err ? $self->_error( $err ) : 1; 949} 950 951### don't know how to make symlinks, let's just extract the file as 952### a plain file 953sub _extract_special_file_as_plain_file { 954 my $self = shift; 955 my $entry = shift or return; 956 my $file = shift; return unless defined $file; 957 958 my $err; 959 TRY: { 960 my $orig = $self->_find_entry( $entry->linkname, $entry ); 961 962 unless( $orig ) { 963 $err = qq[Could not find file '] . $entry->linkname . 964 qq[' in memory.]; 965 last TRY; 966 } 967 968 ### clone the entry, make it appear as a normal file ### 969 my $clone = $orig->clone; 970 $clone->_downgrade_to_plainfile; 971 $self->_extract_file( $clone, $file ) or last TRY; 972 973 return 1; 974 } 975 976 return $self->_error($err); 977} 978 979=head2 $tar->list_files( [\@properties] ) 980 981Returns a list of the names of all the files in the archive. 982 983If C<list_files()> is passed an array reference as its first argument 984it returns a list of hash references containing the requested 985properties of each file. The following list of properties is 986supported: name, size, mtime (last modified date), mode, uid, gid, 987linkname, uname, gname, devmajor, devminor, prefix. 988 989Passing an array reference containing only one element, 'name', is 990special cased to return a list of names rather than a list of hash 991references, making it equivalent to calling C<list_files> without 992arguments. 993 994=cut 995 996sub list_files { 997 my $self = shift; 998 my $aref = shift || [ ]; 999 1000 unless( $self->_data ) { 1001 $self->read() or return; 1002 } 1003 1004 if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 1005 return map { $_->full_path } @{$self->_data}; 1006 } else { 1007 1008 #my @rv; 1009 #for my $obj ( @{$self->_data} ) { 1010 # push @rv, { map { $_ => $obj->$_() } @$aref }; 1011 #} 1012 #return @rv; 1013 1014 ### this does the same as the above.. just needs a +{ } 1015 ### to make sure perl doesn't confuse it for a block 1016 return map { my $o=$_; 1017 +{ map { $_ => $o->$_() } @$aref } 1018 } @{$self->_data}; 1019 } 1020} 1021 1022sub _find_entry { 1023 my $self = shift; 1024 my $file = shift; 1025 1026 unless( defined $file ) { 1027 $self->_error( qq[No file specified] ); 1028 return; 1029 } 1030 1031 ### it's an object already 1032 return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1033 1034seach_entry: 1035 if($self->_data){ 1036 for my $entry ( @{$self->_data} ) { 1037 my $path = $entry->full_path; 1038 return $entry if $path eq $file; 1039 } 1040 } 1041 1042 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1043 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin ) 1044 $file = _symlinks_resolver( $link_entry->name, $file ); 1045 goto seach_entry if $self->_data; 1046 1047 #this will be slower than never, but won't failed! 1048 1049 my $iterargs = $link_entry->{'_archive'}; 1050 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){ 1051 #faster but whole archive will be read in memory 1052 #read whole archive and share data 1053 my $archive = Archive::Tar->new; 1054 $archive->read( @$iterargs ); 1055 push @$iterargs, $archive; #take a trace for destruction 1056 if($archive->_data){ 1057 $self->_data( $archive->_data ); 1058 goto seach_entry; 1059 } 1060 }#faster 1061 1062 {#slower but lower memory usage 1063 # $iterargs = [$filename, $compressed, $opts]; 1064 my $next = Archive::Tar->iter( @$iterargs ); 1065 while(my $e = $next->()){ 1066 if($e->full_path eq $file){ 1067 undef $next; 1068 return $e; 1069 } 1070 } 1071 }#slower 1072 } 1073 } 1074 1075 $self->_error( qq[No such file in archive: '$file'] ); 1076 return; 1077} 1078 1079=head2 $tar->get_files( [@filenames] ) 1080 1081Returns the C<Archive::Tar::File> objects matching the filenames 1082provided. If no filename list was passed, all C<Archive::Tar::File> 1083objects in the current Tar object are returned. 1084 1085Please refer to the C<Archive::Tar::File> documentation on how to 1086handle these objects. 1087 1088=cut 1089 1090sub get_files { 1091 my $self = shift; 1092 1093 return @{ $self->_data } unless @_; 1094 1095 my @list; 1096 for my $file ( @_ ) { 1097 push @list, grep { defined } $self->_find_entry( $file ); 1098 } 1099 1100 return @list; 1101} 1102 1103=head2 $tar->get_content( $file ) 1104 1105Return the content of the named file. 1106 1107=cut 1108 1109sub get_content { 1110 my $self = shift; 1111 my $entry = $self->_find_entry( shift ) or return; 1112 1113 return $entry->data; 1114} 1115 1116=head2 $tar->replace_content( $file, $content ) 1117 1118Make the string $content be the content for the file named $file. 1119 1120=cut 1121 1122sub replace_content { 1123 my $self = shift; 1124 my $entry = $self->_find_entry( shift ) or return; 1125 1126 return $entry->replace_content( shift ); 1127} 1128 1129=head2 $tar->rename( $file, $new_name ) 1130 1131Rename the file of the in-memory archive to $new_name. 1132 1133Note that you must specify a Unix path for $new_name, since per tar 1134standard, all files in the archive must be Unix paths. 1135 1136Returns true on success and false on failure. 1137 1138=cut 1139 1140sub rename { 1141 my $self = shift; 1142 my $file = shift; return unless defined $file; 1143 my $new = shift; return unless defined $new; 1144 1145 my $entry = $self->_find_entry( $file ) or return; 1146 1147 return $entry->rename( $new ); 1148} 1149 1150=head2 $tar->chmod( $file, $mode ) 1151 1152Change mode of $file to $mode. 1153 1154Returns true on success and false on failure. 1155 1156=cut 1157 1158sub chmod { 1159 my $self = shift; 1160 my $file = shift; return unless defined $file; 1161 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1162 my @args = ("$mode"); 1163 1164 my $entry = $self->_find_entry( $file ) or return; 1165 my $x = $entry->chmod( @args ); 1166 return $x; 1167} 1168 1169=head2 $tar->chown( $file, $uname [, $gname] ) 1170 1171Change owner $file to $uname and $gname. 1172 1173Returns true on success and false on failure. 1174 1175=cut 1176 1177sub chown { 1178 my $self = shift; 1179 my $file = shift; return unless defined $file; 1180 my $uname = shift; return unless defined $uname; 1181 my @args = ($uname); 1182 push(@args, shift); 1183 1184 my $entry = $self->_find_entry( $file ) or return; 1185 my $x = $entry->chown( @args ); 1186 return $x; 1187} 1188 1189=head2 $tar->remove (@filenamelist) 1190 1191Removes any entries with names matching any of the given filenames 1192from the in-memory archive. Returns a list of C<Archive::Tar::File> 1193objects that remain. 1194 1195=cut 1196 1197sub remove { 1198 my $self = shift; 1199 my @list = @_; 1200 1201 my %seen = map { $_->full_path => $_ } @{$self->_data}; 1202 delete $seen{ $_ } for @list; 1203 1204 $self->_data( [values %seen] ); 1205 1206 return values %seen; 1207} 1208 1209=head2 $tar->clear 1210 1211C<clear> clears the current in-memory archive. This effectively gives 1212you a 'blank' object, ready to be filled again. Note that C<clear> 1213only has effect on the object, not the underlying tarfile. 1214 1215=cut 1216 1217sub clear { 1218 my $self = shift or return; 1219 1220 $self->_data( [] ); 1221 $self->_file( '' ); 1222 1223 return 1; 1224} 1225 1226 1227=head2 $tar->write ( [$file, $compressed, $prefix] ) 1228 1229Write the in-memory archive to disk. The first argument can either 1230be the name of a file or a reference to an already open filehandle (a 1231GLOB reference). 1232 1233The second argument is used to indicate compression. You can either 1234compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1235to be the C<gzip> compression level (between 1 and 9), but the use of 1236constants is preferred: 1237 1238 # write a gzip compressed file 1239 $tar->write( 'out.tgz', COMPRESS_GZIP ); 1240 1241 # write a bzip compressed file 1242 $tar->write( 'out.tbz', COMPRESS_BZIP ); 1243 1244Note that when you pass in a filehandle, the compression argument 1245is ignored, as all files are printed verbatim to your filehandle. 1246If you wish to enable compression with filehandles, use an 1247C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1248 1249The third argument is an optional prefix. All files will be tucked 1250away in the directory you specify as prefix. So if you have files 1251'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1252will be written to the archive as 'foo/a' and 'foo/b'. 1253 1254If no arguments are given, C<write> returns the entire formatted 1255archive as a string, which could be useful if you'd like to stuff the 1256archive into a socket or a pipe to gzip or something. 1257 1258 1259=cut 1260 1261sub write { 1262 my $self = shift; 1263 my $file = shift; $file = '' unless defined $file; 1264 my $gzip = shift || 0; 1265 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1266 my $dummy = ''; 1267 1268 ### only need a handle if we have a file to print to ### 1269 my $handle = length($file) 1270 ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1271 or return ) 1272 : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1273 : $HAS_IO_STRING ? IO::String->new 1274 : __PACKAGE__->no_string_support(); 1275 1276 ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1277 ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1278 ### printed to the archive 1279 local $\; 1280 1281 for my $entry ( @{$self->_data} ) { 1282 ### entries to be written to the tarfile ### 1283 my @write_me; 1284 1285 ### only now will we change the object to reflect the current state 1286 ### of the name and prefix fields -- this needs to be limited to 1287 ### write() only! 1288 my $clone = $entry->clone; 1289 1290 1291 ### so, if you don't want use to use the prefix, we'll stuff 1292 ### everything in the name field instead 1293 if( $DO_NOT_USE_PREFIX ) { 1294 1295 ### you might have an extended prefix, if so, set it in the clone 1296 ### XXX is ::Unix right? 1297 $clone->name( length $ext_prefix 1298 ? File::Spec::Unix->catdir( $ext_prefix, 1299 $clone->full_path) 1300 : $clone->full_path ); 1301 $clone->prefix( '' ); 1302 1303 ### otherwise, we'll have to set it properly -- prefix part in the 1304 ### prefix and name part in the name field. 1305 } else { 1306 1307 ### split them here, not before! 1308 my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1309 1310 ### you might have an extended prefix, if so, set it in the clone 1311 ### XXX is ::Unix right? 1312 $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1313 if length $ext_prefix; 1314 1315 $clone->prefix( $prefix ); 1316 $clone->name( $name ); 1317 } 1318 1319 ### names are too long, and will get truncated if we don't add a 1320 ### '@LongLink' file... 1321 my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1322 length($clone->prefix) > PREFIX_LENGTH 1323 ) || 0; 1324 1325 ### perhaps we need to make a longlink file? 1326 if( $make_longlink ) { 1327 my $longlink = Archive::Tar::File->new( 1328 data => LONGLINK_NAME, 1329 $clone->full_path, 1330 { type => LONGLINK } 1331 ); 1332 1333 unless( $longlink ) { 1334 $self->_error( qq[Could not create 'LongLink' entry for ] . 1335 qq[oversize file '] . $clone->full_path ."'" ); 1336 return; 1337 }; 1338 1339 push @write_me, $longlink; 1340 } 1341 1342 push @write_me, $clone; 1343 1344 ### write the one, optionally 2 a::t::file objects to the handle 1345 for my $clone (@write_me) { 1346 1347 ### if the file is a symlink, there are 2 options: 1348 ### either we leave the symlink intact, but then we don't write any 1349 ### data OR we follow the symlink, which means we actually make a 1350 ### copy. if we do the latter, we have to change the TYPE of the 1351 ### clone to 'FILE' 1352 my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1353 my $data_ok = !$clone->is_symlink && $clone->has_content; 1354 1355 ### downgrade to a 'normal' file if it's a symlink we're going to 1356 ### treat as a regular file 1357 $clone->_downgrade_to_plainfile if $link_ok; 1358 1359 ### get the header for this block 1360 my $header = $self->_format_tar_entry( $clone ); 1361 unless( $header ) { 1362 $self->_error(q[Could not format header for: ] . 1363 $clone->full_path ); 1364 return; 1365 } 1366 1367 unless( print $handle $header ) { 1368 $self->_error(q[Could not write header for: ] . 1369 $clone->full_path); 1370 return; 1371 } 1372 1373 if( $link_ok or $data_ok ) { 1374 unless( print $handle $clone->data ) { 1375 $self->_error(q[Could not write data for: ] . 1376 $clone->full_path); 1377 return; 1378 } 1379 1380 ### pad the end of the clone if required ### 1381 print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1382 } 1383 1384 } ### done writing these entries 1385 } 1386 1387 ### write the end markers ### 1388 print $handle TAR_END x 2 or 1389 return $self->_error( qq[Could not write tar end markers] ); 1390 1391 ### did you want it written to a file, or returned as a string? ### 1392 my $rv = length($file) ? 1 1393 : $HAS_PERLIO ? $dummy 1394 : do { seek $handle, 0, 0; local $/; <$handle> }; 1395 1396 ### make sure to close the handle if we created it 1397 if ( $file ne $handle ) { 1398 unless( close $handle ) { 1399 $self->_error( qq[Could not write tar] ); 1400 return; 1401 } 1402 } 1403 1404 return $rv; 1405} 1406 1407sub _format_tar_entry { 1408 my $self = shift; 1409 my $entry = shift or return; 1410 my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1411 my $no_prefix = shift || 0; 1412 1413 my $file = $entry->name; 1414 my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1415 1416 ### remove the prefix from the file name 1417 ### not sure if this is still needed --kane 1418 ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1419 ### this for us. Even worse, this would break if we tried to add a file 1420 ### like x/x. 1421 #if( length $prefix ) { 1422 # $file =~ s/^$match//; 1423 #} 1424 1425 $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1426 if length $ext_prefix; 1427 1428 ### not sure why this is... ### 1429 my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1430 substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1431 1432 my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1433 1434 ### this might be optimizable with a 'changed' flag in the file objects ### 1435 my $tar = pack ( 1436 PACK, 1437 $file, 1438 1439 (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1440 (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1441 1442 "", # checksum field - space padded a bit down 1443 1444 (map { $entry->$_() } qw[type linkname magic]), 1445 1446 $entry->version || TAR_VERSION, 1447 1448 (map { $entry->$_() } qw[uname gname]), 1449 (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1450 1451 ($no_prefix ? '' : $prefix) 1452 ); 1453 1454 ### add the checksum ### 1455 my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1456 substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1457 1458 return $tar; 1459} 1460 1461=head2 $tar->add_files( @filenamelist ) 1462 1463Takes a list of filenames and adds them to the in-memory archive. 1464 1465The path to the file is automatically converted to a Unix like 1466equivalent for use in the archive, and, if on MacOS, the file's 1467modification time is converted from the MacOS epoch to the Unix epoch. 1468So tar archives created on MacOS with B<Archive::Tar> can be read 1469both with I<tar> on Unix and applications like I<suntar> or 1470I<Stuffit Expander> on MacOS. 1471 1472Be aware that the file's type/creator and resource fork will be lost, 1473which is usually what you want in cross-platform archives. 1474 1475Instead of a filename, you can also pass it an existing C<Archive::Tar::File> 1476object from, for example, another archive. The object will be clone, and 1477effectively be a copy of the original, not an alias. 1478 1479Returns a list of C<Archive::Tar::File> objects that were just added. 1480 1481=cut 1482 1483sub add_files { 1484 my $self = shift; 1485 my @files = @_ or return; 1486 1487 my @rv; 1488 for my $file ( @files ) { 1489 1490 ### you passed an Archive::Tar::File object 1491 ### clone it so we don't accidentally have a reference to 1492 ### an object from another archive 1493 if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1494 push @rv, $file->clone; 1495 next; 1496 } 1497 1498 eval { 1499 if( utf8::is_utf8( $file )) { 1500 utf8::encode( $file ); 1501 } 1502 }; 1503 1504 unless( -e $file || -l $file ) { 1505 $self->_error( qq[No such file: '$file'] ); 1506 next; 1507 } 1508 1509 my $obj = Archive::Tar::File->new( file => $file ); 1510 unless( $obj ) { 1511 $self->_error( qq[Unable to add file: '$file'] ); 1512 next; 1513 } 1514 1515 push @rv, $obj; 1516 } 1517 1518 push @{$self->{_data}}, @rv; 1519 1520 return @rv; 1521} 1522 1523=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1524 1525Takes a filename, a scalar full of data and optionally a reference to 1526a hash with specific options. 1527 1528Will add a file to the in-memory archive, with name C<$filename> and 1529content C<$data>. Specific properties can be set using C<$opthashref>. 1530The following list of properties is supported: name, size, mtime 1531(last modified date), mode, uid, gid, linkname, uname, gname, 1532devmajor, devminor, prefix, type. (On MacOS, the file's path and 1533modification times are converted to Unix equivalents.) 1534 1535Valid values for the file type are the following constants defined by 1536Archive::Tar::Constant: 1537 1538=over 4 1539 1540=item FILE 1541 1542Regular file. 1543 1544=item HARDLINK 1545 1546=item SYMLINK 1547 1548Hard and symbolic ("soft") links; linkname should specify target. 1549 1550=item CHARDEV 1551 1552=item BLOCKDEV 1553 1554Character and block devices. devmajor and devminor should specify the major 1555and minor device numbers. 1556 1557=item DIR 1558 1559Directory. 1560 1561=item FIFO 1562 1563FIFO (named pipe). 1564 1565=item SOCKET 1566 1567Socket. 1568 1569=back 1570 1571Returns the C<Archive::Tar::File> object that was just added, or 1572C<undef> on failure. 1573 1574=cut 1575 1576sub add_data { 1577 my $self = shift; 1578 my ($file, $data, $opt) = @_; 1579 1580 my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1581 unless( $obj ) { 1582 $self->_error( qq[Unable to add file: '$file'] ); 1583 return; 1584 } 1585 1586 push @{$self->{_data}}, $obj; 1587 1588 return $obj; 1589} 1590 1591=head2 $tar->error( [$BOOL] ) 1592 1593Returns the current error string (usually, the last error reported). 1594If a true value was specified, it will give the C<Carp::longmess> 1595equivalent of the error, in effect giving you a stacktrace. 1596 1597For backwards compatibility, this error is also available as 1598C<$Archive::Tar::error> although it is much recommended you use the 1599method call instead. 1600 1601=cut 1602 1603{ 1604 $error = ''; 1605 my $longmess; 1606 1607 sub _error { 1608 my $self = shift; 1609 my $msg = $error = shift; 1610 $longmess = Carp::longmess($error); 1611 if (ref $self) { 1612 $self->{_error} = $error; 1613 $self->{_longmess} = $longmess; 1614 } 1615 1616 ### set Archive::Tar::WARN to 0 to disable printing 1617 ### of errors 1618 if( $WARN ) { 1619 carp $DEBUG ? $longmess : $msg; 1620 } 1621 1622 return; 1623 } 1624 1625 sub error { 1626 my $self = shift; 1627 if (ref $self) { 1628 return shift() ? $self->{_longmess} : $self->{_error}; 1629 } else { 1630 return shift() ? $longmess : $error; 1631 } 1632 } 1633} 1634 1635=head2 $tar->setcwd( $cwd ); 1636 1637C<Archive::Tar> needs to know the current directory, and it will run 1638C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1639tarfile and saves it in the file system. (As of version 1.30, however, 1640C<Archive::Tar> will use the speed optimization described below 1641automatically, so it's only relevant if you're using C<extract_file()>). 1642 1643Since C<Archive::Tar> doesn't change the current directory internally 1644while it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1645can be avoided if we can guarantee that the current directory doesn't 1646get changed externally. 1647 1648To use this performance boost, set the current directory via 1649 1650 use Cwd; 1651 $tar->setcwd( cwd() ); 1652 1653once before calling a function like C<extract_file> and 1654C<Archive::Tar> will use the current directory setting from then on 1655and won't call C<Cwd::cwd()> internally. 1656 1657To switch back to the default behaviour, use 1658 1659 $tar->setcwd( undef ); 1660 1661and C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1662 1663If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1664be called for you. 1665 1666=cut 1667 1668sub setcwd { 1669 my $self = shift; 1670 my $cwd = shift; 1671 1672 $self->{cwd} = $cwd; 1673} 1674 1675=head1 Class Methods 1676 1677=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1678 1679Creates a tar file from the list of files provided. The first 1680argument can either be the name of the tar file to create or a 1681reference to an open file handle (e.g. a GLOB reference). 1682 1683The second argument is used to indicate compression. You can either 1684compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1685to be the C<gzip> compression level (between 1 and 9), but the use of 1686constants is preferred: 1687 1688 # write a gzip compressed file 1689 Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1690 1691 # write a bzip compressed file 1692 Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1693 1694Note that when you pass in a filehandle, the compression argument 1695is ignored, as all files are printed verbatim to your filehandle. 1696If you wish to enable compression with filehandles, use an 1697C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1698 1699The remaining arguments list the files to be included in the tar file. 1700These files must all exist. Any files which don't exist or can't be 1701read are silently ignored. 1702 1703If the archive creation fails for any reason, C<create_archive> will 1704return false. Please use the C<error> method to find the cause of the 1705failure. 1706 1707Note that this method does not write C<on the fly> as it were; it 1708still reads all the files into memory before writing out the archive. 1709Consult the FAQ below if this is a problem. 1710 1711=cut 1712 1713sub create_archive { 1714 my $class = shift; 1715 1716 my $file = shift; return unless defined $file; 1717 my $gzip = shift || 0; 1718 my @files = @_; 1719 1720 unless( @files ) { 1721 return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1722 } 1723 1724 my $tar = $class->new; 1725 $tar->add_files( @files ); 1726 return $tar->write( $file, $gzip ); 1727} 1728 1729=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1730 1731Returns an iterator function that reads the tar file without loading 1732it all in memory. Each time the function is called it will return the 1733next file in the tarball. The files are returned as 1734C<Archive::Tar::File> objects. The iterator function returns the 1735empty list once it has exhausted the files contained. 1736 1737The second argument can be a hash reference with options, which are 1738identical to the arguments passed to C<read()>. 1739 1740Example usage: 1741 1742 my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1743 1744 while( my $f = $next->() ) { 1745 print $f->name, "\n"; 1746 1747 $f->extract or warn "Extraction failed"; 1748 1749 # .... 1750 } 1751 1752=cut 1753 1754 1755sub iter { 1756 my $class = shift; 1757 my $filename = shift or return; 1758 my $compressed = shift || 0; 1759 my $opts = shift || {}; 1760 1761 ### get a handle to read from. 1762 my $handle = $class->_get_handle( 1763 $filename, 1764 $compressed, 1765 READ_ONLY->( ZLIB ) 1766 ) or return; 1767 1768 my @data; 1769 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ]; 1770 return sub { 1771 return shift(@data) if @data; # more than one file returned? 1772 return unless $handle; # handle exhausted? 1773 1774 ### read data, should only return file 1775 my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1776 @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 1777 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 1778 foreach(@data){ 1779 #may refine this heuristic for ON_UNIX? 1780 if($_->linkname){ 1781 #is there a better slot to store/share it ? 1782 $_->{'_archive'} = $CONSTRUCT_ARGS; 1783 } 1784 } 1785 } 1786 1787 ### return one piece of data 1788 return shift(@data) if @data; 1789 1790 ### data is exhausted, free the filehandle 1791 undef $handle; 1792 if(@$CONSTRUCT_ARGS == 4){ 1793 #free archive in memory 1794 undef $CONSTRUCT_ARGS->[-1]; 1795 } 1796 return; 1797 }; 1798} 1799 1800=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1801 1802Returns a list of the names of all the files in the archive. The 1803first argument can either be the name of the tar file to list or a 1804reference to an open file handle (e.g. a GLOB reference). 1805 1806If C<list_archive()> is passed an array reference as its third 1807argument it returns a list of hash references containing the requested 1808properties of each file. The following list of properties is 1809supported: full_path, name, size, mtime (last modified date), mode, 1810uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type. 1811 1812See C<Archive::Tar::File> for details about supported properties. 1813 1814Passing an array reference containing only one element, 'name', is 1815special cased to return a list of names rather than a list of hash 1816references. 1817 1818=cut 1819 1820sub list_archive { 1821 my $class = shift; 1822 my $file = shift; return unless defined $file; 1823 my $gzip = shift || 0; 1824 1825 my $tar = $class->new($file, $gzip); 1826 return unless $tar; 1827 1828 return $tar->list_files( @_ ); 1829} 1830 1831=head2 Archive::Tar->extract_archive($file, $compressed) 1832 1833Extracts the contents of the tar file. The first argument can either 1834be the name of the tar file to create or a reference to an open file 1835handle (e.g. a GLOB reference). All relative paths in the tar file will 1836be created underneath the current working directory. 1837 1838C<extract_archive> will return a list of files it extracted. 1839If the archive extraction fails for any reason, C<extract_archive> 1840will return false. Please use the C<error> method to find the cause 1841of the failure. 1842 1843=cut 1844 1845sub extract_archive { 1846 my $class = shift; 1847 my $file = shift; return unless defined $file; 1848 my $gzip = shift || 0; 1849 1850 my $tar = $class->new( ) or return; 1851 1852 return $tar->read( $file, $gzip, { extract => 1 } ); 1853} 1854 1855=head2 $bool = Archive::Tar->has_io_string 1856 1857Returns true if we currently have C<IO::String> support loaded. 1858 1859Either C<IO::String> or C<perlio> support is needed to support writing 1860stringified archives. Currently, C<perlio> is the preferred method, if 1861available. 1862 1863See the C<GLOBAL VARIABLES> section to see how to change this preference. 1864 1865=cut 1866 1867sub has_io_string { return $HAS_IO_STRING; } 1868 1869=head2 $bool = Archive::Tar->has_perlio 1870 1871Returns true if we currently have C<perlio> support loaded. 1872 1873This requires C<perl-5.8> or higher, compiled with C<perlio> 1874 1875Either C<IO::String> or C<perlio> support is needed to support writing 1876stringified archives. Currently, C<perlio> is the preferred method, if 1877available. 1878 1879See the C<GLOBAL VARIABLES> section to see how to change this preference. 1880 1881=cut 1882 1883sub has_perlio { return $HAS_PERLIO; } 1884 1885=head2 $bool = Archive::Tar->has_zlib_support 1886 1887Returns true if C<Archive::Tar> can extract C<zlib> compressed archives 1888 1889=cut 1890 1891sub has_zlib_support { return ZLIB } 1892 1893=head2 $bool = Archive::Tar->has_bzip2_support 1894 1895Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1896 1897=cut 1898 1899sub has_bzip2_support { return BZIP } 1900 1901=head2 Archive::Tar->can_handle_compressed_files 1902 1903A simple checking routine, which will return true if C<Archive::Tar> 1904is able to uncompress compressed archives on the fly with C<IO::Zlib> 1905and C<IO::Compress::Bzip2> or false if not both are installed. 1906 1907You can use this as a shortcut to determine whether C<Archive::Tar> 1908will do what you think before passing compressed archives to its 1909C<read> method. 1910 1911=cut 1912 1913sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1914 1915sub no_string_support { 1916 croak("You have to install IO::String to support writing archives to strings"); 1917} 1918 1919sub _symlinks_resolver{ 1920 my ($src, $trg) = @_; 1921 my @src = split /[\/\\]/, $src; 1922 my @trg = split /[\/\\]/, $trg; 1923 pop @src; #strip out current object name 1924 if(@trg and $trg[0] eq ''){ 1925 shift @trg; 1926 #restart path from scratch 1927 @src = ( ); 1928 } 1929 foreach my $part ( @trg ){ 1930 next if $part eq '.'; #ignore current 1931 if($part eq '..'){ 1932 #got to parent 1933 pop @src; 1934 } 1935 else{ 1936 #append it 1937 push @src, $part; 1938 } 1939 } 1940 my $path = join('/', @src); 1941 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG; 1942 return $path; 1943} 1944 19451; 1946 1947__END__ 1948 1949=head1 GLOBAL VARIABLES 1950 1951=head2 $Archive::Tar::FOLLOW_SYMLINK 1952 1953Set this variable to C<1> to make C<Archive::Tar> effectively make a 1954copy of the file when extracting. Default is C<0>, which 1955means the symlink stays intact. Of course, you will have to pack the 1956file linked to as well. 1957 1958This option is checked when you write out the tarfile using C<write> 1959or C<create_archive>. 1960 1961This works just like C</bin/tar>'s C<-h> option. 1962 1963=head2 $Archive::Tar::CHOWN 1964 1965By default, C<Archive::Tar> will try to C<chown> your files if it is 1966able to. In some cases, this may not be desired. In that case, set 1967this variable to C<0> to disable C<chown>-ing, even if it were 1968possible. 1969 1970The default is C<1>. 1971 1972=head2 $Archive::Tar::CHMOD 1973 1974By default, C<Archive::Tar> will try to C<chmod> your files to 1975whatever mode was specified for the particular file in the archive. 1976In some cases, this may not be desired. In that case, set this 1977variable to C<0> to disable C<chmod>-ing. 1978 1979The default is C<1>. 1980 1981=head2 $Archive::Tar::SAME_PERMISSIONS 1982 1983When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 1984the permissions on files from the archive are used without modification 1985of if they are filtered by removing any setid bits and applying the 1986current umask. 1987 1988The default is C<1> for the root user and C<0> for normal users. 1989 1990=head2 $Archive::Tar::DO_NOT_USE_PREFIX 1991 1992By default, C<Archive::Tar> will try to put paths that are over 1993100 characters in the C<prefix> field of your tar header, as 1994defined per POSIX-standard. However, some (older) tar programs 1995do not implement this spec. To retain compatibility with these older 1996or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 1997variable to a true value, and C<Archive::Tar> will use an alternate 1998way of dealing with paths over 100 characters by using the 1999C<GNU Extended Header> feature. 2000 2001Note that clients who do not support the C<GNU Extended Header> 2002feature will not be able to read these archives. Such clients include 2003tars on C<Solaris>, C<Irix> and C<AIX>. 2004 2005The default is C<0>. 2006 2007=head2 $Archive::Tar::DEBUG 2008 2009Set this variable to C<1> to always get the C<Carp::longmess> output 2010of the warnings, instead of the regular C<carp>. This is the same 2011message you would get by doing: 2012 2013 $tar->error(1); 2014 2015Defaults to C<0>. 2016 2017=head2 $Archive::Tar::WARN 2018 2019Set this variable to C<0> if you do not want any warnings printed. 2020Personally I recommend against doing this, but people asked for the 2021option. Also, be advised that this is of course not threadsafe. 2022 2023Defaults to C<1>. 2024 2025=head2 $Archive::Tar::error 2026 2027Holds the last reported error. Kept for historical reasons, but its 2028use is very much discouraged. Use the C<error()> method instead: 2029 2030 warn $tar->error unless $tar->extract; 2031 2032Note that in older versions of this module, the C<error()> method 2033would return an effectively global value even when called an instance 2034method as above. This has since been fixed, and multiple instances of 2035C<Archive::Tar> now have separate error strings. 2036 2037=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 2038 2039This variable indicates whether C<Archive::Tar> should allow 2040files to be extracted outside their current working directory. 2041 2042Allowing this could have security implications, as a malicious 2043tar archive could alter or replace any file the extracting user 2044has permissions to. Therefor, the default is to not allow 2045insecure extractions. 2046 2047If you trust the archive, or have other reasons to allow the 2048archive to write files outside your current working directory, 2049set this variable to C<true>. 2050 2051Note that this is a backwards incompatible change from version 2052C<1.36> and before. 2053 2054=head2 $Archive::Tar::HAS_PERLIO 2055 2056This variable holds a boolean indicating if we currently have 2057C<perlio> support loaded. This will be enabled for any perl 2058greater than C<5.8> compiled with C<perlio>. 2059 2060If you feel strongly about disabling it, set this variable to 2061C<false>. Note that you will then need C<IO::String> installed 2062to support writing stringified archives. 2063 2064Don't change this variable unless you B<really> know what you're 2065doing. 2066 2067=head2 $Archive::Tar::HAS_IO_STRING 2068 2069This variable holds a boolean indicating if we currently have 2070C<IO::String> support loaded. This will be enabled for any perl 2071that has a loadable C<IO::String> module. 2072 2073If you feel strongly about disabling it, set this variable to 2074C<false>. Note that you will then need C<perlio> support from 2075your perl to be able to write stringified archives. 2076 2077Don't change this variable unless you B<really> know what you're 2078doing. 2079 2080=head2 $Archive::Tar::ZERO_PAD_NUMBERS 2081 2082This variable holds a boolean indicating if we will create 2083zero padded numbers for C<size>, C<mtime> and C<checksum>. 2084The default is C<0>, indicating that we will create space padded 2085numbers. Added for compatibility with C<busybox> implementations. 2086 2087=head2 Tuning the way RESOLVE_SYMLINK will works 2088 2089 You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable, 2090 or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar. 2091 2092 Values can be one of the following: 2093 2094 none 2095 Disable this mechanism and failed as it was in previous version (<1.88) 2096 2097 speed (default) 2098 If you prefer speed 2099 this will read again the whole archive using read() so all entries 2100 will be available 2101 2102 memory 2103 If you prefer memory 2104 2105 Limitation 2106 2107 It won't work for terminal, pipe or sockets or every non seekable source. 2108 2109=cut 2110 2111=head1 FAQ 2112 2113=over 4 2114 2115=item What's the minimum perl version required to run Archive::Tar? 2116 2117You will need perl version 5.005_03 or newer. 2118 2119=item Isn't Archive::Tar slow? 2120 2121Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2122However, it's very portable. If speed is an issue, consider using 2123C</bin/tar> instead. 2124 2125=item Isn't Archive::Tar heavier on memory than /bin/tar? 2126 2127Yes it is, see previous answer. Since C<Compress::Zlib> and therefore 2128C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2129choice but to read the archive into memory. 2130This is ok if you want to do in-memory manipulation of the archive. 2131 2132If you just want to extract, use the C<extract_archive> class method 2133instead. It will optimize and write to disk immediately. 2134 2135Another option is to use the C<iter> class method to iterate over 2136the files in the tarball without reading them all in memory at once. 2137 2138=item Can you lazy-load data instead? 2139 2140In some cases, yes. You can use the C<iter> class method to iterate 2141over the files in the tarball without reading them all in memory at once. 2142 2143=item How much memory will an X kb tar file need? 2144 2145Probably more than X kb, since it will all be read into memory. If 2146this is a problem, and you don't need to do in memory manipulation 2147of the archive, consider using the C<iter> class method, or C</bin/tar> 2148instead. 2149 2150=item What do you do with unsupported filetypes in an archive? 2151 2152C<Unix> has a few filetypes that aren't supported on other platforms, 2153like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2154try to make a copy of the original file, rather than throwing an error. 2155 2156This does require you to read the entire archive in to memory first, 2157since otherwise we wouldn't know what data to fill the copy with. 2158(This means that you cannot use the class methods, including C<iter> 2159on archives that have incompatible filetypes and still expect things 2160to work). 2161 2162For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2163the extraction of this particular item didn't work. 2164 2165=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2166 2167By default, C<Archive::Tar> is in a completely POSIX-compatible 2168mode, which uses the POSIX-specification of C<tar> to store files. 2169For paths greater than 100 characters, this is done using the 2170C<POSIX header prefix>. Non-POSIX-compatible clients may not support 2171this part of the specification, and may only support the C<GNU Extended 2172Header> functionality. To facilitate those clients, you can set the 2173C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2174C<GLOBAL VARIABLES> section for details on this variable. 2175 2176Note that GNU tar earlier than version 1.14 does not cope well with 2177the C<POSIX header prefix>. If you use such a version, consider setting 2178the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2179 2180=item How do I extract only files that have property X from an archive? 2181 2182Sometimes, you might not wish to extract a complete archive, just 2183the files that are relevant to you, based on some criteria. 2184 2185You can do this by filtering a list of C<Archive::Tar::File> objects 2186based on your criteria. For example, to extract only files that have 2187the string C<foo> in their title, you would use: 2188 2189 $tar->extract( 2190 grep { $_->full_path =~ /foo/ } $tar->get_files 2191 ); 2192 2193This way, you can filter on any attribute of the files in the archive. 2194Consult the C<Archive::Tar::File> documentation on how to use these 2195objects. 2196 2197=item How do I access .tar.Z files? 2198 2199The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2200the C<IO::Zlib> module) to access tar files that have been compressed 2201with C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2202utility cannot be read by C<Compress::Zlib> and so cannot be directly 2203accesses by C<Archive::Tar>. 2204 2205If the C<uncompress> or C<gunzip> programs are available, you can use 2206one of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2207 2208Firstly with C<uncompress> 2209 2210 use Archive::Tar; 2211 2212 open F, "uncompress -c $filename |"; 2213 my $tar = Archive::Tar->new(*F); 2214 ... 2215 2216and this with C<gunzip> 2217 2218 use Archive::Tar; 2219 2220 open F, "gunzip -c $filename |"; 2221 my $tar = Archive::Tar->new(*F); 2222 ... 2223 2224Similarly, if the C<compress> program is available, you can use this to 2225write a C<.tar.Z> file 2226 2227 use Archive::Tar; 2228 use IO::File; 2229 2230 my $fh = new IO::File "| compress -c >$filename"; 2231 my $tar = Archive::Tar->new(); 2232 ... 2233 $tar->write($fh); 2234 $fh->close ; 2235 2236=item How do I handle Unicode strings? 2237 2238C<Archive::Tar> uses byte semantics for any files it reads from or writes 2239to disk. This is not a problem if you only deal with files and never 2240look at their content or work solely with byte strings. But if you use 2241Unicode strings with character semantics, some additional steps need 2242to be taken. 2243 2244For example, if you add a Unicode string like 2245 2246 # Problem 2247 $tar->add_data('file.txt', "Euro: \x{20AC}"); 2248 2249then there will be a problem later when the tarfile gets written out 2250to disk via C<$tar->write()>: 2251 2252 Wide character in print at .../Archive/Tar.pm line 1014. 2253 2254The data was added as a Unicode string and when writing it out to disk, 2255the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2256tried to convert the string to ISO-8859 and failed. The written file 2257now contains garbage. 2258 2259For this reason, Unicode strings need to be converted to UTF-8-encoded 2260bytestrings before they are handed off to C<add_data()>: 2261 2262 use Encode; 2263 my $data = "Accented character: \x{20AC}"; 2264 $data = encode('utf8', $data); 2265 2266 $tar->add_data('file.txt', $data); 2267 2268A opposite problem occurs if you extract a UTF8-encoded file from a 2269tarball. Using C<get_content()> on the C<Archive::Tar::File> object 2270will return its content as a bytestring, not as a Unicode string. 2271 2272If you want it to be a Unicode string (because you want character 2273semantics with operations like regular expression matching), you need 2274to decode the UTF8-encoded content and have Perl convert it into 2275a Unicode string: 2276 2277 use Encode; 2278 my $data = $tar->get_content(); 2279 2280 # Make it a Unicode string 2281 $data = decode('utf8', $data); 2282 2283There is no easy way to provide this functionality in C<Archive::Tar>, 2284because a tarball can contain many files, and each of which could be 2285encoded in a different way. 2286 2287=back 2288 2289=head1 CAVEATS 2290 2291The AIX tar does not fill all unused space in the tar archive with 0x00. 2292This sometimes leads to warning messages from C<Archive::Tar>. 2293 2294 Invalid header block at offset nnn 2295 2296A fix for that problem is scheduled to be released in the following levels 2297of AIX, all of which should be coming out in the 4th quarter of 2009: 2298 2299 AIX 5.3 TL7 SP10 2300 AIX 5.3 TL8 SP8 2301 AIX 5.3 TL9 SP5 2302 AIX 5.3 TL10 SP2 2303 2304 AIX 6.1 TL0 SP11 2305 AIX 6.1 TL1 SP7 2306 AIX 6.1 TL2 SP6 2307 AIX 6.1 TL3 SP3 2308 2309The IBM APAR number for this problem is IZ50240 (Reported component ID: 23105765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2311If you need an ifix please contact your local IBM AIX support. 2312 2313=head1 TODO 2314 2315=over 4 2316 2317=item Check if passed in handles are open for read/write 2318 2319Currently I don't know of any portable pure perl way to do this. 2320Suggestions welcome. 2321 2322=item Allow archives to be passed in as string 2323 2324Currently, we only allow opened filehandles or filenames, but 2325not strings. The internals would need some reworking to facilitate 2326stringified archives. 2327 2328=item Facilitate processing an opened filehandle of a compressed archive 2329 2330Currently, we only support this if the filehandle is an IO::Zlib object. 2331Environments, like apache, will present you with an opened filehandle 2332to an uploaded file, which might be a compressed archive. 2333 2334=back 2335 2336=head1 SEE ALSO 2337 2338=over 4 2339 2340=item The GNU tar specification 2341 2342C<http://www.gnu.org/software/tar/manual/tar.html> 2343 2344=item The PAX format specification 2345 2346The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2347 2348=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2349 2350=item GNU tar intends to switch to POSIX compatibility 2351 2352GNU Tar authors have expressed their intention to become completely 2353POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2354 2355=item A Comparison between various tar implementations 2356 2357Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2358 2359=back 2360 2361=head1 AUTHOR 2362 2363This module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2364 2365Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2366 2367=head1 ACKNOWLEDGEMENTS 2368 2369Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2370Rainer Tammer and especially Andrew Savige for their help and suggestions. 2371 2372=head1 COPYRIGHT 2373 2374This module is copyright (c) 2002 - 2009 Jos Boumans 2375E<lt>kane@cpan.orgE<gt>. All rights reserved. 2376 2377This library is free software; you may redistribute and/or modify 2378it under the same terms as Perl itself. 2379 2380=cut 2381