1b39c5158Smillert### the gnu tar specification: 2b39c5158Smillert### http://www.gnu.org/software/tar/manual/tar.html 3b39c5158Smillert### 4b39c5158Smillert### and the pax format spec, which tar derives from: 5b39c5158Smillert### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6b39c5158Smillert 7b39c5158Smillertpackage Archive::Tar; 8b39c5158Smillertrequire 5.005_03; 9b39c5158Smillert 10b39c5158Smillertuse Cwd; 11b39c5158Smillertuse IO::Zlib; 12b39c5158Smillertuse IO::File; 13b39c5158Smillertuse Carp qw(carp croak); 14b39c5158Smillertuse File::Spec (); 15b39c5158Smillertuse File::Spec::Unix (); 16b39c5158Smillertuse File::Path (); 17b39c5158Smillert 18b39c5158Smillertuse Archive::Tar::File; 19b39c5158Smillertuse Archive::Tar::Constant; 20b39c5158Smillert 21b39c5158Smillertrequire Exporter; 22b39c5158Smillert 23b39c5158Smillertuse strict; 24b39c5158Smillertuse vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25b39c5158Smillert $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 266fb12b70Safresh1 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK 27b39c5158Smillert ]; 28b39c5158Smillert 29b39c5158Smillert@ISA = qw[Exporter]; 3056d68f1eSafresh1@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP COMPRESS_XZ ]; 31b39c5158Smillert$DEBUG = 0; 32b39c5158Smillert$WARN = 1; 33b39c5158Smillert$FOLLOW_SYMLINK = 0; 34*eac174f2Safresh1$VERSION = "2.40"; 35b39c5158Smillert$CHOWN = 1; 36b39c5158Smillert$CHMOD = 1; 37b39c5158Smillert$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38b39c5158Smillert$DO_NOT_USE_PREFIX = 0; 39b39c5158Smillert$INSECURE_EXTRACT_MODE = 0; 40898184e3Ssthen$ZERO_PAD_NUMBERS = 0; 416fb12b70Safresh1$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed'; 42b39c5158Smillert 43b39c5158SmillertBEGIN { 44b39c5158Smillert use Config; 45b39c5158Smillert $HAS_PERLIO = $Config::Config{useperlio}; 46b39c5158Smillert 47b39c5158Smillert ### try and load IO::String anyway, so you can dynamically 48b39c5158Smillert ### switch between perlio and IO::String 49b39c5158Smillert $HAS_IO_STRING = eval { 50b39c5158Smillert require IO::String; 51*eac174f2Safresh1 IO::String->import; 52b39c5158Smillert 1; 53b39c5158Smillert } || 0; 54b39c5158Smillert} 55b39c5158Smillert 56b39c5158Smillert=head1 NAME 57b39c5158Smillert 58b39c5158SmillertArchive::Tar - module for manipulations of tar archives 59b39c5158Smillert 60b39c5158Smillert=head1 SYNOPSIS 61b39c5158Smillert 62b39c5158Smillert use Archive::Tar; 63b39c5158Smillert my $tar = Archive::Tar->new; 64b39c5158Smillert 65b39c5158Smillert $tar->read('origin.tgz'); 66b39c5158Smillert $tar->extract(); 67b39c5158Smillert 68b39c5158Smillert $tar->add_files('file/foo.pl', 'docs/README'); 69b39c5158Smillert $tar->add_data('file/baz.txt', 'This is the contents now'); 70b39c5158Smillert 71b39c5158Smillert $tar->rename('oldname', 'new/file/name'); 72898184e3Ssthen $tar->chown('/', 'root'); 73898184e3Ssthen $tar->chown('/', 'root:root'); 74898184e3Ssthen $tar->chmod('/tmp', '1777'); 75b39c5158Smillert 76b39c5158Smillert $tar->write('files.tar'); # plain tar 77b39c5158Smillert $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 78b39c5158Smillert $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 7956d68f1eSafresh1 $tar->write('files.txz', COMPRESS_XZ); # xz compressed 80b39c5158Smillert 81b39c5158Smillert=head1 DESCRIPTION 82b39c5158Smillert 83b39c5158SmillertArchive::Tar provides an object oriented mechanism for handling tar 84b39c5158Smillertfiles. It provides class methods for quick and easy files handling 85b39c5158Smillertwhile also allowing for the creation of tar file objects for custom 86b39c5158Smillertmanipulation. If you have the IO::Zlib module installed, 87b39c5158SmillertArchive::Tar will also support compressed or gzipped tar files. 88b39c5158Smillert 89b39c5158SmillertAn object of class Archive::Tar represents a .tar(.gz) archive full 90b39c5158Smillertof files and things. 91b39c5158Smillert 92b39c5158Smillert=head1 Object Methods 93b39c5158Smillert 94b39c5158Smillert=head2 Archive::Tar->new( [$file, $compressed] ) 95b39c5158Smillert 96b39c5158SmillertReturns a new Tar object. If given any arguments, C<new()> calls the 97b39c5158SmillertC<read()> method automatically, passing on the arguments provided to 98b39c5158Smillertthe C<read()> method. 99b39c5158Smillert 100b39c5158SmillertIf C<new()> is invoked with arguments and the C<read()> method fails 101b39c5158Smillertfor any reason, C<new()> returns undef. 102b39c5158Smillert 103b39c5158Smillert=cut 104b39c5158Smillert 105b39c5158Smillertmy $tmpl = { 106b39c5158Smillert _data => [ ], 107b39c5158Smillert _file => 'Unknown', 108b39c5158Smillert}; 109b39c5158Smillert 110b39c5158Smillert### install get/set accessors for this object. 111b39c5158Smillertfor my $key ( keys %$tmpl ) { 112b39c5158Smillert no strict 'refs'; 113b39c5158Smillert *{__PACKAGE__."::$key"} = sub { 114b39c5158Smillert my $self = shift; 115b39c5158Smillert $self->{$key} = $_[0] if @_; 116b39c5158Smillert return $self->{$key}; 117b39c5158Smillert } 118b39c5158Smillert} 119b39c5158Smillert 120b39c5158Smillertsub new { 121b39c5158Smillert my $class = shift; 122b39c5158Smillert $class = ref $class if ref $class; 123b39c5158Smillert 124b39c5158Smillert ### copying $tmpl here since a shallow copy makes it use the 125b39c5158Smillert ### same aref, causing for files to remain in memory always. 126b39c5158Smillert my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 127b39c5158Smillert 128b39c5158Smillert if (@_) { 129b39c5158Smillert unless ( $obj->read( @_ ) ) { 130b39c5158Smillert $obj->_error(qq[No data could be read from file]); 131b39c5158Smillert return; 132b39c5158Smillert } 133b39c5158Smillert } 134b39c5158Smillert 135b39c5158Smillert return $obj; 136b39c5158Smillert} 137b39c5158Smillert 138b39c5158Smillert=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 139b39c5158Smillert 140b39c5158SmillertRead the given tar file into memory. 141b39c5158SmillertThe first argument can either be the name of a file or a reference to 142b39c5158Smillertan already open filehandle (or an IO::Zlib object if it's compressed) 143b39c5158Smillert 144b39c5158SmillertThe C<read> will I<replace> any previous content in C<$tar>! 145b39c5158Smillert 146b39c5158SmillertThe second argument may be considered optional, but remains for 147b39c5158Smillertbackwards compatibility. Archive::Tar now looks at the file 148b39c5158Smillertmagic to determine what class should be used to open the file 149b39c5158Smillertand will transparently Do The Right Thing. 150b39c5158Smillert 15156d68f1eSafresh1Archive::Tar will warn if you try to pass a bzip2 / xz compressed file and the 15256d68f1eSafresh1IO::Uncompress::Bunzip2 / IO::Uncompress::UnXz are not available and simply return. 153b39c5158Smillert 154b39c5158SmillertNote that you can currently B<not> pass a C<gzip> compressed 155b39c5158Smillertfilehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 15656d68f1eSafresh1filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, a C<xz> compressed 15756d68f1eSafresh1filehandle, which is not opened with C<IO::Uncompress::UnXz>, nor a string 158b39c5158Smillertcontaining the full archive information (either compressed or 159b39c5158Smillertuncompressed). These are worth while features, but not currently 160b39c5158Smillertimplemented. See the C<TODO> section. 161b39c5158Smillert 162b39c5158SmillertThe third argument can be a hash reference with options. Note that 163b39c5158Smillertall options are case-sensitive. 164b39c5158Smillert 165b39c5158Smillert=over 4 166b39c5158Smillert 167b39c5158Smillert=item limit 168b39c5158Smillert 169b39c5158SmillertDo not read more than C<limit> files. This is useful if you have 170b39c5158Smillertvery big archives, and are only interested in the first few files. 171b39c5158Smillert 172b39c5158Smillert=item filter 173b39c5158Smillert 174b39c5158SmillertCan be set to a regular expression. Only files with names that match 175b39c5158Smillertthe expression will be read. 176b39c5158Smillert 177898184e3Ssthen=item md5 178898184e3Ssthen 179898184e3SsthenSet to 1 and the md5sum of files will be returned (instead of file data) 180898184e3Ssthen my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 181898184e3Ssthen while( my $f = $iter->() ) { 182898184e3Ssthen print $f->data . "\t" . $f->full_path . $/; 183898184e3Ssthen } 184898184e3Ssthen 185b39c5158Smillert=item extract 186b39c5158Smillert 187b39c5158SmillertIf set to true, immediately extract entries when reading them. This 188b39c5158Smillertgives you the same memory break as the C<extract_archive> function. 189b39c5158SmillertNote however that entries will not be read into memory, but written 190b39c5158Smillertstraight to disk. This means no C<Archive::Tar::File> objects are 191b39c5158Smillertcreated for you to inspect. 192b39c5158Smillert 193b39c5158Smillert=back 194b39c5158Smillert 195b39c5158SmillertAll files are stored internally as C<Archive::Tar::File> objects. 196b39c5158SmillertPlease consult the L<Archive::Tar::File> documentation for details. 197b39c5158Smillert 198b39c5158SmillertReturns the number of files read in scalar context, and a list of 199b39c5158SmillertC<Archive::Tar::File> objects in list context. 200b39c5158Smillert 201b39c5158Smillert=cut 202b39c5158Smillert 203b39c5158Smillertsub read { 204b39c5158Smillert my $self = shift; 205b39c5158Smillert my $file = shift; 206b39c5158Smillert my $gzip = shift || 0; 207b39c5158Smillert my $opts = shift || {}; 208b39c5158Smillert 209b39c5158Smillert unless( defined $file ) { 210b39c5158Smillert $self->_error( qq[No file to read from!] ); 211b39c5158Smillert return; 212b39c5158Smillert } else { 213b39c5158Smillert $self->_file( $file ); 214b39c5158Smillert } 215b39c5158Smillert 216b39c5158Smillert my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 217b39c5158Smillert or return; 218b39c5158Smillert 219b39c5158Smillert my $data = $self->_read_tar( $handle, $opts ) or return; 220b39c5158Smillert 221b39c5158Smillert $self->_data( $data ); 222b39c5158Smillert 223b39c5158Smillert return wantarray ? @$data : scalar @$data; 224b39c5158Smillert} 225b39c5158Smillert 226b39c5158Smillertsub _get_handle { 227b39c5158Smillert my $self = shift; 228b39c5158Smillert my $file = shift; return unless defined $file; 229b39c5158Smillert my $compress = shift || 0; 230b39c5158Smillert my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 231b39c5158Smillert 232898184e3Ssthen ### Check if file is a file handle or IO glob 233898184e3Ssthen if ( ref $file ) { 234898184e3Ssthen return $file if eval{ *$file{IO} }; 235898184e3Ssthen return $file if eval{ $file->isa(q{IO::Handle}) }; 236898184e3Ssthen $file = q{}.$file; 237898184e3Ssthen } 238b39c5158Smillert 239b39c5158Smillert ### get a FH opened to the right class, so we can use it transparently 240b39c5158Smillert ### throughout the program 241b39c5158Smillert my $fh; 242b39c5158Smillert { ### reading magic only makes sense if we're opening a file for 243b39c5158Smillert ### reading. otherwise, just use what the user requested. 244b39c5158Smillert my $magic = ''; 245b39c5158Smillert if( MODE_READ->($mode) ) { 246b39c5158Smillert open my $tmp, $file or do { 247b39c5158Smillert $self->_error( qq[Could not open '$file' for reading: $!] ); 248b39c5158Smillert return; 249b39c5158Smillert }; 250b39c5158Smillert 25156d68f1eSafresh1 ### read the first 6 bytes of the file to figure out which class to 252b39c5158Smillert ### use to open the file. 25356d68f1eSafresh1 sysread( $tmp, $magic, 6 ); 254b39c5158Smillert close $tmp; 255b39c5158Smillert } 256b39c5158Smillert 25756d68f1eSafresh1 ### is it xz? 25856d68f1eSafresh1 ### if you asked specifically for xz compression, or if we're in 25956d68f1eSafresh1 ### read mode and the magic numbers add up, use xz 26056d68f1eSafresh1 if( XZ and ( 26156d68f1eSafresh1 ($compress eq COMPRESS_XZ) or 26256d68f1eSafresh1 ( MODE_READ->($mode) and $magic =~ XZ_MAGIC_NUM ) 26356d68f1eSafresh1 ) 26456d68f1eSafresh1 ) { 26556d68f1eSafresh1 if( MODE_READ->($mode) ) { 26656d68f1eSafresh1 $fh = IO::Uncompress::UnXz->new( $file ) or do { 26756d68f1eSafresh1 $self->_error( qq[Could not read '$file': ] . 26856d68f1eSafresh1 $IO::Uncompress::UnXz::UnXzError 26956d68f1eSafresh1 ); 27056d68f1eSafresh1 return; 27156d68f1eSafresh1 }; 27256d68f1eSafresh1 } else { 27356d68f1eSafresh1 $fh = IO::Compress::Xz->new( $file ) or do { 27456d68f1eSafresh1 $self->_error( qq[Could not write to '$file': ] . 27556d68f1eSafresh1 $IO::Compress::Xz::XzError 27656d68f1eSafresh1 ); 27756d68f1eSafresh1 return; 27856d68f1eSafresh1 }; 27956d68f1eSafresh1 } 28056d68f1eSafresh1 281b39c5158Smillert ### is it bzip? 282b39c5158Smillert ### if you asked specifically for bzip compression, or if we're in 283b39c5158Smillert ### read mode and the magic numbers add up, use bzip 28456d68f1eSafresh1 } elsif( BZIP and ( 285b39c5158Smillert ($compress eq COMPRESS_BZIP) or 286b39c5158Smillert ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 287b39c5158Smillert ) 288b39c5158Smillert ) { 289b39c5158Smillert 290b39c5158Smillert ### different reader/writer modules, different error vars... sigh 291b39c5158Smillert if( MODE_READ->($mode) ) { 2929f11ffb7Safresh1 $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do { 293b39c5158Smillert $self->_error( qq[Could not read '$file': ] . 294b39c5158Smillert $IO::Uncompress::Bunzip2::Bunzip2Error 295b39c5158Smillert ); 296b39c5158Smillert return; 297b39c5158Smillert }; 298b39c5158Smillert 299b39c5158Smillert } else { 300b39c5158Smillert $fh = IO::Compress::Bzip2->new( $file ) or do { 301b39c5158Smillert $self->_error( qq[Could not write to '$file': ] . 302b39c5158Smillert $IO::Compress::Bzip2::Bzip2Error 303b39c5158Smillert ); 304b39c5158Smillert return; 305b39c5158Smillert }; 306b39c5158Smillert } 307b39c5158Smillert 308b39c5158Smillert ### is it gzip? 309b39c5158Smillert ### if you asked for compression, if you wanted to read or the gzip 310b39c5158Smillert ### magic number is present (redundant with read) 311b39c5158Smillert } elsif( ZLIB and ( 312b39c5158Smillert $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 313b39c5158Smillert ) 314b39c5158Smillert ) { 315b39c5158Smillert $fh = IO::Zlib->new; 316b39c5158Smillert 317b39c5158Smillert unless( $fh->open( $file, $mode ) ) { 318b39c5158Smillert $self->_error(qq[Could not create filehandle for '$file': $!]); 319b39c5158Smillert return; 320b39c5158Smillert } 321b39c5158Smillert 322b39c5158Smillert ### is it plain tar? 323b39c5158Smillert } else { 324b39c5158Smillert $fh = IO::File->new; 325b39c5158Smillert 326b39c5158Smillert unless( $fh->open( $file, $mode ) ) { 327b39c5158Smillert $self->_error(qq[Could not create filehandle for '$file': $!]); 328b39c5158Smillert return; 329b39c5158Smillert } 330b39c5158Smillert 331b39c5158Smillert ### enable bin mode on tar archives 332b39c5158Smillert binmode $fh; 333b39c5158Smillert } 334b39c5158Smillert } 335b39c5158Smillert 336b39c5158Smillert return $fh; 337b39c5158Smillert} 338b39c5158Smillert 339b39c5158Smillert 340b39c5158Smillertsub _read_tar { 341b39c5158Smillert my $self = shift; 342b39c5158Smillert my $handle = shift or return; 343b39c5158Smillert my $opts = shift || {}; 344b39c5158Smillert 345b39c5158Smillert my $count = $opts->{limit} || 0; 346b39c5158Smillert my $filter = $opts->{filter}; 347898184e3Ssthen my $md5 = $opts->{md5} || 0; # cdrake 348898184e3Ssthen my $filter_cb = $opts->{filter_cb}; 349b39c5158Smillert my $extract = $opts->{extract} || 0; 350b39c5158Smillert 351b39c5158Smillert ### set a cap on the amount of files to extract ### 352b39c5158Smillert my $limit = 0; 353b39c5158Smillert $limit = 1 if $count > 0; 354b39c5158Smillert 355b39c5158Smillert my $tarfile = [ ]; 356b39c5158Smillert my $chunk; 357b39c5158Smillert my $read = 0; 358b39c5158Smillert my $real_name; # to set the name of a file when 359b39c5158Smillert # we're encountering @longlink 360b39c5158Smillert my $data; 361b39c5158Smillert 362b39c5158Smillert LOOP: 363b39c5158Smillert while( $handle->read( $chunk, HEAD ) ) { 364b39c5158Smillert ### IO::Zlib doesn't support this yet 36591f110e0Safresh1 my $offset; 36691f110e0Safresh1 if ( ref($handle) ne 'IO::Zlib' ) { 36791f110e0Safresh1 local $@; 36891f110e0Safresh1 $offset = eval { tell $handle } || 'unknown'; 369898184e3Ssthen $@ = ''; 37091f110e0Safresh1 } 37191f110e0Safresh1 else { 37291f110e0Safresh1 $offset = 'unknown'; 37391f110e0Safresh1 } 374b39c5158Smillert 375b39c5158Smillert unless( $read++ ) { 376b39c5158Smillert my $gzip = GZIP_MAGIC_NUM; 377b39c5158Smillert if( $chunk =~ /$gzip/ ) { 378b39c5158Smillert $self->_error( qq[Cannot read compressed format in tar-mode] ); 379b39c5158Smillert return; 380b39c5158Smillert } 381b39c5158Smillert 382b39c5158Smillert ### size is < HEAD, which means a corrupted file, as the minimum 383b39c5158Smillert ### length is _at least_ HEAD 384b39c5158Smillert if (length $chunk != HEAD) { 385b39c5158Smillert $self->_error( qq[Cannot read enough bytes from the tarfile] ); 386b39c5158Smillert return; 387b39c5158Smillert } 388b39c5158Smillert } 389b39c5158Smillert 390b39c5158Smillert ### if we can't read in all bytes... ### 391b39c5158Smillert last if length $chunk != HEAD; 392b39c5158Smillert 393b39c5158Smillert ### Apparently this should really be two blocks of 512 zeroes, 394b39c5158Smillert ### but GNU tar sometimes gets it wrong. See comment in the 395b39c5158Smillert ### source code (tar.c) to GNU cpio. 396b39c5158Smillert next if $chunk eq TAR_END; 397b39c5158Smillert 398b39c5158Smillert ### according to the posix spec, the last 12 bytes of the header are 399b39c5158Smillert ### null bytes, to pad it to a 512 byte block. That means if these 400898184e3Ssthen ### bytes are NOT null bytes, it's a corrupt header. See: 401b39c5158Smillert ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 402b39c5158Smillert ### line 111 403b39c5158Smillert { my $nulls = join '', "\0" x 12; 404b39c5158Smillert unless( $nulls eq substr( $chunk, 500, 12 ) ) { 405b39c5158Smillert $self->_error( qq[Invalid header block at offset $offset] ); 406b39c5158Smillert next LOOP; 407b39c5158Smillert } 408b39c5158Smillert } 409b39c5158Smillert 410b39c5158Smillert ### pass the realname, so we can set it 'proper' right away 411b39c5158Smillert ### some of the heuristics are done on the name, so important 412b39c5158Smillert ### to set it ASAP 413b39c5158Smillert my $entry; 414b39c5158Smillert { my %extra_args = (); 415b39c5158Smillert $extra_args{'name'} = $$real_name if defined $real_name; 416b39c5158Smillert 417b39c5158Smillert unless( $entry = Archive::Tar::File->new( chunk => $chunk, 418b39c5158Smillert %extra_args ) 419b39c5158Smillert ) { 420b39c5158Smillert $self->_error( qq[Couldn't read chunk at offset $offset] ); 421b39c5158Smillert next LOOP; 422b39c5158Smillert } 423b39c5158Smillert } 424b39c5158Smillert 425b39c5158Smillert ### ignore labels: 426898184e3Ssthen ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 427b39c5158Smillert next if $entry->is_label; 428b39c5158Smillert 429b39c5158Smillert if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 430b39c5158Smillert 431b39c5158Smillert if ( $entry->is_file && !$entry->validate ) { 432b39c5158Smillert ### sometimes the chunk is rather fux0r3d and a whole 512 433b39c5158Smillert ### bytes ends up in the ->name area. 434b39c5158Smillert ### clean it up, if need be 435b39c5158Smillert my $name = $entry->name; 436b39c5158Smillert $name = substr($name, 0, 100) if length $name > 100; 437b39c5158Smillert $name =~ s/\n/ /g; 438b39c5158Smillert 439b39c5158Smillert $self->_error( $name . qq[: checksum error] ); 440b39c5158Smillert next LOOP; 441b39c5158Smillert } 442b39c5158Smillert 443b39c5158Smillert my $block = BLOCK_SIZE->( $entry->size ); 444b39c5158Smillert 445b39c5158Smillert $data = $entry->get_content_by_ref; 446b39c5158Smillert 447898184e3Ssthen my $skip = 0; 448898184e3Ssthen my $ctx; # cdrake 449898184e3Ssthen ### skip this entry if we're filtering 450898184e3Ssthen 451898184e3Ssthen if($md5) { # cdrake 452898184e3Ssthen $ctx = Digest::MD5->new; # cdrake 453898184e3Ssthen $skip=5; # cdrake 454898184e3Ssthen 455898184e3Ssthen } elsif ($filter && $entry->name !~ $filter) { 456898184e3Ssthen $skip = 1; 457898184e3Ssthen 458b8851fccSafresh1 } elsif ($filter_cb && ! $filter_cb->($entry)) { 459b8851fccSafresh1 $skip = 2; 460b8851fccSafresh1 461898184e3Ssthen ### skip this entry if it's a pax header. This is a special file added 462898184e3Ssthen ### by, among others, git-generated tarballs. It holds comments and is 463898184e3Ssthen ### not meant for extracting. See #38932: pax_global_header extracted 464898184e3Ssthen } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 465898184e3Ssthen $skip = 3; 466898184e3Ssthen } 467898184e3Ssthen 468898184e3Ssthen if ($skip) { 469898184e3Ssthen # 470898184e3Ssthen # Since we're skipping, do not allocate memory for the 471898184e3Ssthen # whole file. Read it 64 BLOCKS at a time. Do not 472898184e3Ssthen # complete the skip yet because maybe what we read is a 473898184e3Ssthen # longlink and it won't get skipped after all 474898184e3Ssthen # 475898184e3Ssthen my $amt = $block; 476898184e3Ssthen my $fsz=$entry->size; # cdrake 477898184e3Ssthen while ($amt > 0) { 478898184e3Ssthen $$data = ''; 479898184e3Ssthen my $this = 64 * BLOCK; 480898184e3Ssthen $this = $amt if $this > $amt; 481898184e3Ssthen if( $handle->read( $$data, $this ) < $this ) { 482898184e3Ssthen $self->_error( qq[Read error on tarfile (missing data) ']. 483898184e3Ssthen $entry->full_path ."' at offset $offset" ); 484898184e3Ssthen next LOOP; 485898184e3Ssthen } 486898184e3Ssthen $amt -= $this; 487898184e3Ssthen $fsz -= $this; # cdrake 488898184e3Ssthen substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 489898184e3Ssthen $ctx->add($$data) if($skip==5); # cdrake 490898184e3Ssthen } 491898184e3Ssthen $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 492898184e3Ssthen } else { 493898184e3Ssthen 494b39c5158Smillert ### just read everything into memory 495b39c5158Smillert ### can't do lazy loading since IO::Zlib doesn't support 'seek' 496b39c5158Smillert ### this is because Compress::Zlib doesn't support it =/ 497b39c5158Smillert ### this reads in the whole data in one read() call. 498b39c5158Smillert if ( $handle->read( $$data, $block ) < $block ) { 499b39c5158Smillert $self->_error( qq[Read error on tarfile (missing data) ']. 500b39c5158Smillert $entry->full_path ."' at offset $offset" ); 501b39c5158Smillert next LOOP; 502b39c5158Smillert } 503b39c5158Smillert ### throw away trailing garbage ### 504b39c5158Smillert substr ($$data, $entry->size) = "" if defined $$data; 505898184e3Ssthen } 506b39c5158Smillert 507b39c5158Smillert ### part II of the @LongLink munging -- need to do /after/ 508b39c5158Smillert ### the checksum check. 509b39c5158Smillert if( $entry->is_longlink ) { 510b39c5158Smillert ### weird thing in tarfiles -- if the file is actually a 511b39c5158Smillert ### @LongLink, the data part seems to have a trailing ^@ 512b39c5158Smillert ### (unprintable) char. to display, pipe output through less. 513b39c5158Smillert ### but that doesn't *always* happen.. so check if the last 514b39c5158Smillert ### character is a control character, and if so remove it 515b39c5158Smillert ### at any rate, we better remove that character here, or tests 516b39c5158Smillert ### like 'eq' and hash lookups based on names will SO not work 517b39c5158Smillert ### remove it by calculating the proper size, and then 518b39c5158Smillert ### tossing out everything that's longer than that size. 519b39c5158Smillert 520b39c5158Smillert ### count number of nulls 521b39c5158Smillert my $nulls = $$data =~ tr/\0/\0/; 522b39c5158Smillert 523b39c5158Smillert ### cut data + size by that many bytes 524b39c5158Smillert $entry->size( $entry->size - $nulls ); 525b39c5158Smillert substr ($$data, $entry->size) = ""; 526b39c5158Smillert } 527b39c5158Smillert } 528b39c5158Smillert 529b39c5158Smillert ### clean up of the entries.. posix tar /apparently/ has some 530b39c5158Smillert ### weird 'feature' that allows for filenames > 255 characters 531b39c5158Smillert ### they'll put a header in with as name '././@LongLink' and the 532b39c5158Smillert ### contents will be the name of the /next/ file in the archive 533b39c5158Smillert ### pretty crappy and kludgy if you ask me 534b39c5158Smillert 535b39c5158Smillert ### set the name for the next entry if this is a @LongLink; 536b39c5158Smillert ### this is one ugly hack =/ but needed for direct extraction 537b39c5158Smillert if( $entry->is_longlink ) { 538b39c5158Smillert $real_name = $data; 539b39c5158Smillert next LOOP; 540b39c5158Smillert } elsif ( defined $real_name ) { 541b39c5158Smillert $entry->name( $$real_name ); 542b39c5158Smillert $entry->prefix(''); 543b39c5158Smillert undef $real_name; 544b39c5158Smillert } 545b39c5158Smillert 546b39c5158Smillert if ($filter && $entry->name !~ $filter) { 547b39c5158Smillert next LOOP; 548b39c5158Smillert 549b8851fccSafresh1 } elsif ($filter_cb && ! $filter_cb->($entry)) { 550b8851fccSafresh1 next LOOP; 551b8851fccSafresh1 552b39c5158Smillert ### skip this entry if it's a pax header. This is a special file added 553b39c5158Smillert ### by, among others, git-generated tarballs. It holds comments and is 554b39c5158Smillert ### not meant for extracting. See #38932: pax_global_header extracted 555898184e3Ssthen } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 556898184e3Ssthen next LOOP; 557b39c5158Smillert } 558b39c5158Smillert 559898184e3Ssthen if ( $extract && !$entry->is_longlink 560b39c5158Smillert && !$entry->is_unknown 561898184e3Ssthen && !$entry->is_label ) { 562898184e3Ssthen $self->_extract_file( $entry ) or return; 563898184e3Ssthen } 564b39c5158Smillert 565b39c5158Smillert ### Guard against tarfiles with garbage at the end 566b39c5158Smillert last LOOP if $entry->name eq ''; 567b39c5158Smillert 568b39c5158Smillert ### push only the name on the rv if we're extracting 569b39c5158Smillert ### -- for extract_archive 570b39c5158Smillert push @$tarfile, ($extract ? $entry->name : $entry); 571b39c5158Smillert 572b39c5158Smillert if( $limit ) { 573b39c5158Smillert $count-- unless $entry->is_longlink || $entry->is_dir; 574b39c5158Smillert last LOOP unless $count; 575b39c5158Smillert } 576b39c5158Smillert } continue { 577b39c5158Smillert undef $data; 578b39c5158Smillert } 579b39c5158Smillert 580b39c5158Smillert return $tarfile; 581b39c5158Smillert} 582b39c5158Smillert 583b39c5158Smillert=head2 $tar->contains_file( $filename ) 584b39c5158Smillert 585b39c5158SmillertCheck if the archive contains a certain file. 586b39c5158SmillertIt will return true if the file is in the archive, false otherwise. 587b39c5158Smillert 588b39c5158SmillertNote however, that this function does an exact match using C<eq> 589b39c5158Smillerton the full path. So it cannot compensate for case-insensitive file- 590b39c5158Smillertsystems or compare 2 paths to see if they would point to the same 591b39c5158Smillertunderlying file. 592b39c5158Smillert 593b39c5158Smillert=cut 594b39c5158Smillert 595b39c5158Smillertsub contains_file { 596b39c5158Smillert my $self = shift; 597b39c5158Smillert my $full = shift; 598b39c5158Smillert 599b39c5158Smillert return unless defined $full; 600b39c5158Smillert 601b39c5158Smillert ### don't warn if the entry isn't there.. that's what this function 602b39c5158Smillert ### is for after all. 603b39c5158Smillert local $WARN = 0; 604b39c5158Smillert return 1 if $self->_find_entry($full); 605b39c5158Smillert return; 606b39c5158Smillert} 607b39c5158Smillert 608b39c5158Smillert=head2 $tar->extract( [@filenames] ) 609b39c5158Smillert 610b39c5158SmillertWrite files whose names are equivalent to any of the names in 611b39c5158SmillertC<@filenames> to disk, creating subdirectories as necessary. This 612b39c5158Smillertmight not work too well under VMS. 613b39c5158SmillertUnder MacPerl, the file's modification time will be converted to the 614b39c5158SmillertMacOS zero of time, and appropriate conversions will be done to the 615b39c5158Smillertpath. However, the length of each element of the path is not 616b39c5158Smillertinspected to see whether it's longer than MacOS currently allows (32 617b39c5158Smillertcharacters). 618b39c5158Smillert 619b39c5158SmillertIf C<extract> is called without a list of file names, the entire 620b39c5158Smillertcontents of the archive are extracted. 621b39c5158Smillert 622b39c5158SmillertReturns a list of filenames extracted. 623b39c5158Smillert 624b39c5158Smillert=cut 625b39c5158Smillert 626b39c5158Smillertsub extract { 627b39c5158Smillert my $self = shift; 628b39c5158Smillert my @args = @_; 629b39c5158Smillert my @files; 6309f11ffb7Safresh1 my $hashmap; 631b39c5158Smillert 632b39c5158Smillert # use the speed optimization for all extracted files 633b39c5158Smillert local($self->{cwd}) = cwd() unless $self->{cwd}; 634b39c5158Smillert 635898184e3Ssthen ### you requested the extraction of only certain files 636b39c5158Smillert if( @args ) { 637b39c5158Smillert for my $file ( @args ) { 638b39c5158Smillert 639b39c5158Smillert ### it's already an object? 640b39c5158Smillert if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 641b39c5158Smillert push @files, $file; 642b39c5158Smillert next; 643b39c5158Smillert 644b39c5158Smillert ### go find it then 645b39c5158Smillert } else { 646b39c5158Smillert 6479f11ffb7Safresh1 # create hash-map once to speed up lookup 6489f11ffb7Safresh1 $hashmap = $hashmap || { 6499f11ffb7Safresh1 map { $_->full_path, $_ } @{$self->_data} 6509f11ffb7Safresh1 }; 651b39c5158Smillert 6529f11ffb7Safresh1 if (exists $hashmap->{$file}) { 653b39c5158Smillert ### we found the file you're looking for 6549f11ffb7Safresh1 push @files, $hashmap->{$file}; 6559f11ffb7Safresh1 } else { 656b39c5158Smillert return $self->_error( 657b39c5158Smillert qq[Could not find '$file' in archive] ); 658b39c5158Smillert } 659b39c5158Smillert } 660b39c5158Smillert } 661b39c5158Smillert 662b39c5158Smillert ### just grab all the file items 663b39c5158Smillert } else { 664b39c5158Smillert @files = $self->get_files; 665b39c5158Smillert } 666b39c5158Smillert 667b39c5158Smillert ### nothing found? that's an error 668b39c5158Smillert unless( scalar @files ) { 669b39c5158Smillert $self->_error( qq[No files found for ] . $self->_file ); 670b39c5158Smillert return; 671b39c5158Smillert } 672b39c5158Smillert 673b39c5158Smillert ### now extract them 674b39c5158Smillert for my $entry ( @files ) { 675b39c5158Smillert unless( $self->_extract_file( $entry ) ) { 676b39c5158Smillert $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 677b39c5158Smillert return; 678b39c5158Smillert } 679b39c5158Smillert } 680b39c5158Smillert 681b39c5158Smillert return @files; 682b39c5158Smillert} 683b39c5158Smillert 684b39c5158Smillert=head2 $tar->extract_file( $file, [$extract_path] ) 685b39c5158Smillert 686b39c5158SmillertWrite an entry, whose name is equivalent to the file name provided to 687b39c5158Smillertdisk. Optionally takes a second parameter, which is the full native 688b39c5158Smillertpath (including filename) the entry will be written to. 689b39c5158Smillert 690b39c5158SmillertFor example: 691b39c5158Smillert 692b39c5158Smillert $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 693b39c5158Smillert 694b39c5158Smillert $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 695b39c5158Smillert 696b39c5158SmillertReturns true on success, false on failure. 697b39c5158Smillert 698b39c5158Smillert=cut 699b39c5158Smillert 700b39c5158Smillertsub extract_file { 701b39c5158Smillert my $self = shift; 702b39c5158Smillert my $file = shift; return unless defined $file; 703b39c5158Smillert my $alt = shift; 704b39c5158Smillert 705b39c5158Smillert my $entry = $self->_find_entry( $file ) 706b39c5158Smillert or $self->_error( qq[Could not find an entry for '$file'] ), return; 707b39c5158Smillert 708b39c5158Smillert return $self->_extract_file( $entry, $alt ); 709b39c5158Smillert} 710b39c5158Smillert 711b39c5158Smillertsub _extract_file { 712b39c5158Smillert my $self = shift; 713b39c5158Smillert my $entry = shift or return; 714b39c5158Smillert my $alt = shift; 715b39c5158Smillert 716b39c5158Smillert ### you wanted an alternate extraction location ### 717b39c5158Smillert my $name = defined $alt ? $alt : $entry->full_path; 718b39c5158Smillert 719b39c5158Smillert ### splitpath takes a bool at the end to indicate 720b39c5158Smillert ### that it's splitting a dir 721b39c5158Smillert my ($vol,$dirs,$file); 722b39c5158Smillert if ( defined $alt ) { # It's a local-OS path 723b39c5158Smillert ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 724b39c5158Smillert $entry->is_dir ); 725b39c5158Smillert } else { 726b39c5158Smillert ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 727b39c5158Smillert $entry->is_dir ); 728b39c5158Smillert } 729b39c5158Smillert 730b39c5158Smillert my $dir; 731b39c5158Smillert ### is $name an absolute path? ### 732b39c5158Smillert if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 733b39c5158Smillert 734b39c5158Smillert ### absolute names are not allowed to be in tarballs under 735b39c5158Smillert ### strict mode, so only allow it if a user tells us to do it 736b39c5158Smillert if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 737b39c5158Smillert $self->_error( 738b39c5158Smillert q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 739b39c5158Smillert q[Not extracting absolute paths under SECURE EXTRACT MODE] 740b39c5158Smillert ); 741b39c5158Smillert return; 742b39c5158Smillert } 743b39c5158Smillert 744b39c5158Smillert ### user asked us to, it's fine. 745b39c5158Smillert $dir = File::Spec->catpath( $vol, $dirs, "" ); 746b39c5158Smillert 747b39c5158Smillert ### it's a relative path ### 748b39c5158Smillert } else { 749b39c5158Smillert my $cwd = (ref $self and defined $self->{cwd}) 750b39c5158Smillert ? $self->{cwd} 751b39c5158Smillert : cwd(); 752b39c5158Smillert 753b39c5158Smillert my @dirs = defined $alt 754b39c5158Smillert ? File::Spec->splitdir( $dirs ) # It's a local-OS path 755b39c5158Smillert : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 756b39c5158Smillert # straight from the tarball 757b39c5158Smillert 758b39c5158Smillert if( not defined $alt and 759b39c5158Smillert not $INSECURE_EXTRACT_MODE 760b39c5158Smillert ) { 761b39c5158Smillert 762b39c5158Smillert ### paths that leave the current directory are not allowed under 763b39c5158Smillert ### strict mode, so only allow it if a user tells us to do this. 764b39c5158Smillert if( grep { $_ eq '..' } @dirs ) { 765b39c5158Smillert 766b39c5158Smillert $self->_error( 767b39c5158Smillert q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 768b39c5158Smillert q[the current working directory. Not extracting under ]. 769b39c5158Smillert q[SECURE EXTRACT MODE] 770b39c5158Smillert ); 771b39c5158Smillert return; 772b39c5158Smillert } 773b39c5158Smillert 774b39c5158Smillert ### the archive may be asking us to extract into a symlink. This 775b39c5158Smillert ### is not sane and a possible security issue, as outlined here: 776b39c5158Smillert ### https://rt.cpan.org/Ticket/Display.html?id=30380 777b39c5158Smillert ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 778b39c5158Smillert ### https://issues.rpath.com/browse/RPL-1716 779b39c5158Smillert my $full_path = $cwd; 780b39c5158Smillert for my $d ( @dirs ) { 781b39c5158Smillert $full_path = File::Spec->catdir( $full_path, $d ); 782b39c5158Smillert 783b39c5158Smillert ### we've already checked this one, and it's safe. Move on. 784b39c5158Smillert next if ref $self and $self->{_link_cache}->{$full_path}; 785b39c5158Smillert 786b39c5158Smillert if( -l $full_path ) { 787b39c5158Smillert my $to = readlink $full_path; 788b39c5158Smillert my $diag = "symlinked directory ($full_path => $to)"; 789b39c5158Smillert 790b39c5158Smillert $self->_error( 791b39c5158Smillert q[Entry ']. $entry->full_path .q[' is attempting to ]. 792b39c5158Smillert qq[extract to a $diag. This is considered a security ]. 793b39c5158Smillert q[vulnerability and not allowed under SECURE EXTRACT ]. 794b39c5158Smillert q[MODE] 795b39c5158Smillert ); 796b39c5158Smillert return; 797b39c5158Smillert } 798b39c5158Smillert 799b39c5158Smillert ### XXX keep a cache if possible, so the stats become cheaper: 800b39c5158Smillert $self->{_link_cache}->{$full_path} = 1 if ref $self; 801b39c5158Smillert } 802b39c5158Smillert } 803b39c5158Smillert 804b39c5158Smillert ### '.' is the directory delimiter on VMS, which has to be escaped 805b39c5158Smillert ### or changed to '_' on vms. vmsify is used, because older versions 806b39c5158Smillert ### of vmspath do not handle this properly. 807b39c5158Smillert ### Must not add a '/' to an empty directory though. 808b39c5158Smillert map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 809b39c5158Smillert 810b39c5158Smillert my ($cwd_vol,$cwd_dir,$cwd_file) 811b39c5158Smillert = File::Spec->splitpath( $cwd ); 812b39c5158Smillert my @cwd = File::Spec->splitdir( $cwd_dir ); 813b39c5158Smillert push @cwd, $cwd_file if length $cwd_file; 814b39c5158Smillert 815898184e3Ssthen ### We need to pass '' as the last element to catpath. Craig Berry 816b39c5158Smillert ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 817b39c5158Smillert ### The root problem is that splitpath on UNIX always returns the 818b39c5158Smillert ### final path element as a file even if it is a directory, and of 819b39c5158Smillert ### course there is no way it can know the difference without checking 820b39c5158Smillert ### against the filesystem, which it is documented as not doing. When 821b39c5158Smillert ### you turn around and call catpath, on VMS you have to know which bits 822b39c5158Smillert ### are directory bits and which bits are file bits. In this case we 823b39c5158Smillert ### know the result should be a directory. I had thought you could omit 824b39c5158Smillert ### the file argument to catpath in such a case, but apparently on UNIX 825b39c5158Smillert ### you can't. 826b39c5158Smillert $dir = File::Spec->catpath( 827b39c5158Smillert $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 828b39c5158Smillert ); 829b39c5158Smillert 830b39c5158Smillert ### catdir() returns undef if the path is longer than 255 chars on 831b39c5158Smillert ### older VMS systems. 832b39c5158Smillert unless ( defined $dir ) { 833b39c5158Smillert $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 834b39c5158Smillert return; 835b39c5158Smillert } 836b39c5158Smillert 837b39c5158Smillert } 838b39c5158Smillert 839b39c5158Smillert if( -e $dir && !-d _ ) { 840b39c5158Smillert $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 841b39c5158Smillert return; 842b39c5158Smillert } 843b39c5158Smillert 844b39c5158Smillert unless ( -d _ ) { 845b39c5158Smillert eval { File::Path::mkpath( $dir, 0, 0777 ) }; 846b39c5158Smillert if( $@ ) { 847b39c5158Smillert my $fp = $entry->full_path; 848b39c5158Smillert $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 849b39c5158Smillert return; 850b39c5158Smillert } 851b39c5158Smillert 852b39c5158Smillert ### XXX chown here? that might not be the same as in the archive 853b39c5158Smillert ### as we're only chown'ing to the owner of the file we're extracting 854b39c5158Smillert ### not to the owner of the directory itself, which may or may not 855b39c5158Smillert ### be another entry in the archive 856b39c5158Smillert ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 857b39c5158Smillert ### way to go. 858b39c5158Smillert #if( $CHOWN && CAN_CHOWN ) { 859b39c5158Smillert # chown $entry->uid, $entry->gid, $dir or 860b39c5158Smillert # $self->_error( qq[Could not set uid/gid on '$dir'] ); 861b39c5158Smillert #} 862b39c5158Smillert } 863b39c5158Smillert 864b39c5158Smillert ### we're done if we just needed to create a dir ### 865b39c5158Smillert return 1 if $entry->is_dir; 866b39c5158Smillert 867b39c5158Smillert my $full = File::Spec->catfile( $dir, $file ); 868b39c5158Smillert 869b39c5158Smillert if( $entry->is_unknown ) { 870b39c5158Smillert $self->_error( qq[Unknown file type for file '$full'] ); 871b39c5158Smillert return; 872b39c5158Smillert } 873b39c5158Smillert 87426f75d2dSafresh1 ### If a file system already contains a block device with the same name as 87526f75d2dSafresh1 ### the being extracted regular file, we would write the file's content 87626f75d2dSafresh1 ### to the block device. So remove the existing file (block device) now. 87726f75d2dSafresh1 ### If an archive contains multiple same-named entries, the last one 87826f75d2dSafresh1 ### should replace the previous ones. So remove the old file now. 87926f75d2dSafresh1 ### If the old entry is a symlink to a file outside of the CWD, the new 88026f75d2dSafresh1 ### entry would create a file there. This is CVE-2018-12015 88126f75d2dSafresh1 ### <https://rt.cpan.org/Ticket/Display.html?id=125523>. 88226f75d2dSafresh1 if (-l $full || -e _) { 88326f75d2dSafresh1 if (!unlink $full) { 88426f75d2dSafresh1 $self->_error( qq[Could not remove old file '$full': $!] ); 88526f75d2dSafresh1 return; 88626f75d2dSafresh1 } 88726f75d2dSafresh1 } 888b39c5158Smillert if( length $entry->type && $entry->is_file ) { 889b39c5158Smillert my $fh = IO::File->new; 8909f11ffb7Safresh1 $fh->open( $full, '>' ) or ( 891b39c5158Smillert $self->_error( qq[Could not open file '$full': $!] ), 892b39c5158Smillert return 893b39c5158Smillert ); 894b39c5158Smillert 895b39c5158Smillert if( $entry->size ) { 896b39c5158Smillert binmode $fh; 897b39c5158Smillert syswrite $fh, $entry->data or ( 898b39c5158Smillert $self->_error( qq[Could not write data to '$full'] ), 899b39c5158Smillert return 900b39c5158Smillert ); 901b39c5158Smillert } 902b39c5158Smillert 903b39c5158Smillert close $fh or ( 904b39c5158Smillert $self->_error( qq[Could not close file '$full'] ), 905b39c5158Smillert return 906b39c5158Smillert ); 907b39c5158Smillert 908b39c5158Smillert } else { 909b39c5158Smillert $self->_make_special_file( $entry, $full ) or return; 910b39c5158Smillert } 911b39c5158Smillert 912b39c5158Smillert ### only update the timestamp if it's not a symlink; that will change the 913b39c5158Smillert ### timestamp of the original. This addresses bug #33669: Could not update 914b39c5158Smillert ### timestamp warning on symlinks 915b39c5158Smillert if( not -l $full ) { 916b39c5158Smillert utime time, $entry->mtime - TIME_OFFSET, $full or 917b39c5158Smillert $self->_error( qq[Could not update timestamp] ); 918b39c5158Smillert } 919b39c5158Smillert 92091f110e0Safresh1 if( $CHOWN && CAN_CHOWN->() and not -l $full ) { 921*eac174f2Safresh1 CORE::chown( $entry->uid, $entry->gid, $full ) or 922b39c5158Smillert $self->_error( qq[Could not set uid/gid on '$full'] ); 923b39c5158Smillert } 924b39c5158Smillert 925b39c5158Smillert ### only chmod if we're allowed to, but never chmod symlinks, since they'll 926b39c5158Smillert ### change the perms on the file they're linking too... 927b39c5158Smillert if( $CHMOD and not -l $full ) { 928b39c5158Smillert my $mode = $entry->mode; 929b39c5158Smillert unless ($SAME_PERMISSIONS) { 930b39c5158Smillert $mode &= ~(oct(7000) | umask); 931b39c5158Smillert } 932*eac174f2Safresh1 CORE::chmod( $mode, $full ) or 933b39c5158Smillert $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 934b39c5158Smillert } 935b39c5158Smillert 936b39c5158Smillert return 1; 937b39c5158Smillert} 938b39c5158Smillert 939b39c5158Smillertsub _make_special_file { 940b39c5158Smillert my $self = shift; 941b39c5158Smillert my $entry = shift or return; 942b39c5158Smillert my $file = shift; return unless defined $file; 943b39c5158Smillert 944b39c5158Smillert my $err; 945b39c5158Smillert 946b39c5158Smillert if( $entry->is_symlink ) { 947b39c5158Smillert my $fail; 948b39c5158Smillert if( ON_UNIX ) { 949b39c5158Smillert symlink( $entry->linkname, $file ) or $fail++; 950b39c5158Smillert 951b39c5158Smillert } else { 952b39c5158Smillert $self->_extract_special_file_as_plain_file( $entry, $file ) 953b39c5158Smillert or $fail++; 954b39c5158Smillert } 955b39c5158Smillert 956b39c5158Smillert $err = qq[Making symbolic link '$file' to '] . 957b39c5158Smillert $entry->linkname .q[' failed] if $fail; 958b39c5158Smillert 959b39c5158Smillert } elsif ( $entry->is_hardlink ) { 960b39c5158Smillert my $fail; 961b39c5158Smillert if( ON_UNIX ) { 962b39c5158Smillert link( $entry->linkname, $file ) or $fail++; 963b39c5158Smillert 964b39c5158Smillert } else { 965b39c5158Smillert $self->_extract_special_file_as_plain_file( $entry, $file ) 966b39c5158Smillert or $fail++; 967b39c5158Smillert } 968b39c5158Smillert 969b39c5158Smillert $err = qq[Making hard link from '] . $entry->linkname . 970b39c5158Smillert qq[' to '$file' failed] if $fail; 971b39c5158Smillert 972b39c5158Smillert } elsif ( $entry->is_fifo ) { 973b39c5158Smillert ON_UNIX && !system('mknod', $file, 'p') or 974b39c5158Smillert $err = qq[Making fifo ']. $entry->name .qq[' failed]; 975b39c5158Smillert 976b39c5158Smillert } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 977b39c5158Smillert my $mode = $entry->is_blockdev ? 'b' : 'c'; 978b39c5158Smillert 979b39c5158Smillert ON_UNIX && !system('mknod', $file, $mode, 980b39c5158Smillert $entry->devmajor, $entry->devminor) or 981b39c5158Smillert $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 982b39c5158Smillert $entry->devmajor . qq[ min=] . $entry->devminor . 983b39c5158Smillert qq[) failed.]; 984b39c5158Smillert 985b39c5158Smillert } elsif ( $entry->is_socket ) { 986b39c5158Smillert ### the original doesn't do anything special for sockets.... ### 987b39c5158Smillert 1; 988b39c5158Smillert } 989b39c5158Smillert 990b39c5158Smillert return $err ? $self->_error( $err ) : 1; 991b39c5158Smillert} 992b39c5158Smillert 993b39c5158Smillert### don't know how to make symlinks, let's just extract the file as 994b39c5158Smillert### a plain file 995b39c5158Smillertsub _extract_special_file_as_plain_file { 996b39c5158Smillert my $self = shift; 997b39c5158Smillert my $entry = shift or return; 998b39c5158Smillert my $file = shift; return unless defined $file; 999b39c5158Smillert 1000b39c5158Smillert my $err; 1001b39c5158Smillert TRY: { 10026fb12b70Safresh1 my $orig = $self->_find_entry( $entry->linkname, $entry ); 1003b39c5158Smillert 1004b39c5158Smillert unless( $orig ) { 1005b39c5158Smillert $err = qq[Could not find file '] . $entry->linkname . 1006b39c5158Smillert qq[' in memory.]; 1007b39c5158Smillert last TRY; 1008b39c5158Smillert } 1009b39c5158Smillert 1010b39c5158Smillert ### clone the entry, make it appear as a normal file ### 10116fb12b70Safresh1 my $clone = $orig->clone; 1012b39c5158Smillert $clone->_downgrade_to_plainfile; 1013b39c5158Smillert $self->_extract_file( $clone, $file ) or last TRY; 1014b39c5158Smillert 1015b39c5158Smillert return 1; 1016b39c5158Smillert } 1017b39c5158Smillert 1018b39c5158Smillert return $self->_error($err); 1019b39c5158Smillert} 1020b39c5158Smillert 1021b39c5158Smillert=head2 $tar->list_files( [\@properties] ) 1022b39c5158Smillert 1023b39c5158SmillertReturns a list of the names of all the files in the archive. 1024b39c5158Smillert 1025b39c5158SmillertIf C<list_files()> is passed an array reference as its first argument 1026b39c5158Smillertit returns a list of hash references containing the requested 1027b39c5158Smillertproperties of each file. The following list of properties is 1028b39c5158Smillertsupported: name, size, mtime (last modified date), mode, uid, gid, 1029b39c5158Smillertlinkname, uname, gname, devmajor, devminor, prefix. 1030b39c5158Smillert 1031b39c5158SmillertPassing an array reference containing only one element, 'name', is 1032b39c5158Smillertspecial cased to return a list of names rather than a list of hash 1033b39c5158Smillertreferences, making it equivalent to calling C<list_files> without 1034b39c5158Smillertarguments. 1035b39c5158Smillert 1036b39c5158Smillert=cut 1037b39c5158Smillert 1038b39c5158Smillertsub list_files { 1039b39c5158Smillert my $self = shift; 1040b39c5158Smillert my $aref = shift || [ ]; 1041b39c5158Smillert 1042b39c5158Smillert unless( $self->_data ) { 1043b39c5158Smillert $self->read() or return; 1044b39c5158Smillert } 1045b39c5158Smillert 1046b39c5158Smillert if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 1047b39c5158Smillert return map { $_->full_path } @{$self->_data}; 1048b39c5158Smillert } else { 1049b39c5158Smillert 1050b39c5158Smillert #my @rv; 1051b39c5158Smillert #for my $obj ( @{$self->_data} ) { 1052b39c5158Smillert # push @rv, { map { $_ => $obj->$_() } @$aref }; 1053b39c5158Smillert #} 1054b39c5158Smillert #return @rv; 1055b39c5158Smillert 1056b39c5158Smillert ### this does the same as the above.. just needs a +{ } 1057b39c5158Smillert ### to make sure perl doesn't confuse it for a block 1058b39c5158Smillert return map { my $o=$_; 1059b39c5158Smillert +{ map { $_ => $o->$_() } @$aref } 1060b39c5158Smillert } @{$self->_data}; 1061b39c5158Smillert } 1062b39c5158Smillert} 1063b39c5158Smillert 1064b39c5158Smillertsub _find_entry { 1065b39c5158Smillert my $self = shift; 1066b39c5158Smillert my $file = shift; 1067b39c5158Smillert 1068b39c5158Smillert unless( defined $file ) { 1069b39c5158Smillert $self->_error( qq[No file specified] ); 1070b39c5158Smillert return; 1071b39c5158Smillert } 1072b39c5158Smillert 1073b39c5158Smillert ### it's an object already 1074b39c5158Smillert return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1075b39c5158Smillert 10766fb12b70Safresh1seach_entry: 10776fb12b70Safresh1 if($self->_data){ 1078b39c5158Smillert for my $entry ( @{$self->_data} ) { 1079b39c5158Smillert my $path = $entry->full_path; 1080b39c5158Smillert return $entry if $path eq $file; 1081b39c5158Smillert } 10826fb12b70Safresh1 } 10836fb12b70Safresh1 10846fb12b70Safresh1 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 10856fb12b70Safresh1 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin ) 10866fb12b70Safresh1 $file = _symlinks_resolver( $link_entry->name, $file ); 10876fb12b70Safresh1 goto seach_entry if $self->_data; 10886fb12b70Safresh1 10896fb12b70Safresh1 #this will be slower than never, but won't failed! 10906fb12b70Safresh1 10916fb12b70Safresh1 my $iterargs = $link_entry->{'_archive'}; 10926fb12b70Safresh1 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){ 10936fb12b70Safresh1 #faster but whole archive will be read in memory 10946fb12b70Safresh1 #read whole archive and share data 10956fb12b70Safresh1 my $archive = Archive::Tar->new; 10966fb12b70Safresh1 $archive->read( @$iterargs ); 10976fb12b70Safresh1 push @$iterargs, $archive; #take a trace for destruction 10986fb12b70Safresh1 if($archive->_data){ 10996fb12b70Safresh1 $self->_data( $archive->_data ); 11006fb12b70Safresh1 goto seach_entry; 11016fb12b70Safresh1 } 11026fb12b70Safresh1 }#faster 11036fb12b70Safresh1 11046fb12b70Safresh1 {#slower but lower memory usage 11056fb12b70Safresh1 # $iterargs = [$filename, $compressed, $opts]; 11066fb12b70Safresh1 my $next = Archive::Tar->iter( @$iterargs ); 11076fb12b70Safresh1 while(my $e = $next->()){ 11086fb12b70Safresh1 if($e->full_path eq $file){ 11096fb12b70Safresh1 undef $next; 11106fb12b70Safresh1 return $e; 11116fb12b70Safresh1 } 11126fb12b70Safresh1 } 11136fb12b70Safresh1 }#slower 11146fb12b70Safresh1 } 11156fb12b70Safresh1 } 1116b39c5158Smillert 1117b39c5158Smillert $self->_error( qq[No such file in archive: '$file'] ); 1118b39c5158Smillert return; 1119b39c5158Smillert} 1120b39c5158Smillert 1121b39c5158Smillert=head2 $tar->get_files( [@filenames] ) 1122b39c5158Smillert 1123b39c5158SmillertReturns the C<Archive::Tar::File> objects matching the filenames 1124b39c5158Smillertprovided. If no filename list was passed, all C<Archive::Tar::File> 1125b39c5158Smillertobjects in the current Tar object are returned. 1126b39c5158Smillert 1127b39c5158SmillertPlease refer to the C<Archive::Tar::File> documentation on how to 1128b39c5158Smillerthandle these objects. 1129b39c5158Smillert 1130b39c5158Smillert=cut 1131b39c5158Smillert 1132b39c5158Smillertsub get_files { 1133b39c5158Smillert my $self = shift; 1134b39c5158Smillert 1135b39c5158Smillert return @{ $self->_data } unless @_; 1136b39c5158Smillert 1137b39c5158Smillert my @list; 1138b39c5158Smillert for my $file ( @_ ) { 1139b39c5158Smillert push @list, grep { defined } $self->_find_entry( $file ); 1140b39c5158Smillert } 1141b39c5158Smillert 1142b39c5158Smillert return @list; 1143b39c5158Smillert} 1144b39c5158Smillert 1145b39c5158Smillert=head2 $tar->get_content( $file ) 1146b39c5158Smillert 1147b39c5158SmillertReturn the content of the named file. 1148b39c5158Smillert 1149b39c5158Smillert=cut 1150b39c5158Smillert 1151b39c5158Smillertsub get_content { 1152b39c5158Smillert my $self = shift; 1153b39c5158Smillert my $entry = $self->_find_entry( shift ) or return; 1154b39c5158Smillert 1155b39c5158Smillert return $entry->data; 1156b39c5158Smillert} 1157b39c5158Smillert 1158b39c5158Smillert=head2 $tar->replace_content( $file, $content ) 1159b39c5158Smillert 1160b39c5158SmillertMake the string $content be the content for the file named $file. 1161b39c5158Smillert 1162b39c5158Smillert=cut 1163b39c5158Smillert 1164b39c5158Smillertsub replace_content { 1165b39c5158Smillert my $self = shift; 1166b39c5158Smillert my $entry = $self->_find_entry( shift ) or return; 1167b39c5158Smillert 1168b39c5158Smillert return $entry->replace_content( shift ); 1169b39c5158Smillert} 1170b39c5158Smillert 1171b39c5158Smillert=head2 $tar->rename( $file, $new_name ) 1172b39c5158Smillert 1173b39c5158SmillertRename the file of the in-memory archive to $new_name. 1174b39c5158Smillert 1175b39c5158SmillertNote that you must specify a Unix path for $new_name, since per tar 1176b39c5158Smillertstandard, all files in the archive must be Unix paths. 1177b39c5158Smillert 1178b39c5158SmillertReturns true on success and false on failure. 1179b39c5158Smillert 1180b39c5158Smillert=cut 1181b39c5158Smillert 1182b39c5158Smillertsub rename { 1183b39c5158Smillert my $self = shift; 1184b39c5158Smillert my $file = shift; return unless defined $file; 1185b39c5158Smillert my $new = shift; return unless defined $new; 1186b39c5158Smillert 1187b39c5158Smillert my $entry = $self->_find_entry( $file ) or return; 1188b39c5158Smillert 1189b39c5158Smillert return $entry->rename( $new ); 1190b39c5158Smillert} 1191b39c5158Smillert 1192898184e3Ssthen=head2 $tar->chmod( $file, $mode ) 1193898184e3Ssthen 1194898184e3SsthenChange mode of $file to $mode. 1195898184e3Ssthen 1196898184e3SsthenReturns true on success and false on failure. 1197898184e3Ssthen 1198898184e3Ssthen=cut 1199898184e3Ssthen 1200898184e3Ssthensub chmod { 1201898184e3Ssthen my $self = shift; 1202898184e3Ssthen my $file = shift; return unless defined $file; 1203898184e3Ssthen my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1204898184e3Ssthen my @args = ("$mode"); 1205898184e3Ssthen 1206898184e3Ssthen my $entry = $self->_find_entry( $file ) or return; 1207898184e3Ssthen my $x = $entry->chmod( @args ); 1208898184e3Ssthen return $x; 1209898184e3Ssthen} 1210898184e3Ssthen 1211898184e3Ssthen=head2 $tar->chown( $file, $uname [, $gname] ) 1212898184e3Ssthen 1213898184e3SsthenChange owner $file to $uname and $gname. 1214898184e3Ssthen 1215898184e3SsthenReturns true on success and false on failure. 1216898184e3Ssthen 1217898184e3Ssthen=cut 1218898184e3Ssthen 1219898184e3Ssthensub chown { 1220898184e3Ssthen my $self = shift; 1221898184e3Ssthen my $file = shift; return unless defined $file; 1222898184e3Ssthen my $uname = shift; return unless defined $uname; 1223898184e3Ssthen my @args = ($uname); 1224898184e3Ssthen push(@args, shift); 1225898184e3Ssthen 1226898184e3Ssthen my $entry = $self->_find_entry( $file ) or return; 1227898184e3Ssthen my $x = $entry->chown( @args ); 1228898184e3Ssthen return $x; 1229898184e3Ssthen} 1230898184e3Ssthen 1231b39c5158Smillert=head2 $tar->remove (@filenamelist) 1232b39c5158Smillert 1233b39c5158SmillertRemoves any entries with names matching any of the given filenames 1234b39c5158Smillertfrom the in-memory archive. Returns a list of C<Archive::Tar::File> 1235b39c5158Smillertobjects that remain. 1236b39c5158Smillert 1237b39c5158Smillert=cut 1238b39c5158Smillert 1239b39c5158Smillertsub remove { 1240b39c5158Smillert my $self = shift; 1241b39c5158Smillert my @list = @_; 1242b39c5158Smillert 1243b39c5158Smillert my %seen = map { $_->full_path => $_ } @{$self->_data}; 1244b39c5158Smillert delete $seen{ $_ } for @list; 1245b39c5158Smillert 1246b39c5158Smillert $self->_data( [values %seen] ); 1247b39c5158Smillert 1248b39c5158Smillert return values %seen; 1249b39c5158Smillert} 1250b39c5158Smillert 1251b39c5158Smillert=head2 $tar->clear 1252b39c5158Smillert 1253b39c5158SmillertC<clear> clears the current in-memory archive. This effectively gives 1254b39c5158Smillertyou a 'blank' object, ready to be filled again. Note that C<clear> 1255b39c5158Smillertonly has effect on the object, not the underlying tarfile. 1256b39c5158Smillert 1257b39c5158Smillert=cut 1258b39c5158Smillert 1259b39c5158Smillertsub clear { 1260b39c5158Smillert my $self = shift or return; 1261b39c5158Smillert 1262b39c5158Smillert $self->_data( [] ); 1263b39c5158Smillert $self->_file( '' ); 1264b39c5158Smillert 1265b39c5158Smillert return 1; 1266b39c5158Smillert} 1267b39c5158Smillert 1268b39c5158Smillert 1269b39c5158Smillert=head2 $tar->write ( [$file, $compressed, $prefix] ) 1270b39c5158Smillert 1271b39c5158SmillertWrite the in-memory archive to disk. The first argument can either 1272b39c5158Smillertbe the name of a file or a reference to an already open filehandle (a 1273b39c5158SmillertGLOB reference). 1274b39c5158Smillert 127556d68f1eSafresh1The second argument is used to indicate compression. You can 127656d68f1eSafresh1compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed 1277b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of 1278898184e3Ssthenconstants is preferred: 1279b39c5158Smillert 1280b39c5158Smillert # write a gzip compressed file 1281b39c5158Smillert $tar->write( 'out.tgz', COMPRESS_GZIP ); 1282b39c5158Smillert 1283b39c5158Smillert # write a bzip compressed file 1284b39c5158Smillert $tar->write( 'out.tbz', COMPRESS_BZIP ); 1285b39c5158Smillert 128656d68f1eSafresh1 # write a xz compressed file 128756d68f1eSafresh1 $tar->write( 'out.txz', COMPRESS_XZ ); 128856d68f1eSafresh1 1289b39c5158SmillertNote that when you pass in a filehandle, the compression argument 1290b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle. 1291b39c5158SmillertIf you wish to enable compression with filehandles, use an 129256d68f1eSafresh1C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead. 1293b39c5158Smillert 1294b39c5158SmillertThe third argument is an optional prefix. All files will be tucked 1295b39c5158Smillertaway in the directory you specify as prefix. So if you have files 1296b39c5158Smillert'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1297b39c5158Smillertwill be written to the archive as 'foo/a' and 'foo/b'. 1298b39c5158Smillert 1299b39c5158SmillertIf no arguments are given, C<write> returns the entire formatted 1300b39c5158Smillertarchive as a string, which could be useful if you'd like to stuff the 1301b39c5158Smillertarchive into a socket or a pipe to gzip or something. 1302b39c5158Smillert 1303b39c5158Smillert 1304b39c5158Smillert=cut 1305b39c5158Smillert 1306b39c5158Smillertsub write { 1307b39c5158Smillert my $self = shift; 1308b39c5158Smillert my $file = shift; $file = '' unless defined $file; 1309b39c5158Smillert my $gzip = shift || 0; 1310b39c5158Smillert my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1311b39c5158Smillert my $dummy = ''; 1312b39c5158Smillert 1313b39c5158Smillert ### only need a handle if we have a file to print to ### 1314b39c5158Smillert my $handle = length($file) 1315b39c5158Smillert ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1316b39c5158Smillert or return ) 1317b39c5158Smillert : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1318b39c5158Smillert : $HAS_IO_STRING ? IO::String->new 1319b39c5158Smillert : __PACKAGE__->no_string_support(); 1320b39c5158Smillert 1321b39c5158Smillert ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1322b39c5158Smillert ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1323b39c5158Smillert ### printed to the archive 1324b39c5158Smillert local $\; 1325b39c5158Smillert 1326b39c5158Smillert for my $entry ( @{$self->_data} ) { 1327b39c5158Smillert ### entries to be written to the tarfile ### 1328b39c5158Smillert my @write_me; 1329b39c5158Smillert 1330b39c5158Smillert ### only now will we change the object to reflect the current state 1331b39c5158Smillert ### of the name and prefix fields -- this needs to be limited to 1332b39c5158Smillert ### write() only! 1333b39c5158Smillert my $clone = $entry->clone; 1334b39c5158Smillert 1335b39c5158Smillert 1336b39c5158Smillert ### so, if you don't want use to use the prefix, we'll stuff 1337b39c5158Smillert ### everything in the name field instead 1338b39c5158Smillert if( $DO_NOT_USE_PREFIX ) { 1339b39c5158Smillert 1340b39c5158Smillert ### you might have an extended prefix, if so, set it in the clone 1341b39c5158Smillert ### XXX is ::Unix right? 1342b39c5158Smillert $clone->name( length $ext_prefix 1343b39c5158Smillert ? File::Spec::Unix->catdir( $ext_prefix, 1344b39c5158Smillert $clone->full_path) 1345b39c5158Smillert : $clone->full_path ); 1346b39c5158Smillert $clone->prefix( '' ); 1347b39c5158Smillert 1348b39c5158Smillert ### otherwise, we'll have to set it properly -- prefix part in the 1349b39c5158Smillert ### prefix and name part in the name field. 1350b39c5158Smillert } else { 1351b39c5158Smillert 1352b39c5158Smillert ### split them here, not before! 1353b39c5158Smillert my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1354b39c5158Smillert 1355b39c5158Smillert ### you might have an extended prefix, if so, set it in the clone 1356b39c5158Smillert ### XXX is ::Unix right? 1357b39c5158Smillert $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1358b39c5158Smillert if length $ext_prefix; 1359b39c5158Smillert 1360b39c5158Smillert $clone->prefix( $prefix ); 1361b39c5158Smillert $clone->name( $name ); 1362b39c5158Smillert } 1363b39c5158Smillert 1364b39c5158Smillert ### names are too long, and will get truncated if we don't add a 1365b39c5158Smillert ### '@LongLink' file... 1366b39c5158Smillert my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1367b39c5158Smillert length($clone->prefix) > PREFIX_LENGTH 1368b39c5158Smillert ) || 0; 1369b39c5158Smillert 1370b39c5158Smillert ### perhaps we need to make a longlink file? 1371b39c5158Smillert if( $make_longlink ) { 1372b39c5158Smillert my $longlink = Archive::Tar::File->new( 1373b39c5158Smillert data => LONGLINK_NAME, 1374b39c5158Smillert $clone->full_path, 1375b39c5158Smillert { type => LONGLINK } 1376b39c5158Smillert ); 1377b39c5158Smillert 1378b39c5158Smillert unless( $longlink ) { 1379b39c5158Smillert $self->_error( qq[Could not create 'LongLink' entry for ] . 1380b39c5158Smillert qq[oversize file '] . $clone->full_path ."'" ); 1381b39c5158Smillert return; 1382b39c5158Smillert }; 1383b39c5158Smillert 1384b39c5158Smillert push @write_me, $longlink; 1385b39c5158Smillert } 1386b39c5158Smillert 1387b39c5158Smillert push @write_me, $clone; 1388b39c5158Smillert 1389b39c5158Smillert ### write the one, optionally 2 a::t::file objects to the handle 1390b39c5158Smillert for my $clone (@write_me) { 1391b39c5158Smillert 1392b39c5158Smillert ### if the file is a symlink, there are 2 options: 1393b39c5158Smillert ### either we leave the symlink intact, but then we don't write any 1394b39c5158Smillert ### data OR we follow the symlink, which means we actually make a 1395b39c5158Smillert ### copy. if we do the latter, we have to change the TYPE of the 1396b39c5158Smillert ### clone to 'FILE' 1397b39c5158Smillert my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1398b39c5158Smillert my $data_ok = !$clone->is_symlink && $clone->has_content; 1399b39c5158Smillert 1400b39c5158Smillert ### downgrade to a 'normal' file if it's a symlink we're going to 1401b39c5158Smillert ### treat as a regular file 1402b39c5158Smillert $clone->_downgrade_to_plainfile if $link_ok; 1403b39c5158Smillert 1404b39c5158Smillert ### get the header for this block 1405b39c5158Smillert my $header = $self->_format_tar_entry( $clone ); 1406b39c5158Smillert unless( $header ) { 1407b39c5158Smillert $self->_error(q[Could not format header for: ] . 1408b39c5158Smillert $clone->full_path ); 1409b39c5158Smillert return; 1410b39c5158Smillert } 1411b39c5158Smillert 1412b39c5158Smillert unless( print $handle $header ) { 1413b39c5158Smillert $self->_error(q[Could not write header for: ] . 1414b39c5158Smillert $clone->full_path); 1415b39c5158Smillert return; 1416b39c5158Smillert } 1417b39c5158Smillert 1418b39c5158Smillert if( $link_ok or $data_ok ) { 1419b39c5158Smillert unless( print $handle $clone->data ) { 1420b39c5158Smillert $self->_error(q[Could not write data for: ] . 1421b39c5158Smillert $clone->full_path); 1422b39c5158Smillert return; 1423b39c5158Smillert } 1424b39c5158Smillert 1425b39c5158Smillert ### pad the end of the clone if required ### 1426b39c5158Smillert print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1427b39c5158Smillert } 1428b39c5158Smillert 1429b39c5158Smillert } ### done writing these entries 1430b39c5158Smillert } 1431b39c5158Smillert 1432b39c5158Smillert ### write the end markers ### 1433b39c5158Smillert print $handle TAR_END x 2 or 1434b39c5158Smillert return $self->_error( qq[Could not write tar end markers] ); 1435b39c5158Smillert 1436b39c5158Smillert ### did you want it written to a file, or returned as a string? ### 1437b39c5158Smillert my $rv = length($file) ? 1 1438b39c5158Smillert : $HAS_PERLIO ? $dummy 1439b39c5158Smillert : do { seek $handle, 0, 0; local $/; <$handle> }; 1440b39c5158Smillert 1441898184e3Ssthen ### make sure to close the handle if we created it 1442898184e3Ssthen if ( $file ne $handle ) { 1443898184e3Ssthen unless( close $handle ) { 1444898184e3Ssthen $self->_error( qq[Could not write tar] ); 1445898184e3Ssthen return; 1446898184e3Ssthen } 1447898184e3Ssthen } 1448b39c5158Smillert 1449b39c5158Smillert return $rv; 1450b39c5158Smillert} 1451b39c5158Smillert 1452b39c5158Smillertsub _format_tar_entry { 1453b39c5158Smillert my $self = shift; 1454b39c5158Smillert my $entry = shift or return; 1455b39c5158Smillert my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1456b39c5158Smillert my $no_prefix = shift || 0; 1457b39c5158Smillert 1458b39c5158Smillert my $file = $entry->name; 1459b39c5158Smillert my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1460b39c5158Smillert 1461b39c5158Smillert ### remove the prefix from the file name 1462898184e3Ssthen ### not sure if this is still needed --kane 1463b39c5158Smillert ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1464b39c5158Smillert ### this for us. Even worse, this would break if we tried to add a file 1465b39c5158Smillert ### like x/x. 1466b39c5158Smillert #if( length $prefix ) { 1467b39c5158Smillert # $file =~ s/^$match//; 1468b39c5158Smillert #} 1469b39c5158Smillert 1470b39c5158Smillert $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1471b39c5158Smillert if length $ext_prefix; 1472b39c5158Smillert 1473b39c5158Smillert ### not sure why this is... ### 1474b39c5158Smillert my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1475b39c5158Smillert substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1476b39c5158Smillert 1477898184e3Ssthen my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1478b39c5158Smillert 1479b39c5158Smillert ### this might be optimizable with a 'changed' flag in the file objects ### 1480b39c5158Smillert my $tar = pack ( 1481b39c5158Smillert PACK, 1482b39c5158Smillert $file, 1483b39c5158Smillert 1484b39c5158Smillert (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1485b39c5158Smillert (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1486b39c5158Smillert 1487b39c5158Smillert "", # checksum field - space padded a bit down 1488b39c5158Smillert 1489b39c5158Smillert (map { $entry->$_() } qw[type linkname magic]), 1490b39c5158Smillert 1491b39c5158Smillert $entry->version || TAR_VERSION, 1492b39c5158Smillert 1493b39c5158Smillert (map { $entry->$_() } qw[uname gname]), 1494b39c5158Smillert (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1495b39c5158Smillert 1496b39c5158Smillert ($no_prefix ? '' : $prefix) 1497b39c5158Smillert ); 1498b39c5158Smillert 1499b39c5158Smillert ### add the checksum ### 1500898184e3Ssthen my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1501b39c5158Smillert substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1502b39c5158Smillert 1503b39c5158Smillert return $tar; 1504b39c5158Smillert} 1505b39c5158Smillert 1506b39c5158Smillert=head2 $tar->add_files( @filenamelist ) 1507b39c5158Smillert 1508b39c5158SmillertTakes a list of filenames and adds them to the in-memory archive. 1509b39c5158Smillert 1510b39c5158SmillertThe path to the file is automatically converted to a Unix like 1511b39c5158Smillertequivalent for use in the archive, and, if on MacOS, the file's 1512b39c5158Smillertmodification time is converted from the MacOS epoch to the Unix epoch. 1513b39c5158SmillertSo tar archives created on MacOS with B<Archive::Tar> can be read 1514b39c5158Smillertboth with I<tar> on Unix and applications like I<suntar> or 1515b39c5158SmillertI<Stuffit Expander> on MacOS. 1516b39c5158Smillert 1517b39c5158SmillertBe aware that the file's type/creator and resource fork will be lost, 1518b39c5158Smillertwhich is usually what you want in cross-platform archives. 1519b39c5158Smillert 1520b39c5158SmillertInstead of a filename, you can also pass it an existing C<Archive::Tar::File> 1521b39c5158Smillertobject from, for example, another archive. The object will be clone, and 1522b39c5158Smillerteffectively be a copy of the original, not an alias. 1523b39c5158Smillert 1524b39c5158SmillertReturns a list of C<Archive::Tar::File> objects that were just added. 1525b39c5158Smillert 1526b39c5158Smillert=cut 1527b39c5158Smillert 1528b39c5158Smillertsub add_files { 1529b39c5158Smillert my $self = shift; 1530b39c5158Smillert my @files = @_ or return; 1531b39c5158Smillert 1532b39c5158Smillert my @rv; 1533b39c5158Smillert for my $file ( @files ) { 1534b39c5158Smillert 1535b39c5158Smillert ### you passed an Archive::Tar::File object 1536b39c5158Smillert ### clone it so we don't accidentally have a reference to 1537b39c5158Smillert ### an object from another archive 1538b39c5158Smillert if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1539b39c5158Smillert push @rv, $file->clone; 1540b39c5158Smillert next; 1541b39c5158Smillert } 1542b39c5158Smillert 154391f110e0Safresh1 eval { 154491f110e0Safresh1 if( utf8::is_utf8( $file )) { 154591f110e0Safresh1 utf8::encode( $file ); 154691f110e0Safresh1 } 154791f110e0Safresh1 }; 154891f110e0Safresh1 1549b39c5158Smillert unless( -e $file || -l $file ) { 1550b39c5158Smillert $self->_error( qq[No such file: '$file'] ); 1551b39c5158Smillert next; 1552b39c5158Smillert } 1553b39c5158Smillert 1554b39c5158Smillert my $obj = Archive::Tar::File->new( file => $file ); 1555b39c5158Smillert unless( $obj ) { 1556b39c5158Smillert $self->_error( qq[Unable to add file: '$file'] ); 1557b39c5158Smillert next; 1558b39c5158Smillert } 1559b39c5158Smillert 1560b39c5158Smillert push @rv, $obj; 1561b39c5158Smillert } 1562b39c5158Smillert 1563b39c5158Smillert push @{$self->{_data}}, @rv; 1564b39c5158Smillert 1565b39c5158Smillert return @rv; 1566b39c5158Smillert} 1567b39c5158Smillert 1568b39c5158Smillert=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1569b39c5158Smillert 1570b39c5158SmillertTakes a filename, a scalar full of data and optionally a reference to 1571b39c5158Smillerta hash with specific options. 1572b39c5158Smillert 1573b39c5158SmillertWill add a file to the in-memory archive, with name C<$filename> and 1574b39c5158Smillertcontent C<$data>. Specific properties can be set using C<$opthashref>. 1575b39c5158SmillertThe following list of properties is supported: name, size, mtime 1576b39c5158Smillert(last modified date), mode, uid, gid, linkname, uname, gname, 1577b39c5158Smillertdevmajor, devminor, prefix, type. (On MacOS, the file's path and 1578b39c5158Smillertmodification times are converted to Unix equivalents.) 1579b39c5158Smillert 158091f110e0Safresh1Valid values for the file type are the following constants defined by 158191f110e0Safresh1Archive::Tar::Constant: 1582b39c5158Smillert 1583b39c5158Smillert=over 4 1584b39c5158Smillert 1585b39c5158Smillert=item FILE 1586b39c5158Smillert 1587b39c5158SmillertRegular file. 1588b39c5158Smillert 1589b39c5158Smillert=item HARDLINK 1590b39c5158Smillert 1591b39c5158Smillert=item SYMLINK 1592b39c5158Smillert 1593b39c5158SmillertHard and symbolic ("soft") links; linkname should specify target. 1594b39c5158Smillert 1595b39c5158Smillert=item CHARDEV 1596b39c5158Smillert 1597b39c5158Smillert=item BLOCKDEV 1598b39c5158Smillert 1599b39c5158SmillertCharacter and block devices. devmajor and devminor should specify the major 1600b39c5158Smillertand minor device numbers. 1601b39c5158Smillert 1602b39c5158Smillert=item DIR 1603b39c5158Smillert 1604b39c5158SmillertDirectory. 1605b39c5158Smillert 1606b39c5158Smillert=item FIFO 1607b39c5158Smillert 1608b39c5158SmillertFIFO (named pipe). 1609b39c5158Smillert 1610b39c5158Smillert=item SOCKET 1611b39c5158Smillert 1612b39c5158SmillertSocket. 1613b39c5158Smillert 1614b39c5158Smillert=back 1615b39c5158Smillert 1616b39c5158SmillertReturns the C<Archive::Tar::File> object that was just added, or 1617b39c5158SmillertC<undef> on failure. 1618b39c5158Smillert 1619b39c5158Smillert=cut 1620b39c5158Smillert 1621b39c5158Smillertsub add_data { 1622b39c5158Smillert my $self = shift; 1623b39c5158Smillert my ($file, $data, $opt) = @_; 1624b39c5158Smillert 1625b39c5158Smillert my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1626b39c5158Smillert unless( $obj ) { 1627b39c5158Smillert $self->_error( qq[Unable to add file: '$file'] ); 1628b39c5158Smillert return; 1629b39c5158Smillert } 1630b39c5158Smillert 1631b39c5158Smillert push @{$self->{_data}}, $obj; 1632b39c5158Smillert 1633b39c5158Smillert return $obj; 1634b39c5158Smillert} 1635b39c5158Smillert 1636b39c5158Smillert=head2 $tar->error( [$BOOL] ) 1637b39c5158Smillert 1638b39c5158SmillertReturns the current error string (usually, the last error reported). 1639b39c5158SmillertIf a true value was specified, it will give the C<Carp::longmess> 1640b39c5158Smillertequivalent of the error, in effect giving you a stacktrace. 1641b39c5158Smillert 1642b39c5158SmillertFor backwards compatibility, this error is also available as 1643b39c5158SmillertC<$Archive::Tar::error> although it is much recommended you use the 1644b39c5158Smillertmethod call instead. 1645b39c5158Smillert 1646b39c5158Smillert=cut 1647b39c5158Smillert 1648b39c5158Smillert{ 1649b39c5158Smillert $error = ''; 1650b39c5158Smillert my $longmess; 1651b39c5158Smillert 1652b39c5158Smillert sub _error { 1653b39c5158Smillert my $self = shift; 1654b39c5158Smillert my $msg = $error = shift; 1655b39c5158Smillert $longmess = Carp::longmess($error); 1656b39c5158Smillert if (ref $self) { 1657b39c5158Smillert $self->{_error} = $error; 1658b39c5158Smillert $self->{_longmess} = $longmess; 1659b39c5158Smillert } 1660b39c5158Smillert 1661b39c5158Smillert ### set Archive::Tar::WARN to 0 to disable printing 1662b39c5158Smillert ### of errors 1663b39c5158Smillert if( $WARN ) { 1664b39c5158Smillert carp $DEBUG ? $longmess : $msg; 1665b39c5158Smillert } 1666b39c5158Smillert 1667b39c5158Smillert return; 1668b39c5158Smillert } 1669b39c5158Smillert 1670b39c5158Smillert sub error { 1671b39c5158Smillert my $self = shift; 1672b39c5158Smillert if (ref $self) { 1673b39c5158Smillert return shift() ? $self->{_longmess} : $self->{_error}; 1674b39c5158Smillert } else { 1675b39c5158Smillert return shift() ? $longmess : $error; 1676b39c5158Smillert } 1677b39c5158Smillert } 1678b39c5158Smillert} 1679b39c5158Smillert 1680b39c5158Smillert=head2 $tar->setcwd( $cwd ); 1681b39c5158Smillert 1682b39c5158SmillertC<Archive::Tar> needs to know the current directory, and it will run 1683b39c5158SmillertC<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1684b39c5158Smillerttarfile and saves it in the file system. (As of version 1.30, however, 1685b39c5158SmillertC<Archive::Tar> will use the speed optimization described below 1686b39c5158Smillertautomatically, so it's only relevant if you're using C<extract_file()>). 1687b39c5158Smillert 1688b39c5158SmillertSince C<Archive::Tar> doesn't change the current directory internally 1689b39c5158Smillertwhile it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1690b39c5158Smillertcan be avoided if we can guarantee that the current directory doesn't 1691b39c5158Smillertget changed externally. 1692b39c5158Smillert 1693b39c5158SmillertTo use this performance boost, set the current directory via 1694b39c5158Smillert 1695b39c5158Smillert use Cwd; 1696b39c5158Smillert $tar->setcwd( cwd() ); 1697b39c5158Smillert 1698b39c5158Smillertonce before calling a function like C<extract_file> and 1699b39c5158SmillertC<Archive::Tar> will use the current directory setting from then on 1700b39c5158Smillertand won't call C<Cwd::cwd()> internally. 1701b39c5158Smillert 1702b39c5158SmillertTo switch back to the default behaviour, use 1703b39c5158Smillert 1704b39c5158Smillert $tar->setcwd( undef ); 1705b39c5158Smillert 1706b39c5158Smillertand C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1707b39c5158Smillert 1708898184e3SsthenIf you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1709b39c5158Smillertbe called for you. 1710b39c5158Smillert 1711b39c5158Smillert=cut 1712b39c5158Smillert 1713b39c5158Smillertsub setcwd { 1714b39c5158Smillert my $self = shift; 1715b39c5158Smillert my $cwd = shift; 1716b39c5158Smillert 1717b39c5158Smillert $self->{cwd} = $cwd; 1718b39c5158Smillert} 1719b39c5158Smillert 1720b39c5158Smillert=head1 Class Methods 1721b39c5158Smillert 1722b39c5158Smillert=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1723b39c5158Smillert 1724b39c5158SmillertCreates a tar file from the list of files provided. The first 1725b39c5158Smillertargument can either be the name of the tar file to create or a 1726b39c5158Smillertreference to an open file handle (e.g. a GLOB reference). 1727b39c5158Smillert 172856d68f1eSafresh1The second argument is used to indicate compression. You can 172956d68f1eSafresh1compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed 1730b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of 1731898184e3Ssthenconstants is preferred: 1732b39c5158Smillert 1733b39c5158Smillert # write a gzip compressed file 1734b39c5158Smillert Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1735b39c5158Smillert 1736b39c5158Smillert # write a bzip compressed file 1737b39c5158Smillert Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1738b39c5158Smillert 173956d68f1eSafresh1 # write a xz compressed file 174056d68f1eSafresh1 Archive::Tar->create_archive( 'out.txz', COMPRESS_XZ, @filelist ); 174156d68f1eSafresh1 1742b39c5158SmillertNote that when you pass in a filehandle, the compression argument 1743b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle. 1744b39c5158SmillertIf you wish to enable compression with filehandles, use an 174556d68f1eSafresh1C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead. 1746b39c5158Smillert 1747b39c5158SmillertThe remaining arguments list the files to be included in the tar file. 1748b39c5158SmillertThese files must all exist. Any files which don't exist or can't be 1749b39c5158Smillertread are silently ignored. 1750b39c5158Smillert 1751b39c5158SmillertIf the archive creation fails for any reason, C<create_archive> will 1752b39c5158Smillertreturn false. Please use the C<error> method to find the cause of the 1753b39c5158Smillertfailure. 1754b39c5158Smillert 1755b39c5158SmillertNote that this method does not write C<on the fly> as it were; it 1756b39c5158Smillertstill reads all the files into memory before writing out the archive. 1757b39c5158SmillertConsult the FAQ below if this is a problem. 1758b39c5158Smillert 1759b39c5158Smillert=cut 1760b39c5158Smillert 1761b39c5158Smillertsub create_archive { 1762b39c5158Smillert my $class = shift; 1763b39c5158Smillert 1764b39c5158Smillert my $file = shift; return unless defined $file; 1765b39c5158Smillert my $gzip = shift || 0; 1766b39c5158Smillert my @files = @_; 1767b39c5158Smillert 1768b39c5158Smillert unless( @files ) { 1769b39c5158Smillert return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1770b39c5158Smillert } 1771b39c5158Smillert 1772b39c5158Smillert my $tar = $class->new; 1773b39c5158Smillert $tar->add_files( @files ); 1774b39c5158Smillert return $tar->write( $file, $gzip ); 1775b39c5158Smillert} 1776b39c5158Smillert 1777b39c5158Smillert=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1778b39c5158Smillert 1779b39c5158SmillertReturns an iterator function that reads the tar file without loading 1780b39c5158Smillertit all in memory. Each time the function is called it will return the 1781b39c5158Smillertnext file in the tarball. The files are returned as 1782b39c5158SmillertC<Archive::Tar::File> objects. The iterator function returns the 1783b39c5158Smillertempty list once it has exhausted the files contained. 1784b39c5158Smillert 1785b39c5158SmillertThe second argument can be a hash reference with options, which are 1786b39c5158Smillertidentical to the arguments passed to C<read()>. 1787b39c5158Smillert 1788b39c5158SmillertExample usage: 1789b39c5158Smillert 1790b39c5158Smillert my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1791b39c5158Smillert 1792b39c5158Smillert while( my $f = $next->() ) { 1793b39c5158Smillert print $f->name, "\n"; 1794b39c5158Smillert 1795b39c5158Smillert $f->extract or warn "Extraction failed"; 1796b39c5158Smillert 1797b39c5158Smillert # .... 1798b39c5158Smillert } 1799b39c5158Smillert 1800b39c5158Smillert=cut 1801b39c5158Smillert 1802b39c5158Smillert 1803b39c5158Smillertsub iter { 1804b39c5158Smillert my $class = shift; 18059f11ffb7Safresh1 my $filename = shift; 18069f11ffb7Safresh1 return unless defined $filename; 1807898184e3Ssthen my $compressed = shift || 0; 1808b39c5158Smillert my $opts = shift || {}; 1809b39c5158Smillert 1810b39c5158Smillert ### get a handle to read from. 1811b39c5158Smillert my $handle = $class->_get_handle( 1812b39c5158Smillert $filename, 1813b39c5158Smillert $compressed, 1814b39c5158Smillert READ_ONLY->( ZLIB ) 1815b39c5158Smillert ) or return; 1816b39c5158Smillert 1817b39c5158Smillert my @data; 18186fb12b70Safresh1 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ]; 1819b39c5158Smillert return sub { 1820b39c5158Smillert return shift(@data) if @data; # more than one file returned? 1821b39c5158Smillert return unless $handle; # handle exhausted? 1822b39c5158Smillert 1823b39c5158Smillert ### read data, should only return file 1824b39c5158Smillert my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1825b39c5158Smillert @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 18266fb12b70Safresh1 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 18276fb12b70Safresh1 foreach(@data){ 18286fb12b70Safresh1 #may refine this heuristic for ON_UNIX? 18296fb12b70Safresh1 if($_->linkname){ 18306fb12b70Safresh1 #is there a better slot to store/share it ? 18316fb12b70Safresh1 $_->{'_archive'} = $CONSTRUCT_ARGS; 18326fb12b70Safresh1 } 18336fb12b70Safresh1 } 18346fb12b70Safresh1 } 1835b39c5158Smillert 1836b39c5158Smillert ### return one piece of data 1837b39c5158Smillert return shift(@data) if @data; 1838b39c5158Smillert 1839b39c5158Smillert ### data is exhausted, free the filehandle 1840b39c5158Smillert undef $handle; 18416fb12b70Safresh1 if(@$CONSTRUCT_ARGS == 4){ 18426fb12b70Safresh1 #free archive in memory 18436fb12b70Safresh1 undef $CONSTRUCT_ARGS->[-1]; 18446fb12b70Safresh1 } 1845b39c5158Smillert return; 1846b39c5158Smillert }; 1847b39c5158Smillert} 1848b39c5158Smillert 1849b39c5158Smillert=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1850b39c5158Smillert 1851b39c5158SmillertReturns a list of the names of all the files in the archive. The 1852b39c5158Smillertfirst argument can either be the name of the tar file to list or a 1853b39c5158Smillertreference to an open file handle (e.g. a GLOB reference). 1854b39c5158Smillert 1855b39c5158SmillertIf C<list_archive()> is passed an array reference as its third 1856b39c5158Smillertargument it returns a list of hash references containing the requested 1857b39c5158Smillertproperties of each file. The following list of properties is 1858b39c5158Smillertsupported: full_path, name, size, mtime (last modified date), mode, 185991f110e0Safresh1uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type. 1860b39c5158Smillert 1861b39c5158SmillertSee C<Archive::Tar::File> for details about supported properties. 1862b39c5158Smillert 1863b39c5158SmillertPassing an array reference containing only one element, 'name', is 1864b39c5158Smillertspecial cased to return a list of names rather than a list of hash 1865b39c5158Smillertreferences. 1866b39c5158Smillert 1867b39c5158Smillert=cut 1868b39c5158Smillert 1869b39c5158Smillertsub list_archive { 1870b39c5158Smillert my $class = shift; 1871b39c5158Smillert my $file = shift; return unless defined $file; 1872b39c5158Smillert my $gzip = shift || 0; 1873b39c5158Smillert 1874b39c5158Smillert my $tar = $class->new($file, $gzip); 1875b39c5158Smillert return unless $tar; 1876b39c5158Smillert 1877b39c5158Smillert return $tar->list_files( @_ ); 1878b39c5158Smillert} 1879b39c5158Smillert 1880b39c5158Smillert=head2 Archive::Tar->extract_archive($file, $compressed) 1881b39c5158Smillert 1882b39c5158SmillertExtracts the contents of the tar file. The first argument can either 1883b39c5158Smillertbe the name of the tar file to create or a reference to an open file 1884b39c5158Smillerthandle (e.g. a GLOB reference). All relative paths in the tar file will 1885b39c5158Smillertbe created underneath the current working directory. 1886b39c5158Smillert 1887b39c5158SmillertC<extract_archive> will return a list of files it extracted. 1888b39c5158SmillertIf the archive extraction fails for any reason, C<extract_archive> 1889b39c5158Smillertwill return false. Please use the C<error> method to find the cause 1890b39c5158Smillertof the failure. 1891b39c5158Smillert 1892b39c5158Smillert=cut 1893b39c5158Smillert 1894b39c5158Smillertsub extract_archive { 1895b39c5158Smillert my $class = shift; 1896b39c5158Smillert my $file = shift; return unless defined $file; 1897b39c5158Smillert my $gzip = shift || 0; 1898b39c5158Smillert 1899b39c5158Smillert my $tar = $class->new( ) or return; 1900b39c5158Smillert 1901b39c5158Smillert return $tar->read( $file, $gzip, { extract => 1 } ); 1902b39c5158Smillert} 1903b39c5158Smillert 1904b39c5158Smillert=head2 $bool = Archive::Tar->has_io_string 1905b39c5158Smillert 1906b39c5158SmillertReturns true if we currently have C<IO::String> support loaded. 1907b39c5158Smillert 1908b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing 1909b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if 1910b39c5158Smillertavailable. 1911b39c5158Smillert 1912b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference. 1913b39c5158Smillert 1914b39c5158Smillert=cut 1915b39c5158Smillert 1916b39c5158Smillertsub has_io_string { return $HAS_IO_STRING; } 1917b39c5158Smillert 1918b39c5158Smillert=head2 $bool = Archive::Tar->has_perlio 1919b39c5158Smillert 1920b39c5158SmillertReturns true if we currently have C<perlio> support loaded. 1921b39c5158Smillert 1922b39c5158SmillertThis requires C<perl-5.8> or higher, compiled with C<perlio> 1923b39c5158Smillert 1924b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing 1925b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if 1926b39c5158Smillertavailable. 1927b39c5158Smillert 1928b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference. 1929b39c5158Smillert 1930b39c5158Smillert=cut 1931b39c5158Smillert 1932b39c5158Smillertsub has_perlio { return $HAS_PERLIO; } 1933b39c5158Smillert 1934b39c5158Smillert=head2 $bool = Archive::Tar->has_zlib_support 1935b39c5158Smillert 1936b39c5158SmillertReturns true if C<Archive::Tar> can extract C<zlib> compressed archives 1937b39c5158Smillert 1938b39c5158Smillert=cut 1939b39c5158Smillert 1940b39c5158Smillertsub has_zlib_support { return ZLIB } 1941b39c5158Smillert 1942b39c5158Smillert=head2 $bool = Archive::Tar->has_bzip2_support 1943b39c5158Smillert 1944b39c5158SmillertReturns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1945b39c5158Smillert 1946b39c5158Smillert=cut 1947b39c5158Smillert 1948b39c5158Smillertsub has_bzip2_support { return BZIP } 1949b39c5158Smillert 195056d68f1eSafresh1=head2 $bool = Archive::Tar->has_xz_support 195156d68f1eSafresh1 195256d68f1eSafresh1Returns true if C<Archive::Tar> can extract C<xz> compressed archives 195356d68f1eSafresh1 195456d68f1eSafresh1=cut 195556d68f1eSafresh1 195656d68f1eSafresh1sub has_xz_support { return XZ } 195756d68f1eSafresh1 1958b39c5158Smillert=head2 Archive::Tar->can_handle_compressed_files 1959b39c5158Smillert 1960b39c5158SmillertA simple checking routine, which will return true if C<Archive::Tar> 196156d68f1eSafresh1is able to uncompress compressed archives on the fly with C<IO::Zlib>, 196256d68f1eSafresh1C<IO::Compress::Bzip2> and C<IO::Compress::Xz> or false if not both are installed. 1963b39c5158Smillert 1964b39c5158SmillertYou can use this as a shortcut to determine whether C<Archive::Tar> 1965b39c5158Smillertwill do what you think before passing compressed archives to its 1966b39c5158SmillertC<read> method. 1967b39c5158Smillert 1968b39c5158Smillert=cut 1969b39c5158Smillert 1970b39c5158Smillertsub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1971b39c5158Smillert 1972b39c5158Smillertsub no_string_support { 1973b39c5158Smillert croak("You have to install IO::String to support writing archives to strings"); 1974b39c5158Smillert} 1975b39c5158Smillert 19766fb12b70Safresh1sub _symlinks_resolver{ 19776fb12b70Safresh1 my ($src, $trg) = @_; 19786fb12b70Safresh1 my @src = split /[\/\\]/, $src; 19796fb12b70Safresh1 my @trg = split /[\/\\]/, $trg; 19806fb12b70Safresh1 pop @src; #strip out current object name 19816fb12b70Safresh1 if(@trg and $trg[0] eq ''){ 19826fb12b70Safresh1 shift @trg; 19836fb12b70Safresh1 #restart path from scratch 19846fb12b70Safresh1 @src = ( ); 19856fb12b70Safresh1 } 19866fb12b70Safresh1 foreach my $part ( @trg ){ 19876fb12b70Safresh1 next if $part eq '.'; #ignore current 19886fb12b70Safresh1 if($part eq '..'){ 19896fb12b70Safresh1 #got to parent 19906fb12b70Safresh1 pop @src; 19916fb12b70Safresh1 } 19926fb12b70Safresh1 else{ 19936fb12b70Safresh1 #append it 19946fb12b70Safresh1 push @src, $part; 19956fb12b70Safresh1 } 19966fb12b70Safresh1 } 19976fb12b70Safresh1 my $path = join('/', @src); 19986fb12b70Safresh1 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG; 19996fb12b70Safresh1 return $path; 20006fb12b70Safresh1} 20016fb12b70Safresh1 2002b39c5158Smillert1; 2003b39c5158Smillert 2004b39c5158Smillert__END__ 2005b39c5158Smillert 2006b39c5158Smillert=head1 GLOBAL VARIABLES 2007b39c5158Smillert 2008b39c5158Smillert=head2 $Archive::Tar::FOLLOW_SYMLINK 2009b39c5158Smillert 2010b39c5158SmillertSet this variable to C<1> to make C<Archive::Tar> effectively make a 2011b39c5158Smillertcopy of the file when extracting. Default is C<0>, which 2012b39c5158Smillertmeans the symlink stays intact. Of course, you will have to pack the 2013b39c5158Smillertfile linked to as well. 2014b39c5158Smillert 2015b39c5158SmillertThis option is checked when you write out the tarfile using C<write> 2016b39c5158Smillertor C<create_archive>. 2017b39c5158Smillert 2018b39c5158SmillertThis works just like C</bin/tar>'s C<-h> option. 2019b39c5158Smillert 2020b39c5158Smillert=head2 $Archive::Tar::CHOWN 2021b39c5158Smillert 2022b39c5158SmillertBy default, C<Archive::Tar> will try to C<chown> your files if it is 2023b39c5158Smillertable to. In some cases, this may not be desired. In that case, set 2024b39c5158Smillertthis variable to C<0> to disable C<chown>-ing, even if it were 2025b39c5158Smillertpossible. 2026b39c5158Smillert 2027b39c5158SmillertThe default is C<1>. 2028b39c5158Smillert 2029b39c5158Smillert=head2 $Archive::Tar::CHMOD 2030b39c5158Smillert 2031b39c5158SmillertBy default, C<Archive::Tar> will try to C<chmod> your files to 2032b39c5158Smillertwhatever mode was specified for the particular file in the archive. 2033b39c5158SmillertIn some cases, this may not be desired. In that case, set this 2034b39c5158Smillertvariable to C<0> to disable C<chmod>-ing. 2035b39c5158Smillert 2036b39c5158SmillertThe default is C<1>. 2037b39c5158Smillert 2038b39c5158Smillert=head2 $Archive::Tar::SAME_PERMISSIONS 2039b39c5158Smillert 2040b39c5158SmillertWhen, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 2041b39c5158Smillertthe permissions on files from the archive are used without modification 2042b39c5158Smillertof if they are filtered by removing any setid bits and applying the 2043b39c5158Smillertcurrent umask. 2044b39c5158Smillert 2045b39c5158SmillertThe default is C<1> for the root user and C<0> for normal users. 2046b39c5158Smillert 2047b39c5158Smillert=head2 $Archive::Tar::DO_NOT_USE_PREFIX 2048b39c5158Smillert 2049b39c5158SmillertBy default, C<Archive::Tar> will try to put paths that are over 2050b39c5158Smillert100 characters in the C<prefix> field of your tar header, as 2051b39c5158Smillertdefined per POSIX-standard. However, some (older) tar programs 2052b39c5158Smillertdo not implement this spec. To retain compatibility with these older 2053b39c5158Smillertor non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 2054b39c5158Smillertvariable to a true value, and C<Archive::Tar> will use an alternate 2055b39c5158Smillertway of dealing with paths over 100 characters by using the 2056b39c5158SmillertC<GNU Extended Header> feature. 2057b39c5158Smillert 2058b39c5158SmillertNote that clients who do not support the C<GNU Extended Header> 2059b39c5158Smillertfeature will not be able to read these archives. Such clients include 2060b39c5158Smillerttars on C<Solaris>, C<Irix> and C<AIX>. 2061b39c5158Smillert 2062b39c5158SmillertThe default is C<0>. 2063b39c5158Smillert 2064b39c5158Smillert=head2 $Archive::Tar::DEBUG 2065b39c5158Smillert 2066b39c5158SmillertSet this variable to C<1> to always get the C<Carp::longmess> output 2067b39c5158Smillertof the warnings, instead of the regular C<carp>. This is the same 2068b39c5158Smillertmessage you would get by doing: 2069b39c5158Smillert 2070b39c5158Smillert $tar->error(1); 2071b39c5158Smillert 2072b39c5158SmillertDefaults to C<0>. 2073b39c5158Smillert 2074b39c5158Smillert=head2 $Archive::Tar::WARN 2075b39c5158Smillert 2076b39c5158SmillertSet this variable to C<0> if you do not want any warnings printed. 2077b39c5158SmillertPersonally I recommend against doing this, but people asked for the 2078b39c5158Smillertoption. Also, be advised that this is of course not threadsafe. 2079b39c5158Smillert 2080b39c5158SmillertDefaults to C<1>. 2081b39c5158Smillert 2082b39c5158Smillert=head2 $Archive::Tar::error 2083b39c5158Smillert 2084b39c5158SmillertHolds the last reported error. Kept for historical reasons, but its 2085b39c5158Smillertuse is very much discouraged. Use the C<error()> method instead: 2086b39c5158Smillert 2087b39c5158Smillert warn $tar->error unless $tar->extract; 2088b39c5158Smillert 2089b39c5158SmillertNote that in older versions of this module, the C<error()> method 2090b39c5158Smillertwould return an effectively global value even when called an instance 2091b39c5158Smillertmethod as above. This has since been fixed, and multiple instances of 2092b39c5158SmillertC<Archive::Tar> now have separate error strings. 2093b39c5158Smillert 2094b39c5158Smillert=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 2095b39c5158Smillert 2096b39c5158SmillertThis variable indicates whether C<Archive::Tar> should allow 2097b39c5158Smillertfiles to be extracted outside their current working directory. 2098b39c5158Smillert 2099b39c5158SmillertAllowing this could have security implications, as a malicious 2100b39c5158Smillerttar archive could alter or replace any file the extracting user 2101b39c5158Smillerthas permissions to. Therefor, the default is to not allow 2102b39c5158Smillertinsecure extractions. 2103b39c5158Smillert 2104b39c5158SmillertIf you trust the archive, or have other reasons to allow the 2105b39c5158Smillertarchive to write files outside your current working directory, 2106b39c5158Smillertset this variable to C<true>. 2107b39c5158Smillert 2108b39c5158SmillertNote that this is a backwards incompatible change from version 2109b39c5158SmillertC<1.36> and before. 2110b39c5158Smillert 2111b39c5158Smillert=head2 $Archive::Tar::HAS_PERLIO 2112b39c5158Smillert 2113b39c5158SmillertThis variable holds a boolean indicating if we currently have 2114b39c5158SmillertC<perlio> support loaded. This will be enabled for any perl 2115b39c5158Smillertgreater than C<5.8> compiled with C<perlio>. 2116b39c5158Smillert 2117b39c5158SmillertIf you feel strongly about disabling it, set this variable to 2118b39c5158SmillertC<false>. Note that you will then need C<IO::String> installed 2119b39c5158Smillertto support writing stringified archives. 2120b39c5158Smillert 2121b39c5158SmillertDon't change this variable unless you B<really> know what you're 2122b39c5158Smillertdoing. 2123b39c5158Smillert 2124b39c5158Smillert=head2 $Archive::Tar::HAS_IO_STRING 2125b39c5158Smillert 2126b39c5158SmillertThis variable holds a boolean indicating if we currently have 2127b39c5158SmillertC<IO::String> support loaded. This will be enabled for any perl 2128b39c5158Smillertthat has a loadable C<IO::String> module. 2129b39c5158Smillert 2130b39c5158SmillertIf you feel strongly about disabling it, set this variable to 2131b39c5158SmillertC<false>. Note that you will then need C<perlio> support from 2132b39c5158Smillertyour perl to be able to write stringified archives. 2133b39c5158Smillert 2134b39c5158SmillertDon't change this variable unless you B<really> know what you're 2135b39c5158Smillertdoing. 2136b39c5158Smillert 2137898184e3Ssthen=head2 $Archive::Tar::ZERO_PAD_NUMBERS 2138898184e3Ssthen 2139898184e3SsthenThis variable holds a boolean indicating if we will create 2140898184e3Ssthenzero padded numbers for C<size>, C<mtime> and C<checksum>. 2141898184e3SsthenThe default is C<0>, indicating that we will create space padded 2142898184e3Ssthennumbers. Added for compatibility with C<busybox> implementations. 2143898184e3Ssthen 21446fb12b70Safresh1=head2 Tuning the way RESOLVE_SYMLINK will works 21456fb12b70Safresh1 21466fb12b70Safresh1 You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable, 21476fb12b70Safresh1 or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar. 21486fb12b70Safresh1 21496fb12b70Safresh1 Values can be one of the following: 21506fb12b70Safresh1 21516fb12b70Safresh1 none 21526fb12b70Safresh1 Disable this mechanism and failed as it was in previous version (<1.88) 21536fb12b70Safresh1 21546fb12b70Safresh1 speed (default) 21556fb12b70Safresh1 If you prefer speed 21566fb12b70Safresh1 this will read again the whole archive using read() so all entries 21576fb12b70Safresh1 will be available 21586fb12b70Safresh1 21596fb12b70Safresh1 memory 21606fb12b70Safresh1 If you prefer memory 21616fb12b70Safresh1 21626fb12b70Safresh1 Limitation 21636fb12b70Safresh1 21646fb12b70Safresh1 It won't work for terminal, pipe or sockets or every non seekable source. 21656fb12b70Safresh1 21666fb12b70Safresh1=cut 21676fb12b70Safresh1 2168b39c5158Smillert=head1 FAQ 2169b39c5158Smillert 2170b39c5158Smillert=over 4 2171b39c5158Smillert 2172b39c5158Smillert=item What's the minimum perl version required to run Archive::Tar? 2173b39c5158Smillert 2174b39c5158SmillertYou will need perl version 5.005_03 or newer. 2175b39c5158Smillert 2176b39c5158Smillert=item Isn't Archive::Tar slow? 2177b39c5158Smillert 2178b39c5158SmillertYes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2179b39c5158SmillertHowever, it's very portable. If speed is an issue, consider using 2180b39c5158SmillertC</bin/tar> instead. 2181b39c5158Smillert 2182b39c5158Smillert=item Isn't Archive::Tar heavier on memory than /bin/tar? 2183b39c5158Smillert 2184b39c5158SmillertYes it is, see previous answer. Since C<Compress::Zlib> and therefore 2185b39c5158SmillertC<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2186b39c5158Smillertchoice but to read the archive into memory. 2187b39c5158SmillertThis is ok if you want to do in-memory manipulation of the archive. 2188b39c5158Smillert 2189b39c5158SmillertIf you just want to extract, use the C<extract_archive> class method 2190b39c5158Smillertinstead. It will optimize and write to disk immediately. 2191b39c5158Smillert 2192b39c5158SmillertAnother option is to use the C<iter> class method to iterate over 2193b39c5158Smillertthe files in the tarball without reading them all in memory at once. 2194b39c5158Smillert 2195b39c5158Smillert=item Can you lazy-load data instead? 2196b39c5158Smillert 2197b39c5158SmillertIn some cases, yes. You can use the C<iter> class method to iterate 2198b39c5158Smillertover the files in the tarball without reading them all in memory at once. 2199b39c5158Smillert 2200b39c5158Smillert=item How much memory will an X kb tar file need? 2201b39c5158Smillert 2202b39c5158SmillertProbably more than X kb, since it will all be read into memory. If 2203b39c5158Smillertthis is a problem, and you don't need to do in memory manipulation 2204b39c5158Smillertof the archive, consider using the C<iter> class method, or C</bin/tar> 2205b39c5158Smillertinstead. 2206b39c5158Smillert 2207b39c5158Smillert=item What do you do with unsupported filetypes in an archive? 2208b39c5158Smillert 2209b39c5158SmillertC<Unix> has a few filetypes that aren't supported on other platforms, 2210b39c5158Smillertlike C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2211b39c5158Smillerttry to make a copy of the original file, rather than throwing an error. 2212b39c5158Smillert 2213b39c5158SmillertThis does require you to read the entire archive in to memory first, 2214b39c5158Smillertsince otherwise we wouldn't know what data to fill the copy with. 2215b39c5158Smillert(This means that you cannot use the class methods, including C<iter> 2216b39c5158Smillerton archives that have incompatible filetypes and still expect things 2217b39c5158Smillertto work). 2218b39c5158Smillert 2219b39c5158SmillertFor other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2220b39c5158Smillertthe extraction of this particular item didn't work. 2221b39c5158Smillert 2222b39c5158Smillert=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2223b39c5158Smillert 2224b39c5158SmillertBy default, C<Archive::Tar> is in a completely POSIX-compatible 2225b39c5158Smillertmode, which uses the POSIX-specification of C<tar> to store files. 2226898184e3SsthenFor paths greater than 100 characters, this is done using the 2227b39c5158SmillertC<POSIX header prefix>. Non-POSIX-compatible clients may not support 2228b39c5158Smillertthis part of the specification, and may only support the C<GNU Extended 2229b39c5158SmillertHeader> functionality. To facilitate those clients, you can set the 2230b39c5158SmillertC<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2231b39c5158SmillertC<GLOBAL VARIABLES> section for details on this variable. 2232b39c5158Smillert 2233b39c5158SmillertNote that GNU tar earlier than version 1.14 does not cope well with 2234b39c5158Smillertthe C<POSIX header prefix>. If you use such a version, consider setting 2235b39c5158Smillertthe C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2236b39c5158Smillert 2237b39c5158Smillert=item How do I extract only files that have property X from an archive? 2238b39c5158Smillert 2239b39c5158SmillertSometimes, you might not wish to extract a complete archive, just 2240b39c5158Smillertthe files that are relevant to you, based on some criteria. 2241b39c5158Smillert 2242b39c5158SmillertYou can do this by filtering a list of C<Archive::Tar::File> objects 2243b39c5158Smillertbased on your criteria. For example, to extract only files that have 2244b39c5158Smillertthe string C<foo> in their title, you would use: 2245b39c5158Smillert 2246b39c5158Smillert $tar->extract( 2247b39c5158Smillert grep { $_->full_path =~ /foo/ } $tar->get_files 2248b39c5158Smillert ); 2249b39c5158Smillert 2250b39c5158SmillertThis way, you can filter on any attribute of the files in the archive. 2251b39c5158SmillertConsult the C<Archive::Tar::File> documentation on how to use these 2252b39c5158Smillertobjects. 2253b39c5158Smillert 2254b39c5158Smillert=item How do I access .tar.Z files? 2255b39c5158Smillert 2256b39c5158SmillertThe C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2257b39c5158Smillertthe C<IO::Zlib> module) to access tar files that have been compressed 2258b39c5158Smillertwith C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2259b39c5158Smillertutility cannot be read by C<Compress::Zlib> and so cannot be directly 2260b39c5158Smillertaccesses by C<Archive::Tar>. 2261b39c5158Smillert 2262b39c5158SmillertIf the C<uncompress> or C<gunzip> programs are available, you can use 2263b39c5158Smillertone of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2264b39c5158Smillert 2265b39c5158SmillertFirstly with C<uncompress> 2266b39c5158Smillert 2267b39c5158Smillert use Archive::Tar; 2268b39c5158Smillert 2269b39c5158Smillert open F, "uncompress -c $filename |"; 2270b39c5158Smillert my $tar = Archive::Tar->new(*F); 2271b39c5158Smillert ... 2272b39c5158Smillert 2273b39c5158Smillertand this with C<gunzip> 2274b39c5158Smillert 2275b39c5158Smillert use Archive::Tar; 2276b39c5158Smillert 2277b39c5158Smillert open F, "gunzip -c $filename |"; 2278b39c5158Smillert my $tar = Archive::Tar->new(*F); 2279b39c5158Smillert ... 2280b39c5158Smillert 2281b39c5158SmillertSimilarly, if the C<compress> program is available, you can use this to 2282b39c5158Smillertwrite a C<.tar.Z> file 2283b39c5158Smillert 2284b39c5158Smillert use Archive::Tar; 2285b39c5158Smillert use IO::File; 2286b39c5158Smillert 2287*eac174f2Safresh1 my $fh = IO::File->new( "| compress -c >$filename" ); 2288b39c5158Smillert my $tar = Archive::Tar->new(); 2289b39c5158Smillert ... 2290b39c5158Smillert $tar->write($fh); 2291b39c5158Smillert $fh->close ; 2292b39c5158Smillert 2293b39c5158Smillert=item How do I handle Unicode strings? 2294b39c5158Smillert 2295b39c5158SmillertC<Archive::Tar> uses byte semantics for any files it reads from or writes 2296b39c5158Smillertto disk. This is not a problem if you only deal with files and never 2297b39c5158Smillertlook at their content or work solely with byte strings. But if you use 2298b39c5158SmillertUnicode strings with character semantics, some additional steps need 2299b39c5158Smillertto be taken. 2300b39c5158Smillert 2301b39c5158SmillertFor example, if you add a Unicode string like 2302b39c5158Smillert 2303b39c5158Smillert # Problem 2304b39c5158Smillert $tar->add_data('file.txt', "Euro: \x{20AC}"); 2305b39c5158Smillert 2306b39c5158Smillertthen there will be a problem later when the tarfile gets written out 23079f11ffb7Safresh1to disk via C<< $tar->write() >>: 2308b39c5158Smillert 2309b39c5158Smillert Wide character in print at .../Archive/Tar.pm line 1014. 2310b39c5158Smillert 2311b39c5158SmillertThe data was added as a Unicode string and when writing it out to disk, 2312b39c5158Smillertthe C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2313b39c5158Smillerttried to convert the string to ISO-8859 and failed. The written file 2314b39c5158Smillertnow contains garbage. 2315b39c5158Smillert 2316b39c5158SmillertFor this reason, Unicode strings need to be converted to UTF-8-encoded 2317b39c5158Smillertbytestrings before they are handed off to C<add_data()>: 2318b39c5158Smillert 2319b39c5158Smillert use Encode; 2320b39c5158Smillert my $data = "Accented character: \x{20AC}"; 2321b39c5158Smillert $data = encode('utf8', $data); 2322b39c5158Smillert 2323b39c5158Smillert $tar->add_data('file.txt', $data); 2324b39c5158Smillert 2325b39c5158SmillertA opposite problem occurs if you extract a UTF8-encoded file from a 2326b39c5158Smillerttarball. Using C<get_content()> on the C<Archive::Tar::File> object 2327b39c5158Smillertwill return its content as a bytestring, not as a Unicode string. 2328b39c5158Smillert 2329b39c5158SmillertIf you want it to be a Unicode string (because you want character 2330b39c5158Smillertsemantics with operations like regular expression matching), you need 2331b39c5158Smillertto decode the UTF8-encoded content and have Perl convert it into 2332b39c5158Smillerta Unicode string: 2333b39c5158Smillert 2334b39c5158Smillert use Encode; 2335b39c5158Smillert my $data = $tar->get_content(); 2336b39c5158Smillert 2337b39c5158Smillert # Make it a Unicode string 2338b39c5158Smillert $data = decode('utf8', $data); 2339b39c5158Smillert 2340b39c5158SmillertThere is no easy way to provide this functionality in C<Archive::Tar>, 2341b39c5158Smillertbecause a tarball can contain many files, and each of which could be 2342b39c5158Smillertencoded in a different way. 2343b39c5158Smillert 2344b39c5158Smillert=back 2345b39c5158Smillert 2346b39c5158Smillert=head1 CAVEATS 2347b39c5158Smillert 2348b39c5158SmillertThe AIX tar does not fill all unused space in the tar archive with 0x00. 2349b39c5158SmillertThis sometimes leads to warning messages from C<Archive::Tar>. 2350b39c5158Smillert 2351b39c5158Smillert Invalid header block at offset nnn 2352b39c5158Smillert 2353b39c5158SmillertA fix for that problem is scheduled to be released in the following levels 2354b39c5158Smillertof AIX, all of which should be coming out in the 4th quarter of 2009: 2355b39c5158Smillert 2356b39c5158Smillert AIX 5.3 TL7 SP10 2357b39c5158Smillert AIX 5.3 TL8 SP8 2358b39c5158Smillert AIX 5.3 TL9 SP5 2359b39c5158Smillert AIX 5.3 TL10 SP2 2360b39c5158Smillert 2361b39c5158Smillert AIX 6.1 TL0 SP11 2362b39c5158Smillert AIX 6.1 TL1 SP7 2363b39c5158Smillert AIX 6.1 TL2 SP6 2364b39c5158Smillert AIX 6.1 TL3 SP3 2365b39c5158Smillert 2366b39c5158SmillertThe IBM APAR number for this problem is IZ50240 (Reported component ID: 2367b39c5158Smillert5765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2368b39c5158SmillertIf you need an ifix please contact your local IBM AIX support. 2369b39c5158Smillert 2370b39c5158Smillert=head1 TODO 2371b39c5158Smillert 2372b39c5158Smillert=over 4 2373b39c5158Smillert 2374b39c5158Smillert=item Check if passed in handles are open for read/write 2375b39c5158Smillert 2376b39c5158SmillertCurrently I don't know of any portable pure perl way to do this. 2377b39c5158SmillertSuggestions welcome. 2378b39c5158Smillert 2379b39c5158Smillert=item Allow archives to be passed in as string 2380b39c5158Smillert 2381b39c5158SmillertCurrently, we only allow opened filehandles or filenames, but 2382b39c5158Smillertnot strings. The internals would need some reworking to facilitate 2383b39c5158Smillertstringified archives. 2384b39c5158Smillert 2385b39c5158Smillert=item Facilitate processing an opened filehandle of a compressed archive 2386b39c5158Smillert 2387b39c5158SmillertCurrently, we only support this if the filehandle is an IO::Zlib object. 2388b39c5158SmillertEnvironments, like apache, will present you with an opened filehandle 2389b39c5158Smillertto an uploaded file, which might be a compressed archive. 2390b39c5158Smillert 2391b39c5158Smillert=back 2392b39c5158Smillert 2393b39c5158Smillert=head1 SEE ALSO 2394b39c5158Smillert 2395b39c5158Smillert=over 4 2396b39c5158Smillert 2397b39c5158Smillert=item The GNU tar specification 2398b39c5158Smillert 2399b39c5158SmillertC<http://www.gnu.org/software/tar/manual/tar.html> 2400b39c5158Smillert 2401898184e3Ssthen=item The PAX format specification 2402b39c5158Smillert 2403898184e3SsthenThe specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2404b39c5158Smillert 2405b39c5158Smillert=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2406b39c5158Smillert 2407b39c5158Smillert=item GNU tar intends to switch to POSIX compatibility 2408b39c5158Smillert 2409b39c5158SmillertGNU Tar authors have expressed their intention to become completely 2410b39c5158SmillertPOSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2411b39c5158Smillert 2412b39c5158Smillert=item A Comparison between various tar implementations 2413b39c5158Smillert 2414b39c5158SmillertLists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2415b39c5158Smillert 2416b39c5158Smillert=back 2417b39c5158Smillert 2418b39c5158Smillert=head1 AUTHOR 2419b39c5158Smillert 2420b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2421b39c5158Smillert 2422b39c5158SmillertPlease reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2423b39c5158Smillert 2424b39c5158Smillert=head1 ACKNOWLEDGEMENTS 2425b39c5158Smillert 2426b39c5158SmillertThanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2427b39c5158SmillertRainer Tammer and especially Andrew Savige for their help and suggestions. 2428b39c5158Smillert 2429b39c5158Smillert=head1 COPYRIGHT 2430b39c5158Smillert 2431b39c5158SmillertThis module is copyright (c) 2002 - 2009 Jos Boumans 2432b39c5158SmillertE<lt>kane@cpan.orgE<gt>. All rights reserved. 2433b39c5158Smillert 2434b39c5158SmillertThis library is free software; you may redistribute and/or modify 2435b39c5158Smillertit under the same terms as Perl itself. 2436b39c5158Smillert 2437b39c5158Smillert=cut 2438