1b39c5158Smillert### the gnu tar specification: 2b39c5158Smillert### http://www.gnu.org/software/tar/manual/tar.html 3b39c5158Smillert### 4b39c5158Smillert### and the pax format spec, which tar derives from: 5b39c5158Smillert### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6b39c5158Smillert 7b39c5158Smillertpackage Archive::Tar; 8b39c5158Smillertrequire 5.005_03; 9b39c5158Smillert 10b39c5158Smillertuse Cwd; 11b39c5158Smillertuse IO::Zlib; 12b39c5158Smillertuse IO::File; 13b39c5158Smillertuse Carp qw(carp croak); 14b39c5158Smillertuse File::Spec (); 15b39c5158Smillertuse File::Spec::Unix (); 16b39c5158Smillertuse File::Path (); 17b39c5158Smillert 18b39c5158Smillertuse Archive::Tar::File; 19b39c5158Smillertuse Archive::Tar::Constant; 20b39c5158Smillert 21b39c5158Smillertrequire Exporter; 22b39c5158Smillert 23b39c5158Smillertuse strict; 24b39c5158Smillertuse vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25b39c5158Smillert $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 266fb12b70Safresh1 $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK 27b39c5158Smillert ]; 28b39c5158Smillert 29b39c5158Smillert@ISA = qw[Exporter]; 30b39c5158Smillert@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; 31b39c5158Smillert$DEBUG = 0; 32b39c5158Smillert$WARN = 1; 33b39c5158Smillert$FOLLOW_SYMLINK = 0; 34*9f11ffb7Safresh1$VERSION = "2.30"; 35b39c5158Smillert$CHOWN = 1; 36b39c5158Smillert$CHMOD = 1; 37b39c5158Smillert$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38b39c5158Smillert$DO_NOT_USE_PREFIX = 0; 39b39c5158Smillert$INSECURE_EXTRACT_MODE = 0; 40898184e3Ssthen$ZERO_PAD_NUMBERS = 0; 416fb12b70Safresh1$RESOLVE_SYMLINK = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed'; 42b39c5158Smillert 43b39c5158SmillertBEGIN { 44b39c5158Smillert use Config; 45b39c5158Smillert $HAS_PERLIO = $Config::Config{useperlio}; 46b39c5158Smillert 47b39c5158Smillert ### try and load IO::String anyway, so you can dynamically 48b39c5158Smillert ### switch between perlio and IO::String 49b39c5158Smillert $HAS_IO_STRING = eval { 50b39c5158Smillert require IO::String; 51b39c5158Smillert import IO::String; 52b39c5158Smillert 1; 53b39c5158Smillert } || 0; 54b39c5158Smillert} 55b39c5158Smillert 56b39c5158Smillert=head1 NAME 57b39c5158Smillert 58b39c5158SmillertArchive::Tar - module for manipulations of tar archives 59b39c5158Smillert 60b39c5158Smillert=head1 SYNOPSIS 61b39c5158Smillert 62b39c5158Smillert use Archive::Tar; 63b39c5158Smillert my $tar = Archive::Tar->new; 64b39c5158Smillert 65b39c5158Smillert $tar->read('origin.tgz'); 66b39c5158Smillert $tar->extract(); 67b39c5158Smillert 68b39c5158Smillert $tar->add_files('file/foo.pl', 'docs/README'); 69b39c5158Smillert $tar->add_data('file/baz.txt', 'This is the contents now'); 70b39c5158Smillert 71b39c5158Smillert $tar->rename('oldname', 'new/file/name'); 72898184e3Ssthen $tar->chown('/', 'root'); 73898184e3Ssthen $tar->chown('/', 'root:root'); 74898184e3Ssthen $tar->chmod('/tmp', '1777'); 75b39c5158Smillert 76b39c5158Smillert $tar->write('files.tar'); # plain tar 77b39c5158Smillert $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 78b39c5158Smillert $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 79b39c5158Smillert 80b39c5158Smillert=head1 DESCRIPTION 81b39c5158Smillert 82b39c5158SmillertArchive::Tar provides an object oriented mechanism for handling tar 83b39c5158Smillertfiles. It provides class methods for quick and easy files handling 84b39c5158Smillertwhile also allowing for the creation of tar file objects for custom 85b39c5158Smillertmanipulation. If you have the IO::Zlib module installed, 86b39c5158SmillertArchive::Tar will also support compressed or gzipped tar files. 87b39c5158Smillert 88b39c5158SmillertAn object of class Archive::Tar represents a .tar(.gz) archive full 89b39c5158Smillertof files and things. 90b39c5158Smillert 91b39c5158Smillert=head1 Object Methods 92b39c5158Smillert 93b39c5158Smillert=head2 Archive::Tar->new( [$file, $compressed] ) 94b39c5158Smillert 95b39c5158SmillertReturns a new Tar object. If given any arguments, C<new()> calls the 96b39c5158SmillertC<read()> method automatically, passing on the arguments provided to 97b39c5158Smillertthe C<read()> method. 98b39c5158Smillert 99b39c5158SmillertIf C<new()> is invoked with arguments and the C<read()> method fails 100b39c5158Smillertfor any reason, C<new()> returns undef. 101b39c5158Smillert 102b39c5158Smillert=cut 103b39c5158Smillert 104b39c5158Smillertmy $tmpl = { 105b39c5158Smillert _data => [ ], 106b39c5158Smillert _file => 'Unknown', 107b39c5158Smillert}; 108b39c5158Smillert 109b39c5158Smillert### install get/set accessors for this object. 110b39c5158Smillertfor my $key ( keys %$tmpl ) { 111b39c5158Smillert no strict 'refs'; 112b39c5158Smillert *{__PACKAGE__."::$key"} = sub { 113b39c5158Smillert my $self = shift; 114b39c5158Smillert $self->{$key} = $_[0] if @_; 115b39c5158Smillert return $self->{$key}; 116b39c5158Smillert } 117b39c5158Smillert} 118b39c5158Smillert 119b39c5158Smillertsub new { 120b39c5158Smillert my $class = shift; 121b39c5158Smillert $class = ref $class if ref $class; 122b39c5158Smillert 123b39c5158Smillert ### copying $tmpl here since a shallow copy makes it use the 124b39c5158Smillert ### same aref, causing for files to remain in memory always. 125b39c5158Smillert my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 126b39c5158Smillert 127b39c5158Smillert if (@_) { 128b39c5158Smillert unless ( $obj->read( @_ ) ) { 129b39c5158Smillert $obj->_error(qq[No data could be read from file]); 130b39c5158Smillert return; 131b39c5158Smillert } 132b39c5158Smillert } 133b39c5158Smillert 134b39c5158Smillert return $obj; 135b39c5158Smillert} 136b39c5158Smillert 137b39c5158Smillert=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 138b39c5158Smillert 139b39c5158SmillertRead the given tar file into memory. 140b39c5158SmillertThe first argument can either be the name of a file or a reference to 141b39c5158Smillertan already open filehandle (or an IO::Zlib object if it's compressed) 142b39c5158Smillert 143b39c5158SmillertThe C<read> will I<replace> any previous content in C<$tar>! 144b39c5158Smillert 145b39c5158SmillertThe second argument may be considered optional, but remains for 146b39c5158Smillertbackwards compatibility. Archive::Tar now looks at the file 147b39c5158Smillertmagic to determine what class should be used to open the file 148b39c5158Smillertand will transparently Do The Right Thing. 149b39c5158Smillert 150b39c5158SmillertArchive::Tar will warn if you try to pass a bzip2 compressed file and the 151b39c5158SmillertIO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. 152b39c5158Smillert 153b39c5158SmillertNote that you can currently B<not> pass a C<gzip> compressed 154b39c5158Smillertfilehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 155b39c5158Smillertfilehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string 156b39c5158Smillertcontaining the full archive information (either compressed or 157b39c5158Smillertuncompressed). These are worth while features, but not currently 158b39c5158Smillertimplemented. See the C<TODO> section. 159b39c5158Smillert 160b39c5158SmillertThe third argument can be a hash reference with options. Note that 161b39c5158Smillertall options are case-sensitive. 162b39c5158Smillert 163b39c5158Smillert=over 4 164b39c5158Smillert 165b39c5158Smillert=item limit 166b39c5158Smillert 167b39c5158SmillertDo not read more than C<limit> files. This is useful if you have 168b39c5158Smillertvery big archives, and are only interested in the first few files. 169b39c5158Smillert 170b39c5158Smillert=item filter 171b39c5158Smillert 172b39c5158SmillertCan be set to a regular expression. Only files with names that match 173b39c5158Smillertthe expression will be read. 174b39c5158Smillert 175898184e3Ssthen=item md5 176898184e3Ssthen 177898184e3SsthenSet to 1 and the md5sum of files will be returned (instead of file data) 178898184e3Ssthen my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 179898184e3Ssthen while( my $f = $iter->() ) { 180898184e3Ssthen print $f->data . "\t" . $f->full_path . $/; 181898184e3Ssthen } 182898184e3Ssthen 183b39c5158Smillert=item extract 184b39c5158Smillert 185b39c5158SmillertIf set to true, immediately extract entries when reading them. This 186b39c5158Smillertgives you the same memory break as the C<extract_archive> function. 187b39c5158SmillertNote however that entries will not be read into memory, but written 188b39c5158Smillertstraight to disk. This means no C<Archive::Tar::File> objects are 189b39c5158Smillertcreated for you to inspect. 190b39c5158Smillert 191b39c5158Smillert=back 192b39c5158Smillert 193b39c5158SmillertAll files are stored internally as C<Archive::Tar::File> objects. 194b39c5158SmillertPlease consult the L<Archive::Tar::File> documentation for details. 195b39c5158Smillert 196b39c5158SmillertReturns the number of files read in scalar context, and a list of 197b39c5158SmillertC<Archive::Tar::File> objects in list context. 198b39c5158Smillert 199b39c5158Smillert=cut 200b39c5158Smillert 201b39c5158Smillertsub read { 202b39c5158Smillert my $self = shift; 203b39c5158Smillert my $file = shift; 204b39c5158Smillert my $gzip = shift || 0; 205b39c5158Smillert my $opts = shift || {}; 206b39c5158Smillert 207b39c5158Smillert unless( defined $file ) { 208b39c5158Smillert $self->_error( qq[No file to read from!] ); 209b39c5158Smillert return; 210b39c5158Smillert } else { 211b39c5158Smillert $self->_file( $file ); 212b39c5158Smillert } 213b39c5158Smillert 214b39c5158Smillert my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 215b39c5158Smillert or return; 216b39c5158Smillert 217b39c5158Smillert my $data = $self->_read_tar( $handle, $opts ) or return; 218b39c5158Smillert 219b39c5158Smillert $self->_data( $data ); 220b39c5158Smillert 221b39c5158Smillert return wantarray ? @$data : scalar @$data; 222b39c5158Smillert} 223b39c5158Smillert 224b39c5158Smillertsub _get_handle { 225b39c5158Smillert my $self = shift; 226b39c5158Smillert my $file = shift; return unless defined $file; 227b39c5158Smillert my $compress = shift || 0; 228b39c5158Smillert my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 229b39c5158Smillert 230898184e3Ssthen ### Check if file is a file handle or IO glob 231898184e3Ssthen if ( ref $file ) { 232898184e3Ssthen return $file if eval{ *$file{IO} }; 233898184e3Ssthen return $file if eval{ $file->isa(q{IO::Handle}) }; 234898184e3Ssthen $file = q{}.$file; 235898184e3Ssthen } 236b39c5158Smillert 237b39c5158Smillert ### get a FH opened to the right class, so we can use it transparently 238b39c5158Smillert ### throughout the program 239b39c5158Smillert my $fh; 240b39c5158Smillert { ### reading magic only makes sense if we're opening a file for 241b39c5158Smillert ### reading. otherwise, just use what the user requested. 242b39c5158Smillert my $magic = ''; 243b39c5158Smillert if( MODE_READ->($mode) ) { 244b39c5158Smillert open my $tmp, $file or do { 245b39c5158Smillert $self->_error( qq[Could not open '$file' for reading: $!] ); 246b39c5158Smillert return; 247b39c5158Smillert }; 248b39c5158Smillert 249b39c5158Smillert ### read the first 4 bites of the file to figure out which class to 250b39c5158Smillert ### use to open the file. 251b39c5158Smillert sysread( $tmp, $magic, 4 ); 252b39c5158Smillert close $tmp; 253b39c5158Smillert } 254b39c5158Smillert 255b39c5158Smillert ### is it bzip? 256b39c5158Smillert ### if you asked specifically for bzip compression, or if we're in 257b39c5158Smillert ### read mode and the magic numbers add up, use bzip 258b39c5158Smillert if( BZIP and ( 259b39c5158Smillert ($compress eq COMPRESS_BZIP) or 260b39c5158Smillert ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 261b39c5158Smillert ) 262b39c5158Smillert ) { 263b39c5158Smillert 264b39c5158Smillert ### different reader/writer modules, different error vars... sigh 265b39c5158Smillert if( MODE_READ->($mode) ) { 266*9f11ffb7Safresh1 $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do { 267b39c5158Smillert $self->_error( qq[Could not read '$file': ] . 268b39c5158Smillert $IO::Uncompress::Bunzip2::Bunzip2Error 269b39c5158Smillert ); 270b39c5158Smillert return; 271b39c5158Smillert }; 272b39c5158Smillert 273b39c5158Smillert } else { 274b39c5158Smillert $fh = IO::Compress::Bzip2->new( $file ) or do { 275b39c5158Smillert $self->_error( qq[Could not write to '$file': ] . 276b39c5158Smillert $IO::Compress::Bzip2::Bzip2Error 277b39c5158Smillert ); 278b39c5158Smillert return; 279b39c5158Smillert }; 280b39c5158Smillert } 281b39c5158Smillert 282b39c5158Smillert ### is it gzip? 283b39c5158Smillert ### if you asked for compression, if you wanted to read or the gzip 284b39c5158Smillert ### magic number is present (redundant with read) 285b39c5158Smillert } elsif( ZLIB and ( 286b39c5158Smillert $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 287b39c5158Smillert ) 288b39c5158Smillert ) { 289b39c5158Smillert $fh = IO::Zlib->new; 290b39c5158Smillert 291b39c5158Smillert unless( $fh->open( $file, $mode ) ) { 292b39c5158Smillert $self->_error(qq[Could not create filehandle for '$file': $!]); 293b39c5158Smillert return; 294b39c5158Smillert } 295b39c5158Smillert 296b39c5158Smillert ### is it plain tar? 297b39c5158Smillert } else { 298b39c5158Smillert $fh = IO::File->new; 299b39c5158Smillert 300b39c5158Smillert unless( $fh->open( $file, $mode ) ) { 301b39c5158Smillert $self->_error(qq[Could not create filehandle for '$file': $!]); 302b39c5158Smillert return; 303b39c5158Smillert } 304b39c5158Smillert 305b39c5158Smillert ### enable bin mode on tar archives 306b39c5158Smillert binmode $fh; 307b39c5158Smillert } 308b39c5158Smillert } 309b39c5158Smillert 310b39c5158Smillert return $fh; 311b39c5158Smillert} 312b39c5158Smillert 313b39c5158Smillert 314b39c5158Smillertsub _read_tar { 315b39c5158Smillert my $self = shift; 316b39c5158Smillert my $handle = shift or return; 317b39c5158Smillert my $opts = shift || {}; 318b39c5158Smillert 319b39c5158Smillert my $count = $opts->{limit} || 0; 320b39c5158Smillert my $filter = $opts->{filter}; 321898184e3Ssthen my $md5 = $opts->{md5} || 0; # cdrake 322898184e3Ssthen my $filter_cb = $opts->{filter_cb}; 323b39c5158Smillert my $extract = $opts->{extract} || 0; 324b39c5158Smillert 325b39c5158Smillert ### set a cap on the amount of files to extract ### 326b39c5158Smillert my $limit = 0; 327b39c5158Smillert $limit = 1 if $count > 0; 328b39c5158Smillert 329b39c5158Smillert my $tarfile = [ ]; 330b39c5158Smillert my $chunk; 331b39c5158Smillert my $read = 0; 332b39c5158Smillert my $real_name; # to set the name of a file when 333b39c5158Smillert # we're encountering @longlink 334b39c5158Smillert my $data; 335b39c5158Smillert 336b39c5158Smillert LOOP: 337b39c5158Smillert while( $handle->read( $chunk, HEAD ) ) { 338b39c5158Smillert ### IO::Zlib doesn't support this yet 33991f110e0Safresh1 my $offset; 34091f110e0Safresh1 if ( ref($handle) ne 'IO::Zlib' ) { 34191f110e0Safresh1 local $@; 34291f110e0Safresh1 $offset = eval { tell $handle } || 'unknown'; 343898184e3Ssthen $@ = ''; 34491f110e0Safresh1 } 34591f110e0Safresh1 else { 34691f110e0Safresh1 $offset = 'unknown'; 34791f110e0Safresh1 } 348b39c5158Smillert 349b39c5158Smillert unless( $read++ ) { 350b39c5158Smillert my $gzip = GZIP_MAGIC_NUM; 351b39c5158Smillert if( $chunk =~ /$gzip/ ) { 352b39c5158Smillert $self->_error( qq[Cannot read compressed format in tar-mode] ); 353b39c5158Smillert return; 354b39c5158Smillert } 355b39c5158Smillert 356b39c5158Smillert ### size is < HEAD, which means a corrupted file, as the minimum 357b39c5158Smillert ### length is _at least_ HEAD 358b39c5158Smillert if (length $chunk != HEAD) { 359b39c5158Smillert $self->_error( qq[Cannot read enough bytes from the tarfile] ); 360b39c5158Smillert return; 361b39c5158Smillert } 362b39c5158Smillert } 363b39c5158Smillert 364b39c5158Smillert ### if we can't read in all bytes... ### 365b39c5158Smillert last if length $chunk != HEAD; 366b39c5158Smillert 367b39c5158Smillert ### Apparently this should really be two blocks of 512 zeroes, 368b39c5158Smillert ### but GNU tar sometimes gets it wrong. See comment in the 369b39c5158Smillert ### source code (tar.c) to GNU cpio. 370b39c5158Smillert next if $chunk eq TAR_END; 371b39c5158Smillert 372b39c5158Smillert ### according to the posix spec, the last 12 bytes of the header are 373b39c5158Smillert ### null bytes, to pad it to a 512 byte block. That means if these 374898184e3Ssthen ### bytes are NOT null bytes, it's a corrupt header. See: 375b39c5158Smillert ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 376b39c5158Smillert ### line 111 377b39c5158Smillert { my $nulls = join '', "\0" x 12; 378b39c5158Smillert unless( $nulls eq substr( $chunk, 500, 12 ) ) { 379b39c5158Smillert $self->_error( qq[Invalid header block at offset $offset] ); 380b39c5158Smillert next LOOP; 381b39c5158Smillert } 382b39c5158Smillert } 383b39c5158Smillert 384b39c5158Smillert ### pass the realname, so we can set it 'proper' right away 385b39c5158Smillert ### some of the heuristics are done on the name, so important 386b39c5158Smillert ### to set it ASAP 387b39c5158Smillert my $entry; 388b39c5158Smillert { my %extra_args = (); 389b39c5158Smillert $extra_args{'name'} = $$real_name if defined $real_name; 390b39c5158Smillert 391b39c5158Smillert unless( $entry = Archive::Tar::File->new( chunk => $chunk, 392b39c5158Smillert %extra_args ) 393b39c5158Smillert ) { 394b39c5158Smillert $self->_error( qq[Couldn't read chunk at offset $offset] ); 395b39c5158Smillert next LOOP; 396b39c5158Smillert } 397b39c5158Smillert } 398b39c5158Smillert 399b39c5158Smillert ### ignore labels: 400898184e3Ssthen ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 401b39c5158Smillert next if $entry->is_label; 402b39c5158Smillert 403b39c5158Smillert if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 404b39c5158Smillert 405b39c5158Smillert if ( $entry->is_file && !$entry->validate ) { 406b39c5158Smillert ### sometimes the chunk is rather fux0r3d and a whole 512 407b39c5158Smillert ### bytes ends up in the ->name area. 408b39c5158Smillert ### clean it up, if need be 409b39c5158Smillert my $name = $entry->name; 410b39c5158Smillert $name = substr($name, 0, 100) if length $name > 100; 411b39c5158Smillert $name =~ s/\n/ /g; 412b39c5158Smillert 413b39c5158Smillert $self->_error( $name . qq[: checksum error] ); 414b39c5158Smillert next LOOP; 415b39c5158Smillert } 416b39c5158Smillert 417b39c5158Smillert my $block = BLOCK_SIZE->( $entry->size ); 418b39c5158Smillert 419b39c5158Smillert $data = $entry->get_content_by_ref; 420b39c5158Smillert 421898184e3Ssthen my $skip = 0; 422898184e3Ssthen my $ctx; # cdrake 423898184e3Ssthen ### skip this entry if we're filtering 424898184e3Ssthen 425898184e3Ssthen if($md5) { # cdrake 426898184e3Ssthen $ctx = Digest::MD5->new; # cdrake 427898184e3Ssthen $skip=5; # cdrake 428898184e3Ssthen 429898184e3Ssthen } elsif ($filter && $entry->name !~ $filter) { 430898184e3Ssthen $skip = 1; 431898184e3Ssthen 432b8851fccSafresh1 } elsif ($filter_cb && ! $filter_cb->($entry)) { 433b8851fccSafresh1 $skip = 2; 434b8851fccSafresh1 435898184e3Ssthen ### skip this entry if it's a pax header. This is a special file added 436898184e3Ssthen ### by, among others, git-generated tarballs. It holds comments and is 437898184e3Ssthen ### not meant for extracting. See #38932: pax_global_header extracted 438898184e3Ssthen } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 439898184e3Ssthen $skip = 3; 440898184e3Ssthen } 441898184e3Ssthen 442898184e3Ssthen if ($skip) { 443898184e3Ssthen # 444898184e3Ssthen # Since we're skipping, do not allocate memory for the 445898184e3Ssthen # whole file. Read it 64 BLOCKS at a time. Do not 446898184e3Ssthen # complete the skip yet because maybe what we read is a 447898184e3Ssthen # longlink and it won't get skipped after all 448898184e3Ssthen # 449898184e3Ssthen my $amt = $block; 450898184e3Ssthen my $fsz=$entry->size; # cdrake 451898184e3Ssthen while ($amt > 0) { 452898184e3Ssthen $$data = ''; 453898184e3Ssthen my $this = 64 * BLOCK; 454898184e3Ssthen $this = $amt if $this > $amt; 455898184e3Ssthen if( $handle->read( $$data, $this ) < $this ) { 456898184e3Ssthen $self->_error( qq[Read error on tarfile (missing data) ']. 457898184e3Ssthen $entry->full_path ."' at offset $offset" ); 458898184e3Ssthen next LOOP; 459898184e3Ssthen } 460898184e3Ssthen $amt -= $this; 461898184e3Ssthen $fsz -= $this; # cdrake 462898184e3Ssthen substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 463898184e3Ssthen $ctx->add($$data) if($skip==5); # cdrake 464898184e3Ssthen } 465898184e3Ssthen $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 466898184e3Ssthen } else { 467898184e3Ssthen 468b39c5158Smillert ### just read everything into memory 469b39c5158Smillert ### can't do lazy loading since IO::Zlib doesn't support 'seek' 470b39c5158Smillert ### this is because Compress::Zlib doesn't support it =/ 471b39c5158Smillert ### this reads in the whole data in one read() call. 472b39c5158Smillert if ( $handle->read( $$data, $block ) < $block ) { 473b39c5158Smillert $self->_error( qq[Read error on tarfile (missing data) ']. 474b39c5158Smillert $entry->full_path ."' at offset $offset" ); 475b39c5158Smillert next LOOP; 476b39c5158Smillert } 477b39c5158Smillert ### throw away trailing garbage ### 478b39c5158Smillert substr ($$data, $entry->size) = "" if defined $$data; 479898184e3Ssthen } 480b39c5158Smillert 481b39c5158Smillert ### part II of the @LongLink munging -- need to do /after/ 482b39c5158Smillert ### the checksum check. 483b39c5158Smillert if( $entry->is_longlink ) { 484b39c5158Smillert ### weird thing in tarfiles -- if the file is actually a 485b39c5158Smillert ### @LongLink, the data part seems to have a trailing ^@ 486b39c5158Smillert ### (unprintable) char. to display, pipe output through less. 487b39c5158Smillert ### but that doesn't *always* happen.. so check if the last 488b39c5158Smillert ### character is a control character, and if so remove it 489b39c5158Smillert ### at any rate, we better remove that character here, or tests 490b39c5158Smillert ### like 'eq' and hash lookups based on names will SO not work 491b39c5158Smillert ### remove it by calculating the proper size, and then 492b39c5158Smillert ### tossing out everything that's longer than that size. 493b39c5158Smillert 494b39c5158Smillert ### count number of nulls 495b39c5158Smillert my $nulls = $$data =~ tr/\0/\0/; 496b39c5158Smillert 497b39c5158Smillert ### cut data + size by that many bytes 498b39c5158Smillert $entry->size( $entry->size - $nulls ); 499b39c5158Smillert substr ($$data, $entry->size) = ""; 500b39c5158Smillert } 501b39c5158Smillert } 502b39c5158Smillert 503b39c5158Smillert ### clean up of the entries.. posix tar /apparently/ has some 504b39c5158Smillert ### weird 'feature' that allows for filenames > 255 characters 505b39c5158Smillert ### they'll put a header in with as name '././@LongLink' and the 506b39c5158Smillert ### contents will be the name of the /next/ file in the archive 507b39c5158Smillert ### pretty crappy and kludgy if you ask me 508b39c5158Smillert 509b39c5158Smillert ### set the name for the next entry if this is a @LongLink; 510b39c5158Smillert ### this is one ugly hack =/ but needed for direct extraction 511b39c5158Smillert if( $entry->is_longlink ) { 512b39c5158Smillert $real_name = $data; 513b39c5158Smillert next LOOP; 514b39c5158Smillert } elsif ( defined $real_name ) { 515b39c5158Smillert $entry->name( $$real_name ); 516b39c5158Smillert $entry->prefix(''); 517b39c5158Smillert undef $real_name; 518b39c5158Smillert } 519b39c5158Smillert 520b39c5158Smillert if ($filter && $entry->name !~ $filter) { 521b39c5158Smillert next LOOP; 522b39c5158Smillert 523b8851fccSafresh1 } elsif ($filter_cb && ! $filter_cb->($entry)) { 524b8851fccSafresh1 next LOOP; 525b8851fccSafresh1 526b39c5158Smillert ### skip this entry if it's a pax header. This is a special file added 527b39c5158Smillert ### by, among others, git-generated tarballs. It holds comments and is 528b39c5158Smillert ### not meant for extracting. See #38932: pax_global_header extracted 529898184e3Ssthen } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 530898184e3Ssthen next LOOP; 531b39c5158Smillert } 532b39c5158Smillert 533898184e3Ssthen if ( $extract && !$entry->is_longlink 534b39c5158Smillert && !$entry->is_unknown 535898184e3Ssthen && !$entry->is_label ) { 536898184e3Ssthen $self->_extract_file( $entry ) or return; 537898184e3Ssthen } 538b39c5158Smillert 539b39c5158Smillert ### Guard against tarfiles with garbage at the end 540b39c5158Smillert last LOOP if $entry->name eq ''; 541b39c5158Smillert 542b39c5158Smillert ### push only the name on the rv if we're extracting 543b39c5158Smillert ### -- for extract_archive 544b39c5158Smillert push @$tarfile, ($extract ? $entry->name : $entry); 545b39c5158Smillert 546b39c5158Smillert if( $limit ) { 547b39c5158Smillert $count-- unless $entry->is_longlink || $entry->is_dir; 548b39c5158Smillert last LOOP unless $count; 549b39c5158Smillert } 550b39c5158Smillert } continue { 551b39c5158Smillert undef $data; 552b39c5158Smillert } 553b39c5158Smillert 554b39c5158Smillert return $tarfile; 555b39c5158Smillert} 556b39c5158Smillert 557b39c5158Smillert=head2 $tar->contains_file( $filename ) 558b39c5158Smillert 559b39c5158SmillertCheck if the archive contains a certain file. 560b39c5158SmillertIt will return true if the file is in the archive, false otherwise. 561b39c5158Smillert 562b39c5158SmillertNote however, that this function does an exact match using C<eq> 563b39c5158Smillerton the full path. So it cannot compensate for case-insensitive file- 564b39c5158Smillertsystems or compare 2 paths to see if they would point to the same 565b39c5158Smillertunderlying file. 566b39c5158Smillert 567b39c5158Smillert=cut 568b39c5158Smillert 569b39c5158Smillertsub contains_file { 570b39c5158Smillert my $self = shift; 571b39c5158Smillert my $full = shift; 572b39c5158Smillert 573b39c5158Smillert return unless defined $full; 574b39c5158Smillert 575b39c5158Smillert ### don't warn if the entry isn't there.. that's what this function 576b39c5158Smillert ### is for after all. 577b39c5158Smillert local $WARN = 0; 578b39c5158Smillert return 1 if $self->_find_entry($full); 579b39c5158Smillert return; 580b39c5158Smillert} 581b39c5158Smillert 582b39c5158Smillert=head2 $tar->extract( [@filenames] ) 583b39c5158Smillert 584b39c5158SmillertWrite files whose names are equivalent to any of the names in 585b39c5158SmillertC<@filenames> to disk, creating subdirectories as necessary. This 586b39c5158Smillertmight not work too well under VMS. 587b39c5158SmillertUnder MacPerl, the file's modification time will be converted to the 588b39c5158SmillertMacOS zero of time, and appropriate conversions will be done to the 589b39c5158Smillertpath. However, the length of each element of the path is not 590b39c5158Smillertinspected to see whether it's longer than MacOS currently allows (32 591b39c5158Smillertcharacters). 592b39c5158Smillert 593b39c5158SmillertIf C<extract> is called without a list of file names, the entire 594b39c5158Smillertcontents of the archive are extracted. 595b39c5158Smillert 596b39c5158SmillertReturns a list of filenames extracted. 597b39c5158Smillert 598b39c5158Smillert=cut 599b39c5158Smillert 600b39c5158Smillertsub extract { 601b39c5158Smillert my $self = shift; 602b39c5158Smillert my @args = @_; 603b39c5158Smillert my @files; 604*9f11ffb7Safresh1 my $hashmap; 605b39c5158Smillert 606b39c5158Smillert # use the speed optimization for all extracted files 607b39c5158Smillert local($self->{cwd}) = cwd() unless $self->{cwd}; 608b39c5158Smillert 609898184e3Ssthen ### you requested the extraction of only certain files 610b39c5158Smillert if( @args ) { 611b39c5158Smillert for my $file ( @args ) { 612b39c5158Smillert 613b39c5158Smillert ### it's already an object? 614b39c5158Smillert if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 615b39c5158Smillert push @files, $file; 616b39c5158Smillert next; 617b39c5158Smillert 618b39c5158Smillert ### go find it then 619b39c5158Smillert } else { 620b39c5158Smillert 621*9f11ffb7Safresh1 # create hash-map once to speed up lookup 622*9f11ffb7Safresh1 $hashmap = $hashmap || { 623*9f11ffb7Safresh1 map { $_->full_path, $_ } @{$self->_data} 624*9f11ffb7Safresh1 }; 625b39c5158Smillert 626*9f11ffb7Safresh1 if (exists $hashmap->{$file}) { 627b39c5158Smillert ### we found the file you're looking for 628*9f11ffb7Safresh1 push @files, $hashmap->{$file}; 629*9f11ffb7Safresh1 } else { 630b39c5158Smillert return $self->_error( 631b39c5158Smillert qq[Could not find '$file' in archive] ); 632b39c5158Smillert } 633b39c5158Smillert } 634b39c5158Smillert } 635b39c5158Smillert 636b39c5158Smillert ### just grab all the file items 637b39c5158Smillert } else { 638b39c5158Smillert @files = $self->get_files; 639b39c5158Smillert } 640b39c5158Smillert 641b39c5158Smillert ### nothing found? that's an error 642b39c5158Smillert unless( scalar @files ) { 643b39c5158Smillert $self->_error( qq[No files found for ] . $self->_file ); 644b39c5158Smillert return; 645b39c5158Smillert } 646b39c5158Smillert 647b39c5158Smillert ### now extract them 648b39c5158Smillert for my $entry ( @files ) { 649b39c5158Smillert unless( $self->_extract_file( $entry ) ) { 650b39c5158Smillert $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 651b39c5158Smillert return; 652b39c5158Smillert } 653b39c5158Smillert } 654b39c5158Smillert 655b39c5158Smillert return @files; 656b39c5158Smillert} 657b39c5158Smillert 658b39c5158Smillert=head2 $tar->extract_file( $file, [$extract_path] ) 659b39c5158Smillert 660b39c5158SmillertWrite an entry, whose name is equivalent to the file name provided to 661b39c5158Smillertdisk. Optionally takes a second parameter, which is the full native 662b39c5158Smillertpath (including filename) the entry will be written to. 663b39c5158Smillert 664b39c5158SmillertFor example: 665b39c5158Smillert 666b39c5158Smillert $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 667b39c5158Smillert 668b39c5158Smillert $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 669b39c5158Smillert 670b39c5158SmillertReturns true on success, false on failure. 671b39c5158Smillert 672b39c5158Smillert=cut 673b39c5158Smillert 674b39c5158Smillertsub extract_file { 675b39c5158Smillert my $self = shift; 676b39c5158Smillert my $file = shift; return unless defined $file; 677b39c5158Smillert my $alt = shift; 678b39c5158Smillert 679b39c5158Smillert my $entry = $self->_find_entry( $file ) 680b39c5158Smillert or $self->_error( qq[Could not find an entry for '$file'] ), return; 681b39c5158Smillert 682b39c5158Smillert return $self->_extract_file( $entry, $alt ); 683b39c5158Smillert} 684b39c5158Smillert 685b39c5158Smillertsub _extract_file { 686b39c5158Smillert my $self = shift; 687b39c5158Smillert my $entry = shift or return; 688b39c5158Smillert my $alt = shift; 689b39c5158Smillert 690b39c5158Smillert ### you wanted an alternate extraction location ### 691b39c5158Smillert my $name = defined $alt ? $alt : $entry->full_path; 692b39c5158Smillert 693b39c5158Smillert ### splitpath takes a bool at the end to indicate 694b39c5158Smillert ### that it's splitting a dir 695b39c5158Smillert my ($vol,$dirs,$file); 696b39c5158Smillert if ( defined $alt ) { # It's a local-OS path 697b39c5158Smillert ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 698b39c5158Smillert $entry->is_dir ); 699b39c5158Smillert } else { 700b39c5158Smillert ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 701b39c5158Smillert $entry->is_dir ); 702b39c5158Smillert } 703b39c5158Smillert 704b39c5158Smillert my $dir; 705b39c5158Smillert ### is $name an absolute path? ### 706b39c5158Smillert if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 707b39c5158Smillert 708b39c5158Smillert ### absolute names are not allowed to be in tarballs under 709b39c5158Smillert ### strict mode, so only allow it if a user tells us to do it 710b39c5158Smillert if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 711b39c5158Smillert $self->_error( 712b39c5158Smillert q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 713b39c5158Smillert q[Not extracting absolute paths under SECURE EXTRACT MODE] 714b39c5158Smillert ); 715b39c5158Smillert return; 716b39c5158Smillert } 717b39c5158Smillert 718b39c5158Smillert ### user asked us to, it's fine. 719b39c5158Smillert $dir = File::Spec->catpath( $vol, $dirs, "" ); 720b39c5158Smillert 721b39c5158Smillert ### it's a relative path ### 722b39c5158Smillert } else { 723b39c5158Smillert my $cwd = (ref $self and defined $self->{cwd}) 724b39c5158Smillert ? $self->{cwd} 725b39c5158Smillert : cwd(); 726b39c5158Smillert 727b39c5158Smillert my @dirs = defined $alt 728b39c5158Smillert ? File::Spec->splitdir( $dirs ) # It's a local-OS path 729b39c5158Smillert : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 730b39c5158Smillert # straight from the tarball 731b39c5158Smillert 732b39c5158Smillert if( not defined $alt and 733b39c5158Smillert not $INSECURE_EXTRACT_MODE 734b39c5158Smillert ) { 735b39c5158Smillert 736b39c5158Smillert ### paths that leave the current directory are not allowed under 737b39c5158Smillert ### strict mode, so only allow it if a user tells us to do this. 738b39c5158Smillert if( grep { $_ eq '..' } @dirs ) { 739b39c5158Smillert 740b39c5158Smillert $self->_error( 741b39c5158Smillert q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 742b39c5158Smillert q[the current working directory. Not extracting under ]. 743b39c5158Smillert q[SECURE EXTRACT MODE] 744b39c5158Smillert ); 745b39c5158Smillert return; 746b39c5158Smillert } 747b39c5158Smillert 748b39c5158Smillert ### the archive may be asking us to extract into a symlink. This 749b39c5158Smillert ### is not sane and a possible security issue, as outlined here: 750b39c5158Smillert ### https://rt.cpan.org/Ticket/Display.html?id=30380 751b39c5158Smillert ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 752b39c5158Smillert ### https://issues.rpath.com/browse/RPL-1716 753b39c5158Smillert my $full_path = $cwd; 754b39c5158Smillert for my $d ( @dirs ) { 755b39c5158Smillert $full_path = File::Spec->catdir( $full_path, $d ); 756b39c5158Smillert 757b39c5158Smillert ### we've already checked this one, and it's safe. Move on. 758b39c5158Smillert next if ref $self and $self->{_link_cache}->{$full_path}; 759b39c5158Smillert 760b39c5158Smillert if( -l $full_path ) { 761b39c5158Smillert my $to = readlink $full_path; 762b39c5158Smillert my $diag = "symlinked directory ($full_path => $to)"; 763b39c5158Smillert 764b39c5158Smillert $self->_error( 765b39c5158Smillert q[Entry ']. $entry->full_path .q[' is attempting to ]. 766b39c5158Smillert qq[extract to a $diag. This is considered a security ]. 767b39c5158Smillert q[vulnerability and not allowed under SECURE EXTRACT ]. 768b39c5158Smillert q[MODE] 769b39c5158Smillert ); 770b39c5158Smillert return; 771b39c5158Smillert } 772b39c5158Smillert 773b39c5158Smillert ### XXX keep a cache if possible, so the stats become cheaper: 774b39c5158Smillert $self->{_link_cache}->{$full_path} = 1 if ref $self; 775b39c5158Smillert } 776b39c5158Smillert } 777b39c5158Smillert 778b39c5158Smillert ### '.' is the directory delimiter on VMS, which has to be escaped 779b39c5158Smillert ### or changed to '_' on vms. vmsify is used, because older versions 780b39c5158Smillert ### of vmspath do not handle this properly. 781b39c5158Smillert ### Must not add a '/' to an empty directory though. 782b39c5158Smillert map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 783b39c5158Smillert 784b39c5158Smillert my ($cwd_vol,$cwd_dir,$cwd_file) 785b39c5158Smillert = File::Spec->splitpath( $cwd ); 786b39c5158Smillert my @cwd = File::Spec->splitdir( $cwd_dir ); 787b39c5158Smillert push @cwd, $cwd_file if length $cwd_file; 788b39c5158Smillert 789898184e3Ssthen ### We need to pass '' as the last element to catpath. Craig Berry 790b39c5158Smillert ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 791b39c5158Smillert ### The root problem is that splitpath on UNIX always returns the 792b39c5158Smillert ### final path element as a file even if it is a directory, and of 793b39c5158Smillert ### course there is no way it can know the difference without checking 794b39c5158Smillert ### against the filesystem, which it is documented as not doing. When 795b39c5158Smillert ### you turn around and call catpath, on VMS you have to know which bits 796b39c5158Smillert ### are directory bits and which bits are file bits. In this case we 797b39c5158Smillert ### know the result should be a directory. I had thought you could omit 798b39c5158Smillert ### the file argument to catpath in such a case, but apparently on UNIX 799b39c5158Smillert ### you can't. 800b39c5158Smillert $dir = File::Spec->catpath( 801b39c5158Smillert $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 802b39c5158Smillert ); 803b39c5158Smillert 804b39c5158Smillert ### catdir() returns undef if the path is longer than 255 chars on 805b39c5158Smillert ### older VMS systems. 806b39c5158Smillert unless ( defined $dir ) { 807b39c5158Smillert $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 808b39c5158Smillert return; 809b39c5158Smillert } 810b39c5158Smillert 811b39c5158Smillert } 812b39c5158Smillert 813b39c5158Smillert if( -e $dir && !-d _ ) { 814b39c5158Smillert $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 815b39c5158Smillert return; 816b39c5158Smillert } 817b39c5158Smillert 818b39c5158Smillert unless ( -d _ ) { 819b39c5158Smillert eval { File::Path::mkpath( $dir, 0, 0777 ) }; 820b39c5158Smillert if( $@ ) { 821b39c5158Smillert my $fp = $entry->full_path; 822b39c5158Smillert $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 823b39c5158Smillert return; 824b39c5158Smillert } 825b39c5158Smillert 826b39c5158Smillert ### XXX chown here? that might not be the same as in the archive 827b39c5158Smillert ### as we're only chown'ing to the owner of the file we're extracting 828b39c5158Smillert ### not to the owner of the directory itself, which may or may not 829b39c5158Smillert ### be another entry in the archive 830b39c5158Smillert ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 831b39c5158Smillert ### way to go. 832b39c5158Smillert #if( $CHOWN && CAN_CHOWN ) { 833b39c5158Smillert # chown $entry->uid, $entry->gid, $dir or 834b39c5158Smillert # $self->_error( qq[Could not set uid/gid on '$dir'] ); 835b39c5158Smillert #} 836b39c5158Smillert } 837b39c5158Smillert 838b39c5158Smillert ### we're done if we just needed to create a dir ### 839b39c5158Smillert return 1 if $entry->is_dir; 840b39c5158Smillert 841b39c5158Smillert my $full = File::Spec->catfile( $dir, $file ); 842b39c5158Smillert 843b39c5158Smillert if( $entry->is_unknown ) { 844b39c5158Smillert $self->_error( qq[Unknown file type for file '$full'] ); 845b39c5158Smillert return; 846b39c5158Smillert } 847b39c5158Smillert 84826f75d2dSafresh1 ### If a file system already contains a block device with the same name as 84926f75d2dSafresh1 ### the being extracted regular file, we would write the file's content 85026f75d2dSafresh1 ### to the block device. So remove the existing file (block device) now. 85126f75d2dSafresh1 ### If an archive contains multiple same-named entries, the last one 85226f75d2dSafresh1 ### should replace the previous ones. So remove the old file now. 85326f75d2dSafresh1 ### If the old entry is a symlink to a file outside of the CWD, the new 85426f75d2dSafresh1 ### entry would create a file there. This is CVE-2018-12015 85526f75d2dSafresh1 ### <https://rt.cpan.org/Ticket/Display.html?id=125523>. 85626f75d2dSafresh1 if (-l $full || -e _) { 85726f75d2dSafresh1 if (!unlink $full) { 85826f75d2dSafresh1 $self->_error( qq[Could not remove old file '$full': $!] ); 85926f75d2dSafresh1 return; 86026f75d2dSafresh1 } 86126f75d2dSafresh1 } 862b39c5158Smillert if( length $entry->type && $entry->is_file ) { 863b39c5158Smillert my $fh = IO::File->new; 864*9f11ffb7Safresh1 $fh->open( $full, '>' ) or ( 865b39c5158Smillert $self->_error( qq[Could not open file '$full': $!] ), 866b39c5158Smillert return 867b39c5158Smillert ); 868b39c5158Smillert 869b39c5158Smillert if( $entry->size ) { 870b39c5158Smillert binmode $fh; 871b39c5158Smillert syswrite $fh, $entry->data or ( 872b39c5158Smillert $self->_error( qq[Could not write data to '$full'] ), 873b39c5158Smillert return 874b39c5158Smillert ); 875b39c5158Smillert } 876b39c5158Smillert 877b39c5158Smillert close $fh or ( 878b39c5158Smillert $self->_error( qq[Could not close file '$full'] ), 879b39c5158Smillert return 880b39c5158Smillert ); 881b39c5158Smillert 882b39c5158Smillert } else { 883b39c5158Smillert $self->_make_special_file( $entry, $full ) or return; 884b39c5158Smillert } 885b39c5158Smillert 886b39c5158Smillert ### only update the timestamp if it's not a symlink; that will change the 887b39c5158Smillert ### timestamp of the original. This addresses bug #33669: Could not update 888b39c5158Smillert ### timestamp warning on symlinks 889b39c5158Smillert if( not -l $full ) { 890b39c5158Smillert utime time, $entry->mtime - TIME_OFFSET, $full or 891b39c5158Smillert $self->_error( qq[Could not update timestamp] ); 892b39c5158Smillert } 893b39c5158Smillert 89491f110e0Safresh1 if( $CHOWN && CAN_CHOWN->() and not -l $full ) { 895b39c5158Smillert chown $entry->uid, $entry->gid, $full or 896b39c5158Smillert $self->_error( qq[Could not set uid/gid on '$full'] ); 897b39c5158Smillert } 898b39c5158Smillert 899b39c5158Smillert ### only chmod if we're allowed to, but never chmod symlinks, since they'll 900b39c5158Smillert ### change the perms on the file they're linking too... 901b39c5158Smillert if( $CHMOD and not -l $full ) { 902b39c5158Smillert my $mode = $entry->mode; 903b39c5158Smillert unless ($SAME_PERMISSIONS) { 904b39c5158Smillert $mode &= ~(oct(7000) | umask); 905b39c5158Smillert } 906b39c5158Smillert chmod $mode, $full or 907b39c5158Smillert $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 908b39c5158Smillert } 909b39c5158Smillert 910b39c5158Smillert return 1; 911b39c5158Smillert} 912b39c5158Smillert 913b39c5158Smillertsub _make_special_file { 914b39c5158Smillert my $self = shift; 915b39c5158Smillert my $entry = shift or return; 916b39c5158Smillert my $file = shift; return unless defined $file; 917b39c5158Smillert 918b39c5158Smillert my $err; 919b39c5158Smillert 920b39c5158Smillert if( $entry->is_symlink ) { 921b39c5158Smillert my $fail; 922b39c5158Smillert if( ON_UNIX ) { 923b39c5158Smillert symlink( $entry->linkname, $file ) or $fail++; 924b39c5158Smillert 925b39c5158Smillert } else { 926b39c5158Smillert $self->_extract_special_file_as_plain_file( $entry, $file ) 927b39c5158Smillert or $fail++; 928b39c5158Smillert } 929b39c5158Smillert 930b39c5158Smillert $err = qq[Making symbolic link '$file' to '] . 931b39c5158Smillert $entry->linkname .q[' failed] if $fail; 932b39c5158Smillert 933b39c5158Smillert } elsif ( $entry->is_hardlink ) { 934b39c5158Smillert my $fail; 935b39c5158Smillert if( ON_UNIX ) { 936b39c5158Smillert link( $entry->linkname, $file ) or $fail++; 937b39c5158Smillert 938b39c5158Smillert } else { 939b39c5158Smillert $self->_extract_special_file_as_plain_file( $entry, $file ) 940b39c5158Smillert or $fail++; 941b39c5158Smillert } 942b39c5158Smillert 943b39c5158Smillert $err = qq[Making hard link from '] . $entry->linkname . 944b39c5158Smillert qq[' to '$file' failed] if $fail; 945b39c5158Smillert 946b39c5158Smillert } elsif ( $entry->is_fifo ) { 947b39c5158Smillert ON_UNIX && !system('mknod', $file, 'p') or 948b39c5158Smillert $err = qq[Making fifo ']. $entry->name .qq[' failed]; 949b39c5158Smillert 950b39c5158Smillert } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 951b39c5158Smillert my $mode = $entry->is_blockdev ? 'b' : 'c'; 952b39c5158Smillert 953b39c5158Smillert ON_UNIX && !system('mknod', $file, $mode, 954b39c5158Smillert $entry->devmajor, $entry->devminor) or 955b39c5158Smillert $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 956b39c5158Smillert $entry->devmajor . qq[ min=] . $entry->devminor . 957b39c5158Smillert qq[) failed.]; 958b39c5158Smillert 959b39c5158Smillert } elsif ( $entry->is_socket ) { 960b39c5158Smillert ### the original doesn't do anything special for sockets.... ### 961b39c5158Smillert 1; 962b39c5158Smillert } 963b39c5158Smillert 964b39c5158Smillert return $err ? $self->_error( $err ) : 1; 965b39c5158Smillert} 966b39c5158Smillert 967b39c5158Smillert### don't know how to make symlinks, let's just extract the file as 968b39c5158Smillert### a plain file 969b39c5158Smillertsub _extract_special_file_as_plain_file { 970b39c5158Smillert my $self = shift; 971b39c5158Smillert my $entry = shift or return; 972b39c5158Smillert my $file = shift; return unless defined $file; 973b39c5158Smillert 974b39c5158Smillert my $err; 975b39c5158Smillert TRY: { 9766fb12b70Safresh1 my $orig = $self->_find_entry( $entry->linkname, $entry ); 977b39c5158Smillert 978b39c5158Smillert unless( $orig ) { 979b39c5158Smillert $err = qq[Could not find file '] . $entry->linkname . 980b39c5158Smillert qq[' in memory.]; 981b39c5158Smillert last TRY; 982b39c5158Smillert } 983b39c5158Smillert 984b39c5158Smillert ### clone the entry, make it appear as a normal file ### 9856fb12b70Safresh1 my $clone = $orig->clone; 986b39c5158Smillert $clone->_downgrade_to_plainfile; 987b39c5158Smillert $self->_extract_file( $clone, $file ) or last TRY; 988b39c5158Smillert 989b39c5158Smillert return 1; 990b39c5158Smillert } 991b39c5158Smillert 992b39c5158Smillert return $self->_error($err); 993b39c5158Smillert} 994b39c5158Smillert 995b39c5158Smillert=head2 $tar->list_files( [\@properties] ) 996b39c5158Smillert 997b39c5158SmillertReturns a list of the names of all the files in the archive. 998b39c5158Smillert 999b39c5158SmillertIf C<list_files()> is passed an array reference as its first argument 1000b39c5158Smillertit returns a list of hash references containing the requested 1001b39c5158Smillertproperties of each file. The following list of properties is 1002b39c5158Smillertsupported: name, size, mtime (last modified date), mode, uid, gid, 1003b39c5158Smillertlinkname, uname, gname, devmajor, devminor, prefix. 1004b39c5158Smillert 1005b39c5158SmillertPassing an array reference containing only one element, 'name', is 1006b39c5158Smillertspecial cased to return a list of names rather than a list of hash 1007b39c5158Smillertreferences, making it equivalent to calling C<list_files> without 1008b39c5158Smillertarguments. 1009b39c5158Smillert 1010b39c5158Smillert=cut 1011b39c5158Smillert 1012b39c5158Smillertsub list_files { 1013b39c5158Smillert my $self = shift; 1014b39c5158Smillert my $aref = shift || [ ]; 1015b39c5158Smillert 1016b39c5158Smillert unless( $self->_data ) { 1017b39c5158Smillert $self->read() or return; 1018b39c5158Smillert } 1019b39c5158Smillert 1020b39c5158Smillert if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 1021b39c5158Smillert return map { $_->full_path } @{$self->_data}; 1022b39c5158Smillert } else { 1023b39c5158Smillert 1024b39c5158Smillert #my @rv; 1025b39c5158Smillert #for my $obj ( @{$self->_data} ) { 1026b39c5158Smillert # push @rv, { map { $_ => $obj->$_() } @$aref }; 1027b39c5158Smillert #} 1028b39c5158Smillert #return @rv; 1029b39c5158Smillert 1030b39c5158Smillert ### this does the same as the above.. just needs a +{ } 1031b39c5158Smillert ### to make sure perl doesn't confuse it for a block 1032b39c5158Smillert return map { my $o=$_; 1033b39c5158Smillert +{ map { $_ => $o->$_() } @$aref } 1034b39c5158Smillert } @{$self->_data}; 1035b39c5158Smillert } 1036b39c5158Smillert} 1037b39c5158Smillert 1038b39c5158Smillertsub _find_entry { 1039b39c5158Smillert my $self = shift; 1040b39c5158Smillert my $file = shift; 1041b39c5158Smillert 1042b39c5158Smillert unless( defined $file ) { 1043b39c5158Smillert $self->_error( qq[No file specified] ); 1044b39c5158Smillert return; 1045b39c5158Smillert } 1046b39c5158Smillert 1047b39c5158Smillert ### it's an object already 1048b39c5158Smillert return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1049b39c5158Smillert 10506fb12b70Safresh1seach_entry: 10516fb12b70Safresh1 if($self->_data){ 1052b39c5158Smillert for my $entry ( @{$self->_data} ) { 1053b39c5158Smillert my $path = $entry->full_path; 1054b39c5158Smillert return $entry if $path eq $file; 1055b39c5158Smillert } 10566fb12b70Safresh1 } 10576fb12b70Safresh1 10586fb12b70Safresh1 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 10596fb12b70Safresh1 if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin ) 10606fb12b70Safresh1 $file = _symlinks_resolver( $link_entry->name, $file ); 10616fb12b70Safresh1 goto seach_entry if $self->_data; 10626fb12b70Safresh1 10636fb12b70Safresh1 #this will be slower than never, but won't failed! 10646fb12b70Safresh1 10656fb12b70Safresh1 my $iterargs = $link_entry->{'_archive'}; 10666fb12b70Safresh1 if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){ 10676fb12b70Safresh1 #faster but whole archive will be read in memory 10686fb12b70Safresh1 #read whole archive and share data 10696fb12b70Safresh1 my $archive = Archive::Tar->new; 10706fb12b70Safresh1 $archive->read( @$iterargs ); 10716fb12b70Safresh1 push @$iterargs, $archive; #take a trace for destruction 10726fb12b70Safresh1 if($archive->_data){ 10736fb12b70Safresh1 $self->_data( $archive->_data ); 10746fb12b70Safresh1 goto seach_entry; 10756fb12b70Safresh1 } 10766fb12b70Safresh1 }#faster 10776fb12b70Safresh1 10786fb12b70Safresh1 {#slower but lower memory usage 10796fb12b70Safresh1 # $iterargs = [$filename, $compressed, $opts]; 10806fb12b70Safresh1 my $next = Archive::Tar->iter( @$iterargs ); 10816fb12b70Safresh1 while(my $e = $next->()){ 10826fb12b70Safresh1 if($e->full_path eq $file){ 10836fb12b70Safresh1 undef $next; 10846fb12b70Safresh1 return $e; 10856fb12b70Safresh1 } 10866fb12b70Safresh1 } 10876fb12b70Safresh1 }#slower 10886fb12b70Safresh1 } 10896fb12b70Safresh1 } 1090b39c5158Smillert 1091b39c5158Smillert $self->_error( qq[No such file in archive: '$file'] ); 1092b39c5158Smillert return; 1093b39c5158Smillert} 1094b39c5158Smillert 1095b39c5158Smillert=head2 $tar->get_files( [@filenames] ) 1096b39c5158Smillert 1097b39c5158SmillertReturns the C<Archive::Tar::File> objects matching the filenames 1098b39c5158Smillertprovided. If no filename list was passed, all C<Archive::Tar::File> 1099b39c5158Smillertobjects in the current Tar object are returned. 1100b39c5158Smillert 1101b39c5158SmillertPlease refer to the C<Archive::Tar::File> documentation on how to 1102b39c5158Smillerthandle these objects. 1103b39c5158Smillert 1104b39c5158Smillert=cut 1105b39c5158Smillert 1106b39c5158Smillertsub get_files { 1107b39c5158Smillert my $self = shift; 1108b39c5158Smillert 1109b39c5158Smillert return @{ $self->_data } unless @_; 1110b39c5158Smillert 1111b39c5158Smillert my @list; 1112b39c5158Smillert for my $file ( @_ ) { 1113b39c5158Smillert push @list, grep { defined } $self->_find_entry( $file ); 1114b39c5158Smillert } 1115b39c5158Smillert 1116b39c5158Smillert return @list; 1117b39c5158Smillert} 1118b39c5158Smillert 1119b39c5158Smillert=head2 $tar->get_content( $file ) 1120b39c5158Smillert 1121b39c5158SmillertReturn the content of the named file. 1122b39c5158Smillert 1123b39c5158Smillert=cut 1124b39c5158Smillert 1125b39c5158Smillertsub get_content { 1126b39c5158Smillert my $self = shift; 1127b39c5158Smillert my $entry = $self->_find_entry( shift ) or return; 1128b39c5158Smillert 1129b39c5158Smillert return $entry->data; 1130b39c5158Smillert} 1131b39c5158Smillert 1132b39c5158Smillert=head2 $tar->replace_content( $file, $content ) 1133b39c5158Smillert 1134b39c5158SmillertMake the string $content be the content for the file named $file. 1135b39c5158Smillert 1136b39c5158Smillert=cut 1137b39c5158Smillert 1138b39c5158Smillertsub replace_content { 1139b39c5158Smillert my $self = shift; 1140b39c5158Smillert my $entry = $self->_find_entry( shift ) or return; 1141b39c5158Smillert 1142b39c5158Smillert return $entry->replace_content( shift ); 1143b39c5158Smillert} 1144b39c5158Smillert 1145b39c5158Smillert=head2 $tar->rename( $file, $new_name ) 1146b39c5158Smillert 1147b39c5158SmillertRename the file of the in-memory archive to $new_name. 1148b39c5158Smillert 1149b39c5158SmillertNote that you must specify a Unix path for $new_name, since per tar 1150b39c5158Smillertstandard, all files in the archive must be Unix paths. 1151b39c5158Smillert 1152b39c5158SmillertReturns true on success and false on failure. 1153b39c5158Smillert 1154b39c5158Smillert=cut 1155b39c5158Smillert 1156b39c5158Smillertsub rename { 1157b39c5158Smillert my $self = shift; 1158b39c5158Smillert my $file = shift; return unless defined $file; 1159b39c5158Smillert my $new = shift; return unless defined $new; 1160b39c5158Smillert 1161b39c5158Smillert my $entry = $self->_find_entry( $file ) or return; 1162b39c5158Smillert 1163b39c5158Smillert return $entry->rename( $new ); 1164b39c5158Smillert} 1165b39c5158Smillert 1166898184e3Ssthen=head2 $tar->chmod( $file, $mode ) 1167898184e3Ssthen 1168898184e3SsthenChange mode of $file to $mode. 1169898184e3Ssthen 1170898184e3SsthenReturns true on success and false on failure. 1171898184e3Ssthen 1172898184e3Ssthen=cut 1173898184e3Ssthen 1174898184e3Ssthensub chmod { 1175898184e3Ssthen my $self = shift; 1176898184e3Ssthen my $file = shift; return unless defined $file; 1177898184e3Ssthen my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1178898184e3Ssthen my @args = ("$mode"); 1179898184e3Ssthen 1180898184e3Ssthen my $entry = $self->_find_entry( $file ) or return; 1181898184e3Ssthen my $x = $entry->chmod( @args ); 1182898184e3Ssthen return $x; 1183898184e3Ssthen} 1184898184e3Ssthen 1185898184e3Ssthen=head2 $tar->chown( $file, $uname [, $gname] ) 1186898184e3Ssthen 1187898184e3SsthenChange owner $file to $uname and $gname. 1188898184e3Ssthen 1189898184e3SsthenReturns true on success and false on failure. 1190898184e3Ssthen 1191898184e3Ssthen=cut 1192898184e3Ssthen 1193898184e3Ssthensub chown { 1194898184e3Ssthen my $self = shift; 1195898184e3Ssthen my $file = shift; return unless defined $file; 1196898184e3Ssthen my $uname = shift; return unless defined $uname; 1197898184e3Ssthen my @args = ($uname); 1198898184e3Ssthen push(@args, shift); 1199898184e3Ssthen 1200898184e3Ssthen my $entry = $self->_find_entry( $file ) or return; 1201898184e3Ssthen my $x = $entry->chown( @args ); 1202898184e3Ssthen return $x; 1203898184e3Ssthen} 1204898184e3Ssthen 1205b39c5158Smillert=head2 $tar->remove (@filenamelist) 1206b39c5158Smillert 1207b39c5158SmillertRemoves any entries with names matching any of the given filenames 1208b39c5158Smillertfrom the in-memory archive. Returns a list of C<Archive::Tar::File> 1209b39c5158Smillertobjects that remain. 1210b39c5158Smillert 1211b39c5158Smillert=cut 1212b39c5158Smillert 1213b39c5158Smillertsub remove { 1214b39c5158Smillert my $self = shift; 1215b39c5158Smillert my @list = @_; 1216b39c5158Smillert 1217b39c5158Smillert my %seen = map { $_->full_path => $_ } @{$self->_data}; 1218b39c5158Smillert delete $seen{ $_ } for @list; 1219b39c5158Smillert 1220b39c5158Smillert $self->_data( [values %seen] ); 1221b39c5158Smillert 1222b39c5158Smillert return values %seen; 1223b39c5158Smillert} 1224b39c5158Smillert 1225b39c5158Smillert=head2 $tar->clear 1226b39c5158Smillert 1227b39c5158SmillertC<clear> clears the current in-memory archive. This effectively gives 1228b39c5158Smillertyou a 'blank' object, ready to be filled again. Note that C<clear> 1229b39c5158Smillertonly has effect on the object, not the underlying tarfile. 1230b39c5158Smillert 1231b39c5158Smillert=cut 1232b39c5158Smillert 1233b39c5158Smillertsub clear { 1234b39c5158Smillert my $self = shift or return; 1235b39c5158Smillert 1236b39c5158Smillert $self->_data( [] ); 1237b39c5158Smillert $self->_file( '' ); 1238b39c5158Smillert 1239b39c5158Smillert return 1; 1240b39c5158Smillert} 1241b39c5158Smillert 1242b39c5158Smillert 1243b39c5158Smillert=head2 $tar->write ( [$file, $compressed, $prefix] ) 1244b39c5158Smillert 1245b39c5158SmillertWrite the in-memory archive to disk. The first argument can either 1246b39c5158Smillertbe the name of a file or a reference to an already open filehandle (a 1247b39c5158SmillertGLOB reference). 1248b39c5158Smillert 1249b39c5158SmillertThe second argument is used to indicate compression. You can either 1250b39c5158Smillertcompress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1251b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of 1252898184e3Ssthenconstants is preferred: 1253b39c5158Smillert 1254b39c5158Smillert # write a gzip compressed file 1255b39c5158Smillert $tar->write( 'out.tgz', COMPRESS_GZIP ); 1256b39c5158Smillert 1257b39c5158Smillert # write a bzip compressed file 1258b39c5158Smillert $tar->write( 'out.tbz', COMPRESS_BZIP ); 1259b39c5158Smillert 1260b39c5158SmillertNote that when you pass in a filehandle, the compression argument 1261b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle. 1262b39c5158SmillertIf you wish to enable compression with filehandles, use an 1263b39c5158SmillertC<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1264b39c5158Smillert 1265b39c5158SmillertThe third argument is an optional prefix. All files will be tucked 1266b39c5158Smillertaway in the directory you specify as prefix. So if you have files 1267b39c5158Smillert'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1268b39c5158Smillertwill be written to the archive as 'foo/a' and 'foo/b'. 1269b39c5158Smillert 1270b39c5158SmillertIf no arguments are given, C<write> returns the entire formatted 1271b39c5158Smillertarchive as a string, which could be useful if you'd like to stuff the 1272b39c5158Smillertarchive into a socket or a pipe to gzip or something. 1273b39c5158Smillert 1274b39c5158Smillert 1275b39c5158Smillert=cut 1276b39c5158Smillert 1277b39c5158Smillertsub write { 1278b39c5158Smillert my $self = shift; 1279b39c5158Smillert my $file = shift; $file = '' unless defined $file; 1280b39c5158Smillert my $gzip = shift || 0; 1281b39c5158Smillert my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1282b39c5158Smillert my $dummy = ''; 1283b39c5158Smillert 1284b39c5158Smillert ### only need a handle if we have a file to print to ### 1285b39c5158Smillert my $handle = length($file) 1286b39c5158Smillert ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1287b39c5158Smillert or return ) 1288b39c5158Smillert : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1289b39c5158Smillert : $HAS_IO_STRING ? IO::String->new 1290b39c5158Smillert : __PACKAGE__->no_string_support(); 1291b39c5158Smillert 1292b39c5158Smillert ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1293b39c5158Smillert ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1294b39c5158Smillert ### printed to the archive 1295b39c5158Smillert local $\; 1296b39c5158Smillert 1297b39c5158Smillert for my $entry ( @{$self->_data} ) { 1298b39c5158Smillert ### entries to be written to the tarfile ### 1299b39c5158Smillert my @write_me; 1300b39c5158Smillert 1301b39c5158Smillert ### only now will we change the object to reflect the current state 1302b39c5158Smillert ### of the name and prefix fields -- this needs to be limited to 1303b39c5158Smillert ### write() only! 1304b39c5158Smillert my $clone = $entry->clone; 1305b39c5158Smillert 1306b39c5158Smillert 1307b39c5158Smillert ### so, if you don't want use to use the prefix, we'll stuff 1308b39c5158Smillert ### everything in the name field instead 1309b39c5158Smillert if( $DO_NOT_USE_PREFIX ) { 1310b39c5158Smillert 1311b39c5158Smillert ### you might have an extended prefix, if so, set it in the clone 1312b39c5158Smillert ### XXX is ::Unix right? 1313b39c5158Smillert $clone->name( length $ext_prefix 1314b39c5158Smillert ? File::Spec::Unix->catdir( $ext_prefix, 1315b39c5158Smillert $clone->full_path) 1316b39c5158Smillert : $clone->full_path ); 1317b39c5158Smillert $clone->prefix( '' ); 1318b39c5158Smillert 1319b39c5158Smillert ### otherwise, we'll have to set it properly -- prefix part in the 1320b39c5158Smillert ### prefix and name part in the name field. 1321b39c5158Smillert } else { 1322b39c5158Smillert 1323b39c5158Smillert ### split them here, not before! 1324b39c5158Smillert my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1325b39c5158Smillert 1326b39c5158Smillert ### you might have an extended prefix, if so, set it in the clone 1327b39c5158Smillert ### XXX is ::Unix right? 1328b39c5158Smillert $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1329b39c5158Smillert if length $ext_prefix; 1330b39c5158Smillert 1331b39c5158Smillert $clone->prefix( $prefix ); 1332b39c5158Smillert $clone->name( $name ); 1333b39c5158Smillert } 1334b39c5158Smillert 1335b39c5158Smillert ### names are too long, and will get truncated if we don't add a 1336b39c5158Smillert ### '@LongLink' file... 1337b39c5158Smillert my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1338b39c5158Smillert length($clone->prefix) > PREFIX_LENGTH 1339b39c5158Smillert ) || 0; 1340b39c5158Smillert 1341b39c5158Smillert ### perhaps we need to make a longlink file? 1342b39c5158Smillert if( $make_longlink ) { 1343b39c5158Smillert my $longlink = Archive::Tar::File->new( 1344b39c5158Smillert data => LONGLINK_NAME, 1345b39c5158Smillert $clone->full_path, 1346b39c5158Smillert { type => LONGLINK } 1347b39c5158Smillert ); 1348b39c5158Smillert 1349b39c5158Smillert unless( $longlink ) { 1350b39c5158Smillert $self->_error( qq[Could not create 'LongLink' entry for ] . 1351b39c5158Smillert qq[oversize file '] . $clone->full_path ."'" ); 1352b39c5158Smillert return; 1353b39c5158Smillert }; 1354b39c5158Smillert 1355b39c5158Smillert push @write_me, $longlink; 1356b39c5158Smillert } 1357b39c5158Smillert 1358b39c5158Smillert push @write_me, $clone; 1359b39c5158Smillert 1360b39c5158Smillert ### write the one, optionally 2 a::t::file objects to the handle 1361b39c5158Smillert for my $clone (@write_me) { 1362b39c5158Smillert 1363b39c5158Smillert ### if the file is a symlink, there are 2 options: 1364b39c5158Smillert ### either we leave the symlink intact, but then we don't write any 1365b39c5158Smillert ### data OR we follow the symlink, which means we actually make a 1366b39c5158Smillert ### copy. if we do the latter, we have to change the TYPE of the 1367b39c5158Smillert ### clone to 'FILE' 1368b39c5158Smillert my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1369b39c5158Smillert my $data_ok = !$clone->is_symlink && $clone->has_content; 1370b39c5158Smillert 1371b39c5158Smillert ### downgrade to a 'normal' file if it's a symlink we're going to 1372b39c5158Smillert ### treat as a regular file 1373b39c5158Smillert $clone->_downgrade_to_plainfile if $link_ok; 1374b39c5158Smillert 1375b39c5158Smillert ### get the header for this block 1376b39c5158Smillert my $header = $self->_format_tar_entry( $clone ); 1377b39c5158Smillert unless( $header ) { 1378b39c5158Smillert $self->_error(q[Could not format header for: ] . 1379b39c5158Smillert $clone->full_path ); 1380b39c5158Smillert return; 1381b39c5158Smillert } 1382b39c5158Smillert 1383b39c5158Smillert unless( print $handle $header ) { 1384b39c5158Smillert $self->_error(q[Could not write header for: ] . 1385b39c5158Smillert $clone->full_path); 1386b39c5158Smillert return; 1387b39c5158Smillert } 1388b39c5158Smillert 1389b39c5158Smillert if( $link_ok or $data_ok ) { 1390b39c5158Smillert unless( print $handle $clone->data ) { 1391b39c5158Smillert $self->_error(q[Could not write data for: ] . 1392b39c5158Smillert $clone->full_path); 1393b39c5158Smillert return; 1394b39c5158Smillert } 1395b39c5158Smillert 1396b39c5158Smillert ### pad the end of the clone if required ### 1397b39c5158Smillert print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1398b39c5158Smillert } 1399b39c5158Smillert 1400b39c5158Smillert } ### done writing these entries 1401b39c5158Smillert } 1402b39c5158Smillert 1403b39c5158Smillert ### write the end markers ### 1404b39c5158Smillert print $handle TAR_END x 2 or 1405b39c5158Smillert return $self->_error( qq[Could not write tar end markers] ); 1406b39c5158Smillert 1407b39c5158Smillert ### did you want it written to a file, or returned as a string? ### 1408b39c5158Smillert my $rv = length($file) ? 1 1409b39c5158Smillert : $HAS_PERLIO ? $dummy 1410b39c5158Smillert : do { seek $handle, 0, 0; local $/; <$handle> }; 1411b39c5158Smillert 1412898184e3Ssthen ### make sure to close the handle if we created it 1413898184e3Ssthen if ( $file ne $handle ) { 1414898184e3Ssthen unless( close $handle ) { 1415898184e3Ssthen $self->_error( qq[Could not write tar] ); 1416898184e3Ssthen return; 1417898184e3Ssthen } 1418898184e3Ssthen } 1419b39c5158Smillert 1420b39c5158Smillert return $rv; 1421b39c5158Smillert} 1422b39c5158Smillert 1423b39c5158Smillertsub _format_tar_entry { 1424b39c5158Smillert my $self = shift; 1425b39c5158Smillert my $entry = shift or return; 1426b39c5158Smillert my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1427b39c5158Smillert my $no_prefix = shift || 0; 1428b39c5158Smillert 1429b39c5158Smillert my $file = $entry->name; 1430b39c5158Smillert my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1431b39c5158Smillert 1432b39c5158Smillert ### remove the prefix from the file name 1433898184e3Ssthen ### not sure if this is still needed --kane 1434b39c5158Smillert ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1435b39c5158Smillert ### this for us. Even worse, this would break if we tried to add a file 1436b39c5158Smillert ### like x/x. 1437b39c5158Smillert #if( length $prefix ) { 1438b39c5158Smillert # $file =~ s/^$match//; 1439b39c5158Smillert #} 1440b39c5158Smillert 1441b39c5158Smillert $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1442b39c5158Smillert if length $ext_prefix; 1443b39c5158Smillert 1444b39c5158Smillert ### not sure why this is... ### 1445b39c5158Smillert my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1446b39c5158Smillert substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1447b39c5158Smillert 1448898184e3Ssthen my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1449b39c5158Smillert 1450b39c5158Smillert ### this might be optimizable with a 'changed' flag in the file objects ### 1451b39c5158Smillert my $tar = pack ( 1452b39c5158Smillert PACK, 1453b39c5158Smillert $file, 1454b39c5158Smillert 1455b39c5158Smillert (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1456b39c5158Smillert (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1457b39c5158Smillert 1458b39c5158Smillert "", # checksum field - space padded a bit down 1459b39c5158Smillert 1460b39c5158Smillert (map { $entry->$_() } qw[type linkname magic]), 1461b39c5158Smillert 1462b39c5158Smillert $entry->version || TAR_VERSION, 1463b39c5158Smillert 1464b39c5158Smillert (map { $entry->$_() } qw[uname gname]), 1465b39c5158Smillert (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1466b39c5158Smillert 1467b39c5158Smillert ($no_prefix ? '' : $prefix) 1468b39c5158Smillert ); 1469b39c5158Smillert 1470b39c5158Smillert ### add the checksum ### 1471898184e3Ssthen my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1472b39c5158Smillert substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1473b39c5158Smillert 1474b39c5158Smillert return $tar; 1475b39c5158Smillert} 1476b39c5158Smillert 1477b39c5158Smillert=head2 $tar->add_files( @filenamelist ) 1478b39c5158Smillert 1479b39c5158SmillertTakes a list of filenames and adds them to the in-memory archive. 1480b39c5158Smillert 1481b39c5158SmillertThe path to the file is automatically converted to a Unix like 1482b39c5158Smillertequivalent for use in the archive, and, if on MacOS, the file's 1483b39c5158Smillertmodification time is converted from the MacOS epoch to the Unix epoch. 1484b39c5158SmillertSo tar archives created on MacOS with B<Archive::Tar> can be read 1485b39c5158Smillertboth with I<tar> on Unix and applications like I<suntar> or 1486b39c5158SmillertI<Stuffit Expander> on MacOS. 1487b39c5158Smillert 1488b39c5158SmillertBe aware that the file's type/creator and resource fork will be lost, 1489b39c5158Smillertwhich is usually what you want in cross-platform archives. 1490b39c5158Smillert 1491b39c5158SmillertInstead of a filename, you can also pass it an existing C<Archive::Tar::File> 1492b39c5158Smillertobject from, for example, another archive. The object will be clone, and 1493b39c5158Smillerteffectively be a copy of the original, not an alias. 1494b39c5158Smillert 1495b39c5158SmillertReturns a list of C<Archive::Tar::File> objects that were just added. 1496b39c5158Smillert 1497b39c5158Smillert=cut 1498b39c5158Smillert 1499b39c5158Smillertsub add_files { 1500b39c5158Smillert my $self = shift; 1501b39c5158Smillert my @files = @_ or return; 1502b39c5158Smillert 1503b39c5158Smillert my @rv; 1504b39c5158Smillert for my $file ( @files ) { 1505b39c5158Smillert 1506b39c5158Smillert ### you passed an Archive::Tar::File object 1507b39c5158Smillert ### clone it so we don't accidentally have a reference to 1508b39c5158Smillert ### an object from another archive 1509b39c5158Smillert if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1510b39c5158Smillert push @rv, $file->clone; 1511b39c5158Smillert next; 1512b39c5158Smillert } 1513b39c5158Smillert 151491f110e0Safresh1 eval { 151591f110e0Safresh1 if( utf8::is_utf8( $file )) { 151691f110e0Safresh1 utf8::encode( $file ); 151791f110e0Safresh1 } 151891f110e0Safresh1 }; 151991f110e0Safresh1 1520b39c5158Smillert unless( -e $file || -l $file ) { 1521b39c5158Smillert $self->_error( qq[No such file: '$file'] ); 1522b39c5158Smillert next; 1523b39c5158Smillert } 1524b39c5158Smillert 1525b39c5158Smillert my $obj = Archive::Tar::File->new( file => $file ); 1526b39c5158Smillert unless( $obj ) { 1527b39c5158Smillert $self->_error( qq[Unable to add file: '$file'] ); 1528b39c5158Smillert next; 1529b39c5158Smillert } 1530b39c5158Smillert 1531b39c5158Smillert push @rv, $obj; 1532b39c5158Smillert } 1533b39c5158Smillert 1534b39c5158Smillert push @{$self->{_data}}, @rv; 1535b39c5158Smillert 1536b39c5158Smillert return @rv; 1537b39c5158Smillert} 1538b39c5158Smillert 1539b39c5158Smillert=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1540b39c5158Smillert 1541b39c5158SmillertTakes a filename, a scalar full of data and optionally a reference to 1542b39c5158Smillerta hash with specific options. 1543b39c5158Smillert 1544b39c5158SmillertWill add a file to the in-memory archive, with name C<$filename> and 1545b39c5158Smillertcontent C<$data>. Specific properties can be set using C<$opthashref>. 1546b39c5158SmillertThe following list of properties is supported: name, size, mtime 1547b39c5158Smillert(last modified date), mode, uid, gid, linkname, uname, gname, 1548b39c5158Smillertdevmajor, devminor, prefix, type. (On MacOS, the file's path and 1549b39c5158Smillertmodification times are converted to Unix equivalents.) 1550b39c5158Smillert 155191f110e0Safresh1Valid values for the file type are the following constants defined by 155291f110e0Safresh1Archive::Tar::Constant: 1553b39c5158Smillert 1554b39c5158Smillert=over 4 1555b39c5158Smillert 1556b39c5158Smillert=item FILE 1557b39c5158Smillert 1558b39c5158SmillertRegular file. 1559b39c5158Smillert 1560b39c5158Smillert=item HARDLINK 1561b39c5158Smillert 1562b39c5158Smillert=item SYMLINK 1563b39c5158Smillert 1564b39c5158SmillertHard and symbolic ("soft") links; linkname should specify target. 1565b39c5158Smillert 1566b39c5158Smillert=item CHARDEV 1567b39c5158Smillert 1568b39c5158Smillert=item BLOCKDEV 1569b39c5158Smillert 1570b39c5158SmillertCharacter and block devices. devmajor and devminor should specify the major 1571b39c5158Smillertand minor device numbers. 1572b39c5158Smillert 1573b39c5158Smillert=item DIR 1574b39c5158Smillert 1575b39c5158SmillertDirectory. 1576b39c5158Smillert 1577b39c5158Smillert=item FIFO 1578b39c5158Smillert 1579b39c5158SmillertFIFO (named pipe). 1580b39c5158Smillert 1581b39c5158Smillert=item SOCKET 1582b39c5158Smillert 1583b39c5158SmillertSocket. 1584b39c5158Smillert 1585b39c5158Smillert=back 1586b39c5158Smillert 1587b39c5158SmillertReturns the C<Archive::Tar::File> object that was just added, or 1588b39c5158SmillertC<undef> on failure. 1589b39c5158Smillert 1590b39c5158Smillert=cut 1591b39c5158Smillert 1592b39c5158Smillertsub add_data { 1593b39c5158Smillert my $self = shift; 1594b39c5158Smillert my ($file, $data, $opt) = @_; 1595b39c5158Smillert 1596b39c5158Smillert my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1597b39c5158Smillert unless( $obj ) { 1598b39c5158Smillert $self->_error( qq[Unable to add file: '$file'] ); 1599b39c5158Smillert return; 1600b39c5158Smillert } 1601b39c5158Smillert 1602b39c5158Smillert push @{$self->{_data}}, $obj; 1603b39c5158Smillert 1604b39c5158Smillert return $obj; 1605b39c5158Smillert} 1606b39c5158Smillert 1607b39c5158Smillert=head2 $tar->error( [$BOOL] ) 1608b39c5158Smillert 1609b39c5158SmillertReturns the current error string (usually, the last error reported). 1610b39c5158SmillertIf a true value was specified, it will give the C<Carp::longmess> 1611b39c5158Smillertequivalent of the error, in effect giving you a stacktrace. 1612b39c5158Smillert 1613b39c5158SmillertFor backwards compatibility, this error is also available as 1614b39c5158SmillertC<$Archive::Tar::error> although it is much recommended you use the 1615b39c5158Smillertmethod call instead. 1616b39c5158Smillert 1617b39c5158Smillert=cut 1618b39c5158Smillert 1619b39c5158Smillert{ 1620b39c5158Smillert $error = ''; 1621b39c5158Smillert my $longmess; 1622b39c5158Smillert 1623b39c5158Smillert sub _error { 1624b39c5158Smillert my $self = shift; 1625b39c5158Smillert my $msg = $error = shift; 1626b39c5158Smillert $longmess = Carp::longmess($error); 1627b39c5158Smillert if (ref $self) { 1628b39c5158Smillert $self->{_error} = $error; 1629b39c5158Smillert $self->{_longmess} = $longmess; 1630b39c5158Smillert } 1631b39c5158Smillert 1632b39c5158Smillert ### set Archive::Tar::WARN to 0 to disable printing 1633b39c5158Smillert ### of errors 1634b39c5158Smillert if( $WARN ) { 1635b39c5158Smillert carp $DEBUG ? $longmess : $msg; 1636b39c5158Smillert } 1637b39c5158Smillert 1638b39c5158Smillert return; 1639b39c5158Smillert } 1640b39c5158Smillert 1641b39c5158Smillert sub error { 1642b39c5158Smillert my $self = shift; 1643b39c5158Smillert if (ref $self) { 1644b39c5158Smillert return shift() ? $self->{_longmess} : $self->{_error}; 1645b39c5158Smillert } else { 1646b39c5158Smillert return shift() ? $longmess : $error; 1647b39c5158Smillert } 1648b39c5158Smillert } 1649b39c5158Smillert} 1650b39c5158Smillert 1651b39c5158Smillert=head2 $tar->setcwd( $cwd ); 1652b39c5158Smillert 1653b39c5158SmillertC<Archive::Tar> needs to know the current directory, and it will run 1654b39c5158SmillertC<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1655b39c5158Smillerttarfile and saves it in the file system. (As of version 1.30, however, 1656b39c5158SmillertC<Archive::Tar> will use the speed optimization described below 1657b39c5158Smillertautomatically, so it's only relevant if you're using C<extract_file()>). 1658b39c5158Smillert 1659b39c5158SmillertSince C<Archive::Tar> doesn't change the current directory internally 1660b39c5158Smillertwhile it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1661b39c5158Smillertcan be avoided if we can guarantee that the current directory doesn't 1662b39c5158Smillertget changed externally. 1663b39c5158Smillert 1664b39c5158SmillertTo use this performance boost, set the current directory via 1665b39c5158Smillert 1666b39c5158Smillert use Cwd; 1667b39c5158Smillert $tar->setcwd( cwd() ); 1668b39c5158Smillert 1669b39c5158Smillertonce before calling a function like C<extract_file> and 1670b39c5158SmillertC<Archive::Tar> will use the current directory setting from then on 1671b39c5158Smillertand won't call C<Cwd::cwd()> internally. 1672b39c5158Smillert 1673b39c5158SmillertTo switch back to the default behaviour, use 1674b39c5158Smillert 1675b39c5158Smillert $tar->setcwd( undef ); 1676b39c5158Smillert 1677b39c5158Smillertand C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1678b39c5158Smillert 1679898184e3SsthenIf you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1680b39c5158Smillertbe called for you. 1681b39c5158Smillert 1682b39c5158Smillert=cut 1683b39c5158Smillert 1684b39c5158Smillertsub setcwd { 1685b39c5158Smillert my $self = shift; 1686b39c5158Smillert my $cwd = shift; 1687b39c5158Smillert 1688b39c5158Smillert $self->{cwd} = $cwd; 1689b39c5158Smillert} 1690b39c5158Smillert 1691b39c5158Smillert=head1 Class Methods 1692b39c5158Smillert 1693b39c5158Smillert=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1694b39c5158Smillert 1695b39c5158SmillertCreates a tar file from the list of files provided. The first 1696b39c5158Smillertargument can either be the name of the tar file to create or a 1697b39c5158Smillertreference to an open file handle (e.g. a GLOB reference). 1698b39c5158Smillert 1699b39c5158SmillertThe second argument is used to indicate compression. You can either 1700b39c5158Smillertcompress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1701b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of 1702898184e3Ssthenconstants is preferred: 1703b39c5158Smillert 1704b39c5158Smillert # write a gzip compressed file 1705b39c5158Smillert Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1706b39c5158Smillert 1707b39c5158Smillert # write a bzip compressed file 1708b39c5158Smillert Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1709b39c5158Smillert 1710b39c5158SmillertNote that when you pass in a filehandle, the compression argument 1711b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle. 1712b39c5158SmillertIf you wish to enable compression with filehandles, use an 1713b39c5158SmillertC<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1714b39c5158Smillert 1715b39c5158SmillertThe remaining arguments list the files to be included in the tar file. 1716b39c5158SmillertThese files must all exist. Any files which don't exist or can't be 1717b39c5158Smillertread are silently ignored. 1718b39c5158Smillert 1719b39c5158SmillertIf the archive creation fails for any reason, C<create_archive> will 1720b39c5158Smillertreturn false. Please use the C<error> method to find the cause of the 1721b39c5158Smillertfailure. 1722b39c5158Smillert 1723b39c5158SmillertNote that this method does not write C<on the fly> as it were; it 1724b39c5158Smillertstill reads all the files into memory before writing out the archive. 1725b39c5158SmillertConsult the FAQ below if this is a problem. 1726b39c5158Smillert 1727b39c5158Smillert=cut 1728b39c5158Smillert 1729b39c5158Smillertsub create_archive { 1730b39c5158Smillert my $class = shift; 1731b39c5158Smillert 1732b39c5158Smillert my $file = shift; return unless defined $file; 1733b39c5158Smillert my $gzip = shift || 0; 1734b39c5158Smillert my @files = @_; 1735b39c5158Smillert 1736b39c5158Smillert unless( @files ) { 1737b39c5158Smillert return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1738b39c5158Smillert } 1739b39c5158Smillert 1740b39c5158Smillert my $tar = $class->new; 1741b39c5158Smillert $tar->add_files( @files ); 1742b39c5158Smillert return $tar->write( $file, $gzip ); 1743b39c5158Smillert} 1744b39c5158Smillert 1745b39c5158Smillert=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1746b39c5158Smillert 1747b39c5158SmillertReturns an iterator function that reads the tar file without loading 1748b39c5158Smillertit all in memory. Each time the function is called it will return the 1749b39c5158Smillertnext file in the tarball. The files are returned as 1750b39c5158SmillertC<Archive::Tar::File> objects. The iterator function returns the 1751b39c5158Smillertempty list once it has exhausted the files contained. 1752b39c5158Smillert 1753b39c5158SmillertThe second argument can be a hash reference with options, which are 1754b39c5158Smillertidentical to the arguments passed to C<read()>. 1755b39c5158Smillert 1756b39c5158SmillertExample usage: 1757b39c5158Smillert 1758b39c5158Smillert my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1759b39c5158Smillert 1760b39c5158Smillert while( my $f = $next->() ) { 1761b39c5158Smillert print $f->name, "\n"; 1762b39c5158Smillert 1763b39c5158Smillert $f->extract or warn "Extraction failed"; 1764b39c5158Smillert 1765b39c5158Smillert # .... 1766b39c5158Smillert } 1767b39c5158Smillert 1768b39c5158Smillert=cut 1769b39c5158Smillert 1770b39c5158Smillert 1771b39c5158Smillertsub iter { 1772b39c5158Smillert my $class = shift; 1773*9f11ffb7Safresh1 my $filename = shift; 1774*9f11ffb7Safresh1 return unless defined $filename; 1775898184e3Ssthen my $compressed = shift || 0; 1776b39c5158Smillert my $opts = shift || {}; 1777b39c5158Smillert 1778b39c5158Smillert ### get a handle to read from. 1779b39c5158Smillert my $handle = $class->_get_handle( 1780b39c5158Smillert $filename, 1781b39c5158Smillert $compressed, 1782b39c5158Smillert READ_ONLY->( ZLIB ) 1783b39c5158Smillert ) or return; 1784b39c5158Smillert 1785b39c5158Smillert my @data; 17866fb12b70Safresh1 my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ]; 1787b39c5158Smillert return sub { 1788b39c5158Smillert return shift(@data) if @data; # more than one file returned? 1789b39c5158Smillert return unless $handle; # handle exhausted? 1790b39c5158Smillert 1791b39c5158Smillert ### read data, should only return file 1792b39c5158Smillert my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1793b39c5158Smillert @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 17946fb12b70Safresh1 if($Archive::Tar::RESOLVE_SYMLINK!~/none/){ 17956fb12b70Safresh1 foreach(@data){ 17966fb12b70Safresh1 #may refine this heuristic for ON_UNIX? 17976fb12b70Safresh1 if($_->linkname){ 17986fb12b70Safresh1 #is there a better slot to store/share it ? 17996fb12b70Safresh1 $_->{'_archive'} = $CONSTRUCT_ARGS; 18006fb12b70Safresh1 } 18016fb12b70Safresh1 } 18026fb12b70Safresh1 } 1803b39c5158Smillert 1804b39c5158Smillert ### return one piece of data 1805b39c5158Smillert return shift(@data) if @data; 1806b39c5158Smillert 1807b39c5158Smillert ### data is exhausted, free the filehandle 1808b39c5158Smillert undef $handle; 18096fb12b70Safresh1 if(@$CONSTRUCT_ARGS == 4){ 18106fb12b70Safresh1 #free archive in memory 18116fb12b70Safresh1 undef $CONSTRUCT_ARGS->[-1]; 18126fb12b70Safresh1 } 1813b39c5158Smillert return; 1814b39c5158Smillert }; 1815b39c5158Smillert} 1816b39c5158Smillert 1817b39c5158Smillert=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1818b39c5158Smillert 1819b39c5158SmillertReturns a list of the names of all the files in the archive. The 1820b39c5158Smillertfirst argument can either be the name of the tar file to list or a 1821b39c5158Smillertreference to an open file handle (e.g. a GLOB reference). 1822b39c5158Smillert 1823b39c5158SmillertIf C<list_archive()> is passed an array reference as its third 1824b39c5158Smillertargument it returns a list of hash references containing the requested 1825b39c5158Smillertproperties of each file. The following list of properties is 1826b39c5158Smillertsupported: full_path, name, size, mtime (last modified date), mode, 182791f110e0Safresh1uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type. 1828b39c5158Smillert 1829b39c5158SmillertSee C<Archive::Tar::File> for details about supported properties. 1830b39c5158Smillert 1831b39c5158SmillertPassing an array reference containing only one element, 'name', is 1832b39c5158Smillertspecial cased to return a list of names rather than a list of hash 1833b39c5158Smillertreferences. 1834b39c5158Smillert 1835b39c5158Smillert=cut 1836b39c5158Smillert 1837b39c5158Smillertsub list_archive { 1838b39c5158Smillert my $class = shift; 1839b39c5158Smillert my $file = shift; return unless defined $file; 1840b39c5158Smillert my $gzip = shift || 0; 1841b39c5158Smillert 1842b39c5158Smillert my $tar = $class->new($file, $gzip); 1843b39c5158Smillert return unless $tar; 1844b39c5158Smillert 1845b39c5158Smillert return $tar->list_files( @_ ); 1846b39c5158Smillert} 1847b39c5158Smillert 1848b39c5158Smillert=head2 Archive::Tar->extract_archive($file, $compressed) 1849b39c5158Smillert 1850b39c5158SmillertExtracts the contents of the tar file. The first argument can either 1851b39c5158Smillertbe the name of the tar file to create or a reference to an open file 1852b39c5158Smillerthandle (e.g. a GLOB reference). All relative paths in the tar file will 1853b39c5158Smillertbe created underneath the current working directory. 1854b39c5158Smillert 1855b39c5158SmillertC<extract_archive> will return a list of files it extracted. 1856b39c5158SmillertIf the archive extraction fails for any reason, C<extract_archive> 1857b39c5158Smillertwill return false. Please use the C<error> method to find the cause 1858b39c5158Smillertof the failure. 1859b39c5158Smillert 1860b39c5158Smillert=cut 1861b39c5158Smillert 1862b39c5158Smillertsub extract_archive { 1863b39c5158Smillert my $class = shift; 1864b39c5158Smillert my $file = shift; return unless defined $file; 1865b39c5158Smillert my $gzip = shift || 0; 1866b39c5158Smillert 1867b39c5158Smillert my $tar = $class->new( ) or return; 1868b39c5158Smillert 1869b39c5158Smillert return $tar->read( $file, $gzip, { extract => 1 } ); 1870b39c5158Smillert} 1871b39c5158Smillert 1872b39c5158Smillert=head2 $bool = Archive::Tar->has_io_string 1873b39c5158Smillert 1874b39c5158SmillertReturns true if we currently have C<IO::String> support loaded. 1875b39c5158Smillert 1876b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing 1877b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if 1878b39c5158Smillertavailable. 1879b39c5158Smillert 1880b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference. 1881b39c5158Smillert 1882b39c5158Smillert=cut 1883b39c5158Smillert 1884b39c5158Smillertsub has_io_string { return $HAS_IO_STRING; } 1885b39c5158Smillert 1886b39c5158Smillert=head2 $bool = Archive::Tar->has_perlio 1887b39c5158Smillert 1888b39c5158SmillertReturns true if we currently have C<perlio> support loaded. 1889b39c5158Smillert 1890b39c5158SmillertThis requires C<perl-5.8> or higher, compiled with C<perlio> 1891b39c5158Smillert 1892b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing 1893b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if 1894b39c5158Smillertavailable. 1895b39c5158Smillert 1896b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference. 1897b39c5158Smillert 1898b39c5158Smillert=cut 1899b39c5158Smillert 1900b39c5158Smillertsub has_perlio { return $HAS_PERLIO; } 1901b39c5158Smillert 1902b39c5158Smillert=head2 $bool = Archive::Tar->has_zlib_support 1903b39c5158Smillert 1904b39c5158SmillertReturns true if C<Archive::Tar> can extract C<zlib> compressed archives 1905b39c5158Smillert 1906b39c5158Smillert=cut 1907b39c5158Smillert 1908b39c5158Smillertsub has_zlib_support { return ZLIB } 1909b39c5158Smillert 1910b39c5158Smillert=head2 $bool = Archive::Tar->has_bzip2_support 1911b39c5158Smillert 1912b39c5158SmillertReturns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1913b39c5158Smillert 1914b39c5158Smillert=cut 1915b39c5158Smillert 1916b39c5158Smillertsub has_bzip2_support { return BZIP } 1917b39c5158Smillert 1918b39c5158Smillert=head2 Archive::Tar->can_handle_compressed_files 1919b39c5158Smillert 1920b39c5158SmillertA simple checking routine, which will return true if C<Archive::Tar> 1921b39c5158Smillertis able to uncompress compressed archives on the fly with C<IO::Zlib> 1922b39c5158Smillertand C<IO::Compress::Bzip2> or false if not both are installed. 1923b39c5158Smillert 1924b39c5158SmillertYou can use this as a shortcut to determine whether C<Archive::Tar> 1925b39c5158Smillertwill do what you think before passing compressed archives to its 1926b39c5158SmillertC<read> method. 1927b39c5158Smillert 1928b39c5158Smillert=cut 1929b39c5158Smillert 1930b39c5158Smillertsub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1931b39c5158Smillert 1932b39c5158Smillertsub no_string_support { 1933b39c5158Smillert croak("You have to install IO::String to support writing archives to strings"); 1934b39c5158Smillert} 1935b39c5158Smillert 19366fb12b70Safresh1sub _symlinks_resolver{ 19376fb12b70Safresh1 my ($src, $trg) = @_; 19386fb12b70Safresh1 my @src = split /[\/\\]/, $src; 19396fb12b70Safresh1 my @trg = split /[\/\\]/, $trg; 19406fb12b70Safresh1 pop @src; #strip out current object name 19416fb12b70Safresh1 if(@trg and $trg[0] eq ''){ 19426fb12b70Safresh1 shift @trg; 19436fb12b70Safresh1 #restart path from scratch 19446fb12b70Safresh1 @src = ( ); 19456fb12b70Safresh1 } 19466fb12b70Safresh1 foreach my $part ( @trg ){ 19476fb12b70Safresh1 next if $part eq '.'; #ignore current 19486fb12b70Safresh1 if($part eq '..'){ 19496fb12b70Safresh1 #got to parent 19506fb12b70Safresh1 pop @src; 19516fb12b70Safresh1 } 19526fb12b70Safresh1 else{ 19536fb12b70Safresh1 #append it 19546fb12b70Safresh1 push @src, $part; 19556fb12b70Safresh1 } 19566fb12b70Safresh1 } 19576fb12b70Safresh1 my $path = join('/', @src); 19586fb12b70Safresh1 warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG; 19596fb12b70Safresh1 return $path; 19606fb12b70Safresh1} 19616fb12b70Safresh1 1962b39c5158Smillert1; 1963b39c5158Smillert 1964b39c5158Smillert__END__ 1965b39c5158Smillert 1966b39c5158Smillert=head1 GLOBAL VARIABLES 1967b39c5158Smillert 1968b39c5158Smillert=head2 $Archive::Tar::FOLLOW_SYMLINK 1969b39c5158Smillert 1970b39c5158SmillertSet this variable to C<1> to make C<Archive::Tar> effectively make a 1971b39c5158Smillertcopy of the file when extracting. Default is C<0>, which 1972b39c5158Smillertmeans the symlink stays intact. Of course, you will have to pack the 1973b39c5158Smillertfile linked to as well. 1974b39c5158Smillert 1975b39c5158SmillertThis option is checked when you write out the tarfile using C<write> 1976b39c5158Smillertor C<create_archive>. 1977b39c5158Smillert 1978b39c5158SmillertThis works just like C</bin/tar>'s C<-h> option. 1979b39c5158Smillert 1980b39c5158Smillert=head2 $Archive::Tar::CHOWN 1981b39c5158Smillert 1982b39c5158SmillertBy default, C<Archive::Tar> will try to C<chown> your files if it is 1983b39c5158Smillertable to. In some cases, this may not be desired. In that case, set 1984b39c5158Smillertthis variable to C<0> to disable C<chown>-ing, even if it were 1985b39c5158Smillertpossible. 1986b39c5158Smillert 1987b39c5158SmillertThe default is C<1>. 1988b39c5158Smillert 1989b39c5158Smillert=head2 $Archive::Tar::CHMOD 1990b39c5158Smillert 1991b39c5158SmillertBy default, C<Archive::Tar> will try to C<chmod> your files to 1992b39c5158Smillertwhatever mode was specified for the particular file in the archive. 1993b39c5158SmillertIn some cases, this may not be desired. In that case, set this 1994b39c5158Smillertvariable to C<0> to disable C<chmod>-ing. 1995b39c5158Smillert 1996b39c5158SmillertThe default is C<1>. 1997b39c5158Smillert 1998b39c5158Smillert=head2 $Archive::Tar::SAME_PERMISSIONS 1999b39c5158Smillert 2000b39c5158SmillertWhen, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 2001b39c5158Smillertthe permissions on files from the archive are used without modification 2002b39c5158Smillertof if they are filtered by removing any setid bits and applying the 2003b39c5158Smillertcurrent umask. 2004b39c5158Smillert 2005b39c5158SmillertThe default is C<1> for the root user and C<0> for normal users. 2006b39c5158Smillert 2007b39c5158Smillert=head2 $Archive::Tar::DO_NOT_USE_PREFIX 2008b39c5158Smillert 2009b39c5158SmillertBy default, C<Archive::Tar> will try to put paths that are over 2010b39c5158Smillert100 characters in the C<prefix> field of your tar header, as 2011b39c5158Smillertdefined per POSIX-standard. However, some (older) tar programs 2012b39c5158Smillertdo not implement this spec. To retain compatibility with these older 2013b39c5158Smillertor non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 2014b39c5158Smillertvariable to a true value, and C<Archive::Tar> will use an alternate 2015b39c5158Smillertway of dealing with paths over 100 characters by using the 2016b39c5158SmillertC<GNU Extended Header> feature. 2017b39c5158Smillert 2018b39c5158SmillertNote that clients who do not support the C<GNU Extended Header> 2019b39c5158Smillertfeature will not be able to read these archives. Such clients include 2020b39c5158Smillerttars on C<Solaris>, C<Irix> and C<AIX>. 2021b39c5158Smillert 2022b39c5158SmillertThe default is C<0>. 2023b39c5158Smillert 2024b39c5158Smillert=head2 $Archive::Tar::DEBUG 2025b39c5158Smillert 2026b39c5158SmillertSet this variable to C<1> to always get the C<Carp::longmess> output 2027b39c5158Smillertof the warnings, instead of the regular C<carp>. This is the same 2028b39c5158Smillertmessage you would get by doing: 2029b39c5158Smillert 2030b39c5158Smillert $tar->error(1); 2031b39c5158Smillert 2032b39c5158SmillertDefaults to C<0>. 2033b39c5158Smillert 2034b39c5158Smillert=head2 $Archive::Tar::WARN 2035b39c5158Smillert 2036b39c5158SmillertSet this variable to C<0> if you do not want any warnings printed. 2037b39c5158SmillertPersonally I recommend against doing this, but people asked for the 2038b39c5158Smillertoption. Also, be advised that this is of course not threadsafe. 2039b39c5158Smillert 2040b39c5158SmillertDefaults to C<1>. 2041b39c5158Smillert 2042b39c5158Smillert=head2 $Archive::Tar::error 2043b39c5158Smillert 2044b39c5158SmillertHolds the last reported error. Kept for historical reasons, but its 2045b39c5158Smillertuse is very much discouraged. Use the C<error()> method instead: 2046b39c5158Smillert 2047b39c5158Smillert warn $tar->error unless $tar->extract; 2048b39c5158Smillert 2049b39c5158SmillertNote that in older versions of this module, the C<error()> method 2050b39c5158Smillertwould return an effectively global value even when called an instance 2051b39c5158Smillertmethod as above. This has since been fixed, and multiple instances of 2052b39c5158SmillertC<Archive::Tar> now have separate error strings. 2053b39c5158Smillert 2054b39c5158Smillert=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 2055b39c5158Smillert 2056b39c5158SmillertThis variable indicates whether C<Archive::Tar> should allow 2057b39c5158Smillertfiles to be extracted outside their current working directory. 2058b39c5158Smillert 2059b39c5158SmillertAllowing this could have security implications, as a malicious 2060b39c5158Smillerttar archive could alter or replace any file the extracting user 2061b39c5158Smillerthas permissions to. Therefor, the default is to not allow 2062b39c5158Smillertinsecure extractions. 2063b39c5158Smillert 2064b39c5158SmillertIf you trust the archive, or have other reasons to allow the 2065b39c5158Smillertarchive to write files outside your current working directory, 2066b39c5158Smillertset this variable to C<true>. 2067b39c5158Smillert 2068b39c5158SmillertNote that this is a backwards incompatible change from version 2069b39c5158SmillertC<1.36> and before. 2070b39c5158Smillert 2071b39c5158Smillert=head2 $Archive::Tar::HAS_PERLIO 2072b39c5158Smillert 2073b39c5158SmillertThis variable holds a boolean indicating if we currently have 2074b39c5158SmillertC<perlio> support loaded. This will be enabled for any perl 2075b39c5158Smillertgreater than C<5.8> compiled with C<perlio>. 2076b39c5158Smillert 2077b39c5158SmillertIf you feel strongly about disabling it, set this variable to 2078b39c5158SmillertC<false>. Note that you will then need C<IO::String> installed 2079b39c5158Smillertto support writing stringified archives. 2080b39c5158Smillert 2081b39c5158SmillertDon't change this variable unless you B<really> know what you're 2082b39c5158Smillertdoing. 2083b39c5158Smillert 2084b39c5158Smillert=head2 $Archive::Tar::HAS_IO_STRING 2085b39c5158Smillert 2086b39c5158SmillertThis variable holds a boolean indicating if we currently have 2087b39c5158SmillertC<IO::String> support loaded. This will be enabled for any perl 2088b39c5158Smillertthat has a loadable C<IO::String> module. 2089b39c5158Smillert 2090b39c5158SmillertIf you feel strongly about disabling it, set this variable to 2091b39c5158SmillertC<false>. Note that you will then need C<perlio> support from 2092b39c5158Smillertyour perl to be able to write stringified archives. 2093b39c5158Smillert 2094b39c5158SmillertDon't change this variable unless you B<really> know what you're 2095b39c5158Smillertdoing. 2096b39c5158Smillert 2097898184e3Ssthen=head2 $Archive::Tar::ZERO_PAD_NUMBERS 2098898184e3Ssthen 2099898184e3SsthenThis variable holds a boolean indicating if we will create 2100898184e3Ssthenzero padded numbers for C<size>, C<mtime> and C<checksum>. 2101898184e3SsthenThe default is C<0>, indicating that we will create space padded 2102898184e3Ssthennumbers. Added for compatibility with C<busybox> implementations. 2103898184e3Ssthen 21046fb12b70Safresh1=head2 Tuning the way RESOLVE_SYMLINK will works 21056fb12b70Safresh1 21066fb12b70Safresh1 You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable, 21076fb12b70Safresh1 or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar. 21086fb12b70Safresh1 21096fb12b70Safresh1 Values can be one of the following: 21106fb12b70Safresh1 21116fb12b70Safresh1 none 21126fb12b70Safresh1 Disable this mechanism and failed as it was in previous version (<1.88) 21136fb12b70Safresh1 21146fb12b70Safresh1 speed (default) 21156fb12b70Safresh1 If you prefer speed 21166fb12b70Safresh1 this will read again the whole archive using read() so all entries 21176fb12b70Safresh1 will be available 21186fb12b70Safresh1 21196fb12b70Safresh1 memory 21206fb12b70Safresh1 If you prefer memory 21216fb12b70Safresh1 21226fb12b70Safresh1 Limitation 21236fb12b70Safresh1 21246fb12b70Safresh1 It won't work for terminal, pipe or sockets or every non seekable source. 21256fb12b70Safresh1 21266fb12b70Safresh1=cut 21276fb12b70Safresh1 2128b39c5158Smillert=head1 FAQ 2129b39c5158Smillert 2130b39c5158Smillert=over 4 2131b39c5158Smillert 2132b39c5158Smillert=item What's the minimum perl version required to run Archive::Tar? 2133b39c5158Smillert 2134b39c5158SmillertYou will need perl version 5.005_03 or newer. 2135b39c5158Smillert 2136b39c5158Smillert=item Isn't Archive::Tar slow? 2137b39c5158Smillert 2138b39c5158SmillertYes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2139b39c5158SmillertHowever, it's very portable. If speed is an issue, consider using 2140b39c5158SmillertC</bin/tar> instead. 2141b39c5158Smillert 2142b39c5158Smillert=item Isn't Archive::Tar heavier on memory than /bin/tar? 2143b39c5158Smillert 2144b39c5158SmillertYes it is, see previous answer. Since C<Compress::Zlib> and therefore 2145b39c5158SmillertC<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2146b39c5158Smillertchoice but to read the archive into memory. 2147b39c5158SmillertThis is ok if you want to do in-memory manipulation of the archive. 2148b39c5158Smillert 2149b39c5158SmillertIf you just want to extract, use the C<extract_archive> class method 2150b39c5158Smillertinstead. It will optimize and write to disk immediately. 2151b39c5158Smillert 2152b39c5158SmillertAnother option is to use the C<iter> class method to iterate over 2153b39c5158Smillertthe files in the tarball without reading them all in memory at once. 2154b39c5158Smillert 2155b39c5158Smillert=item Can you lazy-load data instead? 2156b39c5158Smillert 2157b39c5158SmillertIn some cases, yes. You can use the C<iter> class method to iterate 2158b39c5158Smillertover the files in the tarball without reading them all in memory at once. 2159b39c5158Smillert 2160b39c5158Smillert=item How much memory will an X kb tar file need? 2161b39c5158Smillert 2162b39c5158SmillertProbably more than X kb, since it will all be read into memory. If 2163b39c5158Smillertthis is a problem, and you don't need to do in memory manipulation 2164b39c5158Smillertof the archive, consider using the C<iter> class method, or C</bin/tar> 2165b39c5158Smillertinstead. 2166b39c5158Smillert 2167b39c5158Smillert=item What do you do with unsupported filetypes in an archive? 2168b39c5158Smillert 2169b39c5158SmillertC<Unix> has a few filetypes that aren't supported on other platforms, 2170b39c5158Smillertlike C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2171b39c5158Smillerttry to make a copy of the original file, rather than throwing an error. 2172b39c5158Smillert 2173b39c5158SmillertThis does require you to read the entire archive in to memory first, 2174b39c5158Smillertsince otherwise we wouldn't know what data to fill the copy with. 2175b39c5158Smillert(This means that you cannot use the class methods, including C<iter> 2176b39c5158Smillerton archives that have incompatible filetypes and still expect things 2177b39c5158Smillertto work). 2178b39c5158Smillert 2179b39c5158SmillertFor other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2180b39c5158Smillertthe extraction of this particular item didn't work. 2181b39c5158Smillert 2182b39c5158Smillert=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2183b39c5158Smillert 2184b39c5158SmillertBy default, C<Archive::Tar> is in a completely POSIX-compatible 2185b39c5158Smillertmode, which uses the POSIX-specification of C<tar> to store files. 2186898184e3SsthenFor paths greater than 100 characters, this is done using the 2187b39c5158SmillertC<POSIX header prefix>. Non-POSIX-compatible clients may not support 2188b39c5158Smillertthis part of the specification, and may only support the C<GNU Extended 2189b39c5158SmillertHeader> functionality. To facilitate those clients, you can set the 2190b39c5158SmillertC<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2191b39c5158SmillertC<GLOBAL VARIABLES> section for details on this variable. 2192b39c5158Smillert 2193b39c5158SmillertNote that GNU tar earlier than version 1.14 does not cope well with 2194b39c5158Smillertthe C<POSIX header prefix>. If you use such a version, consider setting 2195b39c5158Smillertthe C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2196b39c5158Smillert 2197b39c5158Smillert=item How do I extract only files that have property X from an archive? 2198b39c5158Smillert 2199b39c5158SmillertSometimes, you might not wish to extract a complete archive, just 2200b39c5158Smillertthe files that are relevant to you, based on some criteria. 2201b39c5158Smillert 2202b39c5158SmillertYou can do this by filtering a list of C<Archive::Tar::File> objects 2203b39c5158Smillertbased on your criteria. For example, to extract only files that have 2204b39c5158Smillertthe string C<foo> in their title, you would use: 2205b39c5158Smillert 2206b39c5158Smillert $tar->extract( 2207b39c5158Smillert grep { $_->full_path =~ /foo/ } $tar->get_files 2208b39c5158Smillert ); 2209b39c5158Smillert 2210b39c5158SmillertThis way, you can filter on any attribute of the files in the archive. 2211b39c5158SmillertConsult the C<Archive::Tar::File> documentation on how to use these 2212b39c5158Smillertobjects. 2213b39c5158Smillert 2214b39c5158Smillert=item How do I access .tar.Z files? 2215b39c5158Smillert 2216b39c5158SmillertThe C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2217b39c5158Smillertthe C<IO::Zlib> module) to access tar files that have been compressed 2218b39c5158Smillertwith C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2219b39c5158Smillertutility cannot be read by C<Compress::Zlib> and so cannot be directly 2220b39c5158Smillertaccesses by C<Archive::Tar>. 2221b39c5158Smillert 2222b39c5158SmillertIf the C<uncompress> or C<gunzip> programs are available, you can use 2223b39c5158Smillertone of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2224b39c5158Smillert 2225b39c5158SmillertFirstly with C<uncompress> 2226b39c5158Smillert 2227b39c5158Smillert use Archive::Tar; 2228b39c5158Smillert 2229b39c5158Smillert open F, "uncompress -c $filename |"; 2230b39c5158Smillert my $tar = Archive::Tar->new(*F); 2231b39c5158Smillert ... 2232b39c5158Smillert 2233b39c5158Smillertand this with C<gunzip> 2234b39c5158Smillert 2235b39c5158Smillert use Archive::Tar; 2236b39c5158Smillert 2237b39c5158Smillert open F, "gunzip -c $filename |"; 2238b39c5158Smillert my $tar = Archive::Tar->new(*F); 2239b39c5158Smillert ... 2240b39c5158Smillert 2241b39c5158SmillertSimilarly, if the C<compress> program is available, you can use this to 2242b39c5158Smillertwrite a C<.tar.Z> file 2243b39c5158Smillert 2244b39c5158Smillert use Archive::Tar; 2245b39c5158Smillert use IO::File; 2246b39c5158Smillert 2247b39c5158Smillert my $fh = new IO::File "| compress -c >$filename"; 2248b39c5158Smillert my $tar = Archive::Tar->new(); 2249b39c5158Smillert ... 2250b39c5158Smillert $tar->write($fh); 2251b39c5158Smillert $fh->close ; 2252b39c5158Smillert 2253b39c5158Smillert=item How do I handle Unicode strings? 2254b39c5158Smillert 2255b39c5158SmillertC<Archive::Tar> uses byte semantics for any files it reads from or writes 2256b39c5158Smillertto disk. This is not a problem if you only deal with files and never 2257b39c5158Smillertlook at their content or work solely with byte strings. But if you use 2258b39c5158SmillertUnicode strings with character semantics, some additional steps need 2259b39c5158Smillertto be taken. 2260b39c5158Smillert 2261b39c5158SmillertFor example, if you add a Unicode string like 2262b39c5158Smillert 2263b39c5158Smillert # Problem 2264b39c5158Smillert $tar->add_data('file.txt', "Euro: \x{20AC}"); 2265b39c5158Smillert 2266b39c5158Smillertthen there will be a problem later when the tarfile gets written out 2267*9f11ffb7Safresh1to disk via C<< $tar->write() >>: 2268b39c5158Smillert 2269b39c5158Smillert Wide character in print at .../Archive/Tar.pm line 1014. 2270b39c5158Smillert 2271b39c5158SmillertThe data was added as a Unicode string and when writing it out to disk, 2272b39c5158Smillertthe C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2273b39c5158Smillerttried to convert the string to ISO-8859 and failed. The written file 2274b39c5158Smillertnow contains garbage. 2275b39c5158Smillert 2276b39c5158SmillertFor this reason, Unicode strings need to be converted to UTF-8-encoded 2277b39c5158Smillertbytestrings before they are handed off to C<add_data()>: 2278b39c5158Smillert 2279b39c5158Smillert use Encode; 2280b39c5158Smillert my $data = "Accented character: \x{20AC}"; 2281b39c5158Smillert $data = encode('utf8', $data); 2282b39c5158Smillert 2283b39c5158Smillert $tar->add_data('file.txt', $data); 2284b39c5158Smillert 2285b39c5158SmillertA opposite problem occurs if you extract a UTF8-encoded file from a 2286b39c5158Smillerttarball. Using C<get_content()> on the C<Archive::Tar::File> object 2287b39c5158Smillertwill return its content as a bytestring, not as a Unicode string. 2288b39c5158Smillert 2289b39c5158SmillertIf you want it to be a Unicode string (because you want character 2290b39c5158Smillertsemantics with operations like regular expression matching), you need 2291b39c5158Smillertto decode the UTF8-encoded content and have Perl convert it into 2292b39c5158Smillerta Unicode string: 2293b39c5158Smillert 2294b39c5158Smillert use Encode; 2295b39c5158Smillert my $data = $tar->get_content(); 2296b39c5158Smillert 2297b39c5158Smillert # Make it a Unicode string 2298b39c5158Smillert $data = decode('utf8', $data); 2299b39c5158Smillert 2300b39c5158SmillertThere is no easy way to provide this functionality in C<Archive::Tar>, 2301b39c5158Smillertbecause a tarball can contain many files, and each of which could be 2302b39c5158Smillertencoded in a different way. 2303b39c5158Smillert 2304b39c5158Smillert=back 2305b39c5158Smillert 2306b39c5158Smillert=head1 CAVEATS 2307b39c5158Smillert 2308b39c5158SmillertThe AIX tar does not fill all unused space in the tar archive with 0x00. 2309b39c5158SmillertThis sometimes leads to warning messages from C<Archive::Tar>. 2310b39c5158Smillert 2311b39c5158Smillert Invalid header block at offset nnn 2312b39c5158Smillert 2313b39c5158SmillertA fix for that problem is scheduled to be released in the following levels 2314b39c5158Smillertof AIX, all of which should be coming out in the 4th quarter of 2009: 2315b39c5158Smillert 2316b39c5158Smillert AIX 5.3 TL7 SP10 2317b39c5158Smillert AIX 5.3 TL8 SP8 2318b39c5158Smillert AIX 5.3 TL9 SP5 2319b39c5158Smillert AIX 5.3 TL10 SP2 2320b39c5158Smillert 2321b39c5158Smillert AIX 6.1 TL0 SP11 2322b39c5158Smillert AIX 6.1 TL1 SP7 2323b39c5158Smillert AIX 6.1 TL2 SP6 2324b39c5158Smillert AIX 6.1 TL3 SP3 2325b39c5158Smillert 2326b39c5158SmillertThe IBM APAR number for this problem is IZ50240 (Reported component ID: 2327b39c5158Smillert5765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2328b39c5158SmillertIf you need an ifix please contact your local IBM AIX support. 2329b39c5158Smillert 2330b39c5158Smillert=head1 TODO 2331b39c5158Smillert 2332b39c5158Smillert=over 4 2333b39c5158Smillert 2334b39c5158Smillert=item Check if passed in handles are open for read/write 2335b39c5158Smillert 2336b39c5158SmillertCurrently I don't know of any portable pure perl way to do this. 2337b39c5158SmillertSuggestions welcome. 2338b39c5158Smillert 2339b39c5158Smillert=item Allow archives to be passed in as string 2340b39c5158Smillert 2341b39c5158SmillertCurrently, we only allow opened filehandles or filenames, but 2342b39c5158Smillertnot strings. The internals would need some reworking to facilitate 2343b39c5158Smillertstringified archives. 2344b39c5158Smillert 2345b39c5158Smillert=item Facilitate processing an opened filehandle of a compressed archive 2346b39c5158Smillert 2347b39c5158SmillertCurrently, we only support this if the filehandle is an IO::Zlib object. 2348b39c5158SmillertEnvironments, like apache, will present you with an opened filehandle 2349b39c5158Smillertto an uploaded file, which might be a compressed archive. 2350b39c5158Smillert 2351b39c5158Smillert=back 2352b39c5158Smillert 2353b39c5158Smillert=head1 SEE ALSO 2354b39c5158Smillert 2355b39c5158Smillert=over 4 2356b39c5158Smillert 2357b39c5158Smillert=item The GNU tar specification 2358b39c5158Smillert 2359b39c5158SmillertC<http://www.gnu.org/software/tar/manual/tar.html> 2360b39c5158Smillert 2361898184e3Ssthen=item The PAX format specification 2362b39c5158Smillert 2363898184e3SsthenThe specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2364b39c5158Smillert 2365b39c5158Smillert=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2366b39c5158Smillert 2367b39c5158Smillert=item GNU tar intends to switch to POSIX compatibility 2368b39c5158Smillert 2369b39c5158SmillertGNU Tar authors have expressed their intention to become completely 2370b39c5158SmillertPOSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2371b39c5158Smillert 2372b39c5158Smillert=item A Comparison between various tar implementations 2373b39c5158Smillert 2374b39c5158SmillertLists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2375b39c5158Smillert 2376b39c5158Smillert=back 2377b39c5158Smillert 2378b39c5158Smillert=head1 AUTHOR 2379b39c5158Smillert 2380b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2381b39c5158Smillert 2382b39c5158SmillertPlease reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2383b39c5158Smillert 2384b39c5158Smillert=head1 ACKNOWLEDGEMENTS 2385b39c5158Smillert 2386b39c5158SmillertThanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2387b39c5158SmillertRainer Tammer and especially Andrew Savige for their help and suggestions. 2388b39c5158Smillert 2389b39c5158Smillert=head1 COPYRIGHT 2390b39c5158Smillert 2391b39c5158SmillertThis module is copyright (c) 2002 - 2009 Jos Boumans 2392b39c5158SmillertE<lt>kane@cpan.orgE<gt>. All rights reserved. 2393b39c5158Smillert 2394b39c5158SmillertThis library is free software; you may redistribute and/or modify 2395b39c5158Smillertit under the same terms as Perl itself. 2396b39c5158Smillert 2397b39c5158Smillert=cut 2398