1b39c5158Smillert### the gnu tar specification: 2b39c5158Smillert### http://www.gnu.org/software/tar/manual/tar.html 3b39c5158Smillert### 4b39c5158Smillert### and the pax format spec, which tar derives from: 5b39c5158Smillert### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 6b39c5158Smillert 7b39c5158Smillertpackage Archive::Tar; 8b39c5158Smillertrequire 5.005_03; 9b39c5158Smillert 10b39c5158Smillertuse Cwd; 11b39c5158Smillertuse IO::Zlib; 12b39c5158Smillertuse IO::File; 13b39c5158Smillertuse Carp qw(carp croak); 14b39c5158Smillertuse File::Spec (); 15b39c5158Smillertuse File::Spec::Unix (); 16b39c5158Smillertuse File::Path (); 17b39c5158Smillert 18b39c5158Smillertuse Archive::Tar::File; 19b39c5158Smillertuse Archive::Tar::Constant; 20b39c5158Smillert 21b39c5158Smillertrequire Exporter; 22b39c5158Smillert 23b39c5158Smillertuse strict; 24b39c5158Smillertuse vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD 25b39c5158Smillert $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS 26*898184e3Ssthen $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT 27b39c5158Smillert ]; 28b39c5158Smillert 29b39c5158Smillert@ISA = qw[Exporter]; 30b39c5158Smillert@EXPORT = qw[ COMPRESS_GZIP COMPRESS_BZIP ]; 31b39c5158Smillert$DEBUG = 0; 32b39c5158Smillert$WARN = 1; 33b39c5158Smillert$FOLLOW_SYMLINK = 0; 34*898184e3Ssthen$VERSION = "1.82"; 35b39c5158Smillert$CHOWN = 1; 36b39c5158Smillert$CHMOD = 1; 37b39c5158Smillert$SAME_PERMISSIONS = $> == 0 ? 1 : 0; 38b39c5158Smillert$DO_NOT_USE_PREFIX = 0; 39b39c5158Smillert$INSECURE_EXTRACT_MODE = 0; 40*898184e3Ssthen$ZERO_PAD_NUMBERS = 0; 41b39c5158Smillert 42b39c5158SmillertBEGIN { 43b39c5158Smillert use Config; 44b39c5158Smillert $HAS_PERLIO = $Config::Config{useperlio}; 45b39c5158Smillert 46b39c5158Smillert ### try and load IO::String anyway, so you can dynamically 47b39c5158Smillert ### switch between perlio and IO::String 48b39c5158Smillert $HAS_IO_STRING = eval { 49b39c5158Smillert require IO::String; 50b39c5158Smillert import IO::String; 51b39c5158Smillert 1; 52b39c5158Smillert } || 0; 53b39c5158Smillert} 54b39c5158Smillert 55b39c5158Smillert=head1 NAME 56b39c5158Smillert 57b39c5158SmillertArchive::Tar - module for manipulations of tar archives 58b39c5158Smillert 59b39c5158Smillert=head1 SYNOPSIS 60b39c5158Smillert 61b39c5158Smillert use Archive::Tar; 62b39c5158Smillert my $tar = Archive::Tar->new; 63b39c5158Smillert 64b39c5158Smillert $tar->read('origin.tgz'); 65b39c5158Smillert $tar->extract(); 66b39c5158Smillert 67b39c5158Smillert $tar->add_files('file/foo.pl', 'docs/README'); 68b39c5158Smillert $tar->add_data('file/baz.txt', 'This is the contents now'); 69b39c5158Smillert 70b39c5158Smillert $tar->rename('oldname', 'new/file/name'); 71*898184e3Ssthen $tar->chown('/', 'root'); 72*898184e3Ssthen $tar->chown('/', 'root:root'); 73*898184e3Ssthen $tar->chmod('/tmp', '1777'); 74b39c5158Smillert 75b39c5158Smillert $tar->write('files.tar'); # plain tar 76b39c5158Smillert $tar->write('files.tgz', COMPRESS_GZIP); # gzip compressed 77b39c5158Smillert $tar->write('files.tbz', COMPRESS_BZIP); # bzip2 compressed 78b39c5158Smillert 79b39c5158Smillert=head1 DESCRIPTION 80b39c5158Smillert 81b39c5158SmillertArchive::Tar provides an object oriented mechanism for handling tar 82b39c5158Smillertfiles. It provides class methods for quick and easy files handling 83b39c5158Smillertwhile also allowing for the creation of tar file objects for custom 84b39c5158Smillertmanipulation. If you have the IO::Zlib module installed, 85b39c5158SmillertArchive::Tar will also support compressed or gzipped tar files. 86b39c5158Smillert 87b39c5158SmillertAn object of class Archive::Tar represents a .tar(.gz) archive full 88b39c5158Smillertof files and things. 89b39c5158Smillert 90b39c5158Smillert=head1 Object Methods 91b39c5158Smillert 92b39c5158Smillert=head2 Archive::Tar->new( [$file, $compressed] ) 93b39c5158Smillert 94b39c5158SmillertReturns a new Tar object. If given any arguments, C<new()> calls the 95b39c5158SmillertC<read()> method automatically, passing on the arguments provided to 96b39c5158Smillertthe C<read()> method. 97b39c5158Smillert 98b39c5158SmillertIf C<new()> is invoked with arguments and the C<read()> method fails 99b39c5158Smillertfor any reason, C<new()> returns undef. 100b39c5158Smillert 101b39c5158Smillert=cut 102b39c5158Smillert 103b39c5158Smillertmy $tmpl = { 104b39c5158Smillert _data => [ ], 105b39c5158Smillert _file => 'Unknown', 106b39c5158Smillert}; 107b39c5158Smillert 108b39c5158Smillert### install get/set accessors for this object. 109b39c5158Smillertfor my $key ( keys %$tmpl ) { 110b39c5158Smillert no strict 'refs'; 111b39c5158Smillert *{__PACKAGE__."::$key"} = sub { 112b39c5158Smillert my $self = shift; 113b39c5158Smillert $self->{$key} = $_[0] if @_; 114b39c5158Smillert return $self->{$key}; 115b39c5158Smillert } 116b39c5158Smillert} 117b39c5158Smillert 118b39c5158Smillertsub new { 119b39c5158Smillert my $class = shift; 120b39c5158Smillert $class = ref $class if ref $class; 121b39c5158Smillert 122b39c5158Smillert ### copying $tmpl here since a shallow copy makes it use the 123b39c5158Smillert ### same aref, causing for files to remain in memory always. 124b39c5158Smillert my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class; 125b39c5158Smillert 126b39c5158Smillert if (@_) { 127b39c5158Smillert unless ( $obj->read( @_ ) ) { 128b39c5158Smillert $obj->_error(qq[No data could be read from file]); 129b39c5158Smillert return; 130b39c5158Smillert } 131b39c5158Smillert } 132b39c5158Smillert 133b39c5158Smillert return $obj; 134b39c5158Smillert} 135b39c5158Smillert 136b39c5158Smillert=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] ) 137b39c5158Smillert 138b39c5158SmillertRead the given tar file into memory. 139b39c5158SmillertThe first argument can either be the name of a file or a reference to 140b39c5158Smillertan already open filehandle (or an IO::Zlib object if it's compressed) 141b39c5158Smillert 142b39c5158SmillertThe C<read> will I<replace> any previous content in C<$tar>! 143b39c5158Smillert 144b39c5158SmillertThe second argument may be considered optional, but remains for 145b39c5158Smillertbackwards compatibility. Archive::Tar now looks at the file 146b39c5158Smillertmagic to determine what class should be used to open the file 147b39c5158Smillertand will transparently Do The Right Thing. 148b39c5158Smillert 149b39c5158SmillertArchive::Tar will warn if you try to pass a bzip2 compressed file and the 150b39c5158SmillertIO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return. 151b39c5158Smillert 152b39c5158SmillertNote that you can currently B<not> pass a C<gzip> compressed 153b39c5158Smillertfilehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed 154b39c5158Smillertfilehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string 155b39c5158Smillertcontaining the full archive information (either compressed or 156b39c5158Smillertuncompressed). These are worth while features, but not currently 157b39c5158Smillertimplemented. See the C<TODO> section. 158b39c5158Smillert 159b39c5158SmillertThe third argument can be a hash reference with options. Note that 160b39c5158Smillertall options are case-sensitive. 161b39c5158Smillert 162b39c5158Smillert=over 4 163b39c5158Smillert 164b39c5158Smillert=item limit 165b39c5158Smillert 166b39c5158SmillertDo not read more than C<limit> files. This is useful if you have 167b39c5158Smillertvery big archives, and are only interested in the first few files. 168b39c5158Smillert 169b39c5158Smillert=item filter 170b39c5158Smillert 171b39c5158SmillertCan be set to a regular expression. Only files with names that match 172b39c5158Smillertthe expression will be read. 173b39c5158Smillert 174*898184e3Ssthen=item md5 175*898184e3Ssthen 176*898184e3SsthenSet to 1 and the md5sum of files will be returned (instead of file data) 177*898184e3Ssthen my $iter = Archive::Tar->iter( $file, 1, {md5 => 1} ); 178*898184e3Ssthen while( my $f = $iter->() ) { 179*898184e3Ssthen print $f->data . "\t" . $f->full_path . $/; 180*898184e3Ssthen } 181*898184e3Ssthen 182b39c5158Smillert=item extract 183b39c5158Smillert 184b39c5158SmillertIf set to true, immediately extract entries when reading them. This 185b39c5158Smillertgives you the same memory break as the C<extract_archive> function. 186b39c5158SmillertNote however that entries will not be read into memory, but written 187b39c5158Smillertstraight to disk. This means no C<Archive::Tar::File> objects are 188b39c5158Smillertcreated for you to inspect. 189b39c5158Smillert 190b39c5158Smillert=back 191b39c5158Smillert 192b39c5158SmillertAll files are stored internally as C<Archive::Tar::File> objects. 193b39c5158SmillertPlease consult the L<Archive::Tar::File> documentation for details. 194b39c5158Smillert 195b39c5158SmillertReturns the number of files read in scalar context, and a list of 196b39c5158SmillertC<Archive::Tar::File> objects in list context. 197b39c5158Smillert 198b39c5158Smillert=cut 199b39c5158Smillert 200b39c5158Smillertsub read { 201b39c5158Smillert my $self = shift; 202b39c5158Smillert my $file = shift; 203b39c5158Smillert my $gzip = shift || 0; 204b39c5158Smillert my $opts = shift || {}; 205b39c5158Smillert 206b39c5158Smillert unless( defined $file ) { 207b39c5158Smillert $self->_error( qq[No file to read from!] ); 208b39c5158Smillert return; 209b39c5158Smillert } else { 210b39c5158Smillert $self->_file( $file ); 211b39c5158Smillert } 212b39c5158Smillert 213b39c5158Smillert my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) ) 214b39c5158Smillert or return; 215b39c5158Smillert 216b39c5158Smillert my $data = $self->_read_tar( $handle, $opts ) or return; 217b39c5158Smillert 218b39c5158Smillert $self->_data( $data ); 219b39c5158Smillert 220b39c5158Smillert return wantarray ? @$data : scalar @$data; 221b39c5158Smillert} 222b39c5158Smillert 223b39c5158Smillertsub _get_handle { 224b39c5158Smillert my $self = shift; 225b39c5158Smillert my $file = shift; return unless defined $file; 226b39c5158Smillert my $compress = shift || 0; 227b39c5158Smillert my $mode = shift || READ_ONLY->( ZLIB ); # default to read only 228b39c5158Smillert 229*898184e3Ssthen ### Check if file is a file handle or IO glob 230*898184e3Ssthen if ( ref $file ) { 231*898184e3Ssthen return $file if eval{ *$file{IO} }; 232*898184e3Ssthen return $file if eval{ $file->isa(q{IO::Handle}) }; 233*898184e3Ssthen $file = q{}.$file; 234*898184e3Ssthen } 235b39c5158Smillert 236b39c5158Smillert ### get a FH opened to the right class, so we can use it transparently 237b39c5158Smillert ### throughout the program 238b39c5158Smillert my $fh; 239b39c5158Smillert { ### reading magic only makes sense if we're opening a file for 240b39c5158Smillert ### reading. otherwise, just use what the user requested. 241b39c5158Smillert my $magic = ''; 242b39c5158Smillert if( MODE_READ->($mode) ) { 243b39c5158Smillert open my $tmp, $file or do { 244b39c5158Smillert $self->_error( qq[Could not open '$file' for reading: $!] ); 245b39c5158Smillert return; 246b39c5158Smillert }; 247b39c5158Smillert 248b39c5158Smillert ### read the first 4 bites of the file to figure out which class to 249b39c5158Smillert ### use to open the file. 250b39c5158Smillert sysread( $tmp, $magic, 4 ); 251b39c5158Smillert close $tmp; 252b39c5158Smillert } 253b39c5158Smillert 254b39c5158Smillert ### is it bzip? 255b39c5158Smillert ### if you asked specifically for bzip compression, or if we're in 256b39c5158Smillert ### read mode and the magic numbers add up, use bzip 257b39c5158Smillert if( BZIP and ( 258b39c5158Smillert ($compress eq COMPRESS_BZIP) or 259b39c5158Smillert ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM ) 260b39c5158Smillert ) 261b39c5158Smillert ) { 262b39c5158Smillert 263b39c5158Smillert ### different reader/writer modules, different error vars... sigh 264b39c5158Smillert if( MODE_READ->($mode) ) { 265b39c5158Smillert $fh = IO::Uncompress::Bunzip2->new( $file ) or do { 266b39c5158Smillert $self->_error( qq[Could not read '$file': ] . 267b39c5158Smillert $IO::Uncompress::Bunzip2::Bunzip2Error 268b39c5158Smillert ); 269b39c5158Smillert return; 270b39c5158Smillert }; 271b39c5158Smillert 272b39c5158Smillert } else { 273b39c5158Smillert $fh = IO::Compress::Bzip2->new( $file ) or do { 274b39c5158Smillert $self->_error( qq[Could not write to '$file': ] . 275b39c5158Smillert $IO::Compress::Bzip2::Bzip2Error 276b39c5158Smillert ); 277b39c5158Smillert return; 278b39c5158Smillert }; 279b39c5158Smillert } 280b39c5158Smillert 281b39c5158Smillert ### is it gzip? 282b39c5158Smillert ### if you asked for compression, if you wanted to read or the gzip 283b39c5158Smillert ### magic number is present (redundant with read) 284b39c5158Smillert } elsif( ZLIB and ( 285b39c5158Smillert $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM 286b39c5158Smillert ) 287b39c5158Smillert ) { 288b39c5158Smillert $fh = IO::Zlib->new; 289b39c5158Smillert 290b39c5158Smillert unless( $fh->open( $file, $mode ) ) { 291b39c5158Smillert $self->_error(qq[Could not create filehandle for '$file': $!]); 292b39c5158Smillert return; 293b39c5158Smillert } 294b39c5158Smillert 295b39c5158Smillert ### is it plain tar? 296b39c5158Smillert } else { 297b39c5158Smillert $fh = IO::File->new; 298b39c5158Smillert 299b39c5158Smillert unless( $fh->open( $file, $mode ) ) { 300b39c5158Smillert $self->_error(qq[Could not create filehandle for '$file': $!]); 301b39c5158Smillert return; 302b39c5158Smillert } 303b39c5158Smillert 304b39c5158Smillert ### enable bin mode on tar archives 305b39c5158Smillert binmode $fh; 306b39c5158Smillert } 307b39c5158Smillert } 308b39c5158Smillert 309b39c5158Smillert return $fh; 310b39c5158Smillert} 311b39c5158Smillert 312b39c5158Smillert 313b39c5158Smillertsub _read_tar { 314b39c5158Smillert my $self = shift; 315b39c5158Smillert my $handle = shift or return; 316b39c5158Smillert my $opts = shift || {}; 317b39c5158Smillert 318b39c5158Smillert my $count = $opts->{limit} || 0; 319b39c5158Smillert my $filter = $opts->{filter}; 320*898184e3Ssthen my $md5 = $opts->{md5} || 0; # cdrake 321*898184e3Ssthen my $filter_cb = $opts->{filter_cb}; 322b39c5158Smillert my $extract = $opts->{extract} || 0; 323b39c5158Smillert 324b39c5158Smillert ### set a cap on the amount of files to extract ### 325b39c5158Smillert my $limit = 0; 326b39c5158Smillert $limit = 1 if $count > 0; 327b39c5158Smillert 328b39c5158Smillert my $tarfile = [ ]; 329b39c5158Smillert my $chunk; 330b39c5158Smillert my $read = 0; 331b39c5158Smillert my $real_name; # to set the name of a file when 332b39c5158Smillert # we're encountering @longlink 333b39c5158Smillert my $data; 334b39c5158Smillert 335b39c5158Smillert LOOP: 336b39c5158Smillert while( $handle->read( $chunk, HEAD ) ) { 337b39c5158Smillert ### IO::Zlib doesn't support this yet 338b39c5158Smillert my $offset = eval { tell $handle } || 'unknown'; 339*898184e3Ssthen $@ = ''; 340b39c5158Smillert 341b39c5158Smillert unless( $read++ ) { 342b39c5158Smillert my $gzip = GZIP_MAGIC_NUM; 343b39c5158Smillert if( $chunk =~ /$gzip/ ) { 344b39c5158Smillert $self->_error( qq[Cannot read compressed format in tar-mode] ); 345b39c5158Smillert return; 346b39c5158Smillert } 347b39c5158Smillert 348b39c5158Smillert ### size is < HEAD, which means a corrupted file, as the minimum 349b39c5158Smillert ### length is _at least_ HEAD 350b39c5158Smillert if (length $chunk != HEAD) { 351b39c5158Smillert $self->_error( qq[Cannot read enough bytes from the tarfile] ); 352b39c5158Smillert return; 353b39c5158Smillert } 354b39c5158Smillert } 355b39c5158Smillert 356b39c5158Smillert ### if we can't read in all bytes... ### 357b39c5158Smillert last if length $chunk != HEAD; 358b39c5158Smillert 359b39c5158Smillert ### Apparently this should really be two blocks of 512 zeroes, 360b39c5158Smillert ### but GNU tar sometimes gets it wrong. See comment in the 361b39c5158Smillert ### source code (tar.c) to GNU cpio. 362b39c5158Smillert next if $chunk eq TAR_END; 363b39c5158Smillert 364b39c5158Smillert ### according to the posix spec, the last 12 bytes of the header are 365b39c5158Smillert ### null bytes, to pad it to a 512 byte block. That means if these 366*898184e3Ssthen ### bytes are NOT null bytes, it's a corrupt header. See: 367b39c5158Smillert ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx 368b39c5158Smillert ### line 111 369b39c5158Smillert { my $nulls = join '', "\0" x 12; 370b39c5158Smillert unless( $nulls eq substr( $chunk, 500, 12 ) ) { 371b39c5158Smillert $self->_error( qq[Invalid header block at offset $offset] ); 372b39c5158Smillert next LOOP; 373b39c5158Smillert } 374b39c5158Smillert } 375b39c5158Smillert 376b39c5158Smillert ### pass the realname, so we can set it 'proper' right away 377b39c5158Smillert ### some of the heuristics are done on the name, so important 378b39c5158Smillert ### to set it ASAP 379b39c5158Smillert my $entry; 380b39c5158Smillert { my %extra_args = (); 381b39c5158Smillert $extra_args{'name'} = $$real_name if defined $real_name; 382b39c5158Smillert 383b39c5158Smillert unless( $entry = Archive::Tar::File->new( chunk => $chunk, 384b39c5158Smillert %extra_args ) 385b39c5158Smillert ) { 386b39c5158Smillert $self->_error( qq[Couldn't read chunk at offset $offset] ); 387b39c5158Smillert next LOOP; 388b39c5158Smillert } 389b39c5158Smillert } 390b39c5158Smillert 391b39c5158Smillert ### ignore labels: 392*898184e3Ssthen ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159 393b39c5158Smillert next if $entry->is_label; 394b39c5158Smillert 395b39c5158Smillert if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) { 396b39c5158Smillert 397b39c5158Smillert if ( $entry->is_file && !$entry->validate ) { 398b39c5158Smillert ### sometimes the chunk is rather fux0r3d and a whole 512 399b39c5158Smillert ### bytes ends up in the ->name area. 400b39c5158Smillert ### clean it up, if need be 401b39c5158Smillert my $name = $entry->name; 402b39c5158Smillert $name = substr($name, 0, 100) if length $name > 100; 403b39c5158Smillert $name =~ s/\n/ /g; 404b39c5158Smillert 405b39c5158Smillert $self->_error( $name . qq[: checksum error] ); 406b39c5158Smillert next LOOP; 407b39c5158Smillert } 408b39c5158Smillert 409b39c5158Smillert my $block = BLOCK_SIZE->( $entry->size ); 410b39c5158Smillert 411b39c5158Smillert $data = $entry->get_content_by_ref; 412b39c5158Smillert 413*898184e3Ssthen my $skip = 0; 414*898184e3Ssthen my $ctx; # cdrake 415*898184e3Ssthen ### skip this entry if we're filtering 416*898184e3Ssthen 417*898184e3Ssthen if($md5) { # cdrake 418*898184e3Ssthen $ctx = Digest::MD5->new; # cdrake 419*898184e3Ssthen $skip=5; # cdrake 420*898184e3Ssthen 421*898184e3Ssthen } elsif ($filter && $entry->name !~ $filter) { 422*898184e3Ssthen $skip = 1; 423*898184e3Ssthen 424*898184e3Ssthen ### skip this entry if it's a pax header. This is a special file added 425*898184e3Ssthen ### by, among others, git-generated tarballs. It holds comments and is 426*898184e3Ssthen ### not meant for extracting. See #38932: pax_global_header extracted 427*898184e3Ssthen } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 428*898184e3Ssthen $skip = 2; 429*898184e3Ssthen } elsif ($filter_cb && ! $filter_cb->($entry)) { 430*898184e3Ssthen $skip = 3; 431*898184e3Ssthen } 432*898184e3Ssthen 433*898184e3Ssthen if ($skip) { 434*898184e3Ssthen # 435*898184e3Ssthen # Since we're skipping, do not allocate memory for the 436*898184e3Ssthen # whole file. Read it 64 BLOCKS at a time. Do not 437*898184e3Ssthen # complete the skip yet because maybe what we read is a 438*898184e3Ssthen # longlink and it won't get skipped after all 439*898184e3Ssthen # 440*898184e3Ssthen my $amt = $block; 441*898184e3Ssthen my $fsz=$entry->size; # cdrake 442*898184e3Ssthen while ($amt > 0) { 443*898184e3Ssthen $$data = ''; 444*898184e3Ssthen my $this = 64 * BLOCK; 445*898184e3Ssthen $this = $amt if $this > $amt; 446*898184e3Ssthen if( $handle->read( $$data, $this ) < $this ) { 447*898184e3Ssthen $self->_error( qq[Read error on tarfile (missing data) ']. 448*898184e3Ssthen $entry->full_path ."' at offset $offset" ); 449*898184e3Ssthen next LOOP; 450*898184e3Ssthen } 451*898184e3Ssthen $amt -= $this; 452*898184e3Ssthen $fsz -= $this; # cdrake 453*898184e3Ssthen substr ($$data, $fsz) = "" if ($fsz<0); # remove external junk prior to md5 # cdrake 454*898184e3Ssthen $ctx->add($$data) if($skip==5); # cdrake 455*898184e3Ssthen } 456*898184e3Ssthen $$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ; # cdrake 457*898184e3Ssthen } else { 458*898184e3Ssthen 459b39c5158Smillert ### just read everything into memory 460b39c5158Smillert ### can't do lazy loading since IO::Zlib doesn't support 'seek' 461b39c5158Smillert ### this is because Compress::Zlib doesn't support it =/ 462b39c5158Smillert ### this reads in the whole data in one read() call. 463b39c5158Smillert if ( $handle->read( $$data, $block ) < $block ) { 464b39c5158Smillert $self->_error( qq[Read error on tarfile (missing data) ']. 465b39c5158Smillert $entry->full_path ."' at offset $offset" ); 466b39c5158Smillert next LOOP; 467b39c5158Smillert } 468b39c5158Smillert ### throw away trailing garbage ### 469b39c5158Smillert substr ($$data, $entry->size) = "" if defined $$data; 470*898184e3Ssthen } 471b39c5158Smillert 472b39c5158Smillert ### part II of the @LongLink munging -- need to do /after/ 473b39c5158Smillert ### the checksum check. 474b39c5158Smillert if( $entry->is_longlink ) { 475b39c5158Smillert ### weird thing in tarfiles -- if the file is actually a 476b39c5158Smillert ### @LongLink, the data part seems to have a trailing ^@ 477b39c5158Smillert ### (unprintable) char. to display, pipe output through less. 478b39c5158Smillert ### but that doesn't *always* happen.. so check if the last 479b39c5158Smillert ### character is a control character, and if so remove it 480b39c5158Smillert ### at any rate, we better remove that character here, or tests 481b39c5158Smillert ### like 'eq' and hashlook ups based on names will SO not work 482b39c5158Smillert ### remove it by calculating the proper size, and then 483b39c5158Smillert ### tossing out everything that's longer than that size. 484b39c5158Smillert 485b39c5158Smillert ### count number of nulls 486b39c5158Smillert my $nulls = $$data =~ tr/\0/\0/; 487b39c5158Smillert 488b39c5158Smillert ### cut data + size by that many bytes 489b39c5158Smillert $entry->size( $entry->size - $nulls ); 490b39c5158Smillert substr ($$data, $entry->size) = ""; 491b39c5158Smillert } 492b39c5158Smillert } 493b39c5158Smillert 494b39c5158Smillert ### clean up of the entries.. posix tar /apparently/ has some 495b39c5158Smillert ### weird 'feature' that allows for filenames > 255 characters 496b39c5158Smillert ### they'll put a header in with as name '././@LongLink' and the 497b39c5158Smillert ### contents will be the name of the /next/ file in the archive 498b39c5158Smillert ### pretty crappy and kludgy if you ask me 499b39c5158Smillert 500b39c5158Smillert ### set the name for the next entry if this is a @LongLink; 501b39c5158Smillert ### this is one ugly hack =/ but needed for direct extraction 502b39c5158Smillert if( $entry->is_longlink ) { 503b39c5158Smillert $real_name = $data; 504b39c5158Smillert next LOOP; 505b39c5158Smillert } elsif ( defined $real_name ) { 506b39c5158Smillert $entry->name( $$real_name ); 507b39c5158Smillert $entry->prefix(''); 508b39c5158Smillert undef $real_name; 509b39c5158Smillert } 510b39c5158Smillert 511b39c5158Smillert if ($filter && $entry->name !~ $filter) { 512b39c5158Smillert next LOOP; 513b39c5158Smillert 514b39c5158Smillert ### skip this entry if it's a pax header. This is a special file added 515b39c5158Smillert ### by, among others, git-generated tarballs. It holds comments and is 516b39c5158Smillert ### not meant for extracting. See #38932: pax_global_header extracted 517*898184e3Ssthen } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) { 518*898184e3Ssthen next LOOP; 519*898184e3Ssthen } elsif ($filter_cb && ! $filter_cb->($entry)) { 520b39c5158Smillert next LOOP; 521b39c5158Smillert } 522b39c5158Smillert 523*898184e3Ssthen if ( $extract && !$entry->is_longlink 524b39c5158Smillert && !$entry->is_unknown 525*898184e3Ssthen && !$entry->is_label ) { 526*898184e3Ssthen $self->_extract_file( $entry ) or return; 527*898184e3Ssthen } 528b39c5158Smillert 529b39c5158Smillert ### Guard against tarfiles with garbage at the end 530b39c5158Smillert last LOOP if $entry->name eq ''; 531b39c5158Smillert 532b39c5158Smillert ### push only the name on the rv if we're extracting 533b39c5158Smillert ### -- for extract_archive 534b39c5158Smillert push @$tarfile, ($extract ? $entry->name : $entry); 535b39c5158Smillert 536b39c5158Smillert if( $limit ) { 537b39c5158Smillert $count-- unless $entry->is_longlink || $entry->is_dir; 538b39c5158Smillert last LOOP unless $count; 539b39c5158Smillert } 540b39c5158Smillert } continue { 541b39c5158Smillert undef $data; 542b39c5158Smillert } 543b39c5158Smillert 544b39c5158Smillert return $tarfile; 545b39c5158Smillert} 546b39c5158Smillert 547b39c5158Smillert=head2 $tar->contains_file( $filename ) 548b39c5158Smillert 549b39c5158SmillertCheck if the archive contains a certain file. 550b39c5158SmillertIt will return true if the file is in the archive, false otherwise. 551b39c5158Smillert 552b39c5158SmillertNote however, that this function does an exact match using C<eq> 553b39c5158Smillerton the full path. So it cannot compensate for case-insensitive file- 554b39c5158Smillertsystems or compare 2 paths to see if they would point to the same 555b39c5158Smillertunderlying file. 556b39c5158Smillert 557b39c5158Smillert=cut 558b39c5158Smillert 559b39c5158Smillertsub contains_file { 560b39c5158Smillert my $self = shift; 561b39c5158Smillert my $full = shift; 562b39c5158Smillert 563b39c5158Smillert return unless defined $full; 564b39c5158Smillert 565b39c5158Smillert ### don't warn if the entry isn't there.. that's what this function 566b39c5158Smillert ### is for after all. 567b39c5158Smillert local $WARN = 0; 568b39c5158Smillert return 1 if $self->_find_entry($full); 569b39c5158Smillert return; 570b39c5158Smillert} 571b39c5158Smillert 572b39c5158Smillert=head2 $tar->extract( [@filenames] ) 573b39c5158Smillert 574b39c5158SmillertWrite files whose names are equivalent to any of the names in 575b39c5158SmillertC<@filenames> to disk, creating subdirectories as necessary. This 576b39c5158Smillertmight not work too well under VMS. 577b39c5158SmillertUnder MacPerl, the file's modification time will be converted to the 578b39c5158SmillertMacOS zero of time, and appropriate conversions will be done to the 579b39c5158Smillertpath. However, the length of each element of the path is not 580b39c5158Smillertinspected to see whether it's longer than MacOS currently allows (32 581b39c5158Smillertcharacters). 582b39c5158Smillert 583b39c5158SmillertIf C<extract> is called without a list of file names, the entire 584b39c5158Smillertcontents of the archive are extracted. 585b39c5158Smillert 586b39c5158SmillertReturns a list of filenames extracted. 587b39c5158Smillert 588b39c5158Smillert=cut 589b39c5158Smillert 590b39c5158Smillertsub extract { 591b39c5158Smillert my $self = shift; 592b39c5158Smillert my @args = @_; 593b39c5158Smillert my @files; 594b39c5158Smillert 595b39c5158Smillert # use the speed optimization for all extracted files 596b39c5158Smillert local($self->{cwd}) = cwd() unless $self->{cwd}; 597b39c5158Smillert 598*898184e3Ssthen ### you requested the extraction of only certain files 599b39c5158Smillert if( @args ) { 600b39c5158Smillert for my $file ( @args ) { 601b39c5158Smillert 602b39c5158Smillert ### it's already an object? 603b39c5158Smillert if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) { 604b39c5158Smillert push @files, $file; 605b39c5158Smillert next; 606b39c5158Smillert 607b39c5158Smillert ### go find it then 608b39c5158Smillert } else { 609b39c5158Smillert 610b39c5158Smillert my $found; 611b39c5158Smillert for my $entry ( @{$self->_data} ) { 612b39c5158Smillert next unless $file eq $entry->full_path; 613b39c5158Smillert 614b39c5158Smillert ### we found the file you're looking for 615b39c5158Smillert push @files, $entry; 616b39c5158Smillert $found++; 617b39c5158Smillert } 618b39c5158Smillert 619b39c5158Smillert unless( $found ) { 620b39c5158Smillert return $self->_error( 621b39c5158Smillert qq[Could not find '$file' in archive] ); 622b39c5158Smillert } 623b39c5158Smillert } 624b39c5158Smillert } 625b39c5158Smillert 626b39c5158Smillert ### just grab all the file items 627b39c5158Smillert } else { 628b39c5158Smillert @files = $self->get_files; 629b39c5158Smillert } 630b39c5158Smillert 631b39c5158Smillert ### nothing found? that's an error 632b39c5158Smillert unless( scalar @files ) { 633b39c5158Smillert $self->_error( qq[No files found for ] . $self->_file ); 634b39c5158Smillert return; 635b39c5158Smillert } 636b39c5158Smillert 637b39c5158Smillert ### now extract them 638b39c5158Smillert for my $entry ( @files ) { 639b39c5158Smillert unless( $self->_extract_file( $entry ) ) { 640b39c5158Smillert $self->_error(q[Could not extract ']. $entry->full_path .q['] ); 641b39c5158Smillert return; 642b39c5158Smillert } 643b39c5158Smillert } 644b39c5158Smillert 645b39c5158Smillert return @files; 646b39c5158Smillert} 647b39c5158Smillert 648b39c5158Smillert=head2 $tar->extract_file( $file, [$extract_path] ) 649b39c5158Smillert 650b39c5158SmillertWrite an entry, whose name is equivalent to the file name provided to 651b39c5158Smillertdisk. Optionally takes a second parameter, which is the full native 652b39c5158Smillertpath (including filename) the entry will be written to. 653b39c5158Smillert 654b39c5158SmillertFor example: 655b39c5158Smillert 656b39c5158Smillert $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' ); 657b39c5158Smillert 658b39c5158Smillert $tar->extract_file( $at_file_object, 'name/i/want/to/give/it' ); 659b39c5158Smillert 660b39c5158SmillertReturns true on success, false on failure. 661b39c5158Smillert 662b39c5158Smillert=cut 663b39c5158Smillert 664b39c5158Smillertsub extract_file { 665b39c5158Smillert my $self = shift; 666b39c5158Smillert my $file = shift; return unless defined $file; 667b39c5158Smillert my $alt = shift; 668b39c5158Smillert 669b39c5158Smillert my $entry = $self->_find_entry( $file ) 670b39c5158Smillert or $self->_error( qq[Could not find an entry for '$file'] ), return; 671b39c5158Smillert 672b39c5158Smillert return $self->_extract_file( $entry, $alt ); 673b39c5158Smillert} 674b39c5158Smillert 675b39c5158Smillertsub _extract_file { 676b39c5158Smillert my $self = shift; 677b39c5158Smillert my $entry = shift or return; 678b39c5158Smillert my $alt = shift; 679b39c5158Smillert 680b39c5158Smillert ### you wanted an alternate extraction location ### 681b39c5158Smillert my $name = defined $alt ? $alt : $entry->full_path; 682b39c5158Smillert 683b39c5158Smillert ### splitpath takes a bool at the end to indicate 684b39c5158Smillert ### that it's splitting a dir 685b39c5158Smillert my ($vol,$dirs,$file); 686b39c5158Smillert if ( defined $alt ) { # It's a local-OS path 687b39c5158Smillert ($vol,$dirs,$file) = File::Spec->splitpath( $alt, 688b39c5158Smillert $entry->is_dir ); 689b39c5158Smillert } else { 690b39c5158Smillert ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name, 691b39c5158Smillert $entry->is_dir ); 692b39c5158Smillert } 693b39c5158Smillert 694b39c5158Smillert my $dir; 695b39c5158Smillert ### is $name an absolute path? ### 696b39c5158Smillert if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) { 697b39c5158Smillert 698b39c5158Smillert ### absolute names are not allowed to be in tarballs under 699b39c5158Smillert ### strict mode, so only allow it if a user tells us to do it 700b39c5158Smillert if( not defined $alt and not $INSECURE_EXTRACT_MODE ) { 701b39c5158Smillert $self->_error( 702b39c5158Smillert q[Entry ']. $entry->full_path .q[' is an absolute path. ]. 703b39c5158Smillert q[Not extracting absolute paths under SECURE EXTRACT MODE] 704b39c5158Smillert ); 705b39c5158Smillert return; 706b39c5158Smillert } 707b39c5158Smillert 708b39c5158Smillert ### user asked us to, it's fine. 709b39c5158Smillert $dir = File::Spec->catpath( $vol, $dirs, "" ); 710b39c5158Smillert 711b39c5158Smillert ### it's a relative path ### 712b39c5158Smillert } else { 713b39c5158Smillert my $cwd = (ref $self and defined $self->{cwd}) 714b39c5158Smillert ? $self->{cwd} 715b39c5158Smillert : cwd(); 716b39c5158Smillert 717b39c5158Smillert my @dirs = defined $alt 718b39c5158Smillert ? File::Spec->splitdir( $dirs ) # It's a local-OS path 719b39c5158Smillert : File::Spec::Unix->splitdir( $dirs ); # it's UNIX-style, likely 720b39c5158Smillert # straight from the tarball 721b39c5158Smillert 722b39c5158Smillert if( not defined $alt and 723b39c5158Smillert not $INSECURE_EXTRACT_MODE 724b39c5158Smillert ) { 725b39c5158Smillert 726b39c5158Smillert ### paths that leave the current directory are not allowed under 727b39c5158Smillert ### strict mode, so only allow it if a user tells us to do this. 728b39c5158Smillert if( grep { $_ eq '..' } @dirs ) { 729b39c5158Smillert 730b39c5158Smillert $self->_error( 731b39c5158Smillert q[Entry ']. $entry->full_path .q[' is attempting to leave ]. 732b39c5158Smillert q[the current working directory. Not extracting under ]. 733b39c5158Smillert q[SECURE EXTRACT MODE] 734b39c5158Smillert ); 735b39c5158Smillert return; 736b39c5158Smillert } 737b39c5158Smillert 738b39c5158Smillert ### the archive may be asking us to extract into a symlink. This 739b39c5158Smillert ### is not sane and a possible security issue, as outlined here: 740b39c5158Smillert ### https://rt.cpan.org/Ticket/Display.html?id=30380 741b39c5158Smillert ### https://bugzilla.redhat.com/show_bug.cgi?id=295021 742b39c5158Smillert ### https://issues.rpath.com/browse/RPL-1716 743b39c5158Smillert my $full_path = $cwd; 744b39c5158Smillert for my $d ( @dirs ) { 745b39c5158Smillert $full_path = File::Spec->catdir( $full_path, $d ); 746b39c5158Smillert 747b39c5158Smillert ### we've already checked this one, and it's safe. Move on. 748b39c5158Smillert next if ref $self and $self->{_link_cache}->{$full_path}; 749b39c5158Smillert 750b39c5158Smillert if( -l $full_path ) { 751b39c5158Smillert my $to = readlink $full_path; 752b39c5158Smillert my $diag = "symlinked directory ($full_path => $to)"; 753b39c5158Smillert 754b39c5158Smillert $self->_error( 755b39c5158Smillert q[Entry ']. $entry->full_path .q[' is attempting to ]. 756b39c5158Smillert qq[extract to a $diag. This is considered a security ]. 757b39c5158Smillert q[vulnerability and not allowed under SECURE EXTRACT ]. 758b39c5158Smillert q[MODE] 759b39c5158Smillert ); 760b39c5158Smillert return; 761b39c5158Smillert } 762b39c5158Smillert 763b39c5158Smillert ### XXX keep a cache if possible, so the stats become cheaper: 764b39c5158Smillert $self->{_link_cache}->{$full_path} = 1 if ref $self; 765b39c5158Smillert } 766b39c5158Smillert } 767b39c5158Smillert 768b39c5158Smillert ### '.' is the directory delimiter on VMS, which has to be escaped 769b39c5158Smillert ### or changed to '_' on vms. vmsify is used, because older versions 770b39c5158Smillert ### of vmspath do not handle this properly. 771b39c5158Smillert ### Must not add a '/' to an empty directory though. 772b39c5158Smillert map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS; 773b39c5158Smillert 774b39c5158Smillert my ($cwd_vol,$cwd_dir,$cwd_file) 775b39c5158Smillert = File::Spec->splitpath( $cwd ); 776b39c5158Smillert my @cwd = File::Spec->splitdir( $cwd_dir ); 777b39c5158Smillert push @cwd, $cwd_file if length $cwd_file; 778b39c5158Smillert 779*898184e3Ssthen ### We need to pass '' as the last element to catpath. Craig Berry 780b39c5158Smillert ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>): 781b39c5158Smillert ### The root problem is that splitpath on UNIX always returns the 782b39c5158Smillert ### final path element as a file even if it is a directory, and of 783b39c5158Smillert ### course there is no way it can know the difference without checking 784b39c5158Smillert ### against the filesystem, which it is documented as not doing. When 785b39c5158Smillert ### you turn around and call catpath, on VMS you have to know which bits 786b39c5158Smillert ### are directory bits and which bits are file bits. In this case we 787b39c5158Smillert ### know the result should be a directory. I had thought you could omit 788b39c5158Smillert ### the file argument to catpath in such a case, but apparently on UNIX 789b39c5158Smillert ### you can't. 790b39c5158Smillert $dir = File::Spec->catpath( 791b39c5158Smillert $cwd_vol, File::Spec->catdir( @cwd, @dirs ), '' 792b39c5158Smillert ); 793b39c5158Smillert 794b39c5158Smillert ### catdir() returns undef if the path is longer than 255 chars on 795b39c5158Smillert ### older VMS systems. 796b39c5158Smillert unless ( defined $dir ) { 797b39c5158Smillert $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] ); 798b39c5158Smillert return; 799b39c5158Smillert } 800b39c5158Smillert 801b39c5158Smillert } 802b39c5158Smillert 803b39c5158Smillert if( -e $dir && !-d _ ) { 804b39c5158Smillert $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] ); 805b39c5158Smillert return; 806b39c5158Smillert } 807b39c5158Smillert 808b39c5158Smillert unless ( -d _ ) { 809b39c5158Smillert eval { File::Path::mkpath( $dir, 0, 0777 ) }; 810b39c5158Smillert if( $@ ) { 811b39c5158Smillert my $fp = $entry->full_path; 812b39c5158Smillert $self->_error(qq[Could not create directory '$dir' for '$fp': $@]); 813b39c5158Smillert return; 814b39c5158Smillert } 815b39c5158Smillert 816b39c5158Smillert ### XXX chown here? that might not be the same as in the archive 817b39c5158Smillert ### as we're only chown'ing to the owner of the file we're extracting 818b39c5158Smillert ### not to the owner of the directory itself, which may or may not 819b39c5158Smillert ### be another entry in the archive 820b39c5158Smillert ### Answer: no, gnu tar doesn't do it either, it'd be the wrong 821b39c5158Smillert ### way to go. 822b39c5158Smillert #if( $CHOWN && CAN_CHOWN ) { 823b39c5158Smillert # chown $entry->uid, $entry->gid, $dir or 824b39c5158Smillert # $self->_error( qq[Could not set uid/gid on '$dir'] ); 825b39c5158Smillert #} 826b39c5158Smillert } 827b39c5158Smillert 828b39c5158Smillert ### we're done if we just needed to create a dir ### 829b39c5158Smillert return 1 if $entry->is_dir; 830b39c5158Smillert 831b39c5158Smillert my $full = File::Spec->catfile( $dir, $file ); 832b39c5158Smillert 833b39c5158Smillert if( $entry->is_unknown ) { 834b39c5158Smillert $self->_error( qq[Unknown file type for file '$full'] ); 835b39c5158Smillert return; 836b39c5158Smillert } 837b39c5158Smillert 838b39c5158Smillert if( length $entry->type && $entry->is_file ) { 839b39c5158Smillert my $fh = IO::File->new; 840b39c5158Smillert $fh->open( '>' . $full ) or ( 841b39c5158Smillert $self->_error( qq[Could not open file '$full': $!] ), 842b39c5158Smillert return 843b39c5158Smillert ); 844b39c5158Smillert 845b39c5158Smillert if( $entry->size ) { 846b39c5158Smillert binmode $fh; 847b39c5158Smillert syswrite $fh, $entry->data or ( 848b39c5158Smillert $self->_error( qq[Could not write data to '$full'] ), 849b39c5158Smillert return 850b39c5158Smillert ); 851b39c5158Smillert } 852b39c5158Smillert 853b39c5158Smillert close $fh or ( 854b39c5158Smillert $self->_error( qq[Could not close file '$full'] ), 855b39c5158Smillert return 856b39c5158Smillert ); 857b39c5158Smillert 858b39c5158Smillert } else { 859b39c5158Smillert $self->_make_special_file( $entry, $full ) or return; 860b39c5158Smillert } 861b39c5158Smillert 862b39c5158Smillert ### only update the timestamp if it's not a symlink; that will change the 863b39c5158Smillert ### timestamp of the original. This addresses bug #33669: Could not update 864b39c5158Smillert ### timestamp warning on symlinks 865b39c5158Smillert if( not -l $full ) { 866b39c5158Smillert utime time, $entry->mtime - TIME_OFFSET, $full or 867b39c5158Smillert $self->_error( qq[Could not update timestamp] ); 868b39c5158Smillert } 869b39c5158Smillert 870b39c5158Smillert if( $CHOWN && CAN_CHOWN->() ) { 871b39c5158Smillert chown $entry->uid, $entry->gid, $full or 872b39c5158Smillert $self->_error( qq[Could not set uid/gid on '$full'] ); 873b39c5158Smillert } 874b39c5158Smillert 875b39c5158Smillert ### only chmod if we're allowed to, but never chmod symlinks, since they'll 876b39c5158Smillert ### change the perms on the file they're linking too... 877b39c5158Smillert if( $CHMOD and not -l $full ) { 878b39c5158Smillert my $mode = $entry->mode; 879b39c5158Smillert unless ($SAME_PERMISSIONS) { 880b39c5158Smillert $mode &= ~(oct(7000) | umask); 881b39c5158Smillert } 882b39c5158Smillert chmod $mode, $full or 883b39c5158Smillert $self->_error( qq[Could not chown '$full' to ] . $entry->mode ); 884b39c5158Smillert } 885b39c5158Smillert 886b39c5158Smillert return 1; 887b39c5158Smillert} 888b39c5158Smillert 889b39c5158Smillertsub _make_special_file { 890b39c5158Smillert my $self = shift; 891b39c5158Smillert my $entry = shift or return; 892b39c5158Smillert my $file = shift; return unless defined $file; 893b39c5158Smillert 894b39c5158Smillert my $err; 895b39c5158Smillert 896b39c5158Smillert if( $entry->is_symlink ) { 897b39c5158Smillert my $fail; 898b39c5158Smillert if( ON_UNIX ) { 899b39c5158Smillert symlink( $entry->linkname, $file ) or $fail++; 900b39c5158Smillert 901b39c5158Smillert } else { 902b39c5158Smillert $self->_extract_special_file_as_plain_file( $entry, $file ) 903b39c5158Smillert or $fail++; 904b39c5158Smillert } 905b39c5158Smillert 906b39c5158Smillert $err = qq[Making symbolic link '$file' to '] . 907b39c5158Smillert $entry->linkname .q[' failed] if $fail; 908b39c5158Smillert 909b39c5158Smillert } elsif ( $entry->is_hardlink ) { 910b39c5158Smillert my $fail; 911b39c5158Smillert if( ON_UNIX ) { 912b39c5158Smillert link( $entry->linkname, $file ) or $fail++; 913b39c5158Smillert 914b39c5158Smillert } else { 915b39c5158Smillert $self->_extract_special_file_as_plain_file( $entry, $file ) 916b39c5158Smillert or $fail++; 917b39c5158Smillert } 918b39c5158Smillert 919b39c5158Smillert $err = qq[Making hard link from '] . $entry->linkname . 920b39c5158Smillert qq[' to '$file' failed] if $fail; 921b39c5158Smillert 922b39c5158Smillert } elsif ( $entry->is_fifo ) { 923b39c5158Smillert ON_UNIX && !system('mknod', $file, 'p') or 924b39c5158Smillert $err = qq[Making fifo ']. $entry->name .qq[' failed]; 925b39c5158Smillert 926b39c5158Smillert } elsif ( $entry->is_blockdev or $entry->is_chardev ) { 927b39c5158Smillert my $mode = $entry->is_blockdev ? 'b' : 'c'; 928b39c5158Smillert 929b39c5158Smillert ON_UNIX && !system('mknod', $file, $mode, 930b39c5158Smillert $entry->devmajor, $entry->devminor) or 931b39c5158Smillert $err = qq[Making block device ']. $entry->name .qq[' (maj=] . 932b39c5158Smillert $entry->devmajor . qq[ min=] . $entry->devminor . 933b39c5158Smillert qq[) failed.]; 934b39c5158Smillert 935b39c5158Smillert } elsif ( $entry->is_socket ) { 936b39c5158Smillert ### the original doesn't do anything special for sockets.... ### 937b39c5158Smillert 1; 938b39c5158Smillert } 939b39c5158Smillert 940b39c5158Smillert return $err ? $self->_error( $err ) : 1; 941b39c5158Smillert} 942b39c5158Smillert 943b39c5158Smillert### don't know how to make symlinks, let's just extract the file as 944b39c5158Smillert### a plain file 945b39c5158Smillertsub _extract_special_file_as_plain_file { 946b39c5158Smillert my $self = shift; 947b39c5158Smillert my $entry = shift or return; 948b39c5158Smillert my $file = shift; return unless defined $file; 949b39c5158Smillert 950b39c5158Smillert my $err; 951b39c5158Smillert TRY: { 952b39c5158Smillert my $orig = $self->_find_entry( $entry->linkname ); 953b39c5158Smillert 954b39c5158Smillert unless( $orig ) { 955b39c5158Smillert $err = qq[Could not find file '] . $entry->linkname . 956b39c5158Smillert qq[' in memory.]; 957b39c5158Smillert last TRY; 958b39c5158Smillert } 959b39c5158Smillert 960b39c5158Smillert ### clone the entry, make it appear as a normal file ### 961b39c5158Smillert my $clone = $entry->clone; 962b39c5158Smillert $clone->_downgrade_to_plainfile; 963b39c5158Smillert $self->_extract_file( $clone, $file ) or last TRY; 964b39c5158Smillert 965b39c5158Smillert return 1; 966b39c5158Smillert } 967b39c5158Smillert 968b39c5158Smillert return $self->_error($err); 969b39c5158Smillert} 970b39c5158Smillert 971b39c5158Smillert=head2 $tar->list_files( [\@properties] ) 972b39c5158Smillert 973b39c5158SmillertReturns a list of the names of all the files in the archive. 974b39c5158Smillert 975b39c5158SmillertIf C<list_files()> is passed an array reference as its first argument 976b39c5158Smillertit returns a list of hash references containing the requested 977b39c5158Smillertproperties of each file. The following list of properties is 978b39c5158Smillertsupported: name, size, mtime (last modified date), mode, uid, gid, 979b39c5158Smillertlinkname, uname, gname, devmajor, devminor, prefix. 980b39c5158Smillert 981b39c5158SmillertPassing an array reference containing only one element, 'name', is 982b39c5158Smillertspecial cased to return a list of names rather than a list of hash 983b39c5158Smillertreferences, making it equivalent to calling C<list_files> without 984b39c5158Smillertarguments. 985b39c5158Smillert 986b39c5158Smillert=cut 987b39c5158Smillert 988b39c5158Smillertsub list_files { 989b39c5158Smillert my $self = shift; 990b39c5158Smillert my $aref = shift || [ ]; 991b39c5158Smillert 992b39c5158Smillert unless( $self->_data ) { 993b39c5158Smillert $self->read() or return; 994b39c5158Smillert } 995b39c5158Smillert 996b39c5158Smillert if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) { 997b39c5158Smillert return map { $_->full_path } @{$self->_data}; 998b39c5158Smillert } else { 999b39c5158Smillert 1000b39c5158Smillert #my @rv; 1001b39c5158Smillert #for my $obj ( @{$self->_data} ) { 1002b39c5158Smillert # push @rv, { map { $_ => $obj->$_() } @$aref }; 1003b39c5158Smillert #} 1004b39c5158Smillert #return @rv; 1005b39c5158Smillert 1006b39c5158Smillert ### this does the same as the above.. just needs a +{ } 1007b39c5158Smillert ### to make sure perl doesn't confuse it for a block 1008b39c5158Smillert return map { my $o=$_; 1009b39c5158Smillert +{ map { $_ => $o->$_() } @$aref } 1010b39c5158Smillert } @{$self->_data}; 1011b39c5158Smillert } 1012b39c5158Smillert} 1013b39c5158Smillert 1014b39c5158Smillertsub _find_entry { 1015b39c5158Smillert my $self = shift; 1016b39c5158Smillert my $file = shift; 1017b39c5158Smillert 1018b39c5158Smillert unless( defined $file ) { 1019b39c5158Smillert $self->_error( qq[No file specified] ); 1020b39c5158Smillert return; 1021b39c5158Smillert } 1022b39c5158Smillert 1023b39c5158Smillert ### it's an object already 1024b39c5158Smillert return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' ); 1025b39c5158Smillert 1026b39c5158Smillert for my $entry ( @{$self->_data} ) { 1027b39c5158Smillert my $path = $entry->full_path; 1028b39c5158Smillert return $entry if $path eq $file; 1029b39c5158Smillert } 1030b39c5158Smillert 1031b39c5158Smillert $self->_error( qq[No such file in archive: '$file'] ); 1032b39c5158Smillert return; 1033b39c5158Smillert} 1034b39c5158Smillert 1035b39c5158Smillert=head2 $tar->get_files( [@filenames] ) 1036b39c5158Smillert 1037b39c5158SmillertReturns the C<Archive::Tar::File> objects matching the filenames 1038b39c5158Smillertprovided. If no filename list was passed, all C<Archive::Tar::File> 1039b39c5158Smillertobjects in the current Tar object are returned. 1040b39c5158Smillert 1041b39c5158SmillertPlease refer to the C<Archive::Tar::File> documentation on how to 1042b39c5158Smillerthandle these objects. 1043b39c5158Smillert 1044b39c5158Smillert=cut 1045b39c5158Smillert 1046b39c5158Smillertsub get_files { 1047b39c5158Smillert my $self = shift; 1048b39c5158Smillert 1049b39c5158Smillert return @{ $self->_data } unless @_; 1050b39c5158Smillert 1051b39c5158Smillert my @list; 1052b39c5158Smillert for my $file ( @_ ) { 1053b39c5158Smillert push @list, grep { defined } $self->_find_entry( $file ); 1054b39c5158Smillert } 1055b39c5158Smillert 1056b39c5158Smillert return @list; 1057b39c5158Smillert} 1058b39c5158Smillert 1059b39c5158Smillert=head2 $tar->get_content( $file ) 1060b39c5158Smillert 1061b39c5158SmillertReturn the content of the named file. 1062b39c5158Smillert 1063b39c5158Smillert=cut 1064b39c5158Smillert 1065b39c5158Smillertsub get_content { 1066b39c5158Smillert my $self = shift; 1067b39c5158Smillert my $entry = $self->_find_entry( shift ) or return; 1068b39c5158Smillert 1069b39c5158Smillert return $entry->data; 1070b39c5158Smillert} 1071b39c5158Smillert 1072b39c5158Smillert=head2 $tar->replace_content( $file, $content ) 1073b39c5158Smillert 1074b39c5158SmillertMake the string $content be the content for the file named $file. 1075b39c5158Smillert 1076b39c5158Smillert=cut 1077b39c5158Smillert 1078b39c5158Smillertsub replace_content { 1079b39c5158Smillert my $self = shift; 1080b39c5158Smillert my $entry = $self->_find_entry( shift ) or return; 1081b39c5158Smillert 1082b39c5158Smillert return $entry->replace_content( shift ); 1083b39c5158Smillert} 1084b39c5158Smillert 1085b39c5158Smillert=head2 $tar->rename( $file, $new_name ) 1086b39c5158Smillert 1087b39c5158SmillertRename the file of the in-memory archive to $new_name. 1088b39c5158Smillert 1089b39c5158SmillertNote that you must specify a Unix path for $new_name, since per tar 1090b39c5158Smillertstandard, all files in the archive must be Unix paths. 1091b39c5158Smillert 1092b39c5158SmillertReturns true on success and false on failure. 1093b39c5158Smillert 1094b39c5158Smillert=cut 1095b39c5158Smillert 1096b39c5158Smillertsub rename { 1097b39c5158Smillert my $self = shift; 1098b39c5158Smillert my $file = shift; return unless defined $file; 1099b39c5158Smillert my $new = shift; return unless defined $new; 1100b39c5158Smillert 1101b39c5158Smillert my $entry = $self->_find_entry( $file ) or return; 1102b39c5158Smillert 1103b39c5158Smillert return $entry->rename( $new ); 1104b39c5158Smillert} 1105b39c5158Smillert 1106*898184e3Ssthen=head2 $tar->chmod( $file, $mode ) 1107*898184e3Ssthen 1108*898184e3SsthenChange mode of $file to $mode. 1109*898184e3Ssthen 1110*898184e3SsthenReturns true on success and false on failure. 1111*898184e3Ssthen 1112*898184e3Ssthen=cut 1113*898184e3Ssthen 1114*898184e3Ssthensub chmod { 1115*898184e3Ssthen my $self = shift; 1116*898184e3Ssthen my $file = shift; return unless defined $file; 1117*898184e3Ssthen my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 1118*898184e3Ssthen my @args = ("$mode"); 1119*898184e3Ssthen 1120*898184e3Ssthen my $entry = $self->_find_entry( $file ) or return; 1121*898184e3Ssthen my $x = $entry->chmod( @args ); 1122*898184e3Ssthen return $x; 1123*898184e3Ssthen} 1124*898184e3Ssthen 1125*898184e3Ssthen=head2 $tar->chown( $file, $uname [, $gname] ) 1126*898184e3Ssthen 1127*898184e3SsthenChange owner $file to $uname and $gname. 1128*898184e3Ssthen 1129*898184e3SsthenReturns true on success and false on failure. 1130*898184e3Ssthen 1131*898184e3Ssthen=cut 1132*898184e3Ssthen 1133*898184e3Ssthensub chown { 1134*898184e3Ssthen my $self = shift; 1135*898184e3Ssthen my $file = shift; return unless defined $file; 1136*898184e3Ssthen my $uname = shift; return unless defined $uname; 1137*898184e3Ssthen my @args = ($uname); 1138*898184e3Ssthen push(@args, shift); 1139*898184e3Ssthen 1140*898184e3Ssthen my $entry = $self->_find_entry( $file ) or return; 1141*898184e3Ssthen my $x = $entry->chown( @args ); 1142*898184e3Ssthen return $x; 1143*898184e3Ssthen} 1144*898184e3Ssthen 1145b39c5158Smillert=head2 $tar->remove (@filenamelist) 1146b39c5158Smillert 1147b39c5158SmillertRemoves any entries with names matching any of the given filenames 1148b39c5158Smillertfrom the in-memory archive. Returns a list of C<Archive::Tar::File> 1149b39c5158Smillertobjects that remain. 1150b39c5158Smillert 1151b39c5158Smillert=cut 1152b39c5158Smillert 1153b39c5158Smillertsub remove { 1154b39c5158Smillert my $self = shift; 1155b39c5158Smillert my @list = @_; 1156b39c5158Smillert 1157b39c5158Smillert my %seen = map { $_->full_path => $_ } @{$self->_data}; 1158b39c5158Smillert delete $seen{ $_ } for @list; 1159b39c5158Smillert 1160b39c5158Smillert $self->_data( [values %seen] ); 1161b39c5158Smillert 1162b39c5158Smillert return values %seen; 1163b39c5158Smillert} 1164b39c5158Smillert 1165b39c5158Smillert=head2 $tar->clear 1166b39c5158Smillert 1167b39c5158SmillertC<clear> clears the current in-memory archive. This effectively gives 1168b39c5158Smillertyou a 'blank' object, ready to be filled again. Note that C<clear> 1169b39c5158Smillertonly has effect on the object, not the underlying tarfile. 1170b39c5158Smillert 1171b39c5158Smillert=cut 1172b39c5158Smillert 1173b39c5158Smillertsub clear { 1174b39c5158Smillert my $self = shift or return; 1175b39c5158Smillert 1176b39c5158Smillert $self->_data( [] ); 1177b39c5158Smillert $self->_file( '' ); 1178b39c5158Smillert 1179b39c5158Smillert return 1; 1180b39c5158Smillert} 1181b39c5158Smillert 1182b39c5158Smillert 1183b39c5158Smillert=head2 $tar->write ( [$file, $compressed, $prefix] ) 1184b39c5158Smillert 1185b39c5158SmillertWrite the in-memory archive to disk. The first argument can either 1186b39c5158Smillertbe the name of a file or a reference to an already open filehandle (a 1187b39c5158SmillertGLOB reference). 1188b39c5158Smillert 1189b39c5158SmillertThe second argument is used to indicate compression. You can either 1190b39c5158Smillertcompress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1191b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of 1192*898184e3Ssthenconstants is preferred: 1193b39c5158Smillert 1194b39c5158Smillert # write a gzip compressed file 1195b39c5158Smillert $tar->write( 'out.tgz', COMPRESS_GZIP ); 1196b39c5158Smillert 1197b39c5158Smillert # write a bzip compressed file 1198b39c5158Smillert $tar->write( 'out.tbz', COMPRESS_BZIP ); 1199b39c5158Smillert 1200b39c5158SmillertNote that when you pass in a filehandle, the compression argument 1201b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle. 1202b39c5158SmillertIf you wish to enable compression with filehandles, use an 1203b39c5158SmillertC<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1204b39c5158Smillert 1205b39c5158SmillertThe third argument is an optional prefix. All files will be tucked 1206b39c5158Smillertaway in the directory you specify as prefix. So if you have files 1207b39c5158Smillert'a' and 'b' in your archive, and you specify 'foo' as prefix, they 1208b39c5158Smillertwill be written to the archive as 'foo/a' and 'foo/b'. 1209b39c5158Smillert 1210b39c5158SmillertIf no arguments are given, C<write> returns the entire formatted 1211b39c5158Smillertarchive as a string, which could be useful if you'd like to stuff the 1212b39c5158Smillertarchive into a socket or a pipe to gzip or something. 1213b39c5158Smillert 1214b39c5158Smillert 1215b39c5158Smillert=cut 1216b39c5158Smillert 1217b39c5158Smillertsub write { 1218b39c5158Smillert my $self = shift; 1219b39c5158Smillert my $file = shift; $file = '' unless defined $file; 1220b39c5158Smillert my $gzip = shift || 0; 1221b39c5158Smillert my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1222b39c5158Smillert my $dummy = ''; 1223b39c5158Smillert 1224b39c5158Smillert ### only need a handle if we have a file to print to ### 1225b39c5158Smillert my $handle = length($file) 1226b39c5158Smillert ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) ) 1227b39c5158Smillert or return ) 1228b39c5158Smillert : $HAS_PERLIO ? do { open my $h, '>', \$dummy; $h } 1229b39c5158Smillert : $HAS_IO_STRING ? IO::String->new 1230b39c5158Smillert : __PACKAGE__->no_string_support(); 1231b39c5158Smillert 1232b39c5158Smillert ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a 1233b39c5158Smillert ### corrupt TAR file. Must clear out $\ to make sure no garbage is 1234b39c5158Smillert ### printed to the archive 1235b39c5158Smillert local $\; 1236b39c5158Smillert 1237b39c5158Smillert for my $entry ( @{$self->_data} ) { 1238b39c5158Smillert ### entries to be written to the tarfile ### 1239b39c5158Smillert my @write_me; 1240b39c5158Smillert 1241b39c5158Smillert ### only now will we change the object to reflect the current state 1242b39c5158Smillert ### of the name and prefix fields -- this needs to be limited to 1243b39c5158Smillert ### write() only! 1244b39c5158Smillert my $clone = $entry->clone; 1245b39c5158Smillert 1246b39c5158Smillert 1247b39c5158Smillert ### so, if you don't want use to use the prefix, we'll stuff 1248b39c5158Smillert ### everything in the name field instead 1249b39c5158Smillert if( $DO_NOT_USE_PREFIX ) { 1250b39c5158Smillert 1251b39c5158Smillert ### you might have an extended prefix, if so, set it in the clone 1252b39c5158Smillert ### XXX is ::Unix right? 1253b39c5158Smillert $clone->name( length $ext_prefix 1254b39c5158Smillert ? File::Spec::Unix->catdir( $ext_prefix, 1255b39c5158Smillert $clone->full_path) 1256b39c5158Smillert : $clone->full_path ); 1257b39c5158Smillert $clone->prefix( '' ); 1258b39c5158Smillert 1259b39c5158Smillert ### otherwise, we'll have to set it properly -- prefix part in the 1260b39c5158Smillert ### prefix and name part in the name field. 1261b39c5158Smillert } else { 1262b39c5158Smillert 1263b39c5158Smillert ### split them here, not before! 1264b39c5158Smillert my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path ); 1265b39c5158Smillert 1266b39c5158Smillert ### you might have an extended prefix, if so, set it in the clone 1267b39c5158Smillert ### XXX is ::Unix right? 1268b39c5158Smillert $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix ) 1269b39c5158Smillert if length $ext_prefix; 1270b39c5158Smillert 1271b39c5158Smillert $clone->prefix( $prefix ); 1272b39c5158Smillert $clone->name( $name ); 1273b39c5158Smillert } 1274b39c5158Smillert 1275b39c5158Smillert ### names are too long, and will get truncated if we don't add a 1276b39c5158Smillert ### '@LongLink' file... 1277b39c5158Smillert my $make_longlink = ( length($clone->name) > NAME_LENGTH or 1278b39c5158Smillert length($clone->prefix) > PREFIX_LENGTH 1279b39c5158Smillert ) || 0; 1280b39c5158Smillert 1281b39c5158Smillert ### perhaps we need to make a longlink file? 1282b39c5158Smillert if( $make_longlink ) { 1283b39c5158Smillert my $longlink = Archive::Tar::File->new( 1284b39c5158Smillert data => LONGLINK_NAME, 1285b39c5158Smillert $clone->full_path, 1286b39c5158Smillert { type => LONGLINK } 1287b39c5158Smillert ); 1288b39c5158Smillert 1289b39c5158Smillert unless( $longlink ) { 1290b39c5158Smillert $self->_error( qq[Could not create 'LongLink' entry for ] . 1291b39c5158Smillert qq[oversize file '] . $clone->full_path ."'" ); 1292b39c5158Smillert return; 1293b39c5158Smillert }; 1294b39c5158Smillert 1295b39c5158Smillert push @write_me, $longlink; 1296b39c5158Smillert } 1297b39c5158Smillert 1298b39c5158Smillert push @write_me, $clone; 1299b39c5158Smillert 1300b39c5158Smillert ### write the one, optionally 2 a::t::file objects to the handle 1301b39c5158Smillert for my $clone (@write_me) { 1302b39c5158Smillert 1303b39c5158Smillert ### if the file is a symlink, there are 2 options: 1304b39c5158Smillert ### either we leave the symlink intact, but then we don't write any 1305b39c5158Smillert ### data OR we follow the symlink, which means we actually make a 1306b39c5158Smillert ### copy. if we do the latter, we have to change the TYPE of the 1307b39c5158Smillert ### clone to 'FILE' 1308b39c5158Smillert my $link_ok = $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK; 1309b39c5158Smillert my $data_ok = !$clone->is_symlink && $clone->has_content; 1310b39c5158Smillert 1311b39c5158Smillert ### downgrade to a 'normal' file if it's a symlink we're going to 1312b39c5158Smillert ### treat as a regular file 1313b39c5158Smillert $clone->_downgrade_to_plainfile if $link_ok; 1314b39c5158Smillert 1315b39c5158Smillert ### get the header for this block 1316b39c5158Smillert my $header = $self->_format_tar_entry( $clone ); 1317b39c5158Smillert unless( $header ) { 1318b39c5158Smillert $self->_error(q[Could not format header for: ] . 1319b39c5158Smillert $clone->full_path ); 1320b39c5158Smillert return; 1321b39c5158Smillert } 1322b39c5158Smillert 1323b39c5158Smillert unless( print $handle $header ) { 1324b39c5158Smillert $self->_error(q[Could not write header for: ] . 1325b39c5158Smillert $clone->full_path); 1326b39c5158Smillert return; 1327b39c5158Smillert } 1328b39c5158Smillert 1329b39c5158Smillert if( $link_ok or $data_ok ) { 1330b39c5158Smillert unless( print $handle $clone->data ) { 1331b39c5158Smillert $self->_error(q[Could not write data for: ] . 1332b39c5158Smillert $clone->full_path); 1333b39c5158Smillert return; 1334b39c5158Smillert } 1335b39c5158Smillert 1336b39c5158Smillert ### pad the end of the clone if required ### 1337b39c5158Smillert print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK 1338b39c5158Smillert } 1339b39c5158Smillert 1340b39c5158Smillert } ### done writing these entries 1341b39c5158Smillert } 1342b39c5158Smillert 1343b39c5158Smillert ### write the end markers ### 1344b39c5158Smillert print $handle TAR_END x 2 or 1345b39c5158Smillert return $self->_error( qq[Could not write tar end markers] ); 1346b39c5158Smillert 1347b39c5158Smillert ### did you want it written to a file, or returned as a string? ### 1348b39c5158Smillert my $rv = length($file) ? 1 1349b39c5158Smillert : $HAS_PERLIO ? $dummy 1350b39c5158Smillert : do { seek $handle, 0, 0; local $/; <$handle> }; 1351b39c5158Smillert 1352*898184e3Ssthen ### make sure to close the handle if we created it 1353*898184e3Ssthen if ( $file ne $handle ) { 1354*898184e3Ssthen unless( close $handle ) { 1355*898184e3Ssthen $self->_error( qq[Could not write tar] ); 1356*898184e3Ssthen return; 1357*898184e3Ssthen } 1358*898184e3Ssthen } 1359b39c5158Smillert 1360b39c5158Smillert return $rv; 1361b39c5158Smillert} 1362b39c5158Smillert 1363b39c5158Smillertsub _format_tar_entry { 1364b39c5158Smillert my $self = shift; 1365b39c5158Smillert my $entry = shift or return; 1366b39c5158Smillert my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix; 1367b39c5158Smillert my $no_prefix = shift || 0; 1368b39c5158Smillert 1369b39c5158Smillert my $file = $entry->name; 1370b39c5158Smillert my $prefix = $entry->prefix; $prefix = '' unless defined $prefix; 1371b39c5158Smillert 1372b39c5158Smillert ### remove the prefix from the file name 1373*898184e3Ssthen ### not sure if this is still needed --kane 1374b39c5158Smillert ### no it's not -- Archive::Tar::File->_new_from_file will take care of 1375b39c5158Smillert ### this for us. Even worse, this would break if we tried to add a file 1376b39c5158Smillert ### like x/x. 1377b39c5158Smillert #if( length $prefix ) { 1378b39c5158Smillert # $file =~ s/^$match//; 1379b39c5158Smillert #} 1380b39c5158Smillert 1381b39c5158Smillert $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix) 1382b39c5158Smillert if length $ext_prefix; 1383b39c5158Smillert 1384b39c5158Smillert ### not sure why this is... ### 1385b39c5158Smillert my $l = PREFIX_LENGTH; # is ambiguous otherwise... 1386b39c5158Smillert substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH; 1387b39c5158Smillert 1388*898184e3Ssthen my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o"; 1389b39c5158Smillert 1390b39c5158Smillert ### this might be optimizable with a 'changed' flag in the file objects ### 1391b39c5158Smillert my $tar = pack ( 1392b39c5158Smillert PACK, 1393b39c5158Smillert $file, 1394b39c5158Smillert 1395b39c5158Smillert (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]), 1396b39c5158Smillert (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]), 1397b39c5158Smillert 1398b39c5158Smillert "", # checksum field - space padded a bit down 1399b39c5158Smillert 1400b39c5158Smillert (map { $entry->$_() } qw[type linkname magic]), 1401b39c5158Smillert 1402b39c5158Smillert $entry->version || TAR_VERSION, 1403b39c5158Smillert 1404b39c5158Smillert (map { $entry->$_() } qw[uname gname]), 1405b39c5158Smillert (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]), 1406b39c5158Smillert 1407b39c5158Smillert ($no_prefix ? '' : $prefix) 1408b39c5158Smillert ); 1409b39c5158Smillert 1410b39c5158Smillert ### add the checksum ### 1411*898184e3Ssthen my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0"; 1412b39c5158Smillert substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar)); 1413b39c5158Smillert 1414b39c5158Smillert return $tar; 1415b39c5158Smillert} 1416b39c5158Smillert 1417b39c5158Smillert=head2 $tar->add_files( @filenamelist ) 1418b39c5158Smillert 1419b39c5158SmillertTakes a list of filenames and adds them to the in-memory archive. 1420b39c5158Smillert 1421b39c5158SmillertThe path to the file is automatically converted to a Unix like 1422b39c5158Smillertequivalent for use in the archive, and, if on MacOS, the file's 1423b39c5158Smillertmodification time is converted from the MacOS epoch to the Unix epoch. 1424b39c5158SmillertSo tar archives created on MacOS with B<Archive::Tar> can be read 1425b39c5158Smillertboth with I<tar> on Unix and applications like I<suntar> or 1426b39c5158SmillertI<Stuffit Expander> on MacOS. 1427b39c5158Smillert 1428b39c5158SmillertBe aware that the file's type/creator and resource fork will be lost, 1429b39c5158Smillertwhich is usually what you want in cross-platform archives. 1430b39c5158Smillert 1431b39c5158SmillertInstead of a filename, you can also pass it an existing C<Archive::Tar::File> 1432b39c5158Smillertobject from, for example, another archive. The object will be clone, and 1433b39c5158Smillerteffectively be a copy of the original, not an alias. 1434b39c5158Smillert 1435b39c5158SmillertReturns a list of C<Archive::Tar::File> objects that were just added. 1436b39c5158Smillert 1437b39c5158Smillert=cut 1438b39c5158Smillert 1439b39c5158Smillertsub add_files { 1440b39c5158Smillert my $self = shift; 1441b39c5158Smillert my @files = @_ or return; 1442b39c5158Smillert 1443b39c5158Smillert my @rv; 1444b39c5158Smillert for my $file ( @files ) { 1445b39c5158Smillert 1446b39c5158Smillert ### you passed an Archive::Tar::File object 1447b39c5158Smillert ### clone it so we don't accidentally have a reference to 1448b39c5158Smillert ### an object from another archive 1449b39c5158Smillert if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) { 1450b39c5158Smillert push @rv, $file->clone; 1451b39c5158Smillert next; 1452b39c5158Smillert } 1453b39c5158Smillert 1454b39c5158Smillert unless( -e $file || -l $file ) { 1455b39c5158Smillert $self->_error( qq[No such file: '$file'] ); 1456b39c5158Smillert next; 1457b39c5158Smillert } 1458b39c5158Smillert 1459b39c5158Smillert my $obj = Archive::Tar::File->new( file => $file ); 1460b39c5158Smillert unless( $obj ) { 1461b39c5158Smillert $self->_error( qq[Unable to add file: '$file'] ); 1462b39c5158Smillert next; 1463b39c5158Smillert } 1464b39c5158Smillert 1465b39c5158Smillert push @rv, $obj; 1466b39c5158Smillert } 1467b39c5158Smillert 1468b39c5158Smillert push @{$self->{_data}}, @rv; 1469b39c5158Smillert 1470b39c5158Smillert return @rv; 1471b39c5158Smillert} 1472b39c5158Smillert 1473b39c5158Smillert=head2 $tar->add_data ( $filename, $data, [$opthashref] ) 1474b39c5158Smillert 1475b39c5158SmillertTakes a filename, a scalar full of data and optionally a reference to 1476b39c5158Smillerta hash with specific options. 1477b39c5158Smillert 1478b39c5158SmillertWill add a file to the in-memory archive, with name C<$filename> and 1479b39c5158Smillertcontent C<$data>. Specific properties can be set using C<$opthashref>. 1480b39c5158SmillertThe following list of properties is supported: name, size, mtime 1481b39c5158Smillert(last modified date), mode, uid, gid, linkname, uname, gname, 1482b39c5158Smillertdevmajor, devminor, prefix, type. (On MacOS, the file's path and 1483b39c5158Smillertmodification times are converted to Unix equivalents.) 1484b39c5158Smillert 1485b39c5158SmillertValid values for the file type are the following constants defined in 1486b39c5158SmillertArchive::Tar::Constants: 1487b39c5158Smillert 1488b39c5158Smillert=over 4 1489b39c5158Smillert 1490b39c5158Smillert=item FILE 1491b39c5158Smillert 1492b39c5158SmillertRegular file. 1493b39c5158Smillert 1494b39c5158Smillert=item HARDLINK 1495b39c5158Smillert 1496b39c5158Smillert=item SYMLINK 1497b39c5158Smillert 1498b39c5158SmillertHard and symbolic ("soft") links; linkname should specify target. 1499b39c5158Smillert 1500b39c5158Smillert=item CHARDEV 1501b39c5158Smillert 1502b39c5158Smillert=item BLOCKDEV 1503b39c5158Smillert 1504b39c5158SmillertCharacter and block devices. devmajor and devminor should specify the major 1505b39c5158Smillertand minor device numbers. 1506b39c5158Smillert 1507b39c5158Smillert=item DIR 1508b39c5158Smillert 1509b39c5158SmillertDirectory. 1510b39c5158Smillert 1511b39c5158Smillert=item FIFO 1512b39c5158Smillert 1513b39c5158SmillertFIFO (named pipe). 1514b39c5158Smillert 1515b39c5158Smillert=item SOCKET 1516b39c5158Smillert 1517b39c5158SmillertSocket. 1518b39c5158Smillert 1519b39c5158Smillert=back 1520b39c5158Smillert 1521b39c5158SmillertReturns the C<Archive::Tar::File> object that was just added, or 1522b39c5158SmillertC<undef> on failure. 1523b39c5158Smillert 1524b39c5158Smillert=cut 1525b39c5158Smillert 1526b39c5158Smillertsub add_data { 1527b39c5158Smillert my $self = shift; 1528b39c5158Smillert my ($file, $data, $opt) = @_; 1529b39c5158Smillert 1530b39c5158Smillert my $obj = Archive::Tar::File->new( data => $file, $data, $opt ); 1531b39c5158Smillert unless( $obj ) { 1532b39c5158Smillert $self->_error( qq[Unable to add file: '$file'] ); 1533b39c5158Smillert return; 1534b39c5158Smillert } 1535b39c5158Smillert 1536b39c5158Smillert push @{$self->{_data}}, $obj; 1537b39c5158Smillert 1538b39c5158Smillert return $obj; 1539b39c5158Smillert} 1540b39c5158Smillert 1541b39c5158Smillert=head2 $tar->error( [$BOOL] ) 1542b39c5158Smillert 1543b39c5158SmillertReturns the current errorstring (usually, the last error reported). 1544b39c5158SmillertIf a true value was specified, it will give the C<Carp::longmess> 1545b39c5158Smillertequivalent of the error, in effect giving you a stacktrace. 1546b39c5158Smillert 1547b39c5158SmillertFor backwards compatibility, this error is also available as 1548b39c5158SmillertC<$Archive::Tar::error> although it is much recommended you use the 1549b39c5158Smillertmethod call instead. 1550b39c5158Smillert 1551b39c5158Smillert=cut 1552b39c5158Smillert 1553b39c5158Smillert{ 1554b39c5158Smillert $error = ''; 1555b39c5158Smillert my $longmess; 1556b39c5158Smillert 1557b39c5158Smillert sub _error { 1558b39c5158Smillert my $self = shift; 1559b39c5158Smillert my $msg = $error = shift; 1560b39c5158Smillert $longmess = Carp::longmess($error); 1561b39c5158Smillert if (ref $self) { 1562b39c5158Smillert $self->{_error} = $error; 1563b39c5158Smillert $self->{_longmess} = $longmess; 1564b39c5158Smillert } 1565b39c5158Smillert 1566b39c5158Smillert ### set Archive::Tar::WARN to 0 to disable printing 1567b39c5158Smillert ### of errors 1568b39c5158Smillert if( $WARN ) { 1569b39c5158Smillert carp $DEBUG ? $longmess : $msg; 1570b39c5158Smillert } 1571b39c5158Smillert 1572b39c5158Smillert return; 1573b39c5158Smillert } 1574b39c5158Smillert 1575b39c5158Smillert sub error { 1576b39c5158Smillert my $self = shift; 1577b39c5158Smillert if (ref $self) { 1578b39c5158Smillert return shift() ? $self->{_longmess} : $self->{_error}; 1579b39c5158Smillert } else { 1580b39c5158Smillert return shift() ? $longmess : $error; 1581b39c5158Smillert } 1582b39c5158Smillert } 1583b39c5158Smillert} 1584b39c5158Smillert 1585b39c5158Smillert=head2 $tar->setcwd( $cwd ); 1586b39c5158Smillert 1587b39c5158SmillertC<Archive::Tar> needs to know the current directory, and it will run 1588b39c5158SmillertC<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the 1589b39c5158Smillerttarfile and saves it in the file system. (As of version 1.30, however, 1590b39c5158SmillertC<Archive::Tar> will use the speed optimization described below 1591b39c5158Smillertautomatically, so it's only relevant if you're using C<extract_file()>). 1592b39c5158Smillert 1593b39c5158SmillertSince C<Archive::Tar> doesn't change the current directory internally 1594b39c5158Smillertwhile it is extracting the items in a tarball, all calls to C<Cwd::cwd()> 1595b39c5158Smillertcan be avoided if we can guarantee that the current directory doesn't 1596b39c5158Smillertget changed externally. 1597b39c5158Smillert 1598b39c5158SmillertTo use this performance boost, set the current directory via 1599b39c5158Smillert 1600b39c5158Smillert use Cwd; 1601b39c5158Smillert $tar->setcwd( cwd() ); 1602b39c5158Smillert 1603b39c5158Smillertonce before calling a function like C<extract_file> and 1604b39c5158SmillertC<Archive::Tar> will use the current directory setting from then on 1605b39c5158Smillertand won't call C<Cwd::cwd()> internally. 1606b39c5158Smillert 1607b39c5158SmillertTo switch back to the default behaviour, use 1608b39c5158Smillert 1609b39c5158Smillert $tar->setcwd( undef ); 1610b39c5158Smillert 1611b39c5158Smillertand C<Archive::Tar> will call C<Cwd::cwd()> internally again. 1612b39c5158Smillert 1613*898184e3SsthenIf you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will 1614b39c5158Smillertbe called for you. 1615b39c5158Smillert 1616b39c5158Smillert=cut 1617b39c5158Smillert 1618b39c5158Smillertsub setcwd { 1619b39c5158Smillert my $self = shift; 1620b39c5158Smillert my $cwd = shift; 1621b39c5158Smillert 1622b39c5158Smillert $self->{cwd} = $cwd; 1623b39c5158Smillert} 1624b39c5158Smillert 1625b39c5158Smillert=head1 Class Methods 1626b39c5158Smillert 1627b39c5158Smillert=head2 Archive::Tar->create_archive($file, $compressed, @filelist) 1628b39c5158Smillert 1629b39c5158SmillertCreates a tar file from the list of files provided. The first 1630b39c5158Smillertargument can either be the name of the tar file to create or a 1631b39c5158Smillertreference to an open file handle (e.g. a GLOB reference). 1632b39c5158Smillert 1633b39c5158SmillertThe second argument is used to indicate compression. You can either 1634b39c5158Smillertcompress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed 1635b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of 1636*898184e3Ssthenconstants is preferred: 1637b39c5158Smillert 1638b39c5158Smillert # write a gzip compressed file 1639b39c5158Smillert Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist ); 1640b39c5158Smillert 1641b39c5158Smillert # write a bzip compressed file 1642b39c5158Smillert Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist ); 1643b39c5158Smillert 1644b39c5158SmillertNote that when you pass in a filehandle, the compression argument 1645b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle. 1646b39c5158SmillertIf you wish to enable compression with filehandles, use an 1647b39c5158SmillertC<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead. 1648b39c5158Smillert 1649b39c5158SmillertThe remaining arguments list the files to be included in the tar file. 1650b39c5158SmillertThese files must all exist. Any files which don't exist or can't be 1651b39c5158Smillertread are silently ignored. 1652b39c5158Smillert 1653b39c5158SmillertIf the archive creation fails for any reason, C<create_archive> will 1654b39c5158Smillertreturn false. Please use the C<error> method to find the cause of the 1655b39c5158Smillertfailure. 1656b39c5158Smillert 1657b39c5158SmillertNote that this method does not write C<on the fly> as it were; it 1658b39c5158Smillertstill reads all the files into memory before writing out the archive. 1659b39c5158SmillertConsult the FAQ below if this is a problem. 1660b39c5158Smillert 1661b39c5158Smillert=cut 1662b39c5158Smillert 1663b39c5158Smillertsub create_archive { 1664b39c5158Smillert my $class = shift; 1665b39c5158Smillert 1666b39c5158Smillert my $file = shift; return unless defined $file; 1667b39c5158Smillert my $gzip = shift || 0; 1668b39c5158Smillert my @files = @_; 1669b39c5158Smillert 1670b39c5158Smillert unless( @files ) { 1671b39c5158Smillert return $class->_error( qq[Cowardly refusing to create empty archive!] ); 1672b39c5158Smillert } 1673b39c5158Smillert 1674b39c5158Smillert my $tar = $class->new; 1675b39c5158Smillert $tar->add_files( @files ); 1676b39c5158Smillert return $tar->write( $file, $gzip ); 1677b39c5158Smillert} 1678b39c5158Smillert 1679b39c5158Smillert=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] ) 1680b39c5158Smillert 1681b39c5158SmillertReturns an iterator function that reads the tar file without loading 1682b39c5158Smillertit all in memory. Each time the function is called it will return the 1683b39c5158Smillertnext file in the tarball. The files are returned as 1684b39c5158SmillertC<Archive::Tar::File> objects. The iterator function returns the 1685b39c5158Smillertempty list once it has exhausted the files contained. 1686b39c5158Smillert 1687b39c5158SmillertThe second argument can be a hash reference with options, which are 1688b39c5158Smillertidentical to the arguments passed to C<read()>. 1689b39c5158Smillert 1690b39c5158SmillertExample usage: 1691b39c5158Smillert 1692b39c5158Smillert my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} ); 1693b39c5158Smillert 1694b39c5158Smillert while( my $f = $next->() ) { 1695b39c5158Smillert print $f->name, "\n"; 1696b39c5158Smillert 1697b39c5158Smillert $f->extract or warn "Extraction failed"; 1698b39c5158Smillert 1699b39c5158Smillert # .... 1700b39c5158Smillert } 1701b39c5158Smillert 1702b39c5158Smillert=cut 1703b39c5158Smillert 1704b39c5158Smillert 1705b39c5158Smillertsub iter { 1706b39c5158Smillert my $class = shift; 1707b39c5158Smillert my $filename = shift or return; 1708*898184e3Ssthen my $compressed = shift || 0; 1709b39c5158Smillert my $opts = shift || {}; 1710b39c5158Smillert 1711b39c5158Smillert ### get a handle to read from. 1712b39c5158Smillert my $handle = $class->_get_handle( 1713b39c5158Smillert $filename, 1714b39c5158Smillert $compressed, 1715b39c5158Smillert READ_ONLY->( ZLIB ) 1716b39c5158Smillert ) or return; 1717b39c5158Smillert 1718b39c5158Smillert my @data; 1719b39c5158Smillert return sub { 1720b39c5158Smillert return shift(@data) if @data; # more than one file returned? 1721b39c5158Smillert return unless $handle; # handle exhausted? 1722b39c5158Smillert 1723b39c5158Smillert ### read data, should only return file 1724b39c5158Smillert my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 }); 1725b39c5158Smillert @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY'; 1726b39c5158Smillert 1727b39c5158Smillert ### return one piece of data 1728b39c5158Smillert return shift(@data) if @data; 1729b39c5158Smillert 1730b39c5158Smillert ### data is exhausted, free the filehandle 1731b39c5158Smillert undef $handle; 1732b39c5158Smillert return; 1733b39c5158Smillert }; 1734b39c5158Smillert} 1735b39c5158Smillert 1736b39c5158Smillert=head2 Archive::Tar->list_archive($file, $compressed, [\@properties]) 1737b39c5158Smillert 1738b39c5158SmillertReturns a list of the names of all the files in the archive. The 1739b39c5158Smillertfirst argument can either be the name of the tar file to list or a 1740b39c5158Smillertreference to an open file handle (e.g. a GLOB reference). 1741b39c5158Smillert 1742b39c5158SmillertIf C<list_archive()> is passed an array reference as its third 1743b39c5158Smillertargument it returns a list of hash references containing the requested 1744b39c5158Smillertproperties of each file. The following list of properties is 1745b39c5158Smillertsupported: full_path, name, size, mtime (last modified date), mode, 1746b39c5158Smillertuid, gid, linkname, uname, gname, devmajor, devminor, prefix. 1747b39c5158Smillert 1748b39c5158SmillertSee C<Archive::Tar::File> for details about supported properties. 1749b39c5158Smillert 1750b39c5158SmillertPassing an array reference containing only one element, 'name', is 1751b39c5158Smillertspecial cased to return a list of names rather than a list of hash 1752b39c5158Smillertreferences. 1753b39c5158Smillert 1754b39c5158Smillert=cut 1755b39c5158Smillert 1756b39c5158Smillertsub list_archive { 1757b39c5158Smillert my $class = shift; 1758b39c5158Smillert my $file = shift; return unless defined $file; 1759b39c5158Smillert my $gzip = shift || 0; 1760b39c5158Smillert 1761b39c5158Smillert my $tar = $class->new($file, $gzip); 1762b39c5158Smillert return unless $tar; 1763b39c5158Smillert 1764b39c5158Smillert return $tar->list_files( @_ ); 1765b39c5158Smillert} 1766b39c5158Smillert 1767b39c5158Smillert=head2 Archive::Tar->extract_archive($file, $compressed) 1768b39c5158Smillert 1769b39c5158SmillertExtracts the contents of the tar file. The first argument can either 1770b39c5158Smillertbe the name of the tar file to create or a reference to an open file 1771b39c5158Smillerthandle (e.g. a GLOB reference). All relative paths in the tar file will 1772b39c5158Smillertbe created underneath the current working directory. 1773b39c5158Smillert 1774b39c5158SmillertC<extract_archive> will return a list of files it extracted. 1775b39c5158SmillertIf the archive extraction fails for any reason, C<extract_archive> 1776b39c5158Smillertwill return false. Please use the C<error> method to find the cause 1777b39c5158Smillertof the failure. 1778b39c5158Smillert 1779b39c5158Smillert=cut 1780b39c5158Smillert 1781b39c5158Smillertsub extract_archive { 1782b39c5158Smillert my $class = shift; 1783b39c5158Smillert my $file = shift; return unless defined $file; 1784b39c5158Smillert my $gzip = shift || 0; 1785b39c5158Smillert 1786b39c5158Smillert my $tar = $class->new( ) or return; 1787b39c5158Smillert 1788b39c5158Smillert return $tar->read( $file, $gzip, { extract => 1 } ); 1789b39c5158Smillert} 1790b39c5158Smillert 1791b39c5158Smillert=head2 $bool = Archive::Tar->has_io_string 1792b39c5158Smillert 1793b39c5158SmillertReturns true if we currently have C<IO::String> support loaded. 1794b39c5158Smillert 1795b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing 1796b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if 1797b39c5158Smillertavailable. 1798b39c5158Smillert 1799b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference. 1800b39c5158Smillert 1801b39c5158Smillert=cut 1802b39c5158Smillert 1803b39c5158Smillertsub has_io_string { return $HAS_IO_STRING; } 1804b39c5158Smillert 1805b39c5158Smillert=head2 $bool = Archive::Tar->has_perlio 1806b39c5158Smillert 1807b39c5158SmillertReturns true if we currently have C<perlio> support loaded. 1808b39c5158Smillert 1809b39c5158SmillertThis requires C<perl-5.8> or higher, compiled with C<perlio> 1810b39c5158Smillert 1811b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing 1812b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if 1813b39c5158Smillertavailable. 1814b39c5158Smillert 1815b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference. 1816b39c5158Smillert 1817b39c5158Smillert=cut 1818b39c5158Smillert 1819b39c5158Smillertsub has_perlio { return $HAS_PERLIO; } 1820b39c5158Smillert 1821b39c5158Smillert=head2 $bool = Archive::Tar->has_zlib_support 1822b39c5158Smillert 1823b39c5158SmillertReturns true if C<Archive::Tar> can extract C<zlib> compressed archives 1824b39c5158Smillert 1825b39c5158Smillert=cut 1826b39c5158Smillert 1827b39c5158Smillertsub has_zlib_support { return ZLIB } 1828b39c5158Smillert 1829b39c5158Smillert=head2 $bool = Archive::Tar->has_bzip2_support 1830b39c5158Smillert 1831b39c5158SmillertReturns true if C<Archive::Tar> can extract C<bzip2> compressed archives 1832b39c5158Smillert 1833b39c5158Smillert=cut 1834b39c5158Smillert 1835b39c5158Smillertsub has_bzip2_support { return BZIP } 1836b39c5158Smillert 1837b39c5158Smillert=head2 Archive::Tar->can_handle_compressed_files 1838b39c5158Smillert 1839b39c5158SmillertA simple checking routine, which will return true if C<Archive::Tar> 1840b39c5158Smillertis able to uncompress compressed archives on the fly with C<IO::Zlib> 1841b39c5158Smillertand C<IO::Compress::Bzip2> or false if not both are installed. 1842b39c5158Smillert 1843b39c5158SmillertYou can use this as a shortcut to determine whether C<Archive::Tar> 1844b39c5158Smillertwill do what you think before passing compressed archives to its 1845b39c5158SmillertC<read> method. 1846b39c5158Smillert 1847b39c5158Smillert=cut 1848b39c5158Smillert 1849b39c5158Smillertsub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 } 1850b39c5158Smillert 1851b39c5158Smillertsub no_string_support { 1852b39c5158Smillert croak("You have to install IO::String to support writing archives to strings"); 1853b39c5158Smillert} 1854b39c5158Smillert 1855b39c5158Smillert1; 1856b39c5158Smillert 1857b39c5158Smillert__END__ 1858b39c5158Smillert 1859b39c5158Smillert=head1 GLOBAL VARIABLES 1860b39c5158Smillert 1861b39c5158Smillert=head2 $Archive::Tar::FOLLOW_SYMLINK 1862b39c5158Smillert 1863b39c5158SmillertSet this variable to C<1> to make C<Archive::Tar> effectively make a 1864b39c5158Smillertcopy of the file when extracting. Default is C<0>, which 1865b39c5158Smillertmeans the symlink stays intact. Of course, you will have to pack the 1866b39c5158Smillertfile linked to as well. 1867b39c5158Smillert 1868b39c5158SmillertThis option is checked when you write out the tarfile using C<write> 1869b39c5158Smillertor C<create_archive>. 1870b39c5158Smillert 1871b39c5158SmillertThis works just like C</bin/tar>'s C<-h> option. 1872b39c5158Smillert 1873b39c5158Smillert=head2 $Archive::Tar::CHOWN 1874b39c5158Smillert 1875b39c5158SmillertBy default, C<Archive::Tar> will try to C<chown> your files if it is 1876b39c5158Smillertable to. In some cases, this may not be desired. In that case, set 1877b39c5158Smillertthis variable to C<0> to disable C<chown>-ing, even if it were 1878b39c5158Smillertpossible. 1879b39c5158Smillert 1880b39c5158SmillertThe default is C<1>. 1881b39c5158Smillert 1882b39c5158Smillert=head2 $Archive::Tar::CHMOD 1883b39c5158Smillert 1884b39c5158SmillertBy default, C<Archive::Tar> will try to C<chmod> your files to 1885b39c5158Smillertwhatever mode was specified for the particular file in the archive. 1886b39c5158SmillertIn some cases, this may not be desired. In that case, set this 1887b39c5158Smillertvariable to C<0> to disable C<chmod>-ing. 1888b39c5158Smillert 1889b39c5158SmillertThe default is C<1>. 1890b39c5158Smillert 1891b39c5158Smillert=head2 $Archive::Tar::SAME_PERMISSIONS 1892b39c5158Smillert 1893b39c5158SmillertWhen, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether 1894b39c5158Smillertthe permissions on files from the archive are used without modification 1895b39c5158Smillertof if they are filtered by removing any setid bits and applying the 1896b39c5158Smillertcurrent umask. 1897b39c5158Smillert 1898b39c5158SmillertThe default is C<1> for the root user and C<0> for normal users. 1899b39c5158Smillert 1900b39c5158Smillert=head2 $Archive::Tar::DO_NOT_USE_PREFIX 1901b39c5158Smillert 1902b39c5158SmillertBy default, C<Archive::Tar> will try to put paths that are over 1903b39c5158Smillert100 characters in the C<prefix> field of your tar header, as 1904b39c5158Smillertdefined per POSIX-standard. However, some (older) tar programs 1905b39c5158Smillertdo not implement this spec. To retain compatibility with these older 1906b39c5158Smillertor non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX> 1907b39c5158Smillertvariable to a true value, and C<Archive::Tar> will use an alternate 1908b39c5158Smillertway of dealing with paths over 100 characters by using the 1909b39c5158SmillertC<GNU Extended Header> feature. 1910b39c5158Smillert 1911b39c5158SmillertNote that clients who do not support the C<GNU Extended Header> 1912b39c5158Smillertfeature will not be able to read these archives. Such clients include 1913b39c5158Smillerttars on C<Solaris>, C<Irix> and C<AIX>. 1914b39c5158Smillert 1915b39c5158SmillertThe default is C<0>. 1916b39c5158Smillert 1917b39c5158Smillert=head2 $Archive::Tar::DEBUG 1918b39c5158Smillert 1919b39c5158SmillertSet this variable to C<1> to always get the C<Carp::longmess> output 1920b39c5158Smillertof the warnings, instead of the regular C<carp>. This is the same 1921b39c5158Smillertmessage you would get by doing: 1922b39c5158Smillert 1923b39c5158Smillert $tar->error(1); 1924b39c5158Smillert 1925b39c5158SmillertDefaults to C<0>. 1926b39c5158Smillert 1927b39c5158Smillert=head2 $Archive::Tar::WARN 1928b39c5158Smillert 1929b39c5158SmillertSet this variable to C<0> if you do not want any warnings printed. 1930b39c5158SmillertPersonally I recommend against doing this, but people asked for the 1931b39c5158Smillertoption. Also, be advised that this is of course not threadsafe. 1932b39c5158Smillert 1933b39c5158SmillertDefaults to C<1>. 1934b39c5158Smillert 1935b39c5158Smillert=head2 $Archive::Tar::error 1936b39c5158Smillert 1937b39c5158SmillertHolds the last reported error. Kept for historical reasons, but its 1938b39c5158Smillertuse is very much discouraged. Use the C<error()> method instead: 1939b39c5158Smillert 1940b39c5158Smillert warn $tar->error unless $tar->extract; 1941b39c5158Smillert 1942b39c5158SmillertNote that in older versions of this module, the C<error()> method 1943b39c5158Smillertwould return an effectively global value even when called an instance 1944b39c5158Smillertmethod as above. This has since been fixed, and multiple instances of 1945b39c5158SmillertC<Archive::Tar> now have separate error strings. 1946b39c5158Smillert 1947b39c5158Smillert=head2 $Archive::Tar::INSECURE_EXTRACT_MODE 1948b39c5158Smillert 1949b39c5158SmillertThis variable indicates whether C<Archive::Tar> should allow 1950b39c5158Smillertfiles to be extracted outside their current working directory. 1951b39c5158Smillert 1952b39c5158SmillertAllowing this could have security implications, as a malicious 1953b39c5158Smillerttar archive could alter or replace any file the extracting user 1954b39c5158Smillerthas permissions to. Therefor, the default is to not allow 1955b39c5158Smillertinsecure extractions. 1956b39c5158Smillert 1957b39c5158SmillertIf you trust the archive, or have other reasons to allow the 1958b39c5158Smillertarchive to write files outside your current working directory, 1959b39c5158Smillertset this variable to C<true>. 1960b39c5158Smillert 1961b39c5158SmillertNote that this is a backwards incompatible change from version 1962b39c5158SmillertC<1.36> and before. 1963b39c5158Smillert 1964b39c5158Smillert=head2 $Archive::Tar::HAS_PERLIO 1965b39c5158Smillert 1966b39c5158SmillertThis variable holds a boolean indicating if we currently have 1967b39c5158SmillertC<perlio> support loaded. This will be enabled for any perl 1968b39c5158Smillertgreater than C<5.8> compiled with C<perlio>. 1969b39c5158Smillert 1970b39c5158SmillertIf you feel strongly about disabling it, set this variable to 1971b39c5158SmillertC<false>. Note that you will then need C<IO::String> installed 1972b39c5158Smillertto support writing stringified archives. 1973b39c5158Smillert 1974b39c5158SmillertDon't change this variable unless you B<really> know what you're 1975b39c5158Smillertdoing. 1976b39c5158Smillert 1977b39c5158Smillert=head2 $Archive::Tar::HAS_IO_STRING 1978b39c5158Smillert 1979b39c5158SmillertThis variable holds a boolean indicating if we currently have 1980b39c5158SmillertC<IO::String> support loaded. This will be enabled for any perl 1981b39c5158Smillertthat has a loadable C<IO::String> module. 1982b39c5158Smillert 1983b39c5158SmillertIf you feel strongly about disabling it, set this variable to 1984b39c5158SmillertC<false>. Note that you will then need C<perlio> support from 1985b39c5158Smillertyour perl to be able to write stringified archives. 1986b39c5158Smillert 1987b39c5158SmillertDon't change this variable unless you B<really> know what you're 1988b39c5158Smillertdoing. 1989b39c5158Smillert 1990*898184e3Ssthen=head2 $Archive::Tar::ZERO_PAD_NUMBERS 1991*898184e3Ssthen 1992*898184e3SsthenThis variable holds a boolean indicating if we will create 1993*898184e3Ssthenzero padded numbers for C<size>, C<mtime> and C<checksum>. 1994*898184e3SsthenThe default is C<0>, indicating that we will create space padded 1995*898184e3Ssthennumbers. Added for compatibility with C<busybox> implementations. 1996*898184e3Ssthen 1997b39c5158Smillert=head1 FAQ 1998b39c5158Smillert 1999b39c5158Smillert=over 4 2000b39c5158Smillert 2001b39c5158Smillert=item What's the minimum perl version required to run Archive::Tar? 2002b39c5158Smillert 2003b39c5158SmillertYou will need perl version 5.005_03 or newer. 2004b39c5158Smillert 2005b39c5158Smillert=item Isn't Archive::Tar slow? 2006b39c5158Smillert 2007b39c5158SmillertYes it is. It's pure perl, so it's a lot slower then your C</bin/tar> 2008b39c5158SmillertHowever, it's very portable. If speed is an issue, consider using 2009b39c5158SmillertC</bin/tar> instead. 2010b39c5158Smillert 2011b39c5158Smillert=item Isn't Archive::Tar heavier on memory than /bin/tar? 2012b39c5158Smillert 2013b39c5158SmillertYes it is, see previous answer. Since C<Compress::Zlib> and therefore 2014b39c5158SmillertC<IO::Zlib> doesn't support C<seek> on their filehandles, there is little 2015b39c5158Smillertchoice but to read the archive into memory. 2016b39c5158SmillertThis is ok if you want to do in-memory manipulation of the archive. 2017b39c5158Smillert 2018b39c5158SmillertIf you just want to extract, use the C<extract_archive> class method 2019b39c5158Smillertinstead. It will optimize and write to disk immediately. 2020b39c5158Smillert 2021b39c5158SmillertAnother option is to use the C<iter> class method to iterate over 2022b39c5158Smillertthe files in the tarball without reading them all in memory at once. 2023b39c5158Smillert 2024b39c5158Smillert=item Can you lazy-load data instead? 2025b39c5158Smillert 2026b39c5158SmillertIn some cases, yes. You can use the C<iter> class method to iterate 2027b39c5158Smillertover the files in the tarball without reading them all in memory at once. 2028b39c5158Smillert 2029b39c5158Smillert=item How much memory will an X kb tar file need? 2030b39c5158Smillert 2031b39c5158SmillertProbably more than X kb, since it will all be read into memory. If 2032b39c5158Smillertthis is a problem, and you don't need to do in memory manipulation 2033b39c5158Smillertof the archive, consider using the C<iter> class method, or C</bin/tar> 2034b39c5158Smillertinstead. 2035b39c5158Smillert 2036b39c5158Smillert=item What do you do with unsupported filetypes in an archive? 2037b39c5158Smillert 2038b39c5158SmillertC<Unix> has a few filetypes that aren't supported on other platforms, 2039b39c5158Smillertlike C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just 2040b39c5158Smillerttry to make a copy of the original file, rather than throwing an error. 2041b39c5158Smillert 2042b39c5158SmillertThis does require you to read the entire archive in to memory first, 2043b39c5158Smillertsince otherwise we wouldn't know what data to fill the copy with. 2044b39c5158Smillert(This means that you cannot use the class methods, including C<iter> 2045b39c5158Smillerton archives that have incompatible filetypes and still expect things 2046b39c5158Smillertto work). 2047b39c5158Smillert 2048b39c5158SmillertFor other filetypes, like C<chardevs> and C<blockdevs> we'll warn that 2049b39c5158Smillertthe extraction of this particular item didn't work. 2050b39c5158Smillert 2051b39c5158Smillert=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly! 2052b39c5158Smillert 2053b39c5158SmillertBy default, C<Archive::Tar> is in a completely POSIX-compatible 2054b39c5158Smillertmode, which uses the POSIX-specification of C<tar> to store files. 2055*898184e3SsthenFor paths greater than 100 characters, this is done using the 2056b39c5158SmillertC<POSIX header prefix>. Non-POSIX-compatible clients may not support 2057b39c5158Smillertthis part of the specification, and may only support the C<GNU Extended 2058b39c5158SmillertHeader> functionality. To facilitate those clients, you can set the 2059b39c5158SmillertC<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the 2060b39c5158SmillertC<GLOBAL VARIABLES> section for details on this variable. 2061b39c5158Smillert 2062b39c5158SmillertNote that GNU tar earlier than version 1.14 does not cope well with 2063b39c5158Smillertthe C<POSIX header prefix>. If you use such a version, consider setting 2064b39c5158Smillertthe C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. 2065b39c5158Smillert 2066b39c5158Smillert=item How do I extract only files that have property X from an archive? 2067b39c5158Smillert 2068b39c5158SmillertSometimes, you might not wish to extract a complete archive, just 2069b39c5158Smillertthe files that are relevant to you, based on some criteria. 2070b39c5158Smillert 2071b39c5158SmillertYou can do this by filtering a list of C<Archive::Tar::File> objects 2072b39c5158Smillertbased on your criteria. For example, to extract only files that have 2073b39c5158Smillertthe string C<foo> in their title, you would use: 2074b39c5158Smillert 2075b39c5158Smillert $tar->extract( 2076b39c5158Smillert grep { $_->full_path =~ /foo/ } $tar->get_files 2077b39c5158Smillert ); 2078b39c5158Smillert 2079b39c5158SmillertThis way, you can filter on any attribute of the files in the archive. 2080b39c5158SmillertConsult the C<Archive::Tar::File> documentation on how to use these 2081b39c5158Smillertobjects. 2082b39c5158Smillert 2083b39c5158Smillert=item How do I access .tar.Z files? 2084b39c5158Smillert 2085b39c5158SmillertThe C<Archive::Tar> module can optionally use C<Compress::Zlib> (via 2086b39c5158Smillertthe C<IO::Zlib> module) to access tar files that have been compressed 2087b39c5158Smillertwith C<gzip>. Unfortunately tar files compressed with the Unix C<compress> 2088b39c5158Smillertutility cannot be read by C<Compress::Zlib> and so cannot be directly 2089b39c5158Smillertaccesses by C<Archive::Tar>. 2090b39c5158Smillert 2091b39c5158SmillertIf the C<uncompress> or C<gunzip> programs are available, you can use 2092b39c5158Smillertone of these workarounds to read C<.tar.Z> files from C<Archive::Tar> 2093b39c5158Smillert 2094b39c5158SmillertFirstly with C<uncompress> 2095b39c5158Smillert 2096b39c5158Smillert use Archive::Tar; 2097b39c5158Smillert 2098b39c5158Smillert open F, "uncompress -c $filename |"; 2099b39c5158Smillert my $tar = Archive::Tar->new(*F); 2100b39c5158Smillert ... 2101b39c5158Smillert 2102b39c5158Smillertand this with C<gunzip> 2103b39c5158Smillert 2104b39c5158Smillert use Archive::Tar; 2105b39c5158Smillert 2106b39c5158Smillert open F, "gunzip -c $filename |"; 2107b39c5158Smillert my $tar = Archive::Tar->new(*F); 2108b39c5158Smillert ... 2109b39c5158Smillert 2110b39c5158SmillertSimilarly, if the C<compress> program is available, you can use this to 2111b39c5158Smillertwrite a C<.tar.Z> file 2112b39c5158Smillert 2113b39c5158Smillert use Archive::Tar; 2114b39c5158Smillert use IO::File; 2115b39c5158Smillert 2116b39c5158Smillert my $fh = new IO::File "| compress -c >$filename"; 2117b39c5158Smillert my $tar = Archive::Tar->new(); 2118b39c5158Smillert ... 2119b39c5158Smillert $tar->write($fh); 2120b39c5158Smillert $fh->close ; 2121b39c5158Smillert 2122b39c5158Smillert=item How do I handle Unicode strings? 2123b39c5158Smillert 2124b39c5158SmillertC<Archive::Tar> uses byte semantics for any files it reads from or writes 2125b39c5158Smillertto disk. This is not a problem if you only deal with files and never 2126b39c5158Smillertlook at their content or work solely with byte strings. But if you use 2127b39c5158SmillertUnicode strings with character semantics, some additional steps need 2128b39c5158Smillertto be taken. 2129b39c5158Smillert 2130b39c5158SmillertFor example, if you add a Unicode string like 2131b39c5158Smillert 2132b39c5158Smillert # Problem 2133b39c5158Smillert $tar->add_data('file.txt', "Euro: \x{20AC}"); 2134b39c5158Smillert 2135b39c5158Smillertthen there will be a problem later when the tarfile gets written out 2136b39c5158Smillertto disk via C<$tar->write()>: 2137b39c5158Smillert 2138b39c5158Smillert Wide character in print at .../Archive/Tar.pm line 1014. 2139b39c5158Smillert 2140b39c5158SmillertThe data was added as a Unicode string and when writing it out to disk, 2141b39c5158Smillertthe C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl 2142b39c5158Smillerttried to convert the string to ISO-8859 and failed. The written file 2143b39c5158Smillertnow contains garbage. 2144b39c5158Smillert 2145b39c5158SmillertFor this reason, Unicode strings need to be converted to UTF-8-encoded 2146b39c5158Smillertbytestrings before they are handed off to C<add_data()>: 2147b39c5158Smillert 2148b39c5158Smillert use Encode; 2149b39c5158Smillert my $data = "Accented character: \x{20AC}"; 2150b39c5158Smillert $data = encode('utf8', $data); 2151b39c5158Smillert 2152b39c5158Smillert $tar->add_data('file.txt', $data); 2153b39c5158Smillert 2154b39c5158SmillertA opposite problem occurs if you extract a UTF8-encoded file from a 2155b39c5158Smillerttarball. Using C<get_content()> on the C<Archive::Tar::File> object 2156b39c5158Smillertwill return its content as a bytestring, not as a Unicode string. 2157b39c5158Smillert 2158b39c5158SmillertIf you want it to be a Unicode string (because you want character 2159b39c5158Smillertsemantics with operations like regular expression matching), you need 2160b39c5158Smillertto decode the UTF8-encoded content and have Perl convert it into 2161b39c5158Smillerta Unicode string: 2162b39c5158Smillert 2163b39c5158Smillert use Encode; 2164b39c5158Smillert my $data = $tar->get_content(); 2165b39c5158Smillert 2166b39c5158Smillert # Make it a Unicode string 2167b39c5158Smillert $data = decode('utf8', $data); 2168b39c5158Smillert 2169b39c5158SmillertThere is no easy way to provide this functionality in C<Archive::Tar>, 2170b39c5158Smillertbecause a tarball can contain many files, and each of which could be 2171b39c5158Smillertencoded in a different way. 2172b39c5158Smillert 2173b39c5158Smillert=back 2174b39c5158Smillert 2175b39c5158Smillert=head1 CAVEATS 2176b39c5158Smillert 2177b39c5158SmillertThe AIX tar does not fill all unused space in the tar archive with 0x00. 2178b39c5158SmillertThis sometimes leads to warning messages from C<Archive::Tar>. 2179b39c5158Smillert 2180b39c5158Smillert Invalid header block at offset nnn 2181b39c5158Smillert 2182b39c5158SmillertA fix for that problem is scheduled to be released in the following levels 2183b39c5158Smillertof AIX, all of which should be coming out in the 4th quarter of 2009: 2184b39c5158Smillert 2185b39c5158Smillert AIX 5.3 TL7 SP10 2186b39c5158Smillert AIX 5.3 TL8 SP8 2187b39c5158Smillert AIX 5.3 TL9 SP5 2188b39c5158Smillert AIX 5.3 TL10 SP2 2189b39c5158Smillert 2190b39c5158Smillert AIX 6.1 TL0 SP11 2191b39c5158Smillert AIX 6.1 TL1 SP7 2192b39c5158Smillert AIX 6.1 TL2 SP6 2193b39c5158Smillert AIX 6.1 TL3 SP3 2194b39c5158Smillert 2195b39c5158SmillertThe IBM APAR number for this problem is IZ50240 (Reported component ID: 2196b39c5158Smillert5765G0300 / AIX 5.3). It is possible to get an ifix for that problem. 2197b39c5158SmillertIf you need an ifix please contact your local IBM AIX support. 2198b39c5158Smillert 2199b39c5158Smillert=head1 TODO 2200b39c5158Smillert 2201b39c5158Smillert=over 4 2202b39c5158Smillert 2203b39c5158Smillert=item Check if passed in handles are open for read/write 2204b39c5158Smillert 2205b39c5158SmillertCurrently I don't know of any portable pure perl way to do this. 2206b39c5158SmillertSuggestions welcome. 2207b39c5158Smillert 2208b39c5158Smillert=item Allow archives to be passed in as string 2209b39c5158Smillert 2210b39c5158SmillertCurrently, we only allow opened filehandles or filenames, but 2211b39c5158Smillertnot strings. The internals would need some reworking to facilitate 2212b39c5158Smillertstringified archives. 2213b39c5158Smillert 2214b39c5158Smillert=item Facilitate processing an opened filehandle of a compressed archive 2215b39c5158Smillert 2216b39c5158SmillertCurrently, we only support this if the filehandle is an IO::Zlib object. 2217b39c5158SmillertEnvironments, like apache, will present you with an opened filehandle 2218b39c5158Smillertto an uploaded file, which might be a compressed archive. 2219b39c5158Smillert 2220b39c5158Smillert=back 2221b39c5158Smillert 2222b39c5158Smillert=head1 SEE ALSO 2223b39c5158Smillert 2224b39c5158Smillert=over 4 2225b39c5158Smillert 2226b39c5158Smillert=item The GNU tar specification 2227b39c5158Smillert 2228b39c5158SmillertC<http://www.gnu.org/software/tar/manual/tar.html> 2229b39c5158Smillert 2230*898184e3Ssthen=item The PAX format specification 2231b39c5158Smillert 2232*898184e3SsthenThe specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html> 2233b39c5158Smillert 2234b39c5158Smillert=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html> 2235b39c5158Smillert 2236b39c5158Smillert=item GNU tar intends to switch to POSIX compatibility 2237b39c5158Smillert 2238b39c5158SmillertGNU Tar authors have expressed their intention to become completely 2239b39c5158SmillertPOSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html> 2240b39c5158Smillert 2241b39c5158Smillert=item A Comparison between various tar implementations 2242b39c5158Smillert 2243b39c5158SmillertLists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs> 2244b39c5158Smillert 2245b39c5158Smillert=back 2246b39c5158Smillert 2247b39c5158Smillert=head1 AUTHOR 2248b39c5158Smillert 2249b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>. 2250b39c5158Smillert 2251b39c5158SmillertPlease reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>. 2252b39c5158Smillert 2253b39c5158Smillert=head1 ACKNOWLEDGEMENTS 2254b39c5158Smillert 2255b39c5158SmillertThanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas, 2256b39c5158SmillertRainer Tammer and especially Andrew Savige for their help and suggestions. 2257b39c5158Smillert 2258b39c5158Smillert=head1 COPYRIGHT 2259b39c5158Smillert 2260b39c5158SmillertThis module is copyright (c) 2002 - 2009 Jos Boumans 2261b39c5158SmillertE<lt>kane@cpan.orgE<gt>. All rights reserved. 2262b39c5158Smillert 2263b39c5158SmillertThis library is free software; you may redistribute and/or modify 2264b39c5158Smillertit under the same terms as Perl itself. 2265b39c5158Smillert 2266b39c5158Smillert=cut 2267