1package Archive::Tar::File; 2use strict; 3 4use Carp (); 5use IO::File; 6use File::Spec::Unix (); 7use File::Spec (); 8use File::Basename (); 9 10use Archive::Tar::Constant; 11 12use vars qw[@ISA $VERSION]; 13#@ISA = qw[Archive::Tar]; 14$VERSION = '3.02_001'; 15 16### set value to 1 to oct() it during the unpack ### 17 18my $tmpl = [ 19 name => 0, # string A100 20 mode => 1, # octal A8 21 uid => 1, # octal A8 22 gid => 1, # octal A8 23 size => 0, # octal # cdrake - not *always* octal.. A12 24 mtime => 1, # octal A12 25 chksum => 1, # octal A8 26 type => 0, # character A1 27 linkname => 0, # string A100 28 magic => 0, # string A6 29 version => 0, # 2 bytes A2 30 uname => 0, # string A32 31 gname => 0, # string A32 32 devmajor => 1, # octal A8 33 devminor => 1, # octal A8 34 prefix => 0, # A155 x 12 35 36### end UNPACK items ### 37 raw => 0, # the raw data chunk 38 data => 0, # the data associated with the file -- 39 # This might be very memory intensive 40]; 41 42### install get/set accessors for this object. 43for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { 44 my $key = $tmpl->[$i]; 45 no strict 'refs'; 46 *{__PACKAGE__."::$key"} = sub { 47 my $self = shift; 48 $self->{$key} = $_[0] if @_; 49 50 ### just in case the key is not there or undef or something ### 51 { local $^W = 0; 52 return $self->{$key}; 53 } 54 } 55} 56 57=head1 NAME 58 59Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar 60 61=head1 SYNOPSIS 62 63 my @items = $tar->get_files; 64 65 print $_->name, ' ', $_->size, "\n" for @items; 66 67 print $object->get_content; 68 $object->replace_content('new content'); 69 70 $object->rename( 'new/full/path/to/file.c' ); 71 72=head1 DESCRIPTION 73 74Archive::Tar::File provides a neat little object layer for in-memory 75extracted files. It's mostly used internally in Archive::Tar to tidy 76up the code, but there's no reason users shouldn't use this API as 77well. 78 79=head2 Accessors 80 81A lot of the methods in this package are accessors to the various 82fields in the tar header: 83 84=over 4 85 86=item name 87 88The file's name 89 90=item mode 91 92The file's mode 93 94=item uid 95 96The user id owning the file 97 98=item gid 99 100The group id owning the file 101 102=item size 103 104File size in bytes 105 106=item mtime 107 108Modification time. Adjusted to mac-time on MacOS if required 109 110=item chksum 111 112Checksum field for the tar header 113 114=item type 115 116File type -- numeric, but comparable to exported constants -- see 117Archive::Tar's documentation 118 119=item linkname 120 121If the file is a symlink, the file it's pointing to 122 123=item magic 124 125Tar magic string -- not useful for most users 126 127=item version 128 129Tar version string -- not useful for most users 130 131=item uname 132 133The user name that owns the file 134 135=item gname 136 137The group name that owns the file 138 139=item devmajor 140 141Device major number in case of a special file 142 143=item devminor 144 145Device minor number in case of a special file 146 147=item prefix 148 149Any directory to prefix to the extraction path, if any 150 151=item raw 152 153Raw tar header -- not useful for most users 154 155=back 156 157=head1 Methods 158 159=head2 Archive::Tar::File->new( file => $path ) 160 161Returns a new Archive::Tar::File object from an existing file. 162 163Returns undef on failure. 164 165=head2 Archive::Tar::File->new( data => $path, $data, $opt ) 166 167Returns a new Archive::Tar::File object from data. 168 169C<$path> defines the file name (which need not exist), C<$data> the 170file contents, and C<$opt> is a reference to a hash of attributes 171which may be used to override the default attributes (fields in the 172tar header), which are described above in the Accessors section. 173 174Returns undef on failure. 175 176=head2 Archive::Tar::File->new( chunk => $chunk ) 177 178Returns a new Archive::Tar::File object from a raw 512-byte tar 179archive chunk. 180 181Returns undef on failure. 182 183=cut 184 185sub new { 186 my $class = shift; 187 my $what = shift; 188 189 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : 190 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : 191 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : 192 undef; 193 194 return $obj; 195} 196 197### copies the data, creates a clone ### 198sub clone { 199 my $self = shift; 200 return bless { %$self }, ref $self; 201} 202 203sub _new_from_chunk { 204 my $class = shift; 205 my $chunk = shift or return; # 512 bytes of tar header 206 my %hash = @_; 207 208 ### filter any arguments on defined-ness of values. 209 ### this allows overriding from what the tar-header is saying 210 ### about this tar-entry. Particularly useful for @LongLink files 211 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; 212 213 ### makes it start at 0 actually... :) ### 214 my $i = -1; 215 my %entry = map { 216 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake 217 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake 218 $s=> $v ? oct $_ : $_ # cdrake 219 # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb 220 } unpack( UNPACK, $chunk ); # cdrake 221 # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake 222 223 224 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake 225 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake 226 } else { # cdrake 227 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake 228 } # cdrake 229 230 231 my $obj = bless { %entry, %args }, $class; 232 233 ### magic is a filetype string.. it should have something like 'ustar' or 234 ### something similar... if the chunk is garbage, skip it 235 return unless $obj->magic !~ /\W/; 236 237 ### store the original chunk ### 238 $obj->raw( $chunk ); 239 240 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); 241 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); 242 243 244 return $obj; 245 246} 247 248sub _new_from_file { 249 my $class = shift; 250 my $path = shift; 251 252 ### path has to at least exist 253 return unless defined $path; 254 255 my $type = __PACKAGE__->_filetype($path); 256 my $data = ''; 257 258 READ: { 259 unless ($type == DIR ) { 260 my $fh = IO::File->new; 261 262 unless( $fh->open($path) ) { 263 ### dangling symlinks are fine, stop reading but continue 264 ### creating the object 265 last READ if $type == SYMLINK; 266 267 ### otherwise, return from this function -- 268 ### anything that's *not* a symlink should be 269 ### resolvable 270 return; 271 } 272 273 ### binmode needed to read files properly on win32 ### 274 binmode $fh; 275 $data = do { local $/; <$fh> }; 276 close $fh; 277 } 278 } 279 280 my @items = qw[mode uid gid size mtime]; 281 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; 282 283 if (ON_VMS) { 284 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is 285 ### not used. We currently do not have an easy way to see if we are in 286 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. 287 ### The VMS UIC has the upper 16 bits is the GID, which in many cases 288 ### the VMS UIC will be larger than 209715, the largest that TAR can 289 ### handle. So for now, assume it is traditional if the UID is larger 290 ### than 0x10000. 291 292 if ($hash{uid} > 0x10000) { 293 $hash{uid} = $hash{uid} & 0xFFFF; 294 } 295 296 ### The file length from stat() is the physical length of the file 297 ### However the amount of data read in may be more for some file types. 298 ### Fixed length files are read past the logical EOF to end of the block 299 ### containing. Other file types get expanded on read because record 300 ### delimiters are added. 301 302 my $data_len = length $data; 303 $hash{size} = $data_len if $hash{size} < $data_len; 304 305 } 306 ### you *must* set size == 0 on symlinks, or the next entry will be 307 ### though of as the contents of the symlink, which is wrong. 308 ### this fixes bug #7937 309 $hash{size} = 0 if ($type == DIR or $type == SYMLINK); 310 $hash{mtime} -= TIME_OFFSET; 311 312 ### strip the high bits off the mode, which we don't need to store 313 $hash{mode} = STRIP_MODE->( $hash{mode} ); 314 315 316 ### probably requires some file path munging here ... ### 317 ### name and prefix are set later 318 my $obj = { 319 %hash, 320 name => '', 321 chksum => CHECK_SUM, 322 type => $type, 323 linkname => ($type == SYMLINK and CAN_READLINK) 324 ? readlink $path 325 : '', 326 magic => MAGIC, 327 version => TAR_VERSION, 328 uname => UNAME->( $hash{uid} ), 329 gname => GNAME->( $hash{gid} ), 330 devmajor => 0, # not handled 331 devminor => 0, # not handled 332 prefix => '', 333 data => $data, 334 }; 335 336 bless $obj, $class; 337 338 ### fix up the prefix and file from the path 339 my($prefix,$file) = $obj->_prefix_and_file( $path ); 340 $obj->prefix( $prefix ); 341 $obj->name( $file ); 342 343 return $obj; 344} 345 346sub _new_from_data { 347 my $class = shift; 348 my $path = shift; return unless defined $path; 349 my $data = shift; return unless defined $data; 350 my $opt = shift; 351 352 my $obj = { 353 data => $data, 354 name => '', 355 mode => MODE, 356 uid => UID, 357 gid => GID, 358 size => length $data, 359 mtime => time - TIME_OFFSET, 360 chksum => CHECK_SUM, 361 type => FILE, 362 linkname => '', 363 magic => MAGIC, 364 version => TAR_VERSION, 365 uname => UNAME->( UID ), 366 gname => GNAME->( GID ), 367 devminor => 0, 368 devmajor => 0, 369 prefix => '', 370 }; 371 372 ### overwrite with user options, if provided ### 373 if( $opt and ref $opt eq 'HASH' ) { 374 for my $key ( keys %$opt ) { 375 376 ### don't write bogus options ### 377 next unless exists $obj->{$key}; 378 $obj->{$key} = $opt->{$key}; 379 } 380 } 381 382 bless $obj, $class; 383 384 ### fix up the prefix and file from the path 385 my($prefix,$file) = $obj->_prefix_and_file( $path ); 386 $obj->prefix( $prefix ); 387 $obj->name( $file ); 388 389 return $obj; 390} 391 392sub _prefix_and_file { 393 my $self = shift; 394 my $path = shift; 395 396 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); 397 my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) ); 398 399 ### if it's a directory, then $file might be empty 400 $file = pop @dirs if $self->is_dir and not length $file; 401 402 ### splitting ../ gives you the relative path in native syntax 403 ### Remove the root (000000) directory 404 ### The volume from splitpath will also be in native syntax 405 if (ON_VMS) { 406 map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs; 407 if (length($vol)) { 408 $vol = VMS::Filespec::unixify($vol); 409 unshift @dirs, $vol; 410 } 411 } 412 413 my $prefix = File::Spec::Unix->catdir(@dirs); 414 return( $prefix, $file ); 415} 416 417sub _filetype { 418 my $self = shift; 419 my $file = shift; 420 421 return unless defined $file; 422 423 return SYMLINK if (-l $file); # Symlink 424 425 return FILE if (-f _); # Plain file 426 427 return DIR if (-d _); # Directory 428 429 return FIFO if (-p _); # Named pipe 430 431 return SOCKET if (-S _); # Socket 432 433 return BLOCKDEV if (-b _); # Block special 434 435 return CHARDEV if (-c _); # Character special 436 437 ### shouldn't happen, this is when making archives, not reading ### 438 return LONGLINK if ( $file eq LONGLINK_NAME ); 439 440 return UNKNOWN; # Something else (like what?) 441 442} 443 444### this method 'downgrades' a file to plain file -- this is used for 445### symlinks when FOLLOW_SYMLINKS is true. 446sub _downgrade_to_plainfile { 447 my $entry = shift; 448 $entry->type( FILE ); 449 $entry->mode( MODE ); 450 $entry->linkname(''); 451 452 return 1; 453} 454 455=head2 $bool = $file->extract( [ $alternative_name ] ) 456 457Extract this object, optionally to an alternative name. 458 459See C<< Archive::Tar->extract_file >> for details. 460 461Returns true on success and false on failure. 462 463=cut 464 465sub extract { 466 my $self = shift; 467 468 local $Carp::CarpLevel += 1; 469 470 ### avoid circular use, so only require; 471 require Archive::Tar; 472 return Archive::Tar->_extract_file( $self, @_ ); 473} 474 475=head2 $path = $file->full_path 476 477Returns the full path from the tar header; this is basically a 478concatenation of the C<prefix> and C<name> fields. 479 480=cut 481 482sub full_path { 483 my $self = shift; 484 485 ### if prefix field is empty 486 return $self->name unless defined $self->prefix and length $self->prefix; 487 488 ### or otherwise, catfile'd 489 my $path = File::Spec::Unix->catfile( $self->prefix, $self->name ); 490 $path .= "/" if $self->name =~ m{/$}; # Re-add trailing slash if necessary, as catfile() strips them off. 491 return $path; 492} 493 494 495=head2 $bool = $file->validate 496 497Done by Archive::Tar internally when reading the tar file: 498validate the header against the checksum to ensure integer tar file. 499 500Returns true on success, false on failure 501 502=cut 503 504sub validate { 505 my $self = shift; 506 507 my $raw = $self->raw; 508 509 ### don't know why this one is different from the one we /write/ ### 510 substr ($raw, 148, 8) = " "; 511 512 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar 513 ### like GNU tar does. See here for details: 514 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139 515 ### so we do both a signed AND unsigned validate. if one succeeds, that's 516 ### good enough 517 return ( (unpack ("%16C*", $raw) == $self->chksum) 518 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; 519} 520 521=head2 $bool = $file->has_content 522 523Returns a boolean to indicate whether the current object has content. 524Some special files like directories and so on never will have any 525content. This method is mainly to make sure you don't get warnings 526for using uninitialized values when looking at an object's content. 527 528=cut 529 530sub has_content { 531 my $self = shift; 532 return defined $self->data() && length $self->data() ? 1 : 0; 533} 534 535=head2 $content = $file->get_content 536 537Returns the current content for the in-memory file 538 539=cut 540 541sub get_content { 542 my $self = shift; 543 $self->data( ); 544} 545 546=head2 $cref = $file->get_content_by_ref 547 548Returns the current content for the in-memory file as a scalar 549reference. Normal users won't need this, but it will save memory if 550you are dealing with very large data files in your tar archive, since 551it will pass the contents by reference, rather than make a copy of it 552first. 553 554=cut 555 556sub get_content_by_ref { 557 my $self = shift; 558 559 return \$self->{data}; 560} 561 562=head2 $bool = $file->replace_content( $content ) 563 564Replace the current content of the file with the new content. This 565only affects the in-memory archive, not the on-disk version until 566you write it. 567 568Returns true on success, false on failure. 569 570=cut 571 572sub replace_content { 573 my $self = shift; 574 my $data = shift || ''; 575 576 $self->data( $data ); 577 $self->size( length $data ); 578 return 1; 579} 580 581=head2 $bool = $file->rename( $new_name ) 582 583Rename the current file to $new_name. 584 585Note that you must specify a Unix path for $new_name, since per tar 586standard, all files in the archive must be Unix paths. 587 588Returns true on success and false on failure. 589 590=cut 591 592sub rename { 593 my $self = shift; 594 my $path = shift; 595 596 return unless defined $path; 597 598 my ($prefix,$file) = $self->_prefix_and_file( $path ); 599 600 $self->name( $file ); 601 $self->prefix( $prefix ); 602 603 return 1; 604} 605 606=head2 $bool = $file->chmod( $mode ) 607 608Change mode of $file to $mode. The mode can be a string or a number 609which is interpreted as octal whether or not a leading 0 is given. 610 611Returns true on success and false on failure. 612 613=cut 614 615sub chmod { 616 my $self = shift; 617 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 618 $self->{mode} = oct($mode); 619 return 1; 620} 621 622=head2 $bool = $file->chown( $user [, $group]) 623 624Change owner of $file to $user. If a $group is given that is changed 625as well. You can also pass a single parameter with a colon separating the 626use and group as in 'root:wheel'. 627 628Returns true on success and false on failure. 629 630=cut 631 632sub chown { 633 my $self = shift; 634 my $uname = shift; 635 return unless defined $uname; 636 my $gname; 637 if (-1 != index($uname, ':')) { 638 ($uname, $gname) = split(/:/, $uname); 639 } else { 640 $gname = shift if @_ > 0; 641 } 642 643 $self->uname( $uname ); 644 $self->gname( $gname ) if $gname; 645 return 1; 646} 647 648=head1 Convenience methods 649 650To quickly check the type of a C<Archive::Tar::File> object, you can 651use the following methods: 652 653=over 4 654 655=item $file->is_file 656 657Returns true if the file is of type C<file> 658 659=item $file->is_dir 660 661Returns true if the file is of type C<dir> 662 663=item $file->is_hardlink 664 665Returns true if the file is of type C<hardlink> 666 667=item $file->is_symlink 668 669Returns true if the file is of type C<symlink> 670 671=item $file->is_chardev 672 673Returns true if the file is of type C<chardev> 674 675=item $file->is_blockdev 676 677Returns true if the file is of type C<blockdev> 678 679=item $file->is_fifo 680 681Returns true if the file is of type C<fifo> 682 683=item $file->is_socket 684 685Returns true if the file is of type C<socket> 686 687=item $file->is_longlink 688 689Returns true if the file is of type C<LongLink>. 690Should not happen after a successful C<read>. 691 692=item $file->is_label 693 694Returns true if the file is of type C<Label>. 695Should not happen after a successful C<read>. 696 697=item $file->is_unknown 698 699Returns true if the file type is C<unknown> 700 701=back 702 703=cut 704 705#stupid perl5.5.3 needs to warn if it's not numeric 706sub is_file { local $^W; FILE == $_[0]->type } 707sub is_dir { local $^W; DIR == $_[0]->type } 708sub is_hardlink { local $^W; HARDLINK == $_[0]->type } 709sub is_symlink { local $^W; SYMLINK == $_[0]->type } 710sub is_chardev { local $^W; CHARDEV == $_[0]->type } 711sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } 712sub is_fifo { local $^W; FIFO == $_[0]->type } 713sub is_socket { local $^W; SOCKET == $_[0]->type } 714sub is_unknown { local $^W; UNKNOWN == $_[0]->type } 715sub is_longlink { local $^W; LONGLINK eq $_[0]->type } 716sub is_label { local $^W; LABEL eq $_[0]->type } 717 7181; 719