1package Archive::Tar::File; 2use strict; 3 4use Carp (); 5use IO::File; 6use File::Spec::Unix (); 7use File::Spec (); 8use File::Basename (); 9 10### avoid circular use, so only require; 11require Archive::Tar; 12use Archive::Tar::Constant; 13 14use vars qw[@ISA $VERSION]; 15#@ISA = qw[Archive::Tar]; 16$VERSION = '1.90'; 17 18### set value to 1 to oct() it during the unpack ### 19 20my $tmpl = [ 21 name => 0, # string A100 22 mode => 1, # octal A8 23 uid => 1, # octal A8 24 gid => 1, # octal A8 25 size => 0, # octal # cdrake - not *always* octal.. A12 26 mtime => 1, # octal A12 27 chksum => 1, # octal A8 28 type => 0, # character A1 29 linkname => 0, # string A100 30 magic => 0, # string A6 31 version => 0, # 2 bytes A2 32 uname => 0, # string A32 33 gname => 0, # string A32 34 devmajor => 1, # octal A8 35 devminor => 1, # octal A8 36 prefix => 0, # A155 x 12 37 38### end UNPACK items ### 39 raw => 0, # the raw data chunk 40 data => 0, # the data associated with the file -- 41 # This might be very memory intensive 42]; 43 44### install get/set accessors for this object. 45for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { 46 my $key = $tmpl->[$i]; 47 no strict 'refs'; 48 *{__PACKAGE__."::$key"} = sub { 49 my $self = shift; 50 $self->{$key} = $_[0] if @_; 51 52 ### just in case the key is not there or undef or something ### 53 { local $^W = 0; 54 return $self->{$key}; 55 } 56 } 57} 58 59=head1 NAME 60 61Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar 62 63=head1 SYNOPSIS 64 65 my @items = $tar->get_files; 66 67 print $_->name, ' ', $_->size, "\n" for @items; 68 69 print $object->get_content; 70 $object->replace_content('new content'); 71 72 $object->rename( 'new/full/path/to/file.c' ); 73 74=head1 DESCRIPTION 75 76Archive::Tar::Files provides a neat little object layer for in-memory 77extracted files. It's mostly used internally in Archive::Tar to tidy 78up the code, but there's no reason users shouldn't use this API as 79well. 80 81=head2 Accessors 82 83A lot of the methods in this package are accessors to the various 84fields in the tar header: 85 86=over 4 87 88=item name 89 90The file's name 91 92=item mode 93 94The file's mode 95 96=item uid 97 98The user id owning the file 99 100=item gid 101 102The group id owning the file 103 104=item size 105 106File size in bytes 107 108=item mtime 109 110Modification time. Adjusted to mac-time on MacOS if required 111 112=item chksum 113 114Checksum field for the tar header 115 116=item type 117 118File type -- numeric, but comparable to exported constants -- see 119Archive::Tar's documentation 120 121=item linkname 122 123If the file is a symlink, the file it's pointing to 124 125=item magic 126 127Tar magic string -- not useful for most users 128 129=item version 130 131Tar version string -- not useful for most users 132 133=item uname 134 135The user name that owns the file 136 137=item gname 138 139The group name that owns the file 140 141=item devmajor 142 143Device major number in case of a special file 144 145=item devminor 146 147Device minor number in case of a special file 148 149=item prefix 150 151Any directory to prefix to the extraction path, if any 152 153=item raw 154 155Raw tar header -- not useful for most users 156 157=back 158 159=head1 Methods 160 161=head2 Archive::Tar::File->new( file => $path ) 162 163Returns a new Archive::Tar::File object from an existing file. 164 165Returns undef on failure. 166 167=head2 Archive::Tar::File->new( data => $path, $data, $opt ) 168 169Returns a new Archive::Tar::File object from data. 170 171C<$path> defines the file name (which need not exist), C<$data> the 172file contents, and C<$opt> is a reference to a hash of attributes 173which may be used to override the default attributes (fields in the 174tar header), which are described above in the Accessors section. 175 176Returns undef on failure. 177 178=head2 Archive::Tar::File->new( chunk => $chunk ) 179 180Returns a new Archive::Tar::File object from a raw 512-byte tar 181archive chunk. 182 183Returns undef on failure. 184 185=cut 186 187sub new { 188 my $class = shift; 189 my $what = shift; 190 191 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : 192 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : 193 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : 194 undef; 195 196 return $obj; 197} 198 199### copies the data, creates a clone ### 200sub clone { 201 my $self = shift; 202 return bless { %$self }, ref $self; 203} 204 205sub _new_from_chunk { 206 my $class = shift; 207 my $chunk = shift or return; # 512 bytes of tar header 208 my %hash = @_; 209 210 ### filter any arguments on defined-ness of values. 211 ### this allows overriding from what the tar-header is saying 212 ### about this tar-entry. Particularly useful for @LongLink files 213 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; 214 215 ### makes it start at 0 actually... :) ### 216 my $i = -1; 217 my %entry = map { 218 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake 219 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake 220 $s=> $v ? oct $_ : $_ # cdrake 221 # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb 222 } unpack( UNPACK, $chunk ); # cdrake 223 # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake 224 225 226 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake 227 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikley to ever be needed - the numbers are just too big...) # cdrake 228 } else { # cdrake 229 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake 230 } # cdrake 231 232 233 my $obj = bless { %entry, %args }, $class; 234 235 ### magic is a filetype string.. it should have something like 'ustar' or 236 ### something similar... if the chunk is garbage, skip it 237 return unless $obj->magic !~ /\W/; 238 239 ### store the original chunk ### 240 $obj->raw( $chunk ); 241 242 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); 243 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); 244 245 246 return $obj; 247 248} 249 250sub _new_from_file { 251 my $class = shift; 252 my $path = shift; 253 254 ### path has to at least exist 255 return unless defined $path; 256 257 my $type = __PACKAGE__->_filetype($path); 258 my $data = ''; 259 260 READ: { 261 unless ($type == DIR ) { 262 my $fh = IO::File->new; 263 264 unless( $fh->open($path) ) { 265 ### dangling symlinks are fine, stop reading but continue 266 ### creating the object 267 last READ if $type == SYMLINK; 268 269 ### otherwise, return from this function -- 270 ### anything that's *not* a symlink should be 271 ### resolvable 272 return; 273 } 274 275 ### binmode needed to read files properly on win32 ### 276 binmode $fh; 277 $data = do { local $/; <$fh> }; 278 close $fh; 279 } 280 } 281 282 my @items = qw[mode uid gid size mtime]; 283 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; 284 285 if (ON_VMS) { 286 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is 287 ### not used. We currently do not have an easy way to see if we are in 288 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. 289 ### The VMS UIC has the upper 16 bits is the GID, which in many cases 290 ### the VMS UIC will be larger than 209715, the largest that TAR can 291 ### handle. So for now, assume it is traditional if the UID is larger 292 ### than 0x10000. 293 294 if ($hash{uid} > 0x10000) { 295 $hash{uid} = $hash{uid} & 0xFFFF; 296 } 297 298 ### The file length from stat() is the physical length of the file 299 ### However the amount of data read in may be more for some file types. 300 ### Fixed length files are read past the logical EOF to end of the block 301 ### containing. Other file types get expanded on read because record 302 ### delimiters are added. 303 304 my $data_len = length $data; 305 $hash{size} = $data_len if $hash{size} < $data_len; 306 307 } 308 ### you *must* set size == 0 on symlinks, or the next entry will be 309 ### though of as the contents of the symlink, which is wrong. 310 ### this fixes bug #7937 311 $hash{size} = 0 if ($type == DIR or $type == SYMLINK); 312 $hash{mtime} -= TIME_OFFSET; 313 314 ### strip the high bits off the mode, which we don't need to store 315 $hash{mode} = STRIP_MODE->( $hash{mode} ); 316 317 318 ### probably requires some file path munging here ... ### 319 ### name and prefix are set later 320 my $obj = { 321 %hash, 322 name => '', 323 chksum => CHECK_SUM, 324 type => $type, 325 linkname => ($type == SYMLINK and CAN_READLINK) 326 ? readlink $path 327 : '', 328 magic => MAGIC, 329 version => TAR_VERSION, 330 uname => UNAME->( $hash{uid} ), 331 gname => GNAME->( $hash{gid} ), 332 devmajor => 0, # not handled 333 devminor => 0, # not handled 334 prefix => '', 335 data => $data, 336 }; 337 338 bless $obj, $class; 339 340 ### fix up the prefix and file from the path 341 my($prefix,$file) = $obj->_prefix_and_file( $path ); 342 $obj->prefix( $prefix ); 343 $obj->name( $file ); 344 345 return $obj; 346} 347 348sub _new_from_data { 349 my $class = shift; 350 my $path = shift; return unless defined $path; 351 my $data = shift; return unless defined $data; 352 my $opt = shift; 353 354 my $obj = { 355 data => $data, 356 name => '', 357 mode => MODE, 358 uid => UID, 359 gid => GID, 360 size => length $data, 361 mtime => time - TIME_OFFSET, 362 chksum => CHECK_SUM, 363 type => FILE, 364 linkname => '', 365 magic => MAGIC, 366 version => TAR_VERSION, 367 uname => UNAME->( UID ), 368 gname => GNAME->( GID ), 369 devminor => 0, 370 devmajor => 0, 371 prefix => '', 372 }; 373 374 ### overwrite with user options, if provided ### 375 if( $opt and ref $opt eq 'HASH' ) { 376 for my $key ( keys %$opt ) { 377 378 ### don't write bogus options ### 379 next unless exists $obj->{$key}; 380 $obj->{$key} = $opt->{$key}; 381 } 382 } 383 384 bless $obj, $class; 385 386 ### fix up the prefix and file from the path 387 my($prefix,$file) = $obj->_prefix_and_file( $path ); 388 $obj->prefix( $prefix ); 389 $obj->name( $file ); 390 391 return $obj; 392} 393 394sub _prefix_and_file { 395 my $self = shift; 396 my $path = shift; 397 398 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); 399 my @dirs = File::Spec->splitdir( $dirs ); 400 401 ### so sometimes the last element is '' -- probably when trailing 402 ### dir slashes are encountered... this is of course pointless, 403 ### so remove it 404 pop @dirs while @dirs and not length $dirs[-1]; 405 406 ### if it's a directory, then $file might be empty 407 $file = pop @dirs if $self->is_dir and not length $file; 408 409 ### splitting ../ gives you the relative path in native syntax 410 map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS; 411 412 my $prefix = File::Spec::Unix->catdir( 413 grep { length } $vol, @dirs 414 ); 415 return( $prefix, $file ); 416} 417 418sub _filetype { 419 my $self = shift; 420 my $file = shift; 421 422 return unless defined $file; 423 424 return SYMLINK if (-l $file); # Symlink 425 426 return FILE if (-f _); # Plain file 427 428 return DIR if (-d _); # Directory 429 430 return FIFO if (-p _); # Named pipe 431 432 return SOCKET if (-S _); # Socket 433 434 return BLOCKDEV if (-b _); # Block special 435 436 return CHARDEV if (-c _); # Character special 437 438 ### shouldn't happen, this is when making archives, not reading ### 439 return LONGLINK if ( $file eq LONGLINK_NAME ); 440 441 return UNKNOWN; # Something else (like what?) 442 443} 444 445### this method 'downgrades' a file to plain file -- this is used for 446### symlinks when FOLLOW_SYMLINKS is true. 447sub _downgrade_to_plainfile { 448 my $entry = shift; 449 $entry->type( FILE ); 450 $entry->mode( MODE ); 451 $entry->linkname(''); 452 453 return 1; 454} 455 456=head2 $bool = $file->extract( [ $alternative_name ] ) 457 458Extract this object, optionally to an alternative name. 459 460See C<< Archive::Tar->extract_file >> for details. 461 462Returns true on success and false on failure. 463 464=cut 465 466sub extract { 467 my $self = shift; 468 469 local $Carp::CarpLevel += 1; 470 471 return Archive::Tar->_extract_file( $self, @_ ); 472} 473 474=head2 $path = $file->full_path 475 476Returns the full path from the tar header; this is basically a 477concatenation of the C<prefix> and C<name> fields. 478 479=cut 480 481sub full_path { 482 my $self = shift; 483 484 ### if prefix field is emtpy 485 return $self->name unless defined $self->prefix and length $self->prefix; 486 487 ### or otherwise, catfile'd 488 return File::Spec::Unix->catfile( $self->prefix, $self->name ); 489} 490 491 492=head2 $bool = $file->validate 493 494Done by Archive::Tar internally when reading the tar file: 495validate the header against the checksum to ensure integer tar file. 496 497Returns true on success, false on failure 498 499=cut 500 501sub validate { 502 my $self = shift; 503 504 my $raw = $self->raw; 505 506 ### don't know why this one is different from the one we /write/ ### 507 substr ($raw, 148, 8) = " "; 508 509 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar 510 ### like GNU tar does. See here for details: 511 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139 512 ### so we do both a signed AND unsigned validate. if one succeeds, that's 513 ### good enough 514 return ( (unpack ("%16C*", $raw) == $self->chksum) 515 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; 516} 517 518=head2 $bool = $file->has_content 519 520Returns a boolean to indicate whether the current object has content. 521Some special files like directories and so on never will have any 522content. This method is mainly to make sure you don't get warnings 523for using uninitialized values when looking at an object's content. 524 525=cut 526 527sub has_content { 528 my $self = shift; 529 return defined $self->data() && length $self->data() ? 1 : 0; 530} 531 532=head2 $content = $file->get_content 533 534Returns the current content for the in-memory file 535 536=cut 537 538sub get_content { 539 my $self = shift; 540 $self->data( ); 541} 542 543=head2 $cref = $file->get_content_by_ref 544 545Returns the current content for the in-memory file as a scalar 546reference. Normal users won't need this, but it will save memory if 547you are dealing with very large data files in your tar archive, since 548it will pass the contents by reference, rather than make a copy of it 549first. 550 551=cut 552 553sub get_content_by_ref { 554 my $self = shift; 555 556 return \$self->{data}; 557} 558 559=head2 $bool = $file->replace_content( $content ) 560 561Replace the current content of the file with the new content. This 562only affects the in-memory archive, not the on-disk version until 563you write it. 564 565Returns true on success, false on failure. 566 567=cut 568 569sub replace_content { 570 my $self = shift; 571 my $data = shift || ''; 572 573 $self->data( $data ); 574 $self->size( length $data ); 575 return 1; 576} 577 578=head2 $bool = $file->rename( $new_name ) 579 580Rename the current file to $new_name. 581 582Note that you must specify a Unix path for $new_name, since per tar 583standard, all files in the archive must be Unix paths. 584 585Returns true on success and false on failure. 586 587=cut 588 589sub rename { 590 my $self = shift; 591 my $path = shift; 592 593 return unless defined $path; 594 595 my ($prefix,$file) = $self->_prefix_and_file( $path ); 596 597 $self->name( $file ); 598 $self->prefix( $prefix ); 599 600 return 1; 601} 602 603=head2 $bool = $file->chmod $mode) 604 605Change mode of $file to $mode. The mode can be a string or a number 606which is interpreted as octal whether or not a leading 0 is given. 607 608Returns true on success and false on failure. 609 610=cut 611 612sub chmod { 613 my $self = shift; 614 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 615 $self->{mode} = oct($mode); 616 return 1; 617} 618 619=head2 $bool = $file->chown( $user [, $group]) 620 621Change owner of $file to $user. If a $group is given that is changed 622as well. You can also pass a single parameter with a colon separating the 623use and group as in 'root:wheel'. 624 625Returns true on success and false on failure. 626 627=cut 628 629sub chown { 630 my $self = shift; 631 my $uname = shift; 632 return unless defined $uname; 633 my $gname; 634 if (-1 != index($uname, ':')) { 635 ($uname, $gname) = split(/:/, $uname); 636 } else { 637 $gname = shift if @_ > 0; 638 } 639 640 $self->uname( $uname ); 641 $self->gname( $gname ) if $gname; 642 return 1; 643} 644 645=head1 Convenience methods 646 647To quickly check the type of a C<Archive::Tar::File> object, you can 648use the following methods: 649 650=over 4 651 652=item $file->is_file 653 654Returns true if the file is of type C<file> 655 656=item $file->is_dir 657 658Returns true if the file is of type C<dir> 659 660=item $file->is_hardlink 661 662Returns true if the file is of type C<hardlink> 663 664=item $file->is_symlink 665 666Returns true if the file is of type C<symlink> 667 668=item $file->is_chardev 669 670Returns true if the file is of type C<chardev> 671 672=item $file->is_blockdev 673 674Returns true if the file is of type C<blockdev> 675 676=item $file->is_fifo 677 678Returns true if the file is of type C<fifo> 679 680=item $file->is_socket 681 682Returns true if the file is of type C<socket> 683 684=item $file->is_longlink 685 686Returns true if the file is of type C<LongLink>. 687Should not happen after a successful C<read>. 688 689=item $file->is_label 690 691Returns true if the file is of type C<Label>. 692Should not happen after a successful C<read>. 693 694=item $file->is_unknown 695 696Returns true if the file type is C<unknown> 697 698=back 699 700=cut 701 702#stupid perl5.5.3 needs to warn if it's not numeric 703sub is_file { local $^W; FILE == $_[0]->type } 704sub is_dir { local $^W; DIR == $_[0]->type } 705sub is_hardlink { local $^W; HARDLINK == $_[0]->type } 706sub is_symlink { local $^W; SYMLINK == $_[0]->type } 707sub is_chardev { local $^W; CHARDEV == $_[0]->type } 708sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } 709sub is_fifo { local $^W; FIFO == $_[0]->type } 710sub is_socket { local $^W; SOCKET == $_[0]->type } 711sub is_unknown { local $^W; UNKNOWN == $_[0]->type } 712sub is_longlink { local $^W; LONGLINK eq $_[0]->type } 713sub is_label { local $^W; LABEL eq $_[0]->type } 714 7151; 716