1package Archive::Tar::File; 2use strict; 3 4use Carp (); 5use IO::File; 6use File::Spec::Unix (); 7use File::Spec (); 8use File::Basename (); 9 10use Archive::Tar::Constant; 11 12use vars qw[@ISA $VERSION]; 13#@ISA = qw[Archive::Tar]; 14$VERSION = '2.40'; 15 16### set value to 1 to oct() it during the unpack ### 17 18my $tmpl = [ 19 name => 0, # string A100 20 mode => 1, # octal A8 21 uid => 1, # octal A8 22 gid => 1, # octal A8 23 size => 0, # octal # cdrake - not *always* octal.. A12 24 mtime => 1, # octal A12 25 chksum => 1, # octal A8 26 type => 0, # character A1 27 linkname => 0, # string A100 28 magic => 0, # string A6 29 version => 0, # 2 bytes A2 30 uname => 0, # string A32 31 gname => 0, # string A32 32 devmajor => 1, # octal A8 33 devminor => 1, # octal A8 34 prefix => 0, # A155 x 12 35 36### end UNPACK items ### 37 raw => 0, # the raw data chunk 38 data => 0, # the data associated with the file -- 39 # This might be very memory intensive 40]; 41 42### install get/set accessors for this object. 43for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { 44 my $key = $tmpl->[$i]; 45 no strict 'refs'; 46 *{__PACKAGE__."::$key"} = sub { 47 my $self = shift; 48 $self->{$key} = $_[0] if @_; 49 50 ### just in case the key is not there or undef or something ### 51 { local $^W = 0; 52 return $self->{$key}; 53 } 54 } 55} 56 57=head1 NAME 58 59Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar 60 61=head1 SYNOPSIS 62 63 my @items = $tar->get_files; 64 65 print $_->name, ' ', $_->size, "\n" for @items; 66 67 print $object->get_content; 68 $object->replace_content('new content'); 69 70 $object->rename( 'new/full/path/to/file.c' ); 71 72=head1 DESCRIPTION 73 74Archive::Tar::Files provides a neat little object layer for in-memory 75extracted files. It's mostly used internally in Archive::Tar to tidy 76up the code, but there's no reason users shouldn't use this API as 77well. 78 79=head2 Accessors 80 81A lot of the methods in this package are accessors to the various 82fields in the tar header: 83 84=over 4 85 86=item name 87 88The file's name 89 90=item mode 91 92The file's mode 93 94=item uid 95 96The user id owning the file 97 98=item gid 99 100The group id owning the file 101 102=item size 103 104File size in bytes 105 106=item mtime 107 108Modification time. Adjusted to mac-time on MacOS if required 109 110=item chksum 111 112Checksum field for the tar header 113 114=item type 115 116File type -- numeric, but comparable to exported constants -- see 117Archive::Tar's documentation 118 119=item linkname 120 121If the file is a symlink, the file it's pointing to 122 123=item magic 124 125Tar magic string -- not useful for most users 126 127=item version 128 129Tar version string -- not useful for most users 130 131=item uname 132 133The user name that owns the file 134 135=item gname 136 137The group name that owns the file 138 139=item devmajor 140 141Device major number in case of a special file 142 143=item devminor 144 145Device minor number in case of a special file 146 147=item prefix 148 149Any directory to prefix to the extraction path, if any 150 151=item raw 152 153Raw tar header -- not useful for most users 154 155=back 156 157=head1 Methods 158 159=head2 Archive::Tar::File->new( file => $path ) 160 161Returns a new Archive::Tar::File object from an existing file. 162 163Returns undef on failure. 164 165=head2 Archive::Tar::File->new( data => $path, $data, $opt ) 166 167Returns a new Archive::Tar::File object from data. 168 169C<$path> defines the file name (which need not exist), C<$data> the 170file contents, and C<$opt> is a reference to a hash of attributes 171which may be used to override the default attributes (fields in the 172tar header), which are described above in the Accessors section. 173 174Returns undef on failure. 175 176=head2 Archive::Tar::File->new( chunk => $chunk ) 177 178Returns a new Archive::Tar::File object from a raw 512-byte tar 179archive chunk. 180 181Returns undef on failure. 182 183=cut 184 185sub new { 186 my $class = shift; 187 my $what = shift; 188 189 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : 190 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : 191 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : 192 undef; 193 194 return $obj; 195} 196 197### copies the data, creates a clone ### 198sub clone { 199 my $self = shift; 200 return bless { %$self }, ref $self; 201} 202 203sub _new_from_chunk { 204 my $class = shift; 205 my $chunk = shift or return; # 512 bytes of tar header 206 my %hash = @_; 207 208 ### filter any arguments on defined-ness of values. 209 ### this allows overriding from what the tar-header is saying 210 ### about this tar-entry. Particularly useful for @LongLink files 211 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; 212 213 ### makes it start at 0 actually... :) ### 214 my $i = -1; 215 my %entry = map { 216 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake 217 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake 218 $s=> $v ? oct $_ : $_ # cdrake 219 # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb 220 } unpack( UNPACK, $chunk ); # cdrake 221 # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake 222 223 224 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake 225 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake 226 } else { # cdrake 227 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake 228 } # cdrake 229 230 231 my $obj = bless { %entry, %args }, $class; 232 233 ### magic is a filetype string.. it should have something like 'ustar' or 234 ### something similar... if the chunk is garbage, skip it 235 return unless $obj->magic !~ /\W/; 236 237 ### store the original chunk ### 238 $obj->raw( $chunk ); 239 240 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); 241 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); 242 243 244 return $obj; 245 246} 247 248sub _new_from_file { 249 my $class = shift; 250 my $path = shift; 251 252 ### path has to at least exist 253 return unless defined $path; 254 255 my $type = __PACKAGE__->_filetype($path); 256 my $data = ''; 257 258 READ: { 259 unless ($type == DIR ) { 260 my $fh = IO::File->new; 261 262 unless( $fh->open($path) ) { 263 ### dangling symlinks are fine, stop reading but continue 264 ### creating the object 265 last READ if $type == SYMLINK; 266 267 ### otherwise, return from this function -- 268 ### anything that's *not* a symlink should be 269 ### resolvable 270 return; 271 } 272 273 ### binmode needed to read files properly on win32 ### 274 binmode $fh; 275 $data = do { local $/; <$fh> }; 276 close $fh; 277 } 278 } 279 280 my @items = qw[mode uid gid size mtime]; 281 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; 282 283 if (ON_VMS) { 284 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is 285 ### not used. We currently do not have an easy way to see if we are in 286 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. 287 ### The VMS UIC has the upper 16 bits is the GID, which in many cases 288 ### the VMS UIC will be larger than 209715, the largest that TAR can 289 ### handle. So for now, assume it is traditional if the UID is larger 290 ### than 0x10000. 291 292 if ($hash{uid} > 0x10000) { 293 $hash{uid} = $hash{uid} & 0xFFFF; 294 } 295 296 ### The file length from stat() is the physical length of the file 297 ### However the amount of data read in may be more for some file types. 298 ### Fixed length files are read past the logical EOF to end of the block 299 ### containing. Other file types get expanded on read because record 300 ### delimiters are added. 301 302 my $data_len = length $data; 303 $hash{size} = $data_len if $hash{size} < $data_len; 304 305 } 306 ### you *must* set size == 0 on symlinks, or the next entry will be 307 ### though of as the contents of the symlink, which is wrong. 308 ### this fixes bug #7937 309 $hash{size} = 0 if ($type == DIR or $type == SYMLINK); 310 $hash{mtime} -= TIME_OFFSET; 311 312 ### strip the high bits off the mode, which we don't need to store 313 $hash{mode} = STRIP_MODE->( $hash{mode} ); 314 315 316 ### probably requires some file path munging here ... ### 317 ### name and prefix are set later 318 my $obj = { 319 %hash, 320 name => '', 321 chksum => CHECK_SUM, 322 type => $type, 323 linkname => ($type == SYMLINK and CAN_READLINK) 324 ? readlink $path 325 : '', 326 magic => MAGIC, 327 version => TAR_VERSION, 328 uname => UNAME->( $hash{uid} ), 329 gname => GNAME->( $hash{gid} ), 330 devmajor => 0, # not handled 331 devminor => 0, # not handled 332 prefix => '', 333 data => $data, 334 }; 335 336 bless $obj, $class; 337 338 ### fix up the prefix and file from the path 339 my($prefix,$file) = $obj->_prefix_and_file( $path ); 340 $obj->prefix( $prefix ); 341 $obj->name( $file ); 342 343 return $obj; 344} 345 346sub _new_from_data { 347 my $class = shift; 348 my $path = shift; return unless defined $path; 349 my $data = shift; return unless defined $data; 350 my $opt = shift; 351 352 my $obj = { 353 data => $data, 354 name => '', 355 mode => MODE, 356 uid => UID, 357 gid => GID, 358 size => length $data, 359 mtime => time - TIME_OFFSET, 360 chksum => CHECK_SUM, 361 type => FILE, 362 linkname => '', 363 magic => MAGIC, 364 version => TAR_VERSION, 365 uname => UNAME->( UID ), 366 gname => GNAME->( GID ), 367 devminor => 0, 368 devmajor => 0, 369 prefix => '', 370 }; 371 372 ### overwrite with user options, if provided ### 373 if( $opt and ref $opt eq 'HASH' ) { 374 for my $key ( keys %$opt ) { 375 376 ### don't write bogus options ### 377 next unless exists $obj->{$key}; 378 $obj->{$key} = $opt->{$key}; 379 } 380 } 381 382 bless $obj, $class; 383 384 ### fix up the prefix and file from the path 385 my($prefix,$file) = $obj->_prefix_and_file( $path ); 386 $obj->prefix( $prefix ); 387 $obj->name( $file ); 388 389 return $obj; 390} 391 392sub _prefix_and_file { 393 my $self = shift; 394 my $path = shift; 395 396 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); 397 my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) ); 398 399 ### if it's a directory, then $file might be empty 400 $file = pop @dirs if $self->is_dir and not length $file; 401 402 ### splitting ../ gives you the relative path in native syntax 403 ### Remove the root (000000) directory 404 ### The volume from splitpath will also be in native syntax 405 if (ON_VMS) { 406 map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs; 407 if (length($vol)) { 408 $vol = VMS::Filespec::unixify($vol); 409 unshift @dirs, $vol; 410 } 411 } 412 413 my $prefix = File::Spec::Unix->catdir(@dirs); 414 return( $prefix, $file ); 415} 416 417sub _filetype { 418 my $self = shift; 419 my $file = shift; 420 421 return unless defined $file; 422 423 return SYMLINK if (-l $file); # Symlink 424 425 return FILE if (-f _); # Plain file 426 427 return DIR if (-d _); # Directory 428 429 return FIFO if (-p _); # Named pipe 430 431 return SOCKET if (-S _); # Socket 432 433 return BLOCKDEV if (-b _); # Block special 434 435 return CHARDEV if (-c _); # Character special 436 437 ### shouldn't happen, this is when making archives, not reading ### 438 return LONGLINK if ( $file eq LONGLINK_NAME ); 439 440 return UNKNOWN; # Something else (like what?) 441 442} 443 444### this method 'downgrades' a file to plain file -- this is used for 445### symlinks when FOLLOW_SYMLINKS is true. 446sub _downgrade_to_plainfile { 447 my $entry = shift; 448 $entry->type( FILE ); 449 $entry->mode( MODE ); 450 $entry->linkname(''); 451 452 return 1; 453} 454 455=head2 $bool = $file->extract( [ $alternative_name ] ) 456 457Extract this object, optionally to an alternative name. 458 459See C<< Archive::Tar->extract_file >> for details. 460 461Returns true on success and false on failure. 462 463=cut 464 465sub extract { 466 my $self = shift; 467 468 local $Carp::CarpLevel += 1; 469 470 ### avoid circular use, so only require; 471 require Archive::Tar; 472 return Archive::Tar->_extract_file( $self, @_ ); 473} 474 475=head2 $path = $file->full_path 476 477Returns the full path from the tar header; this is basically a 478concatenation of the C<prefix> and C<name> fields. 479 480=cut 481 482sub full_path { 483 my $self = shift; 484 485 ### if prefix field is empty 486 return $self->name unless defined $self->prefix and length $self->prefix; 487 488 ### or otherwise, catfile'd 489 return File::Spec::Unix->catfile( $self->prefix, $self->name ); 490} 491 492 493=head2 $bool = $file->validate 494 495Done by Archive::Tar internally when reading the tar file: 496validate the header against the checksum to ensure integer tar file. 497 498Returns true on success, false on failure 499 500=cut 501 502sub validate { 503 my $self = shift; 504 505 my $raw = $self->raw; 506 507 ### don't know why this one is different from the one we /write/ ### 508 substr ($raw, 148, 8) = " "; 509 510 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar 511 ### like GNU tar does. See here for details: 512 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139 513 ### so we do both a signed AND unsigned validate. if one succeeds, that's 514 ### good enough 515 return ( (unpack ("%16C*", $raw) == $self->chksum) 516 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; 517} 518 519=head2 $bool = $file->has_content 520 521Returns a boolean to indicate whether the current object has content. 522Some special files like directories and so on never will have any 523content. This method is mainly to make sure you don't get warnings 524for using uninitialized values when looking at an object's content. 525 526=cut 527 528sub has_content { 529 my $self = shift; 530 return defined $self->data() && length $self->data() ? 1 : 0; 531} 532 533=head2 $content = $file->get_content 534 535Returns the current content for the in-memory file 536 537=cut 538 539sub get_content { 540 my $self = shift; 541 $self->data( ); 542} 543 544=head2 $cref = $file->get_content_by_ref 545 546Returns the current content for the in-memory file as a scalar 547reference. Normal users won't need this, but it will save memory if 548you are dealing with very large data files in your tar archive, since 549it will pass the contents by reference, rather than make a copy of it 550first. 551 552=cut 553 554sub get_content_by_ref { 555 my $self = shift; 556 557 return \$self->{data}; 558} 559 560=head2 $bool = $file->replace_content( $content ) 561 562Replace the current content of the file with the new content. This 563only affects the in-memory archive, not the on-disk version until 564you write it. 565 566Returns true on success, false on failure. 567 568=cut 569 570sub replace_content { 571 my $self = shift; 572 my $data = shift || ''; 573 574 $self->data( $data ); 575 $self->size( length $data ); 576 return 1; 577} 578 579=head2 $bool = $file->rename( $new_name ) 580 581Rename the current file to $new_name. 582 583Note that you must specify a Unix path for $new_name, since per tar 584standard, all files in the archive must be Unix paths. 585 586Returns true on success and false on failure. 587 588=cut 589 590sub rename { 591 my $self = shift; 592 my $path = shift; 593 594 return unless defined $path; 595 596 my ($prefix,$file) = $self->_prefix_and_file( $path ); 597 598 $self->name( $file ); 599 $self->prefix( $prefix ); 600 601 return 1; 602} 603 604=head2 $bool = $file->chmod $mode) 605 606Change mode of $file to $mode. The mode can be a string or a number 607which is interpreted as octal whether or not a leading 0 is given. 608 609Returns true on success and false on failure. 610 611=cut 612 613sub chmod { 614 my $self = shift; 615 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 616 $self->{mode} = oct($mode); 617 return 1; 618} 619 620=head2 $bool = $file->chown( $user [, $group]) 621 622Change owner of $file to $user. If a $group is given that is changed 623as well. You can also pass a single parameter with a colon separating the 624use and group as in 'root:wheel'. 625 626Returns true on success and false on failure. 627 628=cut 629 630sub chown { 631 my $self = shift; 632 my $uname = shift; 633 return unless defined $uname; 634 my $gname; 635 if (-1 != index($uname, ':')) { 636 ($uname, $gname) = split(/:/, $uname); 637 } else { 638 $gname = shift if @_ > 0; 639 } 640 641 $self->uname( $uname ); 642 $self->gname( $gname ) if $gname; 643 return 1; 644} 645 646=head1 Convenience methods 647 648To quickly check the type of a C<Archive::Tar::File> object, you can 649use the following methods: 650 651=over 4 652 653=item $file->is_file 654 655Returns true if the file is of type C<file> 656 657=item $file->is_dir 658 659Returns true if the file is of type C<dir> 660 661=item $file->is_hardlink 662 663Returns true if the file is of type C<hardlink> 664 665=item $file->is_symlink 666 667Returns true if the file is of type C<symlink> 668 669=item $file->is_chardev 670 671Returns true if the file is of type C<chardev> 672 673=item $file->is_blockdev 674 675Returns true if the file is of type C<blockdev> 676 677=item $file->is_fifo 678 679Returns true if the file is of type C<fifo> 680 681=item $file->is_socket 682 683Returns true if the file is of type C<socket> 684 685=item $file->is_longlink 686 687Returns true if the file is of type C<LongLink>. 688Should not happen after a successful C<read>. 689 690=item $file->is_label 691 692Returns true if the file is of type C<Label>. 693Should not happen after a successful C<read>. 694 695=item $file->is_unknown 696 697Returns true if the file type is C<unknown> 698 699=back 700 701=cut 702 703#stupid perl5.5.3 needs to warn if it's not numeric 704sub is_file { local $^W; FILE == $_[0]->type } 705sub is_dir { local $^W; DIR == $_[0]->type } 706sub is_hardlink { local $^W; HARDLINK == $_[0]->type } 707sub is_symlink { local $^W; SYMLINK == $_[0]->type } 708sub is_chardev { local $^W; CHARDEV == $_[0]->type } 709sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } 710sub is_fifo { local $^W; FIFO == $_[0]->type } 711sub is_socket { local $^W; SOCKET == $_[0]->type } 712sub is_unknown { local $^W; UNKNOWN == $_[0]->type } 713sub is_longlink { local $^W; LONGLINK eq $_[0]->type } 714sub is_label { local $^W; LABEL eq $_[0]->type } 715 7161; 717