1package Archive::Tar::File; 2use strict; 3 4use Carp (); 5use IO::File; 6use File::Spec::Unix (); 7use File::Spec (); 8use File::Basename (); 9 10### avoid circular use, so only require; 11require Archive::Tar; 12use Archive::Tar::Constant; 13 14use vars qw[@ISA $VERSION]; 15#@ISA = qw[Archive::Tar]; 16$VERSION = '2.32'; 17 18### set value to 1 to oct() it during the unpack ### 19 20my $tmpl = [ 21 name => 0, # string A100 22 mode => 1, # octal A8 23 uid => 1, # octal A8 24 gid => 1, # octal A8 25 size => 0, # octal # cdrake - not *always* octal.. A12 26 mtime => 1, # octal A12 27 chksum => 1, # octal A8 28 type => 0, # character A1 29 linkname => 0, # string A100 30 magic => 0, # string A6 31 version => 0, # 2 bytes A2 32 uname => 0, # string A32 33 gname => 0, # string A32 34 devmajor => 1, # octal A8 35 devminor => 1, # octal A8 36 prefix => 0, # A155 x 12 37 38### end UNPACK items ### 39 raw => 0, # the raw data chunk 40 data => 0, # the data associated with the file -- 41 # This might be very memory intensive 42]; 43 44### install get/set accessors for this object. 45for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { 46 my $key = $tmpl->[$i]; 47 no strict 'refs'; 48 *{__PACKAGE__."::$key"} = sub { 49 my $self = shift; 50 $self->{$key} = $_[0] if @_; 51 52 ### just in case the key is not there or undef or something ### 53 { local $^W = 0; 54 return $self->{$key}; 55 } 56 } 57} 58 59=head1 NAME 60 61Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar 62 63=head1 SYNOPSIS 64 65 my @items = $tar->get_files; 66 67 print $_->name, ' ', $_->size, "\n" for @items; 68 69 print $object->get_content; 70 $object->replace_content('new content'); 71 72 $object->rename( 'new/full/path/to/file.c' ); 73 74=head1 DESCRIPTION 75 76Archive::Tar::Files provides a neat little object layer for in-memory 77extracted files. It's mostly used internally in Archive::Tar to tidy 78up the code, but there's no reason users shouldn't use this API as 79well. 80 81=head2 Accessors 82 83A lot of the methods in this package are accessors to the various 84fields in the tar header: 85 86=over 4 87 88=item name 89 90The file's name 91 92=item mode 93 94The file's mode 95 96=item uid 97 98The user id owning the file 99 100=item gid 101 102The group id owning the file 103 104=item size 105 106File size in bytes 107 108=item mtime 109 110Modification time. Adjusted to mac-time on MacOS if required 111 112=item chksum 113 114Checksum field for the tar header 115 116=item type 117 118File type -- numeric, but comparable to exported constants -- see 119Archive::Tar's documentation 120 121=item linkname 122 123If the file is a symlink, the file it's pointing to 124 125=item magic 126 127Tar magic string -- not useful for most users 128 129=item version 130 131Tar version string -- not useful for most users 132 133=item uname 134 135The user name that owns the file 136 137=item gname 138 139The group name that owns the file 140 141=item devmajor 142 143Device major number in case of a special file 144 145=item devminor 146 147Device minor number in case of a special file 148 149=item prefix 150 151Any directory to prefix to the extraction path, if any 152 153=item raw 154 155Raw tar header -- not useful for most users 156 157=back 158 159=head1 Methods 160 161=head2 Archive::Tar::File->new( file => $path ) 162 163Returns a new Archive::Tar::File object from an existing file. 164 165Returns undef on failure. 166 167=head2 Archive::Tar::File->new( data => $path, $data, $opt ) 168 169Returns a new Archive::Tar::File object from data. 170 171C<$path> defines the file name (which need not exist), C<$data> the 172file contents, and C<$opt> is a reference to a hash of attributes 173which may be used to override the default attributes (fields in the 174tar header), which are described above in the Accessors section. 175 176Returns undef on failure. 177 178=head2 Archive::Tar::File->new( chunk => $chunk ) 179 180Returns a new Archive::Tar::File object from a raw 512-byte tar 181archive chunk. 182 183Returns undef on failure. 184 185=cut 186 187sub new { 188 my $class = shift; 189 my $what = shift; 190 191 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : 192 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : 193 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : 194 undef; 195 196 return $obj; 197} 198 199### copies the data, creates a clone ### 200sub clone { 201 my $self = shift; 202 return bless { %$self }, ref $self; 203} 204 205sub _new_from_chunk { 206 my $class = shift; 207 my $chunk = shift or return; # 512 bytes of tar header 208 my %hash = @_; 209 210 ### filter any arguments on defined-ness of values. 211 ### this allows overriding from what the tar-header is saying 212 ### about this tar-entry. Particularly useful for @LongLink files 213 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; 214 215 ### makes it start at 0 actually... :) ### 216 my $i = -1; 217 my %entry = map { 218 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake 219 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake 220 $s=> $v ? oct $_ : $_ # cdrake 221 # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb 222 } unpack( UNPACK, $chunk ); # cdrake 223 # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake 224 225 226 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake 227 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake 228 } else { # cdrake 229 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake 230 } # cdrake 231 232 233 my $obj = bless { %entry, %args }, $class; 234 235 ### magic is a filetype string.. it should have something like 'ustar' or 236 ### something similar... if the chunk is garbage, skip it 237 return unless $obj->magic !~ /\W/; 238 239 ### store the original chunk ### 240 $obj->raw( $chunk ); 241 242 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); 243 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); 244 245 246 return $obj; 247 248} 249 250sub _new_from_file { 251 my $class = shift; 252 my $path = shift; 253 254 ### path has to at least exist 255 return unless defined $path; 256 257 my $type = __PACKAGE__->_filetype($path); 258 my $data = ''; 259 260 READ: { 261 unless ($type == DIR ) { 262 my $fh = IO::File->new; 263 264 unless( $fh->open($path) ) { 265 ### dangling symlinks are fine, stop reading but continue 266 ### creating the object 267 last READ if $type == SYMLINK; 268 269 ### otherwise, return from this function -- 270 ### anything that's *not* a symlink should be 271 ### resolvable 272 return; 273 } 274 275 ### binmode needed to read files properly on win32 ### 276 binmode $fh; 277 $data = do { local $/; <$fh> }; 278 close $fh; 279 } 280 } 281 282 my @items = qw[mode uid gid size mtime]; 283 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; 284 285 if (ON_VMS) { 286 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is 287 ### not used. We currently do not have an easy way to see if we are in 288 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. 289 ### The VMS UIC has the upper 16 bits is the GID, which in many cases 290 ### the VMS UIC will be larger than 209715, the largest that TAR can 291 ### handle. So for now, assume it is traditional if the UID is larger 292 ### than 0x10000. 293 294 if ($hash{uid} > 0x10000) { 295 $hash{uid} = $hash{uid} & 0xFFFF; 296 } 297 298 ### The file length from stat() is the physical length of the file 299 ### However the amount of data read in may be more for some file types. 300 ### Fixed length files are read past the logical EOF to end of the block 301 ### containing. Other file types get expanded on read because record 302 ### delimiters are added. 303 304 my $data_len = length $data; 305 $hash{size} = $data_len if $hash{size} < $data_len; 306 307 } 308 ### you *must* set size == 0 on symlinks, or the next entry will be 309 ### though of as the contents of the symlink, which is wrong. 310 ### this fixes bug #7937 311 $hash{size} = 0 if ($type == DIR or $type == SYMLINK); 312 $hash{mtime} -= TIME_OFFSET; 313 314 ### strip the high bits off the mode, which we don't need to store 315 $hash{mode} = STRIP_MODE->( $hash{mode} ); 316 317 318 ### probably requires some file path munging here ... ### 319 ### name and prefix are set later 320 my $obj = { 321 %hash, 322 name => '', 323 chksum => CHECK_SUM, 324 type => $type, 325 linkname => ($type == SYMLINK and CAN_READLINK) 326 ? readlink $path 327 : '', 328 magic => MAGIC, 329 version => TAR_VERSION, 330 uname => UNAME->( $hash{uid} ), 331 gname => GNAME->( $hash{gid} ), 332 devmajor => 0, # not handled 333 devminor => 0, # not handled 334 prefix => '', 335 data => $data, 336 }; 337 338 bless $obj, $class; 339 340 ### fix up the prefix and file from the path 341 my($prefix,$file) = $obj->_prefix_and_file( $path ); 342 $obj->prefix( $prefix ); 343 $obj->name( $file ); 344 345 return $obj; 346} 347 348sub _new_from_data { 349 my $class = shift; 350 my $path = shift; return unless defined $path; 351 my $data = shift; return unless defined $data; 352 my $opt = shift; 353 354 my $obj = { 355 data => $data, 356 name => '', 357 mode => MODE, 358 uid => UID, 359 gid => GID, 360 size => length $data, 361 mtime => time - TIME_OFFSET, 362 chksum => CHECK_SUM, 363 type => FILE, 364 linkname => '', 365 magic => MAGIC, 366 version => TAR_VERSION, 367 uname => UNAME->( UID ), 368 gname => GNAME->( GID ), 369 devminor => 0, 370 devmajor => 0, 371 prefix => '', 372 }; 373 374 ### overwrite with user options, if provided ### 375 if( $opt and ref $opt eq 'HASH' ) { 376 for my $key ( keys %$opt ) { 377 378 ### don't write bogus options ### 379 next unless exists $obj->{$key}; 380 $obj->{$key} = $opt->{$key}; 381 } 382 } 383 384 bless $obj, $class; 385 386 ### fix up the prefix and file from the path 387 my($prefix,$file) = $obj->_prefix_and_file( $path ); 388 $obj->prefix( $prefix ); 389 $obj->name( $file ); 390 391 return $obj; 392} 393 394sub _prefix_and_file { 395 my $self = shift; 396 my $path = shift; 397 398 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); 399 my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) ); 400 401 ### if it's a directory, then $file might be empty 402 $file = pop @dirs if $self->is_dir and not length $file; 403 404 ### splitting ../ gives you the relative path in native syntax 405 ### Remove the root (000000) directory 406 ### The volume from splitpath will also be in native syntax 407 if (ON_VMS) { 408 map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs; 409 if (length($vol)) { 410 $vol = VMS::Filespec::unixify($vol); 411 unshift @dirs, $vol; 412 } 413 } 414 415 my $prefix = File::Spec::Unix->catdir(@dirs); 416 return( $prefix, $file ); 417} 418 419sub _filetype { 420 my $self = shift; 421 my $file = shift; 422 423 return unless defined $file; 424 425 return SYMLINK if (-l $file); # Symlink 426 427 return FILE if (-f _); # Plain file 428 429 return DIR if (-d _); # Directory 430 431 return FIFO if (-p _); # Named pipe 432 433 return SOCKET if (-S _); # Socket 434 435 return BLOCKDEV if (-b _); # Block special 436 437 return CHARDEV if (-c _); # Character special 438 439 ### shouldn't happen, this is when making archives, not reading ### 440 return LONGLINK if ( $file eq LONGLINK_NAME ); 441 442 return UNKNOWN; # Something else (like what?) 443 444} 445 446### this method 'downgrades' a file to plain file -- this is used for 447### symlinks when FOLLOW_SYMLINKS is true. 448sub _downgrade_to_plainfile { 449 my $entry = shift; 450 $entry->type( FILE ); 451 $entry->mode( MODE ); 452 $entry->linkname(''); 453 454 return 1; 455} 456 457=head2 $bool = $file->extract( [ $alternative_name ] ) 458 459Extract this object, optionally to an alternative name. 460 461See C<< Archive::Tar->extract_file >> for details. 462 463Returns true on success and false on failure. 464 465=cut 466 467sub extract { 468 my $self = shift; 469 470 local $Carp::CarpLevel += 1; 471 472 return Archive::Tar->_extract_file( $self, @_ ); 473} 474 475=head2 $path = $file->full_path 476 477Returns the full path from the tar header; this is basically a 478concatenation of the C<prefix> and C<name> fields. 479 480=cut 481 482sub full_path { 483 my $self = shift; 484 485 ### if prefix field is empty 486 return $self->name unless defined $self->prefix and length $self->prefix; 487 488 ### or otherwise, catfile'd 489 return File::Spec::Unix->catfile( $self->prefix, $self->name ); 490} 491 492 493=head2 $bool = $file->validate 494 495Done by Archive::Tar internally when reading the tar file: 496validate the header against the checksum to ensure integer tar file. 497 498Returns true on success, false on failure 499 500=cut 501 502sub validate { 503 my $self = shift; 504 505 my $raw = $self->raw; 506 507 ### don't know why this one is different from the one we /write/ ### 508 substr ($raw, 148, 8) = " "; 509 510 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar 511 ### like GNU tar does. See here for details: 512 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139 513 ### so we do both a signed AND unsigned validate. if one succeeds, that's 514 ### good enough 515 return ( (unpack ("%16C*", $raw) == $self->chksum) 516 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; 517} 518 519=head2 $bool = $file->has_content 520 521Returns a boolean to indicate whether the current object has content. 522Some special files like directories and so on never will have any 523content. This method is mainly to make sure you don't get warnings 524for using uninitialized values when looking at an object's content. 525 526=cut 527 528sub has_content { 529 my $self = shift; 530 return defined $self->data() && length $self->data() ? 1 : 0; 531} 532 533=head2 $content = $file->get_content 534 535Returns the current content for the in-memory file 536 537=cut 538 539sub get_content { 540 my $self = shift; 541 $self->data( ); 542} 543 544=head2 $cref = $file->get_content_by_ref 545 546Returns the current content for the in-memory file as a scalar 547reference. Normal users won't need this, but it will save memory if 548you are dealing with very large data files in your tar archive, since 549it will pass the contents by reference, rather than make a copy of it 550first. 551 552=cut 553 554sub get_content_by_ref { 555 my $self = shift; 556 557 return \$self->{data}; 558} 559 560=head2 $bool = $file->replace_content( $content ) 561 562Replace the current content of the file with the new content. This 563only affects the in-memory archive, not the on-disk version until 564you write it. 565 566Returns true on success, false on failure. 567 568=cut 569 570sub replace_content { 571 my $self = shift; 572 my $data = shift || ''; 573 574 $self->data( $data ); 575 $self->size( length $data ); 576 return 1; 577} 578 579=head2 $bool = $file->rename( $new_name ) 580 581Rename the current file to $new_name. 582 583Note that you must specify a Unix path for $new_name, since per tar 584standard, all files in the archive must be Unix paths. 585 586Returns true on success and false on failure. 587 588=cut 589 590sub rename { 591 my $self = shift; 592 my $path = shift; 593 594 return unless defined $path; 595 596 my ($prefix,$file) = $self->_prefix_and_file( $path ); 597 598 $self->name( $file ); 599 $self->prefix( $prefix ); 600 601 return 1; 602} 603 604=head2 $bool = $file->chmod $mode) 605 606Change mode of $file to $mode. The mode can be a string or a number 607which is interpreted as octal whether or not a leading 0 is given. 608 609Returns true on success and false on failure. 610 611=cut 612 613sub chmod { 614 my $self = shift; 615 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; 616 $self->{mode} = oct($mode); 617 return 1; 618} 619 620=head2 $bool = $file->chown( $user [, $group]) 621 622Change owner of $file to $user. If a $group is given that is changed 623as well. You can also pass a single parameter with a colon separating the 624use and group as in 'root:wheel'. 625 626Returns true on success and false on failure. 627 628=cut 629 630sub chown { 631 my $self = shift; 632 my $uname = shift; 633 return unless defined $uname; 634 my $gname; 635 if (-1 != index($uname, ':')) { 636 ($uname, $gname) = split(/:/, $uname); 637 } else { 638 $gname = shift if @_ > 0; 639 } 640 641 $self->uname( $uname ); 642 $self->gname( $gname ) if $gname; 643 return 1; 644} 645 646=head1 Convenience methods 647 648To quickly check the type of a C<Archive::Tar::File> object, you can 649use the following methods: 650 651=over 4 652 653=item $file->is_file 654 655Returns true if the file is of type C<file> 656 657=item $file->is_dir 658 659Returns true if the file is of type C<dir> 660 661=item $file->is_hardlink 662 663Returns true if the file is of type C<hardlink> 664 665=item $file->is_symlink 666 667Returns true if the file is of type C<symlink> 668 669=item $file->is_chardev 670 671Returns true if the file is of type C<chardev> 672 673=item $file->is_blockdev 674 675Returns true if the file is of type C<blockdev> 676 677=item $file->is_fifo 678 679Returns true if the file is of type C<fifo> 680 681=item $file->is_socket 682 683Returns true if the file is of type C<socket> 684 685=item $file->is_longlink 686 687Returns true if the file is of type C<LongLink>. 688Should not happen after a successful C<read>. 689 690=item $file->is_label 691 692Returns true if the file is of type C<Label>. 693Should not happen after a successful C<read>. 694 695=item $file->is_unknown 696 697Returns true if the file type is C<unknown> 698 699=back 700 701=cut 702 703#stupid perl5.5.3 needs to warn if it's not numeric 704sub is_file { local $^W; FILE == $_[0]->type } 705sub is_dir { local $^W; DIR == $_[0]->type } 706sub is_hardlink { local $^W; HARDLINK == $_[0]->type } 707sub is_symlink { local $^W; SYMLINK == $_[0]->type } 708sub is_chardev { local $^W; CHARDEV == $_[0]->type } 709sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } 710sub is_fifo { local $^W; FIFO == $_[0]->type } 711sub is_socket { local $^W; SOCKET == $_[0]->type } 712sub is_unknown { local $^W; UNKNOWN == $_[0]->type } 713sub is_longlink { local $^W; LONGLINK eq $_[0]->type } 714sub is_label { local $^W; LABEL eq $_[0]->type } 715 7161; 717