1package Archive::Tar::File; 2use strict; 3 4use Carp (); 5use IO::File; 6use File::Spec::Unix (); 7use File::Spec (); 8use File::Basename (); 9 10### avoid circular use, so only require; 11require Archive::Tar; 12use Archive::Tar::Constant; 13 14use vars qw[@ISA $VERSION]; 15#@ISA = qw[Archive::Tar]; 16$VERSION = '0.02'; 17 18### set value to 1 to oct() it during the unpack ### 19my $tmpl = [ 20 name => 0, # string 21 mode => 1, # octal 22 uid => 1, # octal 23 gid => 1, # octal 24 size => 1, # octal 25 mtime => 1, # octal 26 chksum => 1, # octal 27 type => 0, # character 28 linkname => 0, # string 29 magic => 0, # string 30 version => 0, # 2 bytes 31 uname => 0, # string 32 gname => 0, # string 33 devmajor => 1, # octal 34 devminor => 1, # octal 35 prefix => 0, 36 37### end UNPACK items ### 38 raw => 0, # the raw data chunk 39 data => 0, # the data associated with the file -- 40 # This might be very memory intensive 41]; 42 43### install get/set accessors for this object. 44for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { 45 my $key = $tmpl->[$i]; 46 no strict 'refs'; 47 *{__PACKAGE__."::$key"} = sub { 48 my $self = shift; 49 $self->{$key} = $_[0] if @_; 50 51 ### just in case the key is not there or undef or something ### 52 { local $^W = 0; 53 return $self->{$key}; 54 } 55 } 56} 57 58=head1 NAME 59 60Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar 61 62=head1 SYNOPSIS 63 64 my @items = $tar->get_files; 65 66 print $_->name, ' ', $_->size, "\n" for @items; 67 68 print $object->get_content; 69 $object->replace_content('new content'); 70 71 $object->rename( 'new/full/path/to/file.c' ); 72 73=head1 DESCRIPTION 74 75Archive::Tar::Files provides a neat little object layer for in-memory 76extracted files. It's mostly used internally in Archive::Tar to tidy 77up the code, but there's no reason users shouldn't use this API as 78well. 79 80=head2 Accessors 81 82A lot of the methods in this package are accessors to the various 83fields in the tar header: 84 85=over 4 86 87=item name 88 89The file's name 90 91=item mode 92 93The file's mode 94 95=item uid 96 97The user id owning the file 98 99=item gid 100 101The group id owning the file 102 103=item size 104 105File size in bytes 106 107=item mtime 108 109Modification time. Adjusted to mac-time on MacOS if required 110 111=item chksum 112 113Checksum field for the tar header 114 115=item type 116 117File type -- numeric, but comparable to exported constants -- see 118Archive::Tar's documentation 119 120=item linkname 121 122If the file is a symlink, the file it's pointing to 123 124=item magic 125 126Tar magic string -- not useful for most users 127 128=item version 129 130Tar version string -- not useful for most users 131 132=item uname 133 134The user name that owns the file 135 136=item gname 137 138The group name that owns the file 139 140=item devmajor 141 142Device major number in case of a special file 143 144=item devminor 145 146Device minor number in case of a special file 147 148=item prefix 149 150Any directory to prefix to the extraction path, if any 151 152=item raw 153 154Raw tar header -- not useful for most users 155 156=back 157 158=head1 Methods 159 160=head2 Archive::Tar::File->new( file => $path ) 161 162Returns a new Archive::Tar::File object from an existing file. 163 164Returns undef on failure. 165 166=head2 Archive::Tar::File->new( data => $path, $data, $opt ) 167 168Returns a new Archive::Tar::File object from data. 169 170C<$path> defines the file name (which need not exist), C<$data> the 171file contents, and C<$opt> is a reference to a hash of attributes 172which may be used to override the default attributes (fields in the 173tar header), which are described above in the Accessors section. 174 175Returns undef on failure. 176 177=head2 Archive::Tar::File->new( chunk => $chunk ) 178 179Returns a new Archive::Tar::File object from a raw 512-byte tar 180archive chunk. 181 182Returns undef on failure. 183 184=cut 185 186sub new { 187 my $class = shift; 188 my $what = shift; 189 190 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : 191 ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : 192 ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : 193 undef; 194 195 return $obj; 196} 197 198### copies the data, creates a clone ### 199sub clone { 200 my $self = shift; 201 return bless { %$self }, ref $self; 202} 203 204sub _new_from_chunk { 205 my $class = shift; 206 my $chunk = shift or return; # 512 bytes of tar header 207 my %hash = @_; 208 209 ### filter any arguments on defined-ness of values. 210 ### this allows overriding from what the tar-header is saying 211 ### about this tar-entry. Particularly useful for @LongLink files 212 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; 213 214 ### makes it start at 0 actually... :) ### 215 my $i = -1; 216 my %entry = map { 217 $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ 218 } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); 219 220 my $obj = bless { %entry, %args }, $class; 221 222 ### magic is a filetype string.. it should have something like 'ustar' or 223 ### something similar... if the chunk is garbage, skip it 224 return unless $obj->magic !~ /\W/; 225 226 ### store the original chunk ### 227 $obj->raw( $chunk ); 228 229 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); 230 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); 231 232 233 return $obj; 234 235} 236 237sub _new_from_file { 238 my $class = shift; 239 my $path = shift; 240 241 ### path has to at least exist 242 return unless defined $path; 243 244 my $type = __PACKAGE__->_filetype($path); 245 my $data = ''; 246 247 READ: { 248 unless ($type == DIR ) { 249 my $fh = IO::File->new; 250 251 unless( $fh->open($path) ) { 252 ### dangling symlinks are fine, stop reading but continue 253 ### creating the object 254 last READ if $type == SYMLINK; 255 256 ### otherwise, return from this function -- 257 ### anything that's *not* a symlink should be 258 ### resolvable 259 return; 260 } 261 262 ### binmode needed to read files properly on win32 ### 263 binmode $fh; 264 $data = do { local $/; <$fh> }; 265 close $fh; 266 } 267 } 268 269 my @items = qw[mode uid gid size mtime]; 270 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; 271 272 if (ON_VMS) { 273 ### VMS has two UID modes, traditional and POSIX. Normally POSIX is 274 ### not used. We currently do not have an easy way to see if we are in 275 ### POSIX mode. In traditional mode, the UID is actually the VMS UIC. 276 ### The VMS UIC has the upper 16 bits is the GID, which in many cases 277 ### the VMS UIC will be larger than 209715, the largest that TAR can 278 ### handle. So for now, assume it is traditional if the UID is larger 279 ### than 0x10000. 280 281 if ($hash{uid} > 0x10000) { 282 $hash{uid} = $hash{uid} & 0xFFFF; 283 } 284 285 ### The file length from stat() is the physical length of the file 286 ### However the amount of data read in may be more for some file types. 287 ### Fixed length files are read past the logical EOF to end of the block 288 ### containing. Other file types get expanded on read because record 289 ### delimiters are added. 290 291 my $data_len = length $data; 292 $hash{size} = $data_len if $hash{size} < $data_len; 293 294 } 295 ### you *must* set size == 0 on symlinks, or the next entry will be 296 ### though of as the contents of the symlink, which is wrong. 297 ### this fixes bug #7937 298 $hash{size} = 0 if ($type == DIR or $type == SYMLINK); 299 $hash{mtime} -= TIME_OFFSET; 300 301 ### strip the high bits off the mode, which we don't need to store 302 $hash{mode} = STRIP_MODE->( $hash{mode} ); 303 304 305 ### probably requires some file path munging here ... ### 306 ### name and prefix are set later 307 my $obj = { 308 %hash, 309 name => '', 310 chksum => CHECK_SUM, 311 type => $type, 312 linkname => ($type == SYMLINK and CAN_READLINK) 313 ? readlink $path 314 : '', 315 magic => MAGIC, 316 version => TAR_VERSION, 317 uname => UNAME->( $hash{uid} ), 318 gname => GNAME->( $hash{gid} ), 319 devmajor => 0, # not handled 320 devminor => 0, # not handled 321 prefix => '', 322 data => $data, 323 }; 324 325 bless $obj, $class; 326 327 ### fix up the prefix and file from the path 328 my($prefix,$file) = $obj->_prefix_and_file( $path ); 329 $obj->prefix( $prefix ); 330 $obj->name( $file ); 331 332 return $obj; 333} 334 335sub _new_from_data { 336 my $class = shift; 337 my $path = shift; return unless defined $path; 338 my $data = shift; return unless defined $data; 339 my $opt = shift; 340 341 my $obj = { 342 data => $data, 343 name => '', 344 mode => MODE, 345 uid => UID, 346 gid => GID, 347 size => length $data, 348 mtime => time - TIME_OFFSET, 349 chksum => CHECK_SUM, 350 type => FILE, 351 linkname => '', 352 magic => MAGIC, 353 version => TAR_VERSION, 354 uname => UNAME->( UID ), 355 gname => GNAME->( GID ), 356 devminor => 0, 357 devmajor => 0, 358 prefix => '', 359 }; 360 361 ### overwrite with user options, if provided ### 362 if( $opt and ref $opt eq 'HASH' ) { 363 for my $key ( keys %$opt ) { 364 365 ### don't write bogus options ### 366 next unless exists $obj->{$key}; 367 $obj->{$key} = $opt->{$key}; 368 } 369 } 370 371 bless $obj, $class; 372 373 ### fix up the prefix and file from the path 374 my($prefix,$file) = $obj->_prefix_and_file( $path ); 375 $obj->prefix( $prefix ); 376 $obj->name( $file ); 377 378 return $obj; 379} 380 381sub _prefix_and_file { 382 my $self = shift; 383 my $path = shift; 384 385 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); 386 my @dirs = File::Spec->splitdir( $dirs ); 387 388 ### so sometimes the last element is '' -- probably when trailing 389 ### dir slashes are encountered... this is of course pointless, 390 ### so remove it 391 pop @dirs while @dirs and not length $dirs[-1]; 392 393 ### if it's a directory, then $file might be empty 394 $file = pop @dirs if $self->is_dir and not length $file; 395 396 ### splitting ../ gives you the relative path in native syntax 397 map { $_ = '..' if $_ eq '-' } @dirs if ON_VMS; 398 399 my $prefix = File::Spec::Unix->catdir( 400 grep { length } $vol, @dirs 401 ); 402 return( $prefix, $file ); 403} 404 405sub _filetype { 406 my $self = shift; 407 my $file = shift; 408 409 return unless defined $file; 410 411 return SYMLINK if (-l $file); # Symlink 412 413 return FILE if (-f _); # Plain file 414 415 return DIR if (-d _); # Directory 416 417 return FIFO if (-p _); # Named pipe 418 419 return SOCKET if (-S _); # Socket 420 421 return BLOCKDEV if (-b _); # Block special 422 423 return CHARDEV if (-c _); # Character special 424 425 ### shouldn't happen, this is when making archives, not reading ### 426 return LONGLINK if ( $file eq LONGLINK_NAME ); 427 428 return UNKNOWN; # Something else (like what?) 429 430} 431 432### this method 'downgrades' a file to plain file -- this is used for 433### symlinks when FOLLOW_SYMLINKS is true. 434sub _downgrade_to_plainfile { 435 my $entry = shift; 436 $entry->type( FILE ); 437 $entry->mode( MODE ); 438 $entry->linkname(''); 439 440 return 1; 441} 442 443=head2 $bool = $file->extract( [ $alternative_name ] ) 444 445Extract this object, optionally to an alternative name. 446 447See C<< Archive::Tar->extract_file >> for details. 448 449Returns true on success and false on failure. 450 451=cut 452 453sub extract { 454 my $self = shift; 455 456 local $Carp::CarpLevel += 1; 457 458 return Archive::Tar->_extract_file( $self, @_ ); 459} 460 461=head2 $path = $file->full_path 462 463Returns the full path from the tar header; this is basically a 464concatenation of the C<prefix> and C<name> fields. 465 466=cut 467 468sub full_path { 469 my $self = shift; 470 471 ### if prefix field is emtpy 472 return $self->name unless defined $self->prefix and length $self->prefix; 473 474 ### or otherwise, catfile'd 475 return File::Spec::Unix->catfile( $self->prefix, $self->name ); 476} 477 478 479=head2 $bool = $file->validate 480 481Done by Archive::Tar internally when reading the tar file: 482validate the header against the checksum to ensure integer tar file. 483 484Returns true on success, false on failure 485 486=cut 487 488sub validate { 489 my $self = shift; 490 491 my $raw = $self->raw; 492 493 ### don't know why this one is different from the one we /write/ ### 494 substr ($raw, 148, 8) = " "; 495 496 ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar 497 ### like GNU tar does. See here for details: 498 ### http://www.gnu.org/software/tar/manual/tar.html#SEC139 499 ### so we do both a signed AND unsigned validate. if one succeeds, that's 500 ### good enough 501 return ( (unpack ("%16C*", $raw) == $self->chksum) 502 or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; 503} 504 505=head2 $bool = $file->has_content 506 507Returns a boolean to indicate whether the current object has content. 508Some special files like directories and so on never will have any 509content. This method is mainly to make sure you don't get warnings 510for using uninitialized values when looking at an object's content. 511 512=cut 513 514sub has_content { 515 my $self = shift; 516 return defined $self->data() && length $self->data() ? 1 : 0; 517} 518 519=head2 $content = $file->get_content 520 521Returns the current content for the in-memory file 522 523=cut 524 525sub get_content { 526 my $self = shift; 527 $self->data( ); 528} 529 530=head2 $cref = $file->get_content_by_ref 531 532Returns the current content for the in-memory file as a scalar 533reference. Normal users won't need this, but it will save memory if 534you are dealing with very large data files in your tar archive, since 535it will pass the contents by reference, rather than make a copy of it 536first. 537 538=cut 539 540sub get_content_by_ref { 541 my $self = shift; 542 543 return \$self->{data}; 544} 545 546=head2 $bool = $file->replace_content( $content ) 547 548Replace the current content of the file with the new content. This 549only affects the in-memory archive, not the on-disk version until 550you write it. 551 552Returns true on success, false on failure. 553 554=cut 555 556sub replace_content { 557 my $self = shift; 558 my $data = shift || ''; 559 560 $self->data( $data ); 561 $self->size( length $data ); 562 return 1; 563} 564 565=head2 $bool = $file->rename( $new_name ) 566 567Rename the current file to $new_name. 568 569Note that you must specify a Unix path for $new_name, since per tar 570standard, all files in the archive must be Unix paths. 571 572Returns true on success and false on failure. 573 574=cut 575 576sub rename { 577 my $self = shift; 578 my $path = shift; 579 580 return unless defined $path; 581 582 my ($prefix,$file) = $self->_prefix_and_file( $path ); 583 584 $self->name( $file ); 585 $self->prefix( $prefix ); 586 587 return 1; 588} 589 590=head1 Convenience methods 591 592To quickly check the type of a C<Archive::Tar::File> object, you can 593use the following methods: 594 595=over 4 596 597=item $file->is_file 598 599Returns true if the file is of type C<file> 600 601=item $file->is_dir 602 603Returns true if the file is of type C<dir> 604 605=item $file->is_hardlink 606 607Returns true if the file is of type C<hardlink> 608 609=item $file->is_symlink 610 611Returns true if the file is of type C<symlink> 612 613=item $file->is_chardev 614 615Returns true if the file is of type C<chardev> 616 617=item $file->is_blockdev 618 619Returns true if the file is of type C<blockdev> 620 621=item $file->is_fifo 622 623Returns true if the file is of type C<fifo> 624 625=item $file->is_socket 626 627Returns true if the file is of type C<socket> 628 629=item $file->is_longlink 630 631Returns true if the file is of type C<LongLink>. 632Should not happen after a successful C<read>. 633 634=item $file->is_label 635 636Returns true if the file is of type C<Label>. 637Should not happen after a successful C<read>. 638 639=item $file->is_unknown 640 641Returns true if the file type is C<unknown> 642 643=back 644 645=cut 646 647#stupid perl5.5.3 needs to warn if it's not numeric 648sub is_file { local $^W; FILE == $_[0]->type } 649sub is_dir { local $^W; DIR == $_[0]->type } 650sub is_hardlink { local $^W; HARDLINK == $_[0]->type } 651sub is_symlink { local $^W; SYMLINK == $_[0]->type } 652sub is_chardev { local $^W; CHARDEV == $_[0]->type } 653sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } 654sub is_fifo { local $^W; FIFO == $_[0]->type } 655sub is_socket { local $^W; SOCKET == $_[0]->type } 656sub is_unknown { local $^W; UNKNOWN == $_[0]->type } 657sub is_longlink { local $^W; LONGLINK eq $_[0]->type } 658sub is_label { local $^W; LABEL eq $_[0]->type } 659 6601; 661