1package Archive::Tar::File;
2use strict;
3
4use Carp                ();
5use IO::File;
6use File::Spec::Unix    ();
7use File::Spec          ();
8use File::Basename      ();
9
10### avoid circular use, so only require;
11require Archive::Tar;
12use Archive::Tar::Constant;
13
14use vars qw[@ISA $VERSION];
15#@ISA        = qw[Archive::Tar];
16$VERSION    = '2.04_01';
17
18### set value to 1 to oct() it during the unpack ###
19
20my $tmpl = [
21        name        => 0,   # string					A100
22        mode        => 1,   # octal					A8
23        uid         => 1,   # octal					A8
24        gid         => 1,   # octal					A8
25        size        => 0,   # octal	# cdrake - not *always* octal..	A12
26        mtime       => 1,   # octal					A12
27        chksum      => 1,   # octal					A8
28        type        => 0,   # character					A1
29        linkname    => 0,   # string					A100
30        magic       => 0,   # string					A6
31        version     => 0,   # 2 bytes					A2
32        uname       => 0,   # string					A32
33        gname       => 0,   # string					A32
34        devmajor    => 1,   # octal					A8
35        devminor    => 1,   # octal					A8
36        prefix      => 0,	#					A155 x 12
37
38### end UNPACK items ###
39        raw         => 0,   # the raw data chunk
40        data        => 0,   # the data associated with the file --
41                            # This  might be very memory intensive
42];
43
44### install get/set accessors for this object.
45for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
46    my $key = $tmpl->[$i];
47    no strict 'refs';
48    *{__PACKAGE__."::$key"} = sub {
49        my $self = shift;
50        $self->{$key} = $_[0] if @_;
51
52        ### just in case the key is not there or undef or something ###
53        {   local $^W = 0;
54            return $self->{$key};
55        }
56    }
57}
58
59=head1 NAME
60
61Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
62
63=head1 SYNOPSIS
64
65    my @items = $tar->get_files;
66
67    print $_->name, ' ', $_->size, "\n" for @items;
68
69    print $object->get_content;
70    $object->replace_content('new content');
71
72    $object->rename( 'new/full/path/to/file.c' );
73
74=head1 DESCRIPTION
75
76Archive::Tar::Files provides a neat little object layer for in-memory
77extracted files. It's mostly used internally in Archive::Tar to tidy
78up the code, but there's no reason users shouldn't use this API as
79well.
80
81=head2 Accessors
82
83A lot of the methods in this package are accessors to the various
84fields in the tar header:
85
86=over 4
87
88=item name
89
90The file's name
91
92=item mode
93
94The file's mode
95
96=item uid
97
98The user id owning the file
99
100=item gid
101
102The group id owning the file
103
104=item size
105
106File size in bytes
107
108=item mtime
109
110Modification time. Adjusted to mac-time on MacOS if required
111
112=item chksum
113
114Checksum field for the tar header
115
116=item type
117
118File type -- numeric, but comparable to exported constants -- see
119Archive::Tar's documentation
120
121=item linkname
122
123If the file is a symlink, the file it's pointing to
124
125=item magic
126
127Tar magic string -- not useful for most users
128
129=item version
130
131Tar version string -- not useful for most users
132
133=item uname
134
135The user name that owns the file
136
137=item gname
138
139The group name that owns the file
140
141=item devmajor
142
143Device major number in case of a special file
144
145=item devminor
146
147Device minor number in case of a special file
148
149=item prefix
150
151Any directory to prefix to the extraction path, if any
152
153=item raw
154
155Raw tar header -- not useful for most users
156
157=back
158
159=head1 Methods
160
161=head2 Archive::Tar::File->new( file => $path )
162
163Returns a new Archive::Tar::File object from an existing file.
164
165Returns undef on failure.
166
167=head2 Archive::Tar::File->new( data => $path, $data, $opt )
168
169Returns a new Archive::Tar::File object from data.
170
171C<$path> defines the file name (which need not exist), C<$data> the
172file contents, and C<$opt> is a reference to a hash of attributes
173which may be used to override the default attributes (fields in the
174tar header), which are described above in the Accessors section.
175
176Returns undef on failure.
177
178=head2 Archive::Tar::File->new( chunk => $chunk )
179
180Returns a new Archive::Tar::File object from a raw 512-byte tar
181archive chunk.
182
183Returns undef on failure.
184
185=cut
186
187sub new {
188    my $class   = shift;
189    my $what    = shift;
190
191    my $obj =   ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
192                ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
193                ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
194                undef;
195
196    return $obj;
197}
198
199### copies the data, creates a clone ###
200sub clone {
201    my $self = shift;
202    return bless { %$self }, ref $self;
203}
204
205sub _new_from_chunk {
206    my $class = shift;
207    my $chunk = shift or return;    # 512 bytes of tar header
208    my %hash  = @_;
209
210    ### filter any arguments on defined-ness of values.
211    ### this allows overriding from what the tar-header is saying
212    ### about this tar-entry. Particularly useful for @LongLink files
213    my %args  = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
214
215    ### makes it start at 0 actually... :) ###
216    my $i = -1;
217    my %entry = map {
218	my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]);	# cdrake
219	($_)=($_=~/^([^\0]*)/) unless($s eq 'size');	# cdrake
220	$s=> $v ? oct $_ : $_				# cdrake
221	# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_	# removed by cdrake - mucks up binary sizes >8gb
222    } unpack( UNPACK, $chunk );				# cdrake
223    # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );	# old - replaced now by cdrake
224
225
226    if(substr($entry{'size'}, 0, 1) eq "\x80") {	# binary size extension for files >8gigs (> octal 77777777777777)	# cdrake
227      my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64);	# Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
228    } else {	# cdrake
229      ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'};	# cdrake
230    }	# cdrake
231
232
233    my $obj = bless { %entry, %args }, $class;
234
235	### magic is a filetype string.. it should have something like 'ustar' or
236	### something similar... if the chunk is garbage, skip it
237	return unless $obj->magic !~ /\W/;
238
239    ### store the original chunk ###
240    $obj->raw( $chunk );
241
242    $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
243    $obj->type(DIR)  if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
244
245
246    return $obj;
247
248}
249
250sub _new_from_file {
251    my $class       = shift;
252    my $path        = shift;
253
254    ### path has to at least exist
255    return unless defined $path;
256
257    my $type        = __PACKAGE__->_filetype($path);
258    my $data        = '';
259
260    READ: {
261        unless ($type == DIR ) {
262            my $fh = IO::File->new;
263
264            unless( $fh->open($path) ) {
265                ### dangling symlinks are fine, stop reading but continue
266                ### creating the object
267                last READ if $type == SYMLINK;
268
269                ### otherwise, return from this function --
270                ### anything that's *not* a symlink should be
271                ### resolvable
272                return;
273            }
274
275            ### binmode needed to read files properly on win32 ###
276            binmode $fh;
277            $data = do { local $/; <$fh> };
278            close $fh;
279        }
280    }
281
282    my @items       = qw[mode uid gid size mtime];
283    my %hash        = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
284
285    if (ON_VMS) {
286        ### VMS has two UID modes, traditional and POSIX.  Normally POSIX is
287        ### not used.  We currently do not have an easy way to see if we are in
288        ### POSIX mode.  In traditional mode, the UID is actually the VMS UIC.
289        ### The VMS UIC has the upper 16 bits is the GID, which in many cases
290        ### the VMS UIC will be larger than 209715, the largest that TAR can
291        ### handle.  So for now, assume it is traditional if the UID is larger
292        ### than 0x10000.
293
294        if ($hash{uid} > 0x10000) {
295            $hash{uid} = $hash{uid} & 0xFFFF;
296        }
297
298        ### The file length from stat() is the physical length of the file
299        ### However the amount of data read in may be more for some file types.
300        ### Fixed length files are read past the logical EOF to end of the block
301        ### containing.  Other file types get expanded on read because record
302        ### delimiters are added.
303
304        my $data_len = length $data;
305        $hash{size} = $data_len if $hash{size} < $data_len;
306
307    }
308    ### you *must* set size == 0 on symlinks, or the next entry will be
309    ### though of as the contents of the symlink, which is wrong.
310    ### this fixes bug #7937
311    $hash{size}     = 0 if ($type == DIR or $type == SYMLINK);
312    $hash{mtime}    -= TIME_OFFSET;
313
314    ### strip the high bits off the mode, which we don't need to store
315    $hash{mode}     = STRIP_MODE->( $hash{mode} );
316
317
318    ### probably requires some file path munging here ... ###
319    ### name and prefix are set later
320    my $obj = {
321        %hash,
322        name        => '',
323        chksum      => CHECK_SUM,
324        type        => $type,
325        linkname    => ($type == SYMLINK and CAN_READLINK)
326                            ? readlink $path
327                            : '',
328        magic       => MAGIC,
329        version     => TAR_VERSION,
330        uname       => UNAME->( $hash{uid} ),
331        gname       => GNAME->( $hash{gid} ),
332        devmajor    => 0,   # not handled
333        devminor    => 0,   # not handled
334        prefix      => '',
335        data        => $data,
336    };
337
338    bless $obj, $class;
339
340    ### fix up the prefix and file from the path
341    my($prefix,$file) = $obj->_prefix_and_file( $path );
342    $obj->prefix( $prefix );
343    $obj->name( $file );
344
345    return $obj;
346}
347
348sub _new_from_data {
349    my $class   = shift;
350    my $path    = shift;    return unless defined $path;
351    my $data    = shift;    return unless defined $data;
352    my $opt     = shift;
353
354    my $obj = {
355        data        => $data,
356        name        => '',
357        mode        => MODE,
358        uid         => UID,
359        gid         => GID,
360        size        => length $data,
361        mtime       => time - TIME_OFFSET,
362        chksum      => CHECK_SUM,
363        type        => FILE,
364        linkname    => '',
365        magic       => MAGIC,
366        version     => TAR_VERSION,
367        uname       => UNAME->( UID ),
368        gname       => GNAME->( GID ),
369        devminor    => 0,
370        devmajor    => 0,
371        prefix      => '',
372    };
373
374    ### overwrite with user options, if provided ###
375    if( $opt and ref $opt eq 'HASH' ) {
376        for my $key ( keys %$opt ) {
377
378            ### don't write bogus options ###
379            next unless exists $obj->{$key};
380            $obj->{$key} = $opt->{$key};
381        }
382    }
383
384    bless $obj, $class;
385
386    ### fix up the prefix and file from the path
387    my($prefix,$file) = $obj->_prefix_and_file( $path );
388    $obj->prefix( $prefix );
389    $obj->name( $file );
390
391    return $obj;
392}
393
394sub _prefix_and_file {
395    my $self = shift;
396    my $path = shift;
397
398    my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
399    my @dirs = File::Spec->splitdir( $dirs );
400
401    ### so sometimes the last element is '' -- probably when trailing
402    ### dir slashes are encountered... this is of course pointless,
403    ### so remove it
404    pop @dirs while @dirs and not length $dirs[-1];
405
406    ### if it's a directory, then $file might be empty
407    $file = pop @dirs if $self->is_dir and not length $file;
408
409    ### splitting ../ gives you the relative path in native syntax
410    map { $_ = '..' if $_  eq '-' } @dirs if ON_VMS;
411
412    my $prefix = File::Spec::Unix->catdir(
413                        grep { length } $vol, @dirs
414                    );
415    return( $prefix, $file );
416}
417
418sub _filetype {
419    my $self = shift;
420    my $file = shift;
421
422    return unless defined $file;
423
424    return SYMLINK  if (-l $file);	# Symlink
425
426    return FILE     if (-f _);		# Plain file
427
428    return DIR      if (-d _);		# Directory
429
430    return FIFO     if (-p _);		# Named pipe
431
432    return SOCKET   if (-S _);		# Socket
433
434    return BLOCKDEV if (-b _);		# Block special
435
436    return CHARDEV  if (-c _);		# Character special
437
438    ### shouldn't happen, this is when making archives, not reading ###
439    return LONGLINK if ( $file eq LONGLINK_NAME );
440
441    return UNKNOWN;		            # Something else (like what?)
442
443}
444
445### this method 'downgrades' a file to plain file -- this is used for
446### symlinks when FOLLOW_SYMLINKS is true.
447sub _downgrade_to_plainfile {
448    my $entry = shift;
449    $entry->type( FILE );
450    $entry->mode( MODE );
451    $entry->linkname('');
452
453    return 1;
454}
455
456=head2 $bool = $file->extract( [ $alternative_name ] )
457
458Extract this object, optionally to an alternative name.
459
460See C<< Archive::Tar->extract_file >> for details.
461
462Returns true on success and false on failure.
463
464=cut
465
466sub extract {
467    my $self = shift;
468
469    local $Carp::CarpLevel += 1;
470
471    return Archive::Tar->_extract_file( $self, @_ );
472}
473
474=head2 $path = $file->full_path
475
476Returns the full path from the tar header; this is basically a
477concatenation of the C<prefix> and C<name> fields.
478
479=cut
480
481sub full_path {
482    my $self = shift;
483
484    ### if prefix field is empty
485    return $self->name unless defined $self->prefix and length $self->prefix;
486
487    ### or otherwise, catfile'd
488    return File::Spec::Unix->catfile( $self->prefix, $self->name );
489}
490
491
492=head2 $bool = $file->validate
493
494Done by Archive::Tar internally when reading the tar file:
495validate the header against the checksum to ensure integer tar file.
496
497Returns true on success, false on failure
498
499=cut
500
501sub validate {
502    my $self = shift;
503
504    my $raw = $self->raw;
505
506    ### don't know why this one is different from the one we /write/ ###
507    substr ($raw, 148, 8) = "        ";
508
509    ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
510    ### like GNU tar does. See here for details:
511    ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
512    ### so we do both a signed AND unsigned validate. if one succeeds, that's
513    ### good enough
514	return (   (unpack ("%16C*", $raw) == $self->chksum)
515	        or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
516}
517
518=head2 $bool = $file->has_content
519
520Returns a boolean to indicate whether the current object has content.
521Some special files like directories and so on never will have any
522content. This method is mainly to make sure you don't get warnings
523for using uninitialized values when looking at an object's content.
524
525=cut
526
527sub has_content {
528    my $self = shift;
529    return defined $self->data() && length $self->data() ? 1 : 0;
530}
531
532=head2 $content = $file->get_content
533
534Returns the current content for the in-memory file
535
536=cut
537
538sub get_content {
539    my $self = shift;
540    $self->data( );
541}
542
543=head2 $cref = $file->get_content_by_ref
544
545Returns the current content for the in-memory file as a scalar
546reference. Normal users won't need this, but it will save memory if
547you are dealing with very large data files in your tar archive, since
548it will pass the contents by reference, rather than make a copy of it
549first.
550
551=cut
552
553sub get_content_by_ref {
554    my $self = shift;
555
556    return \$self->{data};
557}
558
559=head2 $bool = $file->replace_content( $content )
560
561Replace the current content of the file with the new content. This
562only affects the in-memory archive, not the on-disk version until
563you write it.
564
565Returns true on success, false on failure.
566
567=cut
568
569sub replace_content {
570    my $self = shift;
571    my $data = shift || '';
572
573    $self->data( $data );
574    $self->size( length $data );
575    return 1;
576}
577
578=head2 $bool = $file->rename( $new_name )
579
580Rename the current file to $new_name.
581
582Note that you must specify a Unix path for $new_name, since per tar
583standard, all files in the archive must be Unix paths.
584
585Returns true on success and false on failure.
586
587=cut
588
589sub rename {
590    my $self = shift;
591    my $path = shift;
592
593    return unless defined $path;
594
595    my ($prefix,$file) = $self->_prefix_and_file( $path );
596
597    $self->name( $file );
598    $self->prefix( $prefix );
599
600	return 1;
601}
602
603=head2 $bool = $file->chmod $mode)
604
605Change mode of $file to $mode. The mode can be a string or a number
606which is interpreted as octal whether or not a leading 0 is given.
607
608Returns true on success and false on failure.
609
610=cut
611
612sub chmod {
613    my $self  = shift;
614    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
615    $self->{mode} = oct($mode);
616    return 1;
617}
618
619=head2 $bool = $file->chown( $user [, $group])
620
621Change owner of $file to $user. If a $group is given that is changed
622as well. You can also pass a single parameter with a colon separating the
623use and group as in 'root:wheel'.
624
625Returns true on success and false on failure.
626
627=cut
628
629sub chown {
630    my $self = shift;
631    my $uname = shift;
632    return unless defined $uname;
633    my $gname;
634    if (-1 != index($uname, ':')) {
635	($uname, $gname) = split(/:/, $uname);
636    } else {
637	$gname = shift if @_ > 0;
638    }
639
640    $self->uname( $uname );
641    $self->gname( $gname ) if $gname;
642	return 1;
643}
644
645=head1 Convenience methods
646
647To quickly check the type of a C<Archive::Tar::File> object, you can
648use the following methods:
649
650=over 4
651
652=item $file->is_file
653
654Returns true if the file is of type C<file>
655
656=item $file->is_dir
657
658Returns true if the file is of type C<dir>
659
660=item $file->is_hardlink
661
662Returns true if the file is of type C<hardlink>
663
664=item $file->is_symlink
665
666Returns true if the file is of type C<symlink>
667
668=item $file->is_chardev
669
670Returns true if the file is of type C<chardev>
671
672=item $file->is_blockdev
673
674Returns true if the file is of type C<blockdev>
675
676=item $file->is_fifo
677
678Returns true if the file is of type C<fifo>
679
680=item $file->is_socket
681
682Returns true if the file is of type C<socket>
683
684=item $file->is_longlink
685
686Returns true if the file is of type C<LongLink>.
687Should not happen after a successful C<read>.
688
689=item $file->is_label
690
691Returns true if the file is of type C<Label>.
692Should not happen after a successful C<read>.
693
694=item $file->is_unknown
695
696Returns true if the file type is C<unknown>
697
698=back
699
700=cut
701
702#stupid perl5.5.3 needs to warn if it's not numeric
703sub is_file     { local $^W;    FILE      == $_[0]->type }
704sub is_dir      { local $^W;    DIR       == $_[0]->type }
705sub is_hardlink { local $^W;    HARDLINK  == $_[0]->type }
706sub is_symlink  { local $^W;    SYMLINK   == $_[0]->type }
707sub is_chardev  { local $^W;    CHARDEV   == $_[0]->type }
708sub is_blockdev { local $^W;    BLOCKDEV  == $_[0]->type }
709sub is_fifo     { local $^W;    FIFO      == $_[0]->type }
710sub is_socket   { local $^W;    SOCKET    == $_[0]->type }
711sub is_unknown  { local $^W;    UNKNOWN   == $_[0]->type }
712sub is_longlink { local $^W;    LONGLINK  eq $_[0]->type }
713sub is_label    { local $^W;    LABEL     eq $_[0]->type }
714
7151;
716