1package Archive::Tar::File;
2use strict;
3
4use Carp                ();
5use IO::File;
6use File::Spec::Unix    ();
7use File::Spec          ();
8use File::Basename      ();
9
10use Archive::Tar::Constant;
11
12use vars qw[@ISA $VERSION];
13#@ISA        = qw[Archive::Tar];
14$VERSION    = '3.02_001';
15
16### set value to 1 to oct() it during the unpack ###
17
18my $tmpl = [
19        name        => 0,   # string					A100
20        mode        => 1,   # octal					A8
21        uid         => 1,   # octal					A8
22        gid         => 1,   # octal					A8
23        size        => 0,   # octal	# cdrake - not *always* octal..	A12
24        mtime       => 1,   # octal					A12
25        chksum      => 1,   # octal					A8
26        type        => 0,   # character					A1
27        linkname    => 0,   # string					A100
28        magic       => 0,   # string					A6
29        version     => 0,   # 2 bytes					A2
30        uname       => 0,   # string					A32
31        gname       => 0,   # string					A32
32        devmajor    => 1,   # octal					A8
33        devminor    => 1,   # octal					A8
34        prefix      => 0,	#					A155 x 12
35
36### end UNPACK items ###
37        raw         => 0,   # the raw data chunk
38        data        => 0,   # the data associated with the file --
39                            # This  might be very memory intensive
40];
41
42### install get/set accessors for this object.
43for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
44    my $key = $tmpl->[$i];
45    no strict 'refs';
46    *{__PACKAGE__."::$key"} = sub {
47        my $self = shift;
48        $self->{$key} = $_[0] if @_;
49
50        ### just in case the key is not there or undef or something ###
51        {   local $^W = 0;
52            return $self->{$key};
53        }
54    }
55}
56
57=head1 NAME
58
59Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
60
61=head1 SYNOPSIS
62
63    my @items = $tar->get_files;
64
65    print $_->name, ' ', $_->size, "\n" for @items;
66
67    print $object->get_content;
68    $object->replace_content('new content');
69
70    $object->rename( 'new/full/path/to/file.c' );
71
72=head1 DESCRIPTION
73
74Archive::Tar::File provides a neat little object layer for in-memory
75extracted files. It's mostly used internally in Archive::Tar to tidy
76up the code, but there's no reason users shouldn't use this API as
77well.
78
79=head2 Accessors
80
81A lot of the methods in this package are accessors to the various
82fields in the tar header:
83
84=over 4
85
86=item name
87
88The file's name
89
90=item mode
91
92The file's mode
93
94=item uid
95
96The user id owning the file
97
98=item gid
99
100The group id owning the file
101
102=item size
103
104File size in bytes
105
106=item mtime
107
108Modification time. Adjusted to mac-time on MacOS if required
109
110=item chksum
111
112Checksum field for the tar header
113
114=item type
115
116File type -- numeric, but comparable to exported constants -- see
117Archive::Tar's documentation
118
119=item linkname
120
121If the file is a symlink, the file it's pointing to
122
123=item magic
124
125Tar magic string -- not useful for most users
126
127=item version
128
129Tar version string -- not useful for most users
130
131=item uname
132
133The user name that owns the file
134
135=item gname
136
137The group name that owns the file
138
139=item devmajor
140
141Device major number in case of a special file
142
143=item devminor
144
145Device minor number in case of a special file
146
147=item prefix
148
149Any directory to prefix to the extraction path, if any
150
151=item raw
152
153Raw tar header -- not useful for most users
154
155=back
156
157=head1 Methods
158
159=head2 Archive::Tar::File->new( file => $path )
160
161Returns a new Archive::Tar::File object from an existing file.
162
163Returns undef on failure.
164
165=head2 Archive::Tar::File->new( data => $path, $data, $opt )
166
167Returns a new Archive::Tar::File object from data.
168
169C<$path> defines the file name (which need not exist), C<$data> the
170file contents, and C<$opt> is a reference to a hash of attributes
171which may be used to override the default attributes (fields in the
172tar header), which are described above in the Accessors section.
173
174Returns undef on failure.
175
176=head2 Archive::Tar::File->new( chunk => $chunk )
177
178Returns a new Archive::Tar::File object from a raw 512-byte tar
179archive chunk.
180
181Returns undef on failure.
182
183=cut
184
185sub new {
186    my $class   = shift;
187    my $what    = shift;
188
189    my $obj =   ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
190                ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
191                ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
192                undef;
193
194    return $obj;
195}
196
197### copies the data, creates a clone ###
198sub clone {
199    my $self = shift;
200    return bless { %$self }, ref $self;
201}
202
203sub _new_from_chunk {
204    my $class = shift;
205    my $chunk = shift or return;    # 512 bytes of tar header
206    my %hash  = @_;
207
208    ### filter any arguments on defined-ness of values.
209    ### this allows overriding from what the tar-header is saying
210    ### about this tar-entry. Particularly useful for @LongLink files
211    my %args  = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
212
213    ### makes it start at 0 actually... :) ###
214    my $i = -1;
215    my %entry = map {
216	my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]);	# cdrake
217	($_)=($_=~/^([^\0]*)/) unless($s eq 'size');	# cdrake
218	$s=> $v ? oct $_ : $_				# cdrake
219	# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_	# removed by cdrake - mucks up binary sizes >8gb
220    } unpack( UNPACK, $chunk );				# cdrake
221    # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );	# old - replaced now by cdrake
222
223
224    if(substr($entry{'size'}, 0, 1) eq "\x80") {	# binary size extension for files >8gigs (> octal 77777777777777)	# cdrake
225      my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64);	# Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
226    } else {	# cdrake
227      ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'};	# cdrake
228    }	# cdrake
229
230
231    my $obj = bless { %entry, %args }, $class;
232
233	### magic is a filetype string.. it should have something like 'ustar' or
234	### something similar... if the chunk is garbage, skip it
235	return unless $obj->magic !~ /\W/;
236
237    ### store the original chunk ###
238    $obj->raw( $chunk );
239
240    $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
241    $obj->type(DIR)  if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
242
243
244    return $obj;
245
246}
247
248sub _new_from_file {
249    my $class       = shift;
250    my $path        = shift;
251
252    ### path has to at least exist
253    return unless defined $path;
254
255    my $type        = __PACKAGE__->_filetype($path);
256    my $data        = '';
257
258    READ: {
259        unless ($type == DIR ) {
260            my $fh = IO::File->new;
261
262            unless( $fh->open($path) ) {
263                ### dangling symlinks are fine, stop reading but continue
264                ### creating the object
265                last READ if $type == SYMLINK;
266
267                ### otherwise, return from this function --
268                ### anything that's *not* a symlink should be
269                ### resolvable
270                return;
271            }
272
273            ### binmode needed to read files properly on win32 ###
274            binmode $fh;
275            $data = do { local $/; <$fh> };
276            close $fh;
277        }
278    }
279
280    my @items       = qw[mode uid gid size mtime];
281    my %hash        = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
282
283    if (ON_VMS) {
284        ### VMS has two UID modes, traditional and POSIX.  Normally POSIX is
285        ### not used.  We currently do not have an easy way to see if we are in
286        ### POSIX mode.  In traditional mode, the UID is actually the VMS UIC.
287        ### The VMS UIC has the upper 16 bits is the GID, which in many cases
288        ### the VMS UIC will be larger than 209715, the largest that TAR can
289        ### handle.  So for now, assume it is traditional if the UID is larger
290        ### than 0x10000.
291
292        if ($hash{uid} > 0x10000) {
293            $hash{uid} = $hash{uid} & 0xFFFF;
294        }
295
296        ### The file length from stat() is the physical length of the file
297        ### However the amount of data read in may be more for some file types.
298        ### Fixed length files are read past the logical EOF to end of the block
299        ### containing.  Other file types get expanded on read because record
300        ### delimiters are added.
301
302        my $data_len = length $data;
303        $hash{size} = $data_len if $hash{size} < $data_len;
304
305    }
306    ### you *must* set size == 0 on symlinks, or the next entry will be
307    ### though of as the contents of the symlink, which is wrong.
308    ### this fixes bug #7937
309    $hash{size}     = 0 if ($type == DIR or $type == SYMLINK);
310    $hash{mtime}    -= TIME_OFFSET;
311
312    ### strip the high bits off the mode, which we don't need to store
313    $hash{mode}     = STRIP_MODE->( $hash{mode} );
314
315
316    ### probably requires some file path munging here ... ###
317    ### name and prefix are set later
318    my $obj = {
319        %hash,
320        name        => '',
321        chksum      => CHECK_SUM,
322        type        => $type,
323        linkname    => ($type == SYMLINK and CAN_READLINK)
324                            ? readlink $path
325                            : '',
326        magic       => MAGIC,
327        version     => TAR_VERSION,
328        uname       => UNAME->( $hash{uid} ),
329        gname       => GNAME->( $hash{gid} ),
330        devmajor    => 0,   # not handled
331        devminor    => 0,   # not handled
332        prefix      => '',
333        data        => $data,
334    };
335
336    bless $obj, $class;
337
338    ### fix up the prefix and file from the path
339    my($prefix,$file) = $obj->_prefix_and_file( $path );
340    $obj->prefix( $prefix );
341    $obj->name( $file );
342
343    return $obj;
344}
345
346sub _new_from_data {
347    my $class   = shift;
348    my $path    = shift;    return unless defined $path;
349    my $data    = shift;    return unless defined $data;
350    my $opt     = shift;
351
352    my $obj = {
353        data        => $data,
354        name        => '',
355        mode        => MODE,
356        uid         => UID,
357        gid         => GID,
358        size        => length $data,
359        mtime       => time - TIME_OFFSET,
360        chksum      => CHECK_SUM,
361        type        => FILE,
362        linkname    => '',
363        magic       => MAGIC,
364        version     => TAR_VERSION,
365        uname       => UNAME->( UID ),
366        gname       => GNAME->( GID ),
367        devminor    => 0,
368        devmajor    => 0,
369        prefix      => '',
370    };
371
372    ### overwrite with user options, if provided ###
373    if( $opt and ref $opt eq 'HASH' ) {
374        for my $key ( keys %$opt ) {
375
376            ### don't write bogus options ###
377            next unless exists $obj->{$key};
378            $obj->{$key} = $opt->{$key};
379        }
380    }
381
382    bless $obj, $class;
383
384    ### fix up the prefix and file from the path
385    my($prefix,$file) = $obj->_prefix_and_file( $path );
386    $obj->prefix( $prefix );
387    $obj->name( $file );
388
389    return $obj;
390}
391
392sub _prefix_and_file {
393    my $self = shift;
394    my $path = shift;
395
396    my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
397    my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
398
399    ### if it's a directory, then $file might be empty
400    $file = pop @dirs if $self->is_dir and not length $file;
401
402    ### splitting ../ gives you the relative path in native syntax
403    ### Remove the root (000000) directory
404    ### The volume from splitpath will also be in native syntax
405    if (ON_VMS) {
406        map { $_ = '..' if $_  eq '-'; $_ = '' if $_ eq '000000' } @dirs;
407        if (length($vol)) {
408            $vol = VMS::Filespec::unixify($vol);
409            unshift @dirs, $vol;
410        }
411    }
412
413    my $prefix = File::Spec::Unix->catdir(@dirs);
414    return( $prefix, $file );
415}
416
417sub _filetype {
418    my $self = shift;
419    my $file = shift;
420
421    return unless defined $file;
422
423    return SYMLINK  if (-l $file);	# Symlink
424
425    return FILE     if (-f _);		# Plain file
426
427    return DIR      if (-d _);		# Directory
428
429    return FIFO     if (-p _);		# Named pipe
430
431    return SOCKET   if (-S _);		# Socket
432
433    return BLOCKDEV if (-b _);		# Block special
434
435    return CHARDEV  if (-c _);		# Character special
436
437    ### shouldn't happen, this is when making archives, not reading ###
438    return LONGLINK if ( $file eq LONGLINK_NAME );
439
440    return UNKNOWN;		            # Something else (like what?)
441
442}
443
444### this method 'downgrades' a file to plain file -- this is used for
445### symlinks when FOLLOW_SYMLINKS is true.
446sub _downgrade_to_plainfile {
447    my $entry = shift;
448    $entry->type( FILE );
449    $entry->mode( MODE );
450    $entry->linkname('');
451
452    return 1;
453}
454
455=head2 $bool = $file->extract( [ $alternative_name ] )
456
457Extract this object, optionally to an alternative name.
458
459See C<< Archive::Tar->extract_file >> for details.
460
461Returns true on success and false on failure.
462
463=cut
464
465sub extract {
466    my $self = shift;
467
468    local $Carp::CarpLevel += 1;
469
470    ### avoid circular use, so only require;
471    require Archive::Tar;
472    return Archive::Tar->_extract_file( $self, @_ );
473}
474
475=head2 $path = $file->full_path
476
477Returns the full path from the tar header; this is basically a
478concatenation of the C<prefix> and C<name> fields.
479
480=cut
481
482sub full_path {
483    my $self = shift;
484
485    ### if prefix field is empty
486    return $self->name unless defined $self->prefix and length $self->prefix;
487
488    ### or otherwise, catfile'd
489    my $path = File::Spec::Unix->catfile( $self->prefix, $self->name );
490    $path .= "/" if $self->name =~ m{/$};   # Re-add trailing slash if necessary, as catfile() strips them off.
491    return $path;
492}
493
494
495=head2 $bool = $file->validate
496
497Done by Archive::Tar internally when reading the tar file:
498validate the header against the checksum to ensure integer tar file.
499
500Returns true on success, false on failure
501
502=cut
503
504sub validate {
505    my $self = shift;
506
507    my $raw = $self->raw;
508
509    ### don't know why this one is different from the one we /write/ ###
510    substr ($raw, 148, 8) = "        ";
511
512    ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
513    ### like GNU tar does. See here for details:
514    ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
515    ### so we do both a signed AND unsigned validate. if one succeeds, that's
516    ### good enough
517	return (   (unpack ("%16C*", $raw) == $self->chksum)
518	        or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
519}
520
521=head2 $bool = $file->has_content
522
523Returns a boolean to indicate whether the current object has content.
524Some special files like directories and so on never will have any
525content. This method is mainly to make sure you don't get warnings
526for using uninitialized values when looking at an object's content.
527
528=cut
529
530sub has_content {
531    my $self = shift;
532    return defined $self->data() && length $self->data() ? 1 : 0;
533}
534
535=head2 $content = $file->get_content
536
537Returns the current content for the in-memory file
538
539=cut
540
541sub get_content {
542    my $self = shift;
543    $self->data( );
544}
545
546=head2 $cref = $file->get_content_by_ref
547
548Returns the current content for the in-memory file as a scalar
549reference. Normal users won't need this, but it will save memory if
550you are dealing with very large data files in your tar archive, since
551it will pass the contents by reference, rather than make a copy of it
552first.
553
554=cut
555
556sub get_content_by_ref {
557    my $self = shift;
558
559    return \$self->{data};
560}
561
562=head2 $bool = $file->replace_content( $content )
563
564Replace the current content of the file with the new content. This
565only affects the in-memory archive, not the on-disk version until
566you write it.
567
568Returns true on success, false on failure.
569
570=cut
571
572sub replace_content {
573    my $self = shift;
574    my $data = shift || '';
575
576    $self->data( $data );
577    $self->size( length $data );
578    return 1;
579}
580
581=head2 $bool = $file->rename( $new_name )
582
583Rename the current file to $new_name.
584
585Note that you must specify a Unix path for $new_name, since per tar
586standard, all files in the archive must be Unix paths.
587
588Returns true on success and false on failure.
589
590=cut
591
592sub rename {
593    my $self = shift;
594    my $path = shift;
595
596    return unless defined $path;
597
598    my ($prefix,$file) = $self->_prefix_and_file( $path );
599
600    $self->name( $file );
601    $self->prefix( $prefix );
602
603	return 1;
604}
605
606=head2 $bool = $file->chmod( $mode )
607
608Change mode of $file to $mode. The mode can be a string or a number
609which is interpreted as octal whether or not a leading 0 is given.
610
611Returns true on success and false on failure.
612
613=cut
614
615sub chmod {
616    my $self  = shift;
617    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
618    $self->{mode} = oct($mode);
619    return 1;
620}
621
622=head2 $bool = $file->chown( $user [, $group])
623
624Change owner of $file to $user. If a $group is given that is changed
625as well. You can also pass a single parameter with a colon separating the
626use and group as in 'root:wheel'.
627
628Returns true on success and false on failure.
629
630=cut
631
632sub chown {
633    my $self = shift;
634    my $uname = shift;
635    return unless defined $uname;
636    my $gname;
637    if (-1 != index($uname, ':')) {
638	($uname, $gname) = split(/:/, $uname);
639    } else {
640	$gname = shift if @_ > 0;
641    }
642
643    $self->uname( $uname );
644    $self->gname( $gname ) if $gname;
645	return 1;
646}
647
648=head1 Convenience methods
649
650To quickly check the type of a C<Archive::Tar::File> object, you can
651use the following methods:
652
653=over 4
654
655=item $file->is_file
656
657Returns true if the file is of type C<file>
658
659=item $file->is_dir
660
661Returns true if the file is of type C<dir>
662
663=item $file->is_hardlink
664
665Returns true if the file is of type C<hardlink>
666
667=item $file->is_symlink
668
669Returns true if the file is of type C<symlink>
670
671=item $file->is_chardev
672
673Returns true if the file is of type C<chardev>
674
675=item $file->is_blockdev
676
677Returns true if the file is of type C<blockdev>
678
679=item $file->is_fifo
680
681Returns true if the file is of type C<fifo>
682
683=item $file->is_socket
684
685Returns true if the file is of type C<socket>
686
687=item $file->is_longlink
688
689Returns true if the file is of type C<LongLink>.
690Should not happen after a successful C<read>.
691
692=item $file->is_label
693
694Returns true if the file is of type C<Label>.
695Should not happen after a successful C<read>.
696
697=item $file->is_unknown
698
699Returns true if the file type is C<unknown>
700
701=back
702
703=cut
704
705#stupid perl5.5.3 needs to warn if it's not numeric
706sub is_file     { local $^W;    FILE      == $_[0]->type }
707sub is_dir      { local $^W;    DIR       == $_[0]->type }
708sub is_hardlink { local $^W;    HARDLINK  == $_[0]->type }
709sub is_symlink  { local $^W;    SYMLINK   == $_[0]->type }
710sub is_chardev  { local $^W;    CHARDEV   == $_[0]->type }
711sub is_blockdev { local $^W;    BLOCKDEV  == $_[0]->type }
712sub is_fifo     { local $^W;    FIFO      == $_[0]->type }
713sub is_socket   { local $^W;    SOCKET    == $_[0]->type }
714sub is_unknown  { local $^W;    UNKNOWN   == $_[0]->type }
715sub is_longlink { local $^W;    LONGLINK  eq $_[0]->type }
716sub is_label    { local $^W;    LABEL     eq $_[0]->type }
717
7181;
719