1package Archive::Tar::File;
2use strict;
3
4use Carp                ();
5use IO::File;
6use File::Spec::Unix    ();
7use File::Spec          ();
8use File::Basename      ();
9
10use Archive::Tar::Constant;
11
12use vars qw[@ISA $VERSION];
13#@ISA        = qw[Archive::Tar];
14$VERSION    = '2.38';
15
16### set value to 1 to oct() it during the unpack ###
17
18my $tmpl = [
19        name        => 0,   # string					A100
20        mode        => 1,   # octal					A8
21        uid         => 1,   # octal					A8
22        gid         => 1,   # octal					A8
23        size        => 0,   # octal	# cdrake - not *always* octal..	A12
24        mtime       => 1,   # octal					A12
25        chksum      => 1,   # octal					A8
26        type        => 0,   # character					A1
27        linkname    => 0,   # string					A100
28        magic       => 0,   # string					A6
29        version     => 0,   # 2 bytes					A2
30        uname       => 0,   # string					A32
31        gname       => 0,   # string					A32
32        devmajor    => 1,   # octal					A8
33        devminor    => 1,   # octal					A8
34        prefix      => 0,	#					A155 x 12
35
36### end UNPACK items ###
37        raw         => 0,   # the raw data chunk
38        data        => 0,   # the data associated with the file --
39                            # This  might be very memory intensive
40];
41
42### install get/set accessors for this object.
43for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
44    my $key = $tmpl->[$i];
45    no strict 'refs';
46    *{__PACKAGE__."::$key"} = sub {
47        my $self = shift;
48        $self->{$key} = $_[0] if @_;
49
50        ### just in case the key is not there or undef or something ###
51        {   local $^W = 0;
52            return $self->{$key};
53        }
54    }
55}
56
57=head1 NAME
58
59Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
60
61=head1 SYNOPSIS
62
63    my @items = $tar->get_files;
64
65    print $_->name, ' ', $_->size, "\n" for @items;
66
67    print $object->get_content;
68    $object->replace_content('new content');
69
70    $object->rename( 'new/full/path/to/file.c' );
71
72=head1 DESCRIPTION
73
74Archive::Tar::Files provides a neat little object layer for in-memory
75extracted files. It's mostly used internally in Archive::Tar to tidy
76up the code, but there's no reason users shouldn't use this API as
77well.
78
79=head2 Accessors
80
81A lot of the methods in this package are accessors to the various
82fields in the tar header:
83
84=over 4
85
86=item name
87
88The file's name
89
90=item mode
91
92The file's mode
93
94=item uid
95
96The user id owning the file
97
98=item gid
99
100The group id owning the file
101
102=item size
103
104File size in bytes
105
106=item mtime
107
108Modification time. Adjusted to mac-time on MacOS if required
109
110=item chksum
111
112Checksum field for the tar header
113
114=item type
115
116File type -- numeric, but comparable to exported constants -- see
117Archive::Tar's documentation
118
119=item linkname
120
121If the file is a symlink, the file it's pointing to
122
123=item magic
124
125Tar magic string -- not useful for most users
126
127=item version
128
129Tar version string -- not useful for most users
130
131=item uname
132
133The user name that owns the file
134
135=item gname
136
137The group name that owns the file
138
139=item devmajor
140
141Device major number in case of a special file
142
143=item devminor
144
145Device minor number in case of a special file
146
147=item prefix
148
149Any directory to prefix to the extraction path, if any
150
151=item raw
152
153Raw tar header -- not useful for most users
154
155=back
156
157=head1 Methods
158
159=head2 Archive::Tar::File->new( file => $path )
160
161Returns a new Archive::Tar::File object from an existing file.
162
163Returns undef on failure.
164
165=head2 Archive::Tar::File->new( data => $path, $data, $opt )
166
167Returns a new Archive::Tar::File object from data.
168
169C<$path> defines the file name (which need not exist), C<$data> the
170file contents, and C<$opt> is a reference to a hash of attributes
171which may be used to override the default attributes (fields in the
172tar header), which are described above in the Accessors section.
173
174Returns undef on failure.
175
176=head2 Archive::Tar::File->new( chunk => $chunk )
177
178Returns a new Archive::Tar::File object from a raw 512-byte tar
179archive chunk.
180
181Returns undef on failure.
182
183=cut
184
185sub new {
186    my $class   = shift;
187    my $what    = shift;
188
189    my $obj =   ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
190                ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
191                ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
192                undef;
193
194    return $obj;
195}
196
197### copies the data, creates a clone ###
198sub clone {
199    my $self = shift;
200    return bless { %$self }, ref $self;
201}
202
203sub _new_from_chunk {
204    my $class = shift;
205    my $chunk = shift or return;    # 512 bytes of tar header
206    my %hash  = @_;
207
208    ### filter any arguments on defined-ness of values.
209    ### this allows overriding from what the tar-header is saying
210    ### about this tar-entry. Particularly useful for @LongLink files
211    my %args  = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
212
213    ### makes it start at 0 actually... :) ###
214    my $i = -1;
215    my %entry = map {
216	my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]);	# cdrake
217	($_)=($_=~/^([^\0]*)/) unless($s eq 'size');	# cdrake
218	$s=> $v ? oct $_ : $_				# cdrake
219	# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_	# removed by cdrake - mucks up binary sizes >8gb
220    } unpack( UNPACK, $chunk );				# cdrake
221    # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );	# old - replaced now by cdrake
222
223
224    if(substr($entry{'size'}, 0, 1) eq "\x80") {	# binary size extension for files >8gigs (> octal 77777777777777)	# cdrake
225      my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64);	# Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
226    } else {	# cdrake
227      ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'};	# cdrake
228    }	# cdrake
229
230
231    my $obj = bless { %entry, %args }, $class;
232
233	### magic is a filetype string.. it should have something like 'ustar' or
234	### something similar... if the chunk is garbage, skip it
235	return unless $obj->magic !~ /\W/;
236
237    ### store the original chunk ###
238    $obj->raw( $chunk );
239
240    $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
241    $obj->type(DIR)  if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
242
243
244    return $obj;
245
246}
247
248sub _new_from_file {
249    my $class       = shift;
250    my $path        = shift;
251
252    ### path has to at least exist
253    return unless defined $path;
254
255    my $type        = __PACKAGE__->_filetype($path);
256    my $data        = '';
257
258    READ: {
259        unless ($type == DIR ) {
260            my $fh = IO::File->new;
261
262            unless( $fh->open($path) ) {
263                ### dangling symlinks are fine, stop reading but continue
264                ### creating the object
265                last READ if $type == SYMLINK;
266
267                ### otherwise, return from this function --
268                ### anything that's *not* a symlink should be
269                ### resolvable
270                return;
271            }
272
273            ### binmode needed to read files properly on win32 ###
274            binmode $fh;
275            $data = do { local $/; <$fh> };
276            close $fh;
277        }
278    }
279
280    my @items       = qw[mode uid gid size mtime];
281    my %hash        = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
282
283    if (ON_VMS) {
284        ### VMS has two UID modes, traditional and POSIX.  Normally POSIX is
285        ### not used.  We currently do not have an easy way to see if we are in
286        ### POSIX mode.  In traditional mode, the UID is actually the VMS UIC.
287        ### The VMS UIC has the upper 16 bits is the GID, which in many cases
288        ### the VMS UIC will be larger than 209715, the largest that TAR can
289        ### handle.  So for now, assume it is traditional if the UID is larger
290        ### than 0x10000.
291
292        if ($hash{uid} > 0x10000) {
293            $hash{uid} = $hash{uid} & 0xFFFF;
294        }
295
296        ### The file length from stat() is the physical length of the file
297        ### However the amount of data read in may be more for some file types.
298        ### Fixed length files are read past the logical EOF to end of the block
299        ### containing.  Other file types get expanded on read because record
300        ### delimiters are added.
301
302        my $data_len = length $data;
303        $hash{size} = $data_len if $hash{size} < $data_len;
304
305    }
306    ### you *must* set size == 0 on symlinks, or the next entry will be
307    ### though of as the contents of the symlink, which is wrong.
308    ### this fixes bug #7937
309    $hash{size}     = 0 if ($type == DIR or $type == SYMLINK);
310    $hash{mtime}    -= TIME_OFFSET;
311
312    ### strip the high bits off the mode, which we don't need to store
313    $hash{mode}     = STRIP_MODE->( $hash{mode} );
314
315
316    ### probably requires some file path munging here ... ###
317    ### name and prefix are set later
318    my $obj = {
319        %hash,
320        name        => '',
321        chksum      => CHECK_SUM,
322        type        => $type,
323        linkname    => ($type == SYMLINK and CAN_READLINK)
324                            ? readlink $path
325                            : '',
326        magic       => MAGIC,
327        version     => TAR_VERSION,
328        uname       => UNAME->( $hash{uid} ),
329        gname       => GNAME->( $hash{gid} ),
330        devmajor    => 0,   # not handled
331        devminor    => 0,   # not handled
332        prefix      => '',
333        data        => $data,
334    };
335
336    bless $obj, $class;
337
338    ### fix up the prefix and file from the path
339    my($prefix,$file) = $obj->_prefix_and_file( $path );
340    $obj->prefix( $prefix );
341    $obj->name( $file );
342
343    return $obj;
344}
345
346sub _new_from_data {
347    my $class   = shift;
348    my $path    = shift;    return unless defined $path;
349    my $data    = shift;    return unless defined $data;
350    my $opt     = shift;
351
352    my $obj = {
353        data        => $data,
354        name        => '',
355        mode        => MODE,
356        uid         => UID,
357        gid         => GID,
358        size        => length $data,
359        mtime       => time - TIME_OFFSET,
360        chksum      => CHECK_SUM,
361        type        => FILE,
362        linkname    => '',
363        magic       => MAGIC,
364        version     => TAR_VERSION,
365        uname       => UNAME->( UID ),
366        gname       => GNAME->( GID ),
367        devminor    => 0,
368        devmajor    => 0,
369        prefix      => '',
370    };
371
372    ### overwrite with user options, if provided ###
373    if( $opt and ref $opt eq 'HASH' ) {
374        for my $key ( keys %$opt ) {
375
376            ### don't write bogus options ###
377            next unless exists $obj->{$key};
378            $obj->{$key} = $opt->{$key};
379        }
380    }
381
382    bless $obj, $class;
383
384    ### fix up the prefix and file from the path
385    my($prefix,$file) = $obj->_prefix_and_file( $path );
386    $obj->prefix( $prefix );
387    $obj->name( $file );
388
389    return $obj;
390}
391
392sub _prefix_and_file {
393    my $self = shift;
394    my $path = shift;
395
396    my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
397    my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
398
399    ### if it's a directory, then $file might be empty
400    $file = pop @dirs if $self->is_dir and not length $file;
401
402    ### splitting ../ gives you the relative path in native syntax
403    ### Remove the root (000000) directory
404    ### The volume from splitpath will also be in native syntax
405    if (ON_VMS) {
406        map { $_ = '..' if $_  eq '-'; $_ = '' if $_ eq '000000' } @dirs;
407        if (length($vol)) {
408            $vol = VMS::Filespec::unixify($vol);
409            unshift @dirs, $vol;
410        }
411    }
412
413    my $prefix = File::Spec::Unix->catdir(@dirs);
414    return( $prefix, $file );
415}
416
417sub _filetype {
418    my $self = shift;
419    my $file = shift;
420
421    return unless defined $file;
422
423    return SYMLINK  if (-l $file);	# Symlink
424
425    return FILE     if (-f _);		# Plain file
426
427    return DIR      if (-d _);		# Directory
428
429    return FIFO     if (-p _);		# Named pipe
430
431    return SOCKET   if (-S _);		# Socket
432
433    return BLOCKDEV if (-b _);		# Block special
434
435    return CHARDEV  if (-c _);		# Character special
436
437    ### shouldn't happen, this is when making archives, not reading ###
438    return LONGLINK if ( $file eq LONGLINK_NAME );
439
440    return UNKNOWN;		            # Something else (like what?)
441
442}
443
444### this method 'downgrades' a file to plain file -- this is used for
445### symlinks when FOLLOW_SYMLINKS is true.
446sub _downgrade_to_plainfile {
447    my $entry = shift;
448    $entry->type( FILE );
449    $entry->mode( MODE );
450    $entry->linkname('');
451
452    return 1;
453}
454
455=head2 $bool = $file->extract( [ $alternative_name ] )
456
457Extract this object, optionally to an alternative name.
458
459See C<< Archive::Tar->extract_file >> for details.
460
461Returns true on success and false on failure.
462
463=cut
464
465sub extract {
466    my $self = shift;
467
468    local $Carp::CarpLevel += 1;
469
470    ### avoid circular use, so only require;
471    require Archive::Tar;
472    return Archive::Tar->_extract_file( $self, @_ );
473}
474
475=head2 $path = $file->full_path
476
477Returns the full path from the tar header; this is basically a
478concatenation of the C<prefix> and C<name> fields.
479
480=cut
481
482sub full_path {
483    my $self = shift;
484
485    ### if prefix field is empty
486    return $self->name unless defined $self->prefix and length $self->prefix;
487
488    ### or otherwise, catfile'd
489    return File::Spec::Unix->catfile( $self->prefix, $self->name );
490}
491
492
493=head2 $bool = $file->validate
494
495Done by Archive::Tar internally when reading the tar file:
496validate the header against the checksum to ensure integer tar file.
497
498Returns true on success, false on failure
499
500=cut
501
502sub validate {
503    my $self = shift;
504
505    my $raw = $self->raw;
506
507    ### don't know why this one is different from the one we /write/ ###
508    substr ($raw, 148, 8) = "        ";
509
510    ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
511    ### like GNU tar does. See here for details:
512    ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
513    ### so we do both a signed AND unsigned validate. if one succeeds, that's
514    ### good enough
515	return (   (unpack ("%16C*", $raw) == $self->chksum)
516	        or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
517}
518
519=head2 $bool = $file->has_content
520
521Returns a boolean to indicate whether the current object has content.
522Some special files like directories and so on never will have any
523content. This method is mainly to make sure you don't get warnings
524for using uninitialized values when looking at an object's content.
525
526=cut
527
528sub has_content {
529    my $self = shift;
530    return defined $self->data() && length $self->data() ? 1 : 0;
531}
532
533=head2 $content = $file->get_content
534
535Returns the current content for the in-memory file
536
537=cut
538
539sub get_content {
540    my $self = shift;
541    $self->data( );
542}
543
544=head2 $cref = $file->get_content_by_ref
545
546Returns the current content for the in-memory file as a scalar
547reference. Normal users won't need this, but it will save memory if
548you are dealing with very large data files in your tar archive, since
549it will pass the contents by reference, rather than make a copy of it
550first.
551
552=cut
553
554sub get_content_by_ref {
555    my $self = shift;
556
557    return \$self->{data};
558}
559
560=head2 $bool = $file->replace_content( $content )
561
562Replace the current content of the file with the new content. This
563only affects the in-memory archive, not the on-disk version until
564you write it.
565
566Returns true on success, false on failure.
567
568=cut
569
570sub replace_content {
571    my $self = shift;
572    my $data = shift || '';
573
574    $self->data( $data );
575    $self->size( length $data );
576    return 1;
577}
578
579=head2 $bool = $file->rename( $new_name )
580
581Rename the current file to $new_name.
582
583Note that you must specify a Unix path for $new_name, since per tar
584standard, all files in the archive must be Unix paths.
585
586Returns true on success and false on failure.
587
588=cut
589
590sub rename {
591    my $self = shift;
592    my $path = shift;
593
594    return unless defined $path;
595
596    my ($prefix,$file) = $self->_prefix_and_file( $path );
597
598    $self->name( $file );
599    $self->prefix( $prefix );
600
601	return 1;
602}
603
604=head2 $bool = $file->chmod $mode)
605
606Change mode of $file to $mode. The mode can be a string or a number
607which is interpreted as octal whether or not a leading 0 is given.
608
609Returns true on success and false on failure.
610
611=cut
612
613sub chmod {
614    my $self  = shift;
615    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
616    $self->{mode} = oct($mode);
617    return 1;
618}
619
620=head2 $bool = $file->chown( $user [, $group])
621
622Change owner of $file to $user. If a $group is given that is changed
623as well. You can also pass a single parameter with a colon separating the
624use and group as in 'root:wheel'.
625
626Returns true on success and false on failure.
627
628=cut
629
630sub chown {
631    my $self = shift;
632    my $uname = shift;
633    return unless defined $uname;
634    my $gname;
635    if (-1 != index($uname, ':')) {
636	($uname, $gname) = split(/:/, $uname);
637    } else {
638	$gname = shift if @_ > 0;
639    }
640
641    $self->uname( $uname );
642    $self->gname( $gname ) if $gname;
643	return 1;
644}
645
646=head1 Convenience methods
647
648To quickly check the type of a C<Archive::Tar::File> object, you can
649use the following methods:
650
651=over 4
652
653=item $file->is_file
654
655Returns true if the file is of type C<file>
656
657=item $file->is_dir
658
659Returns true if the file is of type C<dir>
660
661=item $file->is_hardlink
662
663Returns true if the file is of type C<hardlink>
664
665=item $file->is_symlink
666
667Returns true if the file is of type C<symlink>
668
669=item $file->is_chardev
670
671Returns true if the file is of type C<chardev>
672
673=item $file->is_blockdev
674
675Returns true if the file is of type C<blockdev>
676
677=item $file->is_fifo
678
679Returns true if the file is of type C<fifo>
680
681=item $file->is_socket
682
683Returns true if the file is of type C<socket>
684
685=item $file->is_longlink
686
687Returns true if the file is of type C<LongLink>.
688Should not happen after a successful C<read>.
689
690=item $file->is_label
691
692Returns true if the file is of type C<Label>.
693Should not happen after a successful C<read>.
694
695=item $file->is_unknown
696
697Returns true if the file type is C<unknown>
698
699=back
700
701=cut
702
703#stupid perl5.5.3 needs to warn if it's not numeric
704sub is_file     { local $^W;    FILE      == $_[0]->type }
705sub is_dir      { local $^W;    DIR       == $_[0]->type }
706sub is_hardlink { local $^W;    HARDLINK  == $_[0]->type }
707sub is_symlink  { local $^W;    SYMLINK   == $_[0]->type }
708sub is_chardev  { local $^W;    CHARDEV   == $_[0]->type }
709sub is_blockdev { local $^W;    BLOCKDEV  == $_[0]->type }
710sub is_fifo     { local $^W;    FIFO      == $_[0]->type }
711sub is_socket   { local $^W;    SOCKET    == $_[0]->type }
712sub is_unknown  { local $^W;    UNKNOWN   == $_[0]->type }
713sub is_longlink { local $^W;    LONGLINK  eq $_[0]->type }
714sub is_label    { local $^W;    LABEL     eq $_[0]->type }
715
7161;
717