1package Archive::Tar::File;
2use strict;
3
4use Carp                ();
5use IO::File;
6use File::Spec::Unix    ();
7use File::Spec          ();
8use File::Basename      ();
9
10### avoid circular use, so only require;
11require Archive::Tar;
12use Archive::Tar::Constant;
13
14use vars qw[@ISA $VERSION];
15#@ISA        = qw[Archive::Tar];
16$VERSION    = '2.32';
17
18### set value to 1 to oct() it during the unpack ###
19
20my $tmpl = [
21        name        => 0,   # string					A100
22        mode        => 1,   # octal					A8
23        uid         => 1,   # octal					A8
24        gid         => 1,   # octal					A8
25        size        => 0,   # octal	# cdrake - not *always* octal..	A12
26        mtime       => 1,   # octal					A12
27        chksum      => 1,   # octal					A8
28        type        => 0,   # character					A1
29        linkname    => 0,   # string					A100
30        magic       => 0,   # string					A6
31        version     => 0,   # 2 bytes					A2
32        uname       => 0,   # string					A32
33        gname       => 0,   # string					A32
34        devmajor    => 1,   # octal					A8
35        devminor    => 1,   # octal					A8
36        prefix      => 0,	#					A155 x 12
37
38### end UNPACK items ###
39        raw         => 0,   # the raw data chunk
40        data        => 0,   # the data associated with the file --
41                            # This  might be very memory intensive
42];
43
44### install get/set accessors for this object.
45for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) {
46    my $key = $tmpl->[$i];
47    no strict 'refs';
48    *{__PACKAGE__."::$key"} = sub {
49        my $self = shift;
50        $self->{$key} = $_[0] if @_;
51
52        ### just in case the key is not there or undef or something ###
53        {   local $^W = 0;
54            return $self->{$key};
55        }
56    }
57}
58
59=head1 NAME
60
61Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
62
63=head1 SYNOPSIS
64
65    my @items = $tar->get_files;
66
67    print $_->name, ' ', $_->size, "\n" for @items;
68
69    print $object->get_content;
70    $object->replace_content('new content');
71
72    $object->rename( 'new/full/path/to/file.c' );
73
74=head1 DESCRIPTION
75
76Archive::Tar::Files provides a neat little object layer for in-memory
77extracted files. It's mostly used internally in Archive::Tar to tidy
78up the code, but there's no reason users shouldn't use this API as
79well.
80
81=head2 Accessors
82
83A lot of the methods in this package are accessors to the various
84fields in the tar header:
85
86=over 4
87
88=item name
89
90The file's name
91
92=item mode
93
94The file's mode
95
96=item uid
97
98The user id owning the file
99
100=item gid
101
102The group id owning the file
103
104=item size
105
106File size in bytes
107
108=item mtime
109
110Modification time. Adjusted to mac-time on MacOS if required
111
112=item chksum
113
114Checksum field for the tar header
115
116=item type
117
118File type -- numeric, but comparable to exported constants -- see
119Archive::Tar's documentation
120
121=item linkname
122
123If the file is a symlink, the file it's pointing to
124
125=item magic
126
127Tar magic string -- not useful for most users
128
129=item version
130
131Tar version string -- not useful for most users
132
133=item uname
134
135The user name that owns the file
136
137=item gname
138
139The group name that owns the file
140
141=item devmajor
142
143Device major number in case of a special file
144
145=item devminor
146
147Device minor number in case of a special file
148
149=item prefix
150
151Any directory to prefix to the extraction path, if any
152
153=item raw
154
155Raw tar header -- not useful for most users
156
157=back
158
159=head1 Methods
160
161=head2 Archive::Tar::File->new( file => $path )
162
163Returns a new Archive::Tar::File object from an existing file.
164
165Returns undef on failure.
166
167=head2 Archive::Tar::File->new( data => $path, $data, $opt )
168
169Returns a new Archive::Tar::File object from data.
170
171C<$path> defines the file name (which need not exist), C<$data> the
172file contents, and C<$opt> is a reference to a hash of attributes
173which may be used to override the default attributes (fields in the
174tar header), which are described above in the Accessors section.
175
176Returns undef on failure.
177
178=head2 Archive::Tar::File->new( chunk => $chunk )
179
180Returns a new Archive::Tar::File object from a raw 512-byte tar
181archive chunk.
182
183Returns undef on failure.
184
185=cut
186
187sub new {
188    my $class   = shift;
189    my $what    = shift;
190
191    my $obj =   ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
192                ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
193                ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
194                undef;
195
196    return $obj;
197}
198
199### copies the data, creates a clone ###
200sub clone {
201    my $self = shift;
202    return bless { %$self }, ref $self;
203}
204
205sub _new_from_chunk {
206    my $class = shift;
207    my $chunk = shift or return;    # 512 bytes of tar header
208    my %hash  = @_;
209
210    ### filter any arguments on defined-ness of values.
211    ### this allows overriding from what the tar-header is saying
212    ### about this tar-entry. Particularly useful for @LongLink files
213    my %args  = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
214
215    ### makes it start at 0 actually... :) ###
216    my $i = -1;
217    my %entry = map {
218	my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]);	# cdrake
219	($_)=($_=~/^([^\0]*)/) unless($s eq 'size');	# cdrake
220	$s=> $v ? oct $_ : $_				# cdrake
221	# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_	# removed by cdrake - mucks up binary sizes >8gb
222    } unpack( UNPACK, $chunk );				# cdrake
223    # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk );	# old - replaced now by cdrake
224
225
226    if(substr($entry{'size'}, 0, 1) eq "\x80") {	# binary size extension for files >8gigs (> octal 77777777777777)	# cdrake
227      my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64);	# Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
228    } else {	# cdrake
229      ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'};	# cdrake
230    }	# cdrake
231
232
233    my $obj = bless { %entry, %args }, $class;
234
235	### magic is a filetype string.. it should have something like 'ustar' or
236	### something similar... if the chunk is garbage, skip it
237	return unless $obj->magic !~ /\W/;
238
239    ### store the original chunk ###
240    $obj->raw( $chunk );
241
242    $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
243    $obj->type(DIR)  if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
244
245
246    return $obj;
247
248}
249
250sub _new_from_file {
251    my $class       = shift;
252    my $path        = shift;
253
254    ### path has to at least exist
255    return unless defined $path;
256
257    my $type        = __PACKAGE__->_filetype($path);
258    my $data        = '';
259
260    READ: {
261        unless ($type == DIR ) {
262            my $fh = IO::File->new;
263
264            unless( $fh->open($path) ) {
265                ### dangling symlinks are fine, stop reading but continue
266                ### creating the object
267                last READ if $type == SYMLINK;
268
269                ### otherwise, return from this function --
270                ### anything that's *not* a symlink should be
271                ### resolvable
272                return;
273            }
274
275            ### binmode needed to read files properly on win32 ###
276            binmode $fh;
277            $data = do { local $/; <$fh> };
278            close $fh;
279        }
280    }
281
282    my @items       = qw[mode uid gid size mtime];
283    my %hash        = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
284
285    if (ON_VMS) {
286        ### VMS has two UID modes, traditional and POSIX.  Normally POSIX is
287        ### not used.  We currently do not have an easy way to see if we are in
288        ### POSIX mode.  In traditional mode, the UID is actually the VMS UIC.
289        ### The VMS UIC has the upper 16 bits is the GID, which in many cases
290        ### the VMS UIC will be larger than 209715, the largest that TAR can
291        ### handle.  So for now, assume it is traditional if the UID is larger
292        ### than 0x10000.
293
294        if ($hash{uid} > 0x10000) {
295            $hash{uid} = $hash{uid} & 0xFFFF;
296        }
297
298        ### The file length from stat() is the physical length of the file
299        ### However the amount of data read in may be more for some file types.
300        ### Fixed length files are read past the logical EOF to end of the block
301        ### containing.  Other file types get expanded on read because record
302        ### delimiters are added.
303
304        my $data_len = length $data;
305        $hash{size} = $data_len if $hash{size} < $data_len;
306
307    }
308    ### you *must* set size == 0 on symlinks, or the next entry will be
309    ### though of as the contents of the symlink, which is wrong.
310    ### this fixes bug #7937
311    $hash{size}     = 0 if ($type == DIR or $type == SYMLINK);
312    $hash{mtime}    -= TIME_OFFSET;
313
314    ### strip the high bits off the mode, which we don't need to store
315    $hash{mode}     = STRIP_MODE->( $hash{mode} );
316
317
318    ### probably requires some file path munging here ... ###
319    ### name and prefix are set later
320    my $obj = {
321        %hash,
322        name        => '',
323        chksum      => CHECK_SUM,
324        type        => $type,
325        linkname    => ($type == SYMLINK and CAN_READLINK)
326                            ? readlink $path
327                            : '',
328        magic       => MAGIC,
329        version     => TAR_VERSION,
330        uname       => UNAME->( $hash{uid} ),
331        gname       => GNAME->( $hash{gid} ),
332        devmajor    => 0,   # not handled
333        devminor    => 0,   # not handled
334        prefix      => '',
335        data        => $data,
336    };
337
338    bless $obj, $class;
339
340    ### fix up the prefix and file from the path
341    my($prefix,$file) = $obj->_prefix_and_file( $path );
342    $obj->prefix( $prefix );
343    $obj->name( $file );
344
345    return $obj;
346}
347
348sub _new_from_data {
349    my $class   = shift;
350    my $path    = shift;    return unless defined $path;
351    my $data    = shift;    return unless defined $data;
352    my $opt     = shift;
353
354    my $obj = {
355        data        => $data,
356        name        => '',
357        mode        => MODE,
358        uid         => UID,
359        gid         => GID,
360        size        => length $data,
361        mtime       => time - TIME_OFFSET,
362        chksum      => CHECK_SUM,
363        type        => FILE,
364        linkname    => '',
365        magic       => MAGIC,
366        version     => TAR_VERSION,
367        uname       => UNAME->( UID ),
368        gname       => GNAME->( GID ),
369        devminor    => 0,
370        devmajor    => 0,
371        prefix      => '',
372    };
373
374    ### overwrite with user options, if provided ###
375    if( $opt and ref $opt eq 'HASH' ) {
376        for my $key ( keys %$opt ) {
377
378            ### don't write bogus options ###
379            next unless exists $obj->{$key};
380            $obj->{$key} = $opt->{$key};
381        }
382    }
383
384    bless $obj, $class;
385
386    ### fix up the prefix and file from the path
387    my($prefix,$file) = $obj->_prefix_and_file( $path );
388    $obj->prefix( $prefix );
389    $obj->name( $file );
390
391    return $obj;
392}
393
394sub _prefix_and_file {
395    my $self = shift;
396    my $path = shift;
397
398    my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
399    my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
400
401    ### if it's a directory, then $file might be empty
402    $file = pop @dirs if $self->is_dir and not length $file;
403
404    ### splitting ../ gives you the relative path in native syntax
405    ### Remove the root (000000) directory
406    ### The volume from splitpath will also be in native syntax
407    if (ON_VMS) {
408        map { $_ = '..' if $_  eq '-'; $_ = '' if $_ eq '000000' } @dirs;
409        if (length($vol)) {
410            $vol = VMS::Filespec::unixify($vol);
411            unshift @dirs, $vol;
412        }
413    }
414
415    my $prefix = File::Spec::Unix->catdir(@dirs);
416    return( $prefix, $file );
417}
418
419sub _filetype {
420    my $self = shift;
421    my $file = shift;
422
423    return unless defined $file;
424
425    return SYMLINK  if (-l $file);	# Symlink
426
427    return FILE     if (-f _);		# Plain file
428
429    return DIR      if (-d _);		# Directory
430
431    return FIFO     if (-p _);		# Named pipe
432
433    return SOCKET   if (-S _);		# Socket
434
435    return BLOCKDEV if (-b _);		# Block special
436
437    return CHARDEV  if (-c _);		# Character special
438
439    ### shouldn't happen, this is when making archives, not reading ###
440    return LONGLINK if ( $file eq LONGLINK_NAME );
441
442    return UNKNOWN;		            # Something else (like what?)
443
444}
445
446### this method 'downgrades' a file to plain file -- this is used for
447### symlinks when FOLLOW_SYMLINKS is true.
448sub _downgrade_to_plainfile {
449    my $entry = shift;
450    $entry->type( FILE );
451    $entry->mode( MODE );
452    $entry->linkname('');
453
454    return 1;
455}
456
457=head2 $bool = $file->extract( [ $alternative_name ] )
458
459Extract this object, optionally to an alternative name.
460
461See C<< Archive::Tar->extract_file >> for details.
462
463Returns true on success and false on failure.
464
465=cut
466
467sub extract {
468    my $self = shift;
469
470    local $Carp::CarpLevel += 1;
471
472    return Archive::Tar->_extract_file( $self, @_ );
473}
474
475=head2 $path = $file->full_path
476
477Returns the full path from the tar header; this is basically a
478concatenation of the C<prefix> and C<name> fields.
479
480=cut
481
482sub full_path {
483    my $self = shift;
484
485    ### if prefix field is empty
486    return $self->name unless defined $self->prefix and length $self->prefix;
487
488    ### or otherwise, catfile'd
489    return File::Spec::Unix->catfile( $self->prefix, $self->name );
490}
491
492
493=head2 $bool = $file->validate
494
495Done by Archive::Tar internally when reading the tar file:
496validate the header against the checksum to ensure integer tar file.
497
498Returns true on success, false on failure
499
500=cut
501
502sub validate {
503    my $self = shift;
504
505    my $raw = $self->raw;
506
507    ### don't know why this one is different from the one we /write/ ###
508    substr ($raw, 148, 8) = "        ";
509
510    ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
511    ### like GNU tar does. See here for details:
512    ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
513    ### so we do both a signed AND unsigned validate. if one succeeds, that's
514    ### good enough
515	return (   (unpack ("%16C*", $raw) == $self->chksum)
516	        or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
517}
518
519=head2 $bool = $file->has_content
520
521Returns a boolean to indicate whether the current object has content.
522Some special files like directories and so on never will have any
523content. This method is mainly to make sure you don't get warnings
524for using uninitialized values when looking at an object's content.
525
526=cut
527
528sub has_content {
529    my $self = shift;
530    return defined $self->data() && length $self->data() ? 1 : 0;
531}
532
533=head2 $content = $file->get_content
534
535Returns the current content for the in-memory file
536
537=cut
538
539sub get_content {
540    my $self = shift;
541    $self->data( );
542}
543
544=head2 $cref = $file->get_content_by_ref
545
546Returns the current content for the in-memory file as a scalar
547reference. Normal users won't need this, but it will save memory if
548you are dealing with very large data files in your tar archive, since
549it will pass the contents by reference, rather than make a copy of it
550first.
551
552=cut
553
554sub get_content_by_ref {
555    my $self = shift;
556
557    return \$self->{data};
558}
559
560=head2 $bool = $file->replace_content( $content )
561
562Replace the current content of the file with the new content. This
563only affects the in-memory archive, not the on-disk version until
564you write it.
565
566Returns true on success, false on failure.
567
568=cut
569
570sub replace_content {
571    my $self = shift;
572    my $data = shift || '';
573
574    $self->data( $data );
575    $self->size( length $data );
576    return 1;
577}
578
579=head2 $bool = $file->rename( $new_name )
580
581Rename the current file to $new_name.
582
583Note that you must specify a Unix path for $new_name, since per tar
584standard, all files in the archive must be Unix paths.
585
586Returns true on success and false on failure.
587
588=cut
589
590sub rename {
591    my $self = shift;
592    my $path = shift;
593
594    return unless defined $path;
595
596    my ($prefix,$file) = $self->_prefix_and_file( $path );
597
598    $self->name( $file );
599    $self->prefix( $prefix );
600
601	return 1;
602}
603
604=head2 $bool = $file->chmod $mode)
605
606Change mode of $file to $mode. The mode can be a string or a number
607which is interpreted as octal whether or not a leading 0 is given.
608
609Returns true on success and false on failure.
610
611=cut
612
613sub chmod {
614    my $self  = shift;
615    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
616    $self->{mode} = oct($mode);
617    return 1;
618}
619
620=head2 $bool = $file->chown( $user [, $group])
621
622Change owner of $file to $user. If a $group is given that is changed
623as well. You can also pass a single parameter with a colon separating the
624use and group as in 'root:wheel'.
625
626Returns true on success and false on failure.
627
628=cut
629
630sub chown {
631    my $self = shift;
632    my $uname = shift;
633    return unless defined $uname;
634    my $gname;
635    if (-1 != index($uname, ':')) {
636	($uname, $gname) = split(/:/, $uname);
637    } else {
638	$gname = shift if @_ > 0;
639    }
640
641    $self->uname( $uname );
642    $self->gname( $gname ) if $gname;
643	return 1;
644}
645
646=head1 Convenience methods
647
648To quickly check the type of a C<Archive::Tar::File> object, you can
649use the following methods:
650
651=over 4
652
653=item $file->is_file
654
655Returns true if the file is of type C<file>
656
657=item $file->is_dir
658
659Returns true if the file is of type C<dir>
660
661=item $file->is_hardlink
662
663Returns true if the file is of type C<hardlink>
664
665=item $file->is_symlink
666
667Returns true if the file is of type C<symlink>
668
669=item $file->is_chardev
670
671Returns true if the file is of type C<chardev>
672
673=item $file->is_blockdev
674
675Returns true if the file is of type C<blockdev>
676
677=item $file->is_fifo
678
679Returns true if the file is of type C<fifo>
680
681=item $file->is_socket
682
683Returns true if the file is of type C<socket>
684
685=item $file->is_longlink
686
687Returns true if the file is of type C<LongLink>.
688Should not happen after a successful C<read>.
689
690=item $file->is_label
691
692Returns true if the file is of type C<Label>.
693Should not happen after a successful C<read>.
694
695=item $file->is_unknown
696
697Returns true if the file type is C<unknown>
698
699=back
700
701=cut
702
703#stupid perl5.5.3 needs to warn if it's not numeric
704sub is_file     { local $^W;    FILE      == $_[0]->type }
705sub is_dir      { local $^W;    DIR       == $_[0]->type }
706sub is_hardlink { local $^W;    HARDLINK  == $_[0]->type }
707sub is_symlink  { local $^W;    SYMLINK   == $_[0]->type }
708sub is_chardev  { local $^W;    CHARDEV   == $_[0]->type }
709sub is_blockdev { local $^W;    BLOCKDEV  == $_[0]->type }
710sub is_fifo     { local $^W;    FIFO      == $_[0]->type }
711sub is_socket   { local $^W;    SOCKET    == $_[0]->type }
712sub is_unknown  { local $^W;    UNKNOWN   == $_[0]->type }
713sub is_longlink { local $^W;    LONGLINK  eq $_[0]->type }
714sub is_label    { local $^W;    LABEL     eq $_[0]->type }
715
7161;
717