1b39c5158Smillert### the gnu tar specification:
2b39c5158Smillert### http://www.gnu.org/software/tar/manual/tar.html
3b39c5158Smillert###
4b39c5158Smillert### and the pax format spec, which tar derives from:
5b39c5158Smillert### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6b39c5158Smillert
7b39c5158Smillertpackage Archive::Tar;
8b39c5158Smillertrequire 5.005_03;
9b39c5158Smillert
10b39c5158Smillertuse Cwd;
11b39c5158Smillertuse IO::Zlib;
12b39c5158Smillertuse IO::File;
13b39c5158Smillertuse Carp                qw(carp croak);
14b39c5158Smillertuse File::Spec          ();
15b39c5158Smillertuse File::Spec::Unix    ();
16b39c5158Smillertuse File::Path          ();
17b39c5158Smillert
18b39c5158Smillertuse Archive::Tar::File;
19b39c5158Smillertuse Archive::Tar::Constant;
20b39c5158Smillert
21b39c5158Smillertrequire Exporter;
22b39c5158Smillert
23b39c5158Smillertuse strict;
24b39c5158Smillertuse vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25b39c5158Smillert            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
266fb12b70Safresh1            $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
27b39c5158Smillert         ];
28b39c5158Smillert
29b39c5158Smillert@ISA                    = qw[Exporter];
3056d68f1eSafresh1@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP COMPRESS_XZ ];
31b39c5158Smillert$DEBUG                  = 0;
32b39c5158Smillert$WARN                   = 1;
33b39c5158Smillert$FOLLOW_SYMLINK         = 0;
34*eac174f2Safresh1$VERSION                = "2.40";
35b39c5158Smillert$CHOWN                  = 1;
36b39c5158Smillert$CHMOD                  = 1;
37b39c5158Smillert$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
38b39c5158Smillert$DO_NOT_USE_PREFIX      = 0;
39b39c5158Smillert$INSECURE_EXTRACT_MODE  = 0;
40898184e3Ssthen$ZERO_PAD_NUMBERS       = 0;
416fb12b70Safresh1$RESOLVE_SYMLINK        = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
42b39c5158Smillert
43b39c5158SmillertBEGIN {
44b39c5158Smillert    use Config;
45b39c5158Smillert    $HAS_PERLIO = $Config::Config{useperlio};
46b39c5158Smillert
47b39c5158Smillert    ### try and load IO::String anyway, so you can dynamically
48b39c5158Smillert    ### switch between perlio and IO::String
49b39c5158Smillert    $HAS_IO_STRING = eval {
50b39c5158Smillert        require IO::String;
51*eac174f2Safresh1        IO::String->import;
52b39c5158Smillert        1;
53b39c5158Smillert    } || 0;
54b39c5158Smillert}
55b39c5158Smillert
56b39c5158Smillert=head1 NAME
57b39c5158Smillert
58b39c5158SmillertArchive::Tar - module for manipulations of tar archives
59b39c5158Smillert
60b39c5158Smillert=head1 SYNOPSIS
61b39c5158Smillert
62b39c5158Smillert    use Archive::Tar;
63b39c5158Smillert    my $tar = Archive::Tar->new;
64b39c5158Smillert
65b39c5158Smillert    $tar->read('origin.tgz');
66b39c5158Smillert    $tar->extract();
67b39c5158Smillert
68b39c5158Smillert    $tar->add_files('file/foo.pl', 'docs/README');
69b39c5158Smillert    $tar->add_data('file/baz.txt', 'This is the contents now');
70b39c5158Smillert
71b39c5158Smillert    $tar->rename('oldname', 'new/file/name');
72898184e3Ssthen    $tar->chown('/', 'root');
73898184e3Ssthen    $tar->chown('/', 'root:root');
74898184e3Ssthen    $tar->chmod('/tmp', '1777');
75b39c5158Smillert
76b39c5158Smillert    $tar->write('files.tar');                   # plain tar
77b39c5158Smillert    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
78b39c5158Smillert    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
7956d68f1eSafresh1    $tar->write('files.txz', COMPRESS_XZ);      # xz compressed
80b39c5158Smillert
81b39c5158Smillert=head1 DESCRIPTION
82b39c5158Smillert
83b39c5158SmillertArchive::Tar provides an object oriented mechanism for handling tar
84b39c5158Smillertfiles.  It provides class methods for quick and easy files handling
85b39c5158Smillertwhile also allowing for the creation of tar file objects for custom
86b39c5158Smillertmanipulation.  If you have the IO::Zlib module installed,
87b39c5158SmillertArchive::Tar will also support compressed or gzipped tar files.
88b39c5158Smillert
89b39c5158SmillertAn object of class Archive::Tar represents a .tar(.gz) archive full
90b39c5158Smillertof files and things.
91b39c5158Smillert
92b39c5158Smillert=head1 Object Methods
93b39c5158Smillert
94b39c5158Smillert=head2 Archive::Tar->new( [$file, $compressed] )
95b39c5158Smillert
96b39c5158SmillertReturns a new Tar object. If given any arguments, C<new()> calls the
97b39c5158SmillertC<read()> method automatically, passing on the arguments provided to
98b39c5158Smillertthe C<read()> method.
99b39c5158Smillert
100b39c5158SmillertIf C<new()> is invoked with arguments and the C<read()> method fails
101b39c5158Smillertfor any reason, C<new()> returns undef.
102b39c5158Smillert
103b39c5158Smillert=cut
104b39c5158Smillert
105b39c5158Smillertmy $tmpl = {
106b39c5158Smillert    _data   => [ ],
107b39c5158Smillert    _file   => 'Unknown',
108b39c5158Smillert};
109b39c5158Smillert
110b39c5158Smillert### install get/set accessors for this object.
111b39c5158Smillertfor my $key ( keys %$tmpl ) {
112b39c5158Smillert    no strict 'refs';
113b39c5158Smillert    *{__PACKAGE__."::$key"} = sub {
114b39c5158Smillert        my $self = shift;
115b39c5158Smillert        $self->{$key} = $_[0] if @_;
116b39c5158Smillert        return $self->{$key};
117b39c5158Smillert    }
118b39c5158Smillert}
119b39c5158Smillert
120b39c5158Smillertsub new {
121b39c5158Smillert    my $class = shift;
122b39c5158Smillert    $class = ref $class if ref $class;
123b39c5158Smillert
124b39c5158Smillert    ### copying $tmpl here since a shallow copy makes it use the
125b39c5158Smillert    ### same aref, causing for files to remain in memory always.
126b39c5158Smillert    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
127b39c5158Smillert
128b39c5158Smillert    if (@_) {
129b39c5158Smillert        unless ( $obj->read( @_ ) ) {
130b39c5158Smillert            $obj->_error(qq[No data could be read from file]);
131b39c5158Smillert            return;
132b39c5158Smillert        }
133b39c5158Smillert    }
134b39c5158Smillert
135b39c5158Smillert    return $obj;
136b39c5158Smillert}
137b39c5158Smillert
138b39c5158Smillert=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
139b39c5158Smillert
140b39c5158SmillertRead the given tar file into memory.
141b39c5158SmillertThe first argument can either be the name of a file or a reference to
142b39c5158Smillertan already open filehandle (or an IO::Zlib object if it's compressed)
143b39c5158Smillert
144b39c5158SmillertThe C<read> will I<replace> any previous content in C<$tar>!
145b39c5158Smillert
146b39c5158SmillertThe second argument may be considered optional, but remains for
147b39c5158Smillertbackwards compatibility. Archive::Tar now looks at the file
148b39c5158Smillertmagic to determine what class should be used to open the file
149b39c5158Smillertand will transparently Do The Right Thing.
150b39c5158Smillert
15156d68f1eSafresh1Archive::Tar will warn if you try to pass a bzip2 / xz compressed file and the
15256d68f1eSafresh1IO::Uncompress::Bunzip2 / IO::Uncompress::UnXz are not available and simply return.
153b39c5158Smillert
154b39c5158SmillertNote that you can currently B<not> pass a C<gzip> compressed
155b39c5158Smillertfilehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
15656d68f1eSafresh1filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, a C<xz> compressed
15756d68f1eSafresh1filehandle, which is not opened with C<IO::Uncompress::UnXz>, nor a string
158b39c5158Smillertcontaining the full archive information (either compressed or
159b39c5158Smillertuncompressed). These are worth while features, but not currently
160b39c5158Smillertimplemented. See the C<TODO> section.
161b39c5158Smillert
162b39c5158SmillertThe third argument can be a hash reference with options. Note that
163b39c5158Smillertall options are case-sensitive.
164b39c5158Smillert
165b39c5158Smillert=over 4
166b39c5158Smillert
167b39c5158Smillert=item limit
168b39c5158Smillert
169b39c5158SmillertDo not read more than C<limit> files. This is useful if you have
170b39c5158Smillertvery big archives, and are only interested in the first few files.
171b39c5158Smillert
172b39c5158Smillert=item filter
173b39c5158Smillert
174b39c5158SmillertCan be set to a regular expression.  Only files with names that match
175b39c5158Smillertthe expression will be read.
176b39c5158Smillert
177898184e3Ssthen=item md5
178898184e3Ssthen
179898184e3SsthenSet to 1 and the md5sum of files will be returned (instead of file data)
180898184e3Ssthen    my $iter = Archive::Tar->iter( $file,  1, {md5 => 1} );
181898184e3Ssthen    while( my $f = $iter->() ) {
182898184e3Ssthen        print $f->data . "\t" . $f->full_path . $/;
183898184e3Ssthen    }
184898184e3Ssthen
185b39c5158Smillert=item extract
186b39c5158Smillert
187b39c5158SmillertIf set to true, immediately extract entries when reading them. This
188b39c5158Smillertgives you the same memory break as the C<extract_archive> function.
189b39c5158SmillertNote however that entries will not be read into memory, but written
190b39c5158Smillertstraight to disk. This means no C<Archive::Tar::File> objects are
191b39c5158Smillertcreated for you to inspect.
192b39c5158Smillert
193b39c5158Smillert=back
194b39c5158Smillert
195b39c5158SmillertAll files are stored internally as C<Archive::Tar::File> objects.
196b39c5158SmillertPlease consult the L<Archive::Tar::File> documentation for details.
197b39c5158Smillert
198b39c5158SmillertReturns the number of files read in scalar context, and a list of
199b39c5158SmillertC<Archive::Tar::File> objects in list context.
200b39c5158Smillert
201b39c5158Smillert=cut
202b39c5158Smillert
203b39c5158Smillertsub read {
204b39c5158Smillert    my $self = shift;
205b39c5158Smillert    my $file = shift;
206b39c5158Smillert    my $gzip = shift || 0;
207b39c5158Smillert    my $opts = shift || {};
208b39c5158Smillert
209b39c5158Smillert    unless( defined $file ) {
210b39c5158Smillert        $self->_error( qq[No file to read from!] );
211b39c5158Smillert        return;
212b39c5158Smillert    } else {
213b39c5158Smillert        $self->_file( $file );
214b39c5158Smillert    }
215b39c5158Smillert
216b39c5158Smillert    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
217b39c5158Smillert                    or return;
218b39c5158Smillert
219b39c5158Smillert    my $data = $self->_read_tar( $handle, $opts ) or return;
220b39c5158Smillert
221b39c5158Smillert    $self->_data( $data );
222b39c5158Smillert
223b39c5158Smillert    return wantarray ? @$data : scalar @$data;
224b39c5158Smillert}
225b39c5158Smillert
226b39c5158Smillertsub _get_handle {
227b39c5158Smillert    my $self     = shift;
228b39c5158Smillert    my $file     = shift;   return unless defined $file;
229b39c5158Smillert    my $compress = shift || 0;
230b39c5158Smillert    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
231b39c5158Smillert
232898184e3Ssthen    ### Check if file is a file handle or IO glob
233898184e3Ssthen    if ( ref $file ) {
234898184e3Ssthen	return $file if eval{ *$file{IO} };
235898184e3Ssthen	return $file if eval{ $file->isa(q{IO::Handle}) };
236898184e3Ssthen	$file = q{}.$file;
237898184e3Ssthen    }
238b39c5158Smillert
239b39c5158Smillert    ### get a FH opened to the right class, so we can use it transparently
240b39c5158Smillert    ### throughout the program
241b39c5158Smillert    my $fh;
242b39c5158Smillert    {   ### reading magic only makes sense if we're opening a file for
243b39c5158Smillert        ### reading. otherwise, just use what the user requested.
244b39c5158Smillert        my $magic = '';
245b39c5158Smillert        if( MODE_READ->($mode) ) {
246b39c5158Smillert            open my $tmp, $file or do {
247b39c5158Smillert                $self->_error( qq[Could not open '$file' for reading: $!] );
248b39c5158Smillert                return;
249b39c5158Smillert            };
250b39c5158Smillert
25156d68f1eSafresh1            ### read the first 6 bytes of the file to figure out which class to
252b39c5158Smillert            ### use to open the file.
25356d68f1eSafresh1            sysread( $tmp, $magic, 6 );
254b39c5158Smillert            close $tmp;
255b39c5158Smillert        }
256b39c5158Smillert
25756d68f1eSafresh1        ### is it xz?
25856d68f1eSafresh1        ### if you asked specifically for xz compression, or if we're in
25956d68f1eSafresh1        ### read mode and the magic numbers add up, use xz
26056d68f1eSafresh1        if( XZ and (
26156d68f1eSafresh1               ($compress eq COMPRESS_XZ) or
26256d68f1eSafresh1               ( MODE_READ->($mode) and $magic =~ XZ_MAGIC_NUM )
26356d68f1eSafresh1            )
26456d68f1eSafresh1        ) {
26556d68f1eSafresh1            if( MODE_READ->($mode) ) {
26656d68f1eSafresh1                $fh = IO::Uncompress::UnXz->new( $file ) or do {
26756d68f1eSafresh1                    $self->_error( qq[Could not read '$file': ] .
26856d68f1eSafresh1                        $IO::Uncompress::UnXz::UnXzError
26956d68f1eSafresh1                    );
27056d68f1eSafresh1                    return;
27156d68f1eSafresh1                };
27256d68f1eSafresh1            } else {
27356d68f1eSafresh1                $fh = IO::Compress::Xz->new( $file ) or do {
27456d68f1eSafresh1                    $self->_error( qq[Could not write to '$file': ] .
27556d68f1eSafresh1                        $IO::Compress::Xz::XzError
27656d68f1eSafresh1                    );
27756d68f1eSafresh1                    return;
27856d68f1eSafresh1                };
27956d68f1eSafresh1            }
28056d68f1eSafresh1
281b39c5158Smillert        ### is it bzip?
282b39c5158Smillert        ### if you asked specifically for bzip compression, or if we're in
283b39c5158Smillert        ### read mode and the magic numbers add up, use bzip
28456d68f1eSafresh1        } elsif( BZIP and (
285b39c5158Smillert                ($compress eq COMPRESS_BZIP) or
286b39c5158Smillert                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
287b39c5158Smillert            )
288b39c5158Smillert        ) {
289b39c5158Smillert
290b39c5158Smillert            ### different reader/writer modules, different error vars... sigh
291b39c5158Smillert            if( MODE_READ->($mode) ) {
2929f11ffb7Safresh1                $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do {
293b39c5158Smillert                    $self->_error( qq[Could not read '$file': ] .
294b39c5158Smillert                        $IO::Uncompress::Bunzip2::Bunzip2Error
295b39c5158Smillert                    );
296b39c5158Smillert                    return;
297b39c5158Smillert                };
298b39c5158Smillert
299b39c5158Smillert            } else {
300b39c5158Smillert                $fh = IO::Compress::Bzip2->new( $file ) or do {
301b39c5158Smillert                    $self->_error( qq[Could not write to '$file': ] .
302b39c5158Smillert                        $IO::Compress::Bzip2::Bzip2Error
303b39c5158Smillert                    );
304b39c5158Smillert                    return;
305b39c5158Smillert                };
306b39c5158Smillert            }
307b39c5158Smillert
308b39c5158Smillert        ### is it gzip?
309b39c5158Smillert        ### if you asked for compression, if you wanted to read or the gzip
310b39c5158Smillert        ### magic number is present (redundant with read)
311b39c5158Smillert        } elsif( ZLIB and (
312b39c5158Smillert                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
313b39c5158Smillert                 )
314b39c5158Smillert        ) {
315b39c5158Smillert            $fh = IO::Zlib->new;
316b39c5158Smillert
317b39c5158Smillert            unless( $fh->open( $file, $mode ) ) {
318b39c5158Smillert                $self->_error(qq[Could not create filehandle for '$file': $!]);
319b39c5158Smillert                return;
320b39c5158Smillert            }
321b39c5158Smillert
322b39c5158Smillert        ### is it plain tar?
323b39c5158Smillert        } else {
324b39c5158Smillert            $fh = IO::File->new;
325b39c5158Smillert
326b39c5158Smillert            unless( $fh->open( $file, $mode ) ) {
327b39c5158Smillert                $self->_error(qq[Could not create filehandle for '$file': $!]);
328b39c5158Smillert                return;
329b39c5158Smillert            }
330b39c5158Smillert
331b39c5158Smillert            ### enable bin mode on tar archives
332b39c5158Smillert            binmode $fh;
333b39c5158Smillert        }
334b39c5158Smillert    }
335b39c5158Smillert
336b39c5158Smillert    return $fh;
337b39c5158Smillert}
338b39c5158Smillert
339b39c5158Smillert
340b39c5158Smillertsub _read_tar {
341b39c5158Smillert    my $self    = shift;
342b39c5158Smillert    my $handle  = shift or return;
343b39c5158Smillert    my $opts    = shift || {};
344b39c5158Smillert
345b39c5158Smillert    my $count   = $opts->{limit}    || 0;
346b39c5158Smillert    my $filter  = $opts->{filter};
347898184e3Ssthen    my $md5  = $opts->{md5} || 0;	# cdrake
348898184e3Ssthen    my $filter_cb = $opts->{filter_cb};
349b39c5158Smillert    my $extract = $opts->{extract}  || 0;
350b39c5158Smillert
351b39c5158Smillert    ### set a cap on the amount of files to extract ###
352b39c5158Smillert    my $limit   = 0;
353b39c5158Smillert    $limit = 1 if $count > 0;
354b39c5158Smillert
355b39c5158Smillert    my $tarfile = [ ];
356b39c5158Smillert    my $chunk;
357b39c5158Smillert    my $read = 0;
358b39c5158Smillert    my $real_name;  # to set the name of a file when
359b39c5158Smillert                    # we're encountering @longlink
360b39c5158Smillert    my $data;
361b39c5158Smillert
362b39c5158Smillert    LOOP:
363b39c5158Smillert    while( $handle->read( $chunk, HEAD ) ) {
364b39c5158Smillert        ### IO::Zlib doesn't support this yet
36591f110e0Safresh1        my $offset;
36691f110e0Safresh1        if ( ref($handle) ne 'IO::Zlib' ) {
36791f110e0Safresh1            local $@;
36891f110e0Safresh1            $offset = eval { tell $handle } || 'unknown';
369898184e3Ssthen            $@ = '';
37091f110e0Safresh1        }
37191f110e0Safresh1        else {
37291f110e0Safresh1            $offset = 'unknown';
37391f110e0Safresh1        }
374b39c5158Smillert
375b39c5158Smillert        unless( $read++ ) {
376b39c5158Smillert            my $gzip = GZIP_MAGIC_NUM;
377b39c5158Smillert            if( $chunk =~ /$gzip/ ) {
378b39c5158Smillert                $self->_error( qq[Cannot read compressed format in tar-mode] );
379b39c5158Smillert                return;
380b39c5158Smillert            }
381b39c5158Smillert
382b39c5158Smillert            ### size is < HEAD, which means a corrupted file, as the minimum
383b39c5158Smillert            ### length is _at least_ HEAD
384b39c5158Smillert            if (length $chunk != HEAD) {
385b39c5158Smillert                $self->_error( qq[Cannot read enough bytes from the tarfile] );
386b39c5158Smillert                return;
387b39c5158Smillert            }
388b39c5158Smillert        }
389b39c5158Smillert
390b39c5158Smillert        ### if we can't read in all bytes... ###
391b39c5158Smillert        last if length $chunk != HEAD;
392b39c5158Smillert
393b39c5158Smillert        ### Apparently this should really be two blocks of 512 zeroes,
394b39c5158Smillert        ### but GNU tar sometimes gets it wrong. See comment in the
395b39c5158Smillert        ### source code (tar.c) to GNU cpio.
396b39c5158Smillert        next if $chunk eq TAR_END;
397b39c5158Smillert
398b39c5158Smillert        ### according to the posix spec, the last 12 bytes of the header are
399b39c5158Smillert        ### null bytes, to pad it to a 512 byte block. That means if these
400898184e3Ssthen        ### bytes are NOT null bytes, it's a corrupt header. See:
401b39c5158Smillert        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
402b39c5158Smillert        ### line 111
403b39c5158Smillert        {   my $nulls = join '', "\0" x 12;
404b39c5158Smillert            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
405b39c5158Smillert                $self->_error( qq[Invalid header block at offset $offset] );
406b39c5158Smillert                next LOOP;
407b39c5158Smillert            }
408b39c5158Smillert        }
409b39c5158Smillert
410b39c5158Smillert        ### pass the realname, so we can set it 'proper' right away
411b39c5158Smillert        ### some of the heuristics are done on the name, so important
412b39c5158Smillert        ### to set it ASAP
413b39c5158Smillert        my $entry;
414b39c5158Smillert        {   my %extra_args = ();
415b39c5158Smillert            $extra_args{'name'} = $$real_name if defined $real_name;
416b39c5158Smillert
417b39c5158Smillert            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
418b39c5158Smillert                                                        %extra_args )
419b39c5158Smillert            ) {
420b39c5158Smillert                $self->_error( qq[Couldn't read chunk at offset $offset] );
421b39c5158Smillert                next LOOP;
422b39c5158Smillert            }
423b39c5158Smillert        }
424b39c5158Smillert
425b39c5158Smillert        ### ignore labels:
426898184e3Ssthen        ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
427b39c5158Smillert        next if $entry->is_label;
428b39c5158Smillert
429b39c5158Smillert        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
430b39c5158Smillert
431b39c5158Smillert            if ( $entry->is_file && !$entry->validate ) {
432b39c5158Smillert                ### sometimes the chunk is rather fux0r3d and a whole 512
433b39c5158Smillert                ### bytes ends up in the ->name area.
434b39c5158Smillert                ### clean it up, if need be
435b39c5158Smillert                my $name = $entry->name;
436b39c5158Smillert                $name = substr($name, 0, 100) if length $name > 100;
437b39c5158Smillert                $name =~ s/\n/ /g;
438b39c5158Smillert
439b39c5158Smillert                $self->_error( $name . qq[: checksum error] );
440b39c5158Smillert                next LOOP;
441b39c5158Smillert            }
442b39c5158Smillert
443b39c5158Smillert            my $block = BLOCK_SIZE->( $entry->size );
444b39c5158Smillert
445b39c5158Smillert            $data = $entry->get_content_by_ref;
446b39c5158Smillert
447898184e3Ssthen	    my $skip = 0;
448898184e3Ssthen	    my $ctx;			# cdrake
449898184e3Ssthen	    ### skip this entry if we're filtering
450898184e3Ssthen
451898184e3Ssthen	    if($md5) {			# cdrake
452898184e3Ssthen	      $ctx = Digest::MD5->new;	# cdrake
453898184e3Ssthen	        $skip=5;		# cdrake
454898184e3Ssthen
455898184e3Ssthen	    } elsif ($filter && $entry->name !~ $filter) {
456898184e3Ssthen		$skip = 1;
457898184e3Ssthen
458b8851fccSafresh1	    } elsif ($filter_cb && ! $filter_cb->($entry)) {
459b8851fccSafresh1		$skip = 2;
460b8851fccSafresh1
461898184e3Ssthen	    ### skip this entry if it's a pax header. This is a special file added
462898184e3Ssthen	    ### by, among others, git-generated tarballs. It holds comments and is
463898184e3Ssthen	    ### not meant for extracting. See #38932: pax_global_header extracted
464898184e3Ssthen	    } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
465898184e3Ssthen		$skip = 3;
466898184e3Ssthen	    }
467898184e3Ssthen
468898184e3Ssthen	    if ($skip) {
469898184e3Ssthen		#
470898184e3Ssthen		# Since we're skipping, do not allocate memory for the
471898184e3Ssthen		# whole file.  Read it 64 BLOCKS at a time.  Do not
472898184e3Ssthen		# complete the skip yet because maybe what we read is a
473898184e3Ssthen		# longlink and it won't get skipped after all
474898184e3Ssthen		#
475898184e3Ssthen		my $amt = $block;
476898184e3Ssthen		my $fsz=$entry->size;	# cdrake
477898184e3Ssthen		while ($amt > 0) {
478898184e3Ssthen		    $$data = '';
479898184e3Ssthen		    my $this = 64 * BLOCK;
480898184e3Ssthen		    $this = $amt if $this > $amt;
481898184e3Ssthen		    if( $handle->read( $$data, $this ) < $this ) {
482898184e3Ssthen			$self->_error( qq[Read error on tarfile (missing data) '].
483898184e3Ssthen					    $entry->full_path ."' at offset $offset" );
484898184e3Ssthen			next LOOP;
485898184e3Ssthen		    }
486898184e3Ssthen		    $amt -= $this;
487898184e3Ssthen		    $fsz -= $this;	# cdrake
488898184e3Ssthen		substr ($$data, $fsz) = "" if ($fsz<0);	# remove external junk prior to md5	# cdrake
489898184e3Ssthen		$ctx->add($$data) if($skip==5);	# cdrake
490898184e3Ssthen		}
491898184e3Ssthen		$$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ;	# cdrake
492898184e3Ssthen            } else {
493898184e3Ssthen
494b39c5158Smillert		### just read everything into memory
495b39c5158Smillert		### can't do lazy loading since IO::Zlib doesn't support 'seek'
496b39c5158Smillert		### this is because Compress::Zlib doesn't support it =/
497b39c5158Smillert		### this reads in the whole data in one read() call.
498b39c5158Smillert		if ( $handle->read( $$data, $block ) < $block ) {
499b39c5158Smillert		    $self->_error( qq[Read error on tarfile (missing data) '].
500b39c5158Smillert                                    $entry->full_path ."' at offset $offset" );
501b39c5158Smillert		    next LOOP;
502b39c5158Smillert		}
503b39c5158Smillert		### throw away trailing garbage ###
504b39c5158Smillert		substr ($$data, $entry->size) = "" if defined $$data;
505898184e3Ssthen            }
506b39c5158Smillert
507b39c5158Smillert            ### part II of the @LongLink munging -- need to do /after/
508b39c5158Smillert            ### the checksum check.
509b39c5158Smillert            if( $entry->is_longlink ) {
510b39c5158Smillert                ### weird thing in tarfiles -- if the file is actually a
511b39c5158Smillert                ### @LongLink, the data part seems to have a trailing ^@
512b39c5158Smillert                ### (unprintable) char. to display, pipe output through less.
513b39c5158Smillert                ### but that doesn't *always* happen.. so check if the last
514b39c5158Smillert                ### character is a control character, and if so remove it
515b39c5158Smillert                ### at any rate, we better remove that character here, or tests
516b39c5158Smillert                ### like 'eq' and hash lookups based on names will SO not work
517b39c5158Smillert                ### remove it by calculating the proper size, and then
518b39c5158Smillert                ### tossing out everything that's longer than that size.
519b39c5158Smillert
520b39c5158Smillert                ### count number of nulls
521b39c5158Smillert                my $nulls = $$data =~ tr/\0/\0/;
522b39c5158Smillert
523b39c5158Smillert                ### cut data + size by that many bytes
524b39c5158Smillert                $entry->size( $entry->size - $nulls );
525b39c5158Smillert                substr ($$data, $entry->size) = "";
526b39c5158Smillert            }
527b39c5158Smillert        }
528b39c5158Smillert
529b39c5158Smillert        ### clean up of the entries.. posix tar /apparently/ has some
530b39c5158Smillert        ### weird 'feature' that allows for filenames > 255 characters
531b39c5158Smillert        ### they'll put a header in with as name '././@LongLink' and the
532b39c5158Smillert        ### contents will be the name of the /next/ file in the archive
533b39c5158Smillert        ### pretty crappy and kludgy if you ask me
534b39c5158Smillert
535b39c5158Smillert        ### set the name for the next entry if this is a @LongLink;
536b39c5158Smillert        ### this is one ugly hack =/ but needed for direct extraction
537b39c5158Smillert        if( $entry->is_longlink ) {
538b39c5158Smillert            $real_name = $data;
539b39c5158Smillert            next LOOP;
540b39c5158Smillert        } elsif ( defined $real_name ) {
541b39c5158Smillert            $entry->name( $$real_name );
542b39c5158Smillert            $entry->prefix('');
543b39c5158Smillert            undef $real_name;
544b39c5158Smillert        }
545b39c5158Smillert
546b39c5158Smillert	if ($filter && $entry->name !~ $filter) {
547b39c5158Smillert	    next LOOP;
548b39c5158Smillert
549b8851fccSafresh1	} elsif ($filter_cb && ! $filter_cb->($entry)) {
550b8851fccSafresh1	    next LOOP;
551b8851fccSafresh1
552b39c5158Smillert	### skip this entry if it's a pax header. This is a special file added
553b39c5158Smillert	### by, among others, git-generated tarballs. It holds comments and is
554b39c5158Smillert	### not meant for extracting. See #38932: pax_global_header extracted
555898184e3Ssthen	} elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
556898184e3Ssthen	    next LOOP;
557b39c5158Smillert	}
558b39c5158Smillert
559898184e3Ssthen        if ( $extract && !$entry->is_longlink
560b39c5158Smillert                      && !$entry->is_unknown
561898184e3Ssthen                      && !$entry->is_label ) {
562898184e3Ssthen            $self->_extract_file( $entry ) or return;
563898184e3Ssthen        }
564b39c5158Smillert
565b39c5158Smillert        ### Guard against tarfiles with garbage at the end
566b39c5158Smillert	    last LOOP if $entry->name eq '';
567b39c5158Smillert
568b39c5158Smillert        ### push only the name on the rv if we're extracting
569b39c5158Smillert        ### -- for extract_archive
570b39c5158Smillert        push @$tarfile, ($extract ? $entry->name : $entry);
571b39c5158Smillert
572b39c5158Smillert        if( $limit ) {
573b39c5158Smillert            $count-- unless $entry->is_longlink || $entry->is_dir;
574b39c5158Smillert            last LOOP unless $count;
575b39c5158Smillert        }
576b39c5158Smillert    } continue {
577b39c5158Smillert        undef $data;
578b39c5158Smillert    }
579b39c5158Smillert
580b39c5158Smillert    return $tarfile;
581b39c5158Smillert}
582b39c5158Smillert
583b39c5158Smillert=head2 $tar->contains_file( $filename )
584b39c5158Smillert
585b39c5158SmillertCheck if the archive contains a certain file.
586b39c5158SmillertIt will return true if the file is in the archive, false otherwise.
587b39c5158Smillert
588b39c5158SmillertNote however, that this function does an exact match using C<eq>
589b39c5158Smillerton the full path. So it cannot compensate for case-insensitive file-
590b39c5158Smillertsystems or compare 2 paths to see if they would point to the same
591b39c5158Smillertunderlying file.
592b39c5158Smillert
593b39c5158Smillert=cut
594b39c5158Smillert
595b39c5158Smillertsub contains_file {
596b39c5158Smillert    my $self = shift;
597b39c5158Smillert    my $full = shift;
598b39c5158Smillert
599b39c5158Smillert    return unless defined $full;
600b39c5158Smillert
601b39c5158Smillert    ### don't warn if the entry isn't there.. that's what this function
602b39c5158Smillert    ### is for after all.
603b39c5158Smillert    local $WARN = 0;
604b39c5158Smillert    return 1 if $self->_find_entry($full);
605b39c5158Smillert    return;
606b39c5158Smillert}
607b39c5158Smillert
608b39c5158Smillert=head2 $tar->extract( [@filenames] )
609b39c5158Smillert
610b39c5158SmillertWrite files whose names are equivalent to any of the names in
611b39c5158SmillertC<@filenames> to disk, creating subdirectories as necessary. This
612b39c5158Smillertmight not work too well under VMS.
613b39c5158SmillertUnder MacPerl, the file's modification time will be converted to the
614b39c5158SmillertMacOS zero of time, and appropriate conversions will be done to the
615b39c5158Smillertpath.  However, the length of each element of the path is not
616b39c5158Smillertinspected to see whether it's longer than MacOS currently allows (32
617b39c5158Smillertcharacters).
618b39c5158Smillert
619b39c5158SmillertIf C<extract> is called without a list of file names, the entire
620b39c5158Smillertcontents of the archive are extracted.
621b39c5158Smillert
622b39c5158SmillertReturns a list of filenames extracted.
623b39c5158Smillert
624b39c5158Smillert=cut
625b39c5158Smillert
626b39c5158Smillertsub extract {
627b39c5158Smillert    my $self    = shift;
628b39c5158Smillert    my @args    = @_;
629b39c5158Smillert    my @files;
6309f11ffb7Safresh1    my $hashmap;
631b39c5158Smillert
632b39c5158Smillert    # use the speed optimization for all extracted files
633b39c5158Smillert    local($self->{cwd}) = cwd() unless $self->{cwd};
634b39c5158Smillert
635898184e3Ssthen    ### you requested the extraction of only certain files
636b39c5158Smillert    if( @args ) {
637b39c5158Smillert        for my $file ( @args ) {
638b39c5158Smillert
639b39c5158Smillert            ### it's already an object?
640b39c5158Smillert            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
641b39c5158Smillert                push @files, $file;
642b39c5158Smillert                next;
643b39c5158Smillert
644b39c5158Smillert            ### go find it then
645b39c5158Smillert            } else {
646b39c5158Smillert
6479f11ffb7Safresh1                # create hash-map once to speed up lookup
6489f11ffb7Safresh1                $hashmap = $hashmap || {
6499f11ffb7Safresh1                    map { $_->full_path, $_ } @{$self->_data}
6509f11ffb7Safresh1                };
651b39c5158Smillert
6529f11ffb7Safresh1                if (exists $hashmap->{$file}) {
653b39c5158Smillert                    ### we found the file you're looking for
6549f11ffb7Safresh1                    push @files, $hashmap->{$file};
6559f11ffb7Safresh1                } else {
656b39c5158Smillert                    return $self->_error(
657b39c5158Smillert                        qq[Could not find '$file' in archive] );
658b39c5158Smillert                }
659b39c5158Smillert            }
660b39c5158Smillert        }
661b39c5158Smillert
662b39c5158Smillert    ### just grab all the file items
663b39c5158Smillert    } else {
664b39c5158Smillert        @files = $self->get_files;
665b39c5158Smillert    }
666b39c5158Smillert
667b39c5158Smillert    ### nothing found? that's an error
668b39c5158Smillert    unless( scalar @files ) {
669b39c5158Smillert        $self->_error( qq[No files found for ] . $self->_file );
670b39c5158Smillert        return;
671b39c5158Smillert    }
672b39c5158Smillert
673b39c5158Smillert    ### now extract them
674b39c5158Smillert    for my $entry ( @files ) {
675b39c5158Smillert        unless( $self->_extract_file( $entry ) ) {
676b39c5158Smillert            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
677b39c5158Smillert            return;
678b39c5158Smillert        }
679b39c5158Smillert    }
680b39c5158Smillert
681b39c5158Smillert    return @files;
682b39c5158Smillert}
683b39c5158Smillert
684b39c5158Smillert=head2 $tar->extract_file( $file, [$extract_path] )
685b39c5158Smillert
686b39c5158SmillertWrite an entry, whose name is equivalent to the file name provided to
687b39c5158Smillertdisk. Optionally takes a second parameter, which is the full native
688b39c5158Smillertpath (including filename) the entry will be written to.
689b39c5158Smillert
690b39c5158SmillertFor example:
691b39c5158Smillert
692b39c5158Smillert    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
693b39c5158Smillert
694b39c5158Smillert    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
695b39c5158Smillert
696b39c5158SmillertReturns true on success, false on failure.
697b39c5158Smillert
698b39c5158Smillert=cut
699b39c5158Smillert
700b39c5158Smillertsub extract_file {
701b39c5158Smillert    my $self = shift;
702b39c5158Smillert    my $file = shift;   return unless defined $file;
703b39c5158Smillert    my $alt  = shift;
704b39c5158Smillert
705b39c5158Smillert    my $entry = $self->_find_entry( $file )
706b39c5158Smillert        or $self->_error( qq[Could not find an entry for '$file'] ), return;
707b39c5158Smillert
708b39c5158Smillert    return $self->_extract_file( $entry, $alt );
709b39c5158Smillert}
710b39c5158Smillert
711b39c5158Smillertsub _extract_file {
712b39c5158Smillert    my $self    = shift;
713b39c5158Smillert    my $entry   = shift or return;
714b39c5158Smillert    my $alt     = shift;
715b39c5158Smillert
716b39c5158Smillert    ### you wanted an alternate extraction location ###
717b39c5158Smillert    my $name = defined $alt ? $alt : $entry->full_path;
718b39c5158Smillert
719b39c5158Smillert                            ### splitpath takes a bool at the end to indicate
720b39c5158Smillert                            ### that it's splitting a dir
721b39c5158Smillert    my ($vol,$dirs,$file);
722b39c5158Smillert    if ( defined $alt ) { # It's a local-OS path
723b39c5158Smillert        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
724b39c5158Smillert                                                          $entry->is_dir );
725b39c5158Smillert    } else {
726b39c5158Smillert        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
727b39c5158Smillert                                                          $entry->is_dir );
728b39c5158Smillert    }
729b39c5158Smillert
730b39c5158Smillert    my $dir;
731b39c5158Smillert    ### is $name an absolute path? ###
732b39c5158Smillert    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
733b39c5158Smillert
734b39c5158Smillert        ### absolute names are not allowed to be in tarballs under
735b39c5158Smillert        ### strict mode, so only allow it if a user tells us to do it
736b39c5158Smillert        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
737b39c5158Smillert            $self->_error(
738b39c5158Smillert                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
739b39c5158Smillert                q[Not extracting absolute paths under SECURE EXTRACT MODE]
740b39c5158Smillert            );
741b39c5158Smillert            return;
742b39c5158Smillert        }
743b39c5158Smillert
744b39c5158Smillert        ### user asked us to, it's fine.
745b39c5158Smillert        $dir = File::Spec->catpath( $vol, $dirs, "" );
746b39c5158Smillert
747b39c5158Smillert    ### it's a relative path ###
748b39c5158Smillert    } else {
749b39c5158Smillert        my $cwd     = (ref $self and defined $self->{cwd})
750b39c5158Smillert                        ? $self->{cwd}
751b39c5158Smillert                        : cwd();
752b39c5158Smillert
753b39c5158Smillert        my @dirs = defined $alt
754b39c5158Smillert            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
755b39c5158Smillert            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
756b39c5158Smillert                                                    # straight from the tarball
757b39c5158Smillert
758b39c5158Smillert        if( not defined $alt            and
759b39c5158Smillert            not $INSECURE_EXTRACT_MODE
760b39c5158Smillert        ) {
761b39c5158Smillert
762b39c5158Smillert            ### paths that leave the current directory are not allowed under
763b39c5158Smillert            ### strict mode, so only allow it if a user tells us to do this.
764b39c5158Smillert            if( grep { $_ eq '..' } @dirs ) {
765b39c5158Smillert
766b39c5158Smillert                $self->_error(
767b39c5158Smillert                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
768b39c5158Smillert                    q[the current working directory. Not extracting under ].
769b39c5158Smillert                    q[SECURE EXTRACT MODE]
770b39c5158Smillert                );
771b39c5158Smillert                return;
772b39c5158Smillert            }
773b39c5158Smillert
774b39c5158Smillert            ### the archive may be asking us to extract into a symlink. This
775b39c5158Smillert            ### is not sane and a possible security issue, as outlined here:
776b39c5158Smillert            ### https://rt.cpan.org/Ticket/Display.html?id=30380
777b39c5158Smillert            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
778b39c5158Smillert            ### https://issues.rpath.com/browse/RPL-1716
779b39c5158Smillert            my $full_path = $cwd;
780b39c5158Smillert            for my $d ( @dirs ) {
781b39c5158Smillert                $full_path = File::Spec->catdir( $full_path, $d );
782b39c5158Smillert
783b39c5158Smillert                ### we've already checked this one, and it's safe. Move on.
784b39c5158Smillert                next if ref $self and $self->{_link_cache}->{$full_path};
785b39c5158Smillert
786b39c5158Smillert                if( -l $full_path ) {
787b39c5158Smillert                    my $to   = readlink $full_path;
788b39c5158Smillert                    my $diag = "symlinked directory ($full_path => $to)";
789b39c5158Smillert
790b39c5158Smillert                    $self->_error(
791b39c5158Smillert                        q[Entry ']. $entry->full_path .q[' is attempting to ].
792b39c5158Smillert                        qq[extract to a $diag. This is considered a security ].
793b39c5158Smillert                        q[vulnerability and not allowed under SECURE EXTRACT ].
794b39c5158Smillert                        q[MODE]
795b39c5158Smillert                    );
796b39c5158Smillert                    return;
797b39c5158Smillert                }
798b39c5158Smillert
799b39c5158Smillert                ### XXX keep a cache if possible, so the stats become cheaper:
800b39c5158Smillert                $self->{_link_cache}->{$full_path} = 1 if ref $self;
801b39c5158Smillert            }
802b39c5158Smillert        }
803b39c5158Smillert
804b39c5158Smillert        ### '.' is the directory delimiter on VMS, which has to be escaped
805b39c5158Smillert        ### or changed to '_' on vms.  vmsify is used, because older versions
806b39c5158Smillert        ### of vmspath do not handle this properly.
807b39c5158Smillert        ### Must not add a '/' to an empty directory though.
808b39c5158Smillert        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
809b39c5158Smillert
810b39c5158Smillert        my ($cwd_vol,$cwd_dir,$cwd_file)
811b39c5158Smillert                    = File::Spec->splitpath( $cwd );
812b39c5158Smillert        my @cwd     = File::Spec->splitdir( $cwd_dir );
813b39c5158Smillert        push @cwd, $cwd_file if length $cwd_file;
814b39c5158Smillert
815898184e3Ssthen        ### We need to pass '' as the last element to catpath. Craig Berry
816b39c5158Smillert        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
817b39c5158Smillert        ### The root problem is that splitpath on UNIX always returns the
818b39c5158Smillert        ### final path element as a file even if it is a directory, and of
819b39c5158Smillert        ### course there is no way it can know the difference without checking
820b39c5158Smillert        ### against the filesystem, which it is documented as not doing.  When
821b39c5158Smillert        ### you turn around and call catpath, on VMS you have to know which bits
822b39c5158Smillert        ### are directory bits and which bits are file bits.  In this case we
823b39c5158Smillert        ### know the result should be a directory.  I had thought you could omit
824b39c5158Smillert        ### the file argument to catpath in such a case, but apparently on UNIX
825b39c5158Smillert        ### you can't.
826b39c5158Smillert        $dir        = File::Spec->catpath(
827b39c5158Smillert                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
828b39c5158Smillert                        );
829b39c5158Smillert
830b39c5158Smillert        ### catdir() returns undef if the path is longer than 255 chars on
831b39c5158Smillert        ### older VMS systems.
832b39c5158Smillert        unless ( defined $dir ) {
833b39c5158Smillert            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
834b39c5158Smillert            return;
835b39c5158Smillert        }
836b39c5158Smillert
837b39c5158Smillert    }
838b39c5158Smillert
839b39c5158Smillert    if( -e $dir && !-d _ ) {
840b39c5158Smillert        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
841b39c5158Smillert        return;
842b39c5158Smillert    }
843b39c5158Smillert
844b39c5158Smillert    unless ( -d _ ) {
845b39c5158Smillert        eval { File::Path::mkpath( $dir, 0, 0777 ) };
846b39c5158Smillert        if( $@ ) {
847b39c5158Smillert            my $fp = $entry->full_path;
848b39c5158Smillert            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
849b39c5158Smillert            return;
850b39c5158Smillert        }
851b39c5158Smillert
852b39c5158Smillert        ### XXX chown here? that might not be the same as in the archive
853b39c5158Smillert        ### as we're only chown'ing to the owner of the file we're extracting
854b39c5158Smillert        ### not to the owner of the directory itself, which may or may not
855b39c5158Smillert        ### be another entry in the archive
856b39c5158Smillert        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
857b39c5158Smillert        ### way to go.
858b39c5158Smillert        #if( $CHOWN && CAN_CHOWN ) {
859b39c5158Smillert        #    chown $entry->uid, $entry->gid, $dir or
860b39c5158Smillert        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
861b39c5158Smillert        #}
862b39c5158Smillert    }
863b39c5158Smillert
864b39c5158Smillert    ### we're done if we just needed to create a dir ###
865b39c5158Smillert    return 1 if $entry->is_dir;
866b39c5158Smillert
867b39c5158Smillert    my $full = File::Spec->catfile( $dir, $file );
868b39c5158Smillert
869b39c5158Smillert    if( $entry->is_unknown ) {
870b39c5158Smillert        $self->_error( qq[Unknown file type for file '$full'] );
871b39c5158Smillert        return;
872b39c5158Smillert    }
873b39c5158Smillert
87426f75d2dSafresh1    ### If a file system already contains a block device with the same name as
87526f75d2dSafresh1    ### the being extracted regular file, we would write the file's content
87626f75d2dSafresh1    ### to the block device. So remove the existing file (block device) now.
87726f75d2dSafresh1    ### If an archive contains multiple same-named entries, the last one
87826f75d2dSafresh1    ### should replace the previous ones. So remove the old file now.
87926f75d2dSafresh1    ### If the old entry is a symlink to a file outside of the CWD, the new
88026f75d2dSafresh1    ### entry would create a file there. This is CVE-2018-12015
88126f75d2dSafresh1    ### <https://rt.cpan.org/Ticket/Display.html?id=125523>.
88226f75d2dSafresh1    if (-l $full || -e _) {
88326f75d2dSafresh1	if (!unlink $full) {
88426f75d2dSafresh1	    $self->_error( qq[Could not remove old file '$full': $!] );
88526f75d2dSafresh1	    return;
88626f75d2dSafresh1	}
88726f75d2dSafresh1    }
888b39c5158Smillert    if( length $entry->type && $entry->is_file ) {
889b39c5158Smillert        my $fh = IO::File->new;
8909f11ffb7Safresh1        $fh->open( $full, '>' ) or (
891b39c5158Smillert            $self->_error( qq[Could not open file '$full': $!] ),
892b39c5158Smillert            return
893b39c5158Smillert        );
894b39c5158Smillert
895b39c5158Smillert        if( $entry->size ) {
896b39c5158Smillert            binmode $fh;
897b39c5158Smillert            syswrite $fh, $entry->data or (
898b39c5158Smillert                $self->_error( qq[Could not write data to '$full'] ),
899b39c5158Smillert                return
900b39c5158Smillert            );
901b39c5158Smillert        }
902b39c5158Smillert
903b39c5158Smillert        close $fh or (
904b39c5158Smillert            $self->_error( qq[Could not close file '$full'] ),
905b39c5158Smillert            return
906b39c5158Smillert        );
907b39c5158Smillert
908b39c5158Smillert    } else {
909b39c5158Smillert        $self->_make_special_file( $entry, $full ) or return;
910b39c5158Smillert    }
911b39c5158Smillert
912b39c5158Smillert    ### only update the timestamp if it's not a symlink; that will change the
913b39c5158Smillert    ### timestamp of the original. This addresses bug #33669: Could not update
914b39c5158Smillert    ### timestamp warning on symlinks
915b39c5158Smillert    if( not -l $full ) {
916b39c5158Smillert        utime time, $entry->mtime - TIME_OFFSET, $full or
917b39c5158Smillert            $self->_error( qq[Could not update timestamp] );
918b39c5158Smillert    }
919b39c5158Smillert
92091f110e0Safresh1    if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
921*eac174f2Safresh1        CORE::chown( $entry->uid, $entry->gid, $full ) or
922b39c5158Smillert            $self->_error( qq[Could not set uid/gid on '$full'] );
923b39c5158Smillert    }
924b39c5158Smillert
925b39c5158Smillert    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
926b39c5158Smillert    ### change the perms on the file they're linking too...
927b39c5158Smillert    if( $CHMOD and not -l $full ) {
928b39c5158Smillert        my $mode = $entry->mode;
929b39c5158Smillert        unless ($SAME_PERMISSIONS) {
930b39c5158Smillert            $mode &= ~(oct(7000) | umask);
931b39c5158Smillert        }
932*eac174f2Safresh1        CORE::chmod( $mode, $full ) or
933b39c5158Smillert            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
934b39c5158Smillert    }
935b39c5158Smillert
936b39c5158Smillert    return 1;
937b39c5158Smillert}
938b39c5158Smillert
939b39c5158Smillertsub _make_special_file {
940b39c5158Smillert    my $self    = shift;
941b39c5158Smillert    my $entry   = shift     or return;
942b39c5158Smillert    my $file    = shift;    return unless defined $file;
943b39c5158Smillert
944b39c5158Smillert    my $err;
945b39c5158Smillert
946b39c5158Smillert    if( $entry->is_symlink ) {
947b39c5158Smillert        my $fail;
948b39c5158Smillert        if( ON_UNIX ) {
949b39c5158Smillert            symlink( $entry->linkname, $file ) or $fail++;
950b39c5158Smillert
951b39c5158Smillert        } else {
952b39c5158Smillert            $self->_extract_special_file_as_plain_file( $entry, $file )
953b39c5158Smillert                or $fail++;
954b39c5158Smillert        }
955b39c5158Smillert
956b39c5158Smillert        $err =  qq[Making symbolic link '$file' to '] .
957b39c5158Smillert                $entry->linkname .q[' failed] if $fail;
958b39c5158Smillert
959b39c5158Smillert    } elsif ( $entry->is_hardlink ) {
960b39c5158Smillert        my $fail;
961b39c5158Smillert        if( ON_UNIX ) {
962b39c5158Smillert            link( $entry->linkname, $file ) or $fail++;
963b39c5158Smillert
964b39c5158Smillert        } else {
965b39c5158Smillert            $self->_extract_special_file_as_plain_file( $entry, $file )
966b39c5158Smillert                or $fail++;
967b39c5158Smillert        }
968b39c5158Smillert
969b39c5158Smillert        $err =  qq[Making hard link from '] . $entry->linkname .
970b39c5158Smillert                qq[' to '$file' failed] if $fail;
971b39c5158Smillert
972b39c5158Smillert    } elsif ( $entry->is_fifo ) {
973b39c5158Smillert        ON_UNIX && !system('mknod', $file, 'p') or
974b39c5158Smillert            $err = qq[Making fifo ']. $entry->name .qq[' failed];
975b39c5158Smillert
976b39c5158Smillert    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
977b39c5158Smillert        my $mode = $entry->is_blockdev ? 'b' : 'c';
978b39c5158Smillert
979b39c5158Smillert        ON_UNIX && !system('mknod', $file, $mode,
980b39c5158Smillert                            $entry->devmajor, $entry->devminor) or
981b39c5158Smillert            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
982b39c5158Smillert                    $entry->devmajor . qq[ min=] . $entry->devminor .
983b39c5158Smillert                    qq[) failed.];
984b39c5158Smillert
985b39c5158Smillert    } elsif ( $entry->is_socket ) {
986b39c5158Smillert        ### the original doesn't do anything special for sockets.... ###
987b39c5158Smillert        1;
988b39c5158Smillert    }
989b39c5158Smillert
990b39c5158Smillert    return $err ? $self->_error( $err ) : 1;
991b39c5158Smillert}
992b39c5158Smillert
993b39c5158Smillert### don't know how to make symlinks, let's just extract the file as
994b39c5158Smillert### a plain file
995b39c5158Smillertsub _extract_special_file_as_plain_file {
996b39c5158Smillert    my $self    = shift;
997b39c5158Smillert    my $entry   = shift     or return;
998b39c5158Smillert    my $file    = shift;    return unless defined $file;
999b39c5158Smillert
1000b39c5158Smillert    my $err;
1001b39c5158Smillert    TRY: {
10026fb12b70Safresh1        my $orig = $self->_find_entry( $entry->linkname, $entry );
1003b39c5158Smillert
1004b39c5158Smillert        unless( $orig ) {
1005b39c5158Smillert            $err =  qq[Could not find file '] . $entry->linkname .
1006b39c5158Smillert                    qq[' in memory.];
1007b39c5158Smillert            last TRY;
1008b39c5158Smillert        }
1009b39c5158Smillert
1010b39c5158Smillert        ### clone the entry, make it appear as a normal file ###
10116fb12b70Safresh1        my $clone = $orig->clone;
1012b39c5158Smillert        $clone->_downgrade_to_plainfile;
1013b39c5158Smillert        $self->_extract_file( $clone, $file ) or last TRY;
1014b39c5158Smillert
1015b39c5158Smillert        return 1;
1016b39c5158Smillert    }
1017b39c5158Smillert
1018b39c5158Smillert    return $self->_error($err);
1019b39c5158Smillert}
1020b39c5158Smillert
1021b39c5158Smillert=head2 $tar->list_files( [\@properties] )
1022b39c5158Smillert
1023b39c5158SmillertReturns a list of the names of all the files in the archive.
1024b39c5158Smillert
1025b39c5158SmillertIf C<list_files()> is passed an array reference as its first argument
1026b39c5158Smillertit returns a list of hash references containing the requested
1027b39c5158Smillertproperties of each file.  The following list of properties is
1028b39c5158Smillertsupported: name, size, mtime (last modified date), mode, uid, gid,
1029b39c5158Smillertlinkname, uname, gname, devmajor, devminor, prefix.
1030b39c5158Smillert
1031b39c5158SmillertPassing an array reference containing only one element, 'name', is
1032b39c5158Smillertspecial cased to return a list of names rather than a list of hash
1033b39c5158Smillertreferences, making it equivalent to calling C<list_files> without
1034b39c5158Smillertarguments.
1035b39c5158Smillert
1036b39c5158Smillert=cut
1037b39c5158Smillert
1038b39c5158Smillertsub list_files {
1039b39c5158Smillert    my $self = shift;
1040b39c5158Smillert    my $aref = shift || [ ];
1041b39c5158Smillert
1042b39c5158Smillert    unless( $self->_data ) {
1043b39c5158Smillert        $self->read() or return;
1044b39c5158Smillert    }
1045b39c5158Smillert
1046b39c5158Smillert    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1047b39c5158Smillert        return map { $_->full_path } @{$self->_data};
1048b39c5158Smillert    } else {
1049b39c5158Smillert
1050b39c5158Smillert        #my @rv;
1051b39c5158Smillert        #for my $obj ( @{$self->_data} ) {
1052b39c5158Smillert        #    push @rv, { map { $_ => $obj->$_() } @$aref };
1053b39c5158Smillert        #}
1054b39c5158Smillert        #return @rv;
1055b39c5158Smillert
1056b39c5158Smillert        ### this does the same as the above.. just needs a +{ }
1057b39c5158Smillert        ### to make sure perl doesn't confuse it for a block
1058b39c5158Smillert        return map {    my $o=$_;
1059b39c5158Smillert                        +{ map { $_ => $o->$_() } @$aref }
1060b39c5158Smillert                    } @{$self->_data};
1061b39c5158Smillert    }
1062b39c5158Smillert}
1063b39c5158Smillert
1064b39c5158Smillertsub _find_entry {
1065b39c5158Smillert    my $self = shift;
1066b39c5158Smillert    my $file = shift;
1067b39c5158Smillert
1068b39c5158Smillert    unless( defined $file ) {
1069b39c5158Smillert        $self->_error( qq[No file specified] );
1070b39c5158Smillert        return;
1071b39c5158Smillert    }
1072b39c5158Smillert
1073b39c5158Smillert    ### it's an object already
1074b39c5158Smillert    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1075b39c5158Smillert
10766fb12b70Safresh1seach_entry:
10776fb12b70Safresh1		if($self->_data){
1078b39c5158Smillert			for my $entry ( @{$self->_data} ) {
1079b39c5158Smillert					my $path = $entry->full_path;
1080b39c5158Smillert					return $entry if $path eq $file;
1081b39c5158Smillert			}
10826fb12b70Safresh1		}
10836fb12b70Safresh1
10846fb12b70Safresh1		if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
10856fb12b70Safresh1			if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
10866fb12b70Safresh1				$file = _symlinks_resolver( $link_entry->name, $file );
10876fb12b70Safresh1				goto seach_entry if $self->_data;
10886fb12b70Safresh1
10896fb12b70Safresh1				#this will be slower than never, but won't failed!
10906fb12b70Safresh1
10916fb12b70Safresh1				my $iterargs = $link_entry->{'_archive'};
10926fb12b70Safresh1				if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
10936fb12b70Safresh1				#faster	but whole archive will be read in memory
10946fb12b70Safresh1					#read whole archive and share data
10956fb12b70Safresh1					my $archive = Archive::Tar->new;
10966fb12b70Safresh1					$archive->read( @$iterargs );
10976fb12b70Safresh1					push @$iterargs, $archive; #take a trace for destruction
10986fb12b70Safresh1					if($archive->_data){
10996fb12b70Safresh1						$self->_data( $archive->_data );
11006fb12b70Safresh1						goto seach_entry;
11016fb12b70Safresh1					}
11026fb12b70Safresh1				}#faster
11036fb12b70Safresh1
11046fb12b70Safresh1				{#slower but lower memory usage
11056fb12b70Safresh1					# $iterargs = [$filename, $compressed, $opts];
11066fb12b70Safresh1					my $next = Archive::Tar->iter( @$iterargs );
11076fb12b70Safresh1					while(my $e = $next->()){
11086fb12b70Safresh1						if($e->full_path eq $file){
11096fb12b70Safresh1							undef $next;
11106fb12b70Safresh1							return $e;
11116fb12b70Safresh1						}
11126fb12b70Safresh1					}
11136fb12b70Safresh1				}#slower
11146fb12b70Safresh1			}
11156fb12b70Safresh1		}
1116b39c5158Smillert
1117b39c5158Smillert    $self->_error( qq[No such file in archive: '$file'] );
1118b39c5158Smillert    return;
1119b39c5158Smillert}
1120b39c5158Smillert
1121b39c5158Smillert=head2 $tar->get_files( [@filenames] )
1122b39c5158Smillert
1123b39c5158SmillertReturns the C<Archive::Tar::File> objects matching the filenames
1124b39c5158Smillertprovided. If no filename list was passed, all C<Archive::Tar::File>
1125b39c5158Smillertobjects in the current Tar object are returned.
1126b39c5158Smillert
1127b39c5158SmillertPlease refer to the C<Archive::Tar::File> documentation on how to
1128b39c5158Smillerthandle these objects.
1129b39c5158Smillert
1130b39c5158Smillert=cut
1131b39c5158Smillert
1132b39c5158Smillertsub get_files {
1133b39c5158Smillert    my $self = shift;
1134b39c5158Smillert
1135b39c5158Smillert    return @{ $self->_data } unless @_;
1136b39c5158Smillert
1137b39c5158Smillert    my @list;
1138b39c5158Smillert    for my $file ( @_ ) {
1139b39c5158Smillert        push @list, grep { defined } $self->_find_entry( $file );
1140b39c5158Smillert    }
1141b39c5158Smillert
1142b39c5158Smillert    return @list;
1143b39c5158Smillert}
1144b39c5158Smillert
1145b39c5158Smillert=head2 $tar->get_content( $file )
1146b39c5158Smillert
1147b39c5158SmillertReturn the content of the named file.
1148b39c5158Smillert
1149b39c5158Smillert=cut
1150b39c5158Smillert
1151b39c5158Smillertsub get_content {
1152b39c5158Smillert    my $self = shift;
1153b39c5158Smillert    my $entry = $self->_find_entry( shift ) or return;
1154b39c5158Smillert
1155b39c5158Smillert    return $entry->data;
1156b39c5158Smillert}
1157b39c5158Smillert
1158b39c5158Smillert=head2 $tar->replace_content( $file, $content )
1159b39c5158Smillert
1160b39c5158SmillertMake the string $content be the content for the file named $file.
1161b39c5158Smillert
1162b39c5158Smillert=cut
1163b39c5158Smillert
1164b39c5158Smillertsub replace_content {
1165b39c5158Smillert    my $self = shift;
1166b39c5158Smillert    my $entry = $self->_find_entry( shift ) or return;
1167b39c5158Smillert
1168b39c5158Smillert    return $entry->replace_content( shift );
1169b39c5158Smillert}
1170b39c5158Smillert
1171b39c5158Smillert=head2 $tar->rename( $file, $new_name )
1172b39c5158Smillert
1173b39c5158SmillertRename the file of the in-memory archive to $new_name.
1174b39c5158Smillert
1175b39c5158SmillertNote that you must specify a Unix path for $new_name, since per tar
1176b39c5158Smillertstandard, all files in the archive must be Unix paths.
1177b39c5158Smillert
1178b39c5158SmillertReturns true on success and false on failure.
1179b39c5158Smillert
1180b39c5158Smillert=cut
1181b39c5158Smillert
1182b39c5158Smillertsub rename {
1183b39c5158Smillert    my $self = shift;
1184b39c5158Smillert    my $file = shift; return unless defined $file;
1185b39c5158Smillert    my $new  = shift; return unless defined $new;
1186b39c5158Smillert
1187b39c5158Smillert    my $entry = $self->_find_entry( $file ) or return;
1188b39c5158Smillert
1189b39c5158Smillert    return $entry->rename( $new );
1190b39c5158Smillert}
1191b39c5158Smillert
1192898184e3Ssthen=head2 $tar->chmod( $file, $mode )
1193898184e3Ssthen
1194898184e3SsthenChange mode of $file to $mode.
1195898184e3Ssthen
1196898184e3SsthenReturns true on success and false on failure.
1197898184e3Ssthen
1198898184e3Ssthen=cut
1199898184e3Ssthen
1200898184e3Ssthensub chmod {
1201898184e3Ssthen    my $self = shift;
1202898184e3Ssthen    my $file = shift; return unless defined $file;
1203898184e3Ssthen    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1204898184e3Ssthen    my @args = ("$mode");
1205898184e3Ssthen
1206898184e3Ssthen    my $entry = $self->_find_entry( $file ) or return;
1207898184e3Ssthen    my $x = $entry->chmod( @args );
1208898184e3Ssthen    return $x;
1209898184e3Ssthen}
1210898184e3Ssthen
1211898184e3Ssthen=head2 $tar->chown( $file, $uname [, $gname] )
1212898184e3Ssthen
1213898184e3SsthenChange owner $file to $uname and $gname.
1214898184e3Ssthen
1215898184e3SsthenReturns true on success and false on failure.
1216898184e3Ssthen
1217898184e3Ssthen=cut
1218898184e3Ssthen
1219898184e3Ssthensub chown {
1220898184e3Ssthen    my $self = shift;
1221898184e3Ssthen    my $file = shift; return unless defined $file;
1222898184e3Ssthen    my $uname  = shift; return unless defined $uname;
1223898184e3Ssthen    my @args   = ($uname);
1224898184e3Ssthen    push(@args, shift);
1225898184e3Ssthen
1226898184e3Ssthen    my $entry = $self->_find_entry( $file ) or return;
1227898184e3Ssthen    my $x = $entry->chown( @args );
1228898184e3Ssthen    return $x;
1229898184e3Ssthen}
1230898184e3Ssthen
1231b39c5158Smillert=head2 $tar->remove (@filenamelist)
1232b39c5158Smillert
1233b39c5158SmillertRemoves any entries with names matching any of the given filenames
1234b39c5158Smillertfrom the in-memory archive. Returns a list of C<Archive::Tar::File>
1235b39c5158Smillertobjects that remain.
1236b39c5158Smillert
1237b39c5158Smillert=cut
1238b39c5158Smillert
1239b39c5158Smillertsub remove {
1240b39c5158Smillert    my $self = shift;
1241b39c5158Smillert    my @list = @_;
1242b39c5158Smillert
1243b39c5158Smillert    my %seen = map { $_->full_path => $_ } @{$self->_data};
1244b39c5158Smillert    delete $seen{ $_ } for @list;
1245b39c5158Smillert
1246b39c5158Smillert    $self->_data( [values %seen] );
1247b39c5158Smillert
1248b39c5158Smillert    return values %seen;
1249b39c5158Smillert}
1250b39c5158Smillert
1251b39c5158Smillert=head2 $tar->clear
1252b39c5158Smillert
1253b39c5158SmillertC<clear> clears the current in-memory archive. This effectively gives
1254b39c5158Smillertyou a 'blank' object, ready to be filled again. Note that C<clear>
1255b39c5158Smillertonly has effect on the object, not the underlying tarfile.
1256b39c5158Smillert
1257b39c5158Smillert=cut
1258b39c5158Smillert
1259b39c5158Smillertsub clear {
1260b39c5158Smillert    my $self = shift or return;
1261b39c5158Smillert
1262b39c5158Smillert    $self->_data( [] );
1263b39c5158Smillert    $self->_file( '' );
1264b39c5158Smillert
1265b39c5158Smillert    return 1;
1266b39c5158Smillert}
1267b39c5158Smillert
1268b39c5158Smillert
1269b39c5158Smillert=head2 $tar->write ( [$file, $compressed, $prefix] )
1270b39c5158Smillert
1271b39c5158SmillertWrite the in-memory archive to disk.  The first argument can either
1272b39c5158Smillertbe the name of a file or a reference to an already open filehandle (a
1273b39c5158SmillertGLOB reference).
1274b39c5158Smillert
127556d68f1eSafresh1The second argument is used to indicate compression. You can
127656d68f1eSafresh1compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed
1277b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of
1278898184e3Ssthenconstants is preferred:
1279b39c5158Smillert
1280b39c5158Smillert  # write a gzip compressed file
1281b39c5158Smillert  $tar->write( 'out.tgz', COMPRESS_GZIP );
1282b39c5158Smillert
1283b39c5158Smillert  # write a bzip compressed file
1284b39c5158Smillert  $tar->write( 'out.tbz', COMPRESS_BZIP );
1285b39c5158Smillert
128656d68f1eSafresh1  # write a xz compressed file
128756d68f1eSafresh1  $tar->write( 'out.txz', COMPRESS_XZ );
128856d68f1eSafresh1
1289b39c5158SmillertNote that when you pass in a filehandle, the compression argument
1290b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle.
1291b39c5158SmillertIf you wish to enable compression with filehandles, use an
129256d68f1eSafresh1C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead.
1293b39c5158Smillert
1294b39c5158SmillertThe third argument is an optional prefix. All files will be tucked
1295b39c5158Smillertaway in the directory you specify as prefix. So if you have files
1296b39c5158Smillert'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1297b39c5158Smillertwill be written to the archive as 'foo/a' and 'foo/b'.
1298b39c5158Smillert
1299b39c5158SmillertIf no arguments are given, C<write> returns the entire formatted
1300b39c5158Smillertarchive as a string, which could be useful if you'd like to stuff the
1301b39c5158Smillertarchive into a socket or a pipe to gzip or something.
1302b39c5158Smillert
1303b39c5158Smillert
1304b39c5158Smillert=cut
1305b39c5158Smillert
1306b39c5158Smillertsub write {
1307b39c5158Smillert    my $self        = shift;
1308b39c5158Smillert    my $file        = shift; $file = '' unless defined $file;
1309b39c5158Smillert    my $gzip        = shift || 0;
1310b39c5158Smillert    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1311b39c5158Smillert    my $dummy       = '';
1312b39c5158Smillert
1313b39c5158Smillert    ### only need a handle if we have a file to print to ###
1314b39c5158Smillert    my $handle = length($file)
1315b39c5158Smillert                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1316b39c5158Smillert                        or return )
1317b39c5158Smillert                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1318b39c5158Smillert                    : $HAS_IO_STRING ? IO::String->new
1319b39c5158Smillert                    : __PACKAGE__->no_string_support();
1320b39c5158Smillert
1321b39c5158Smillert    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1322b39c5158Smillert    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1323b39c5158Smillert    ### printed to the archive
1324b39c5158Smillert    local $\;
1325b39c5158Smillert
1326b39c5158Smillert    for my $entry ( @{$self->_data} ) {
1327b39c5158Smillert        ### entries to be written to the tarfile ###
1328b39c5158Smillert        my @write_me;
1329b39c5158Smillert
1330b39c5158Smillert        ### only now will we change the object to reflect the current state
1331b39c5158Smillert        ### of the name and prefix fields -- this needs to be limited to
1332b39c5158Smillert        ### write() only!
1333b39c5158Smillert        my $clone = $entry->clone;
1334b39c5158Smillert
1335b39c5158Smillert
1336b39c5158Smillert        ### so, if you don't want use to use the prefix, we'll stuff
1337b39c5158Smillert        ### everything in the name field instead
1338b39c5158Smillert        if( $DO_NOT_USE_PREFIX ) {
1339b39c5158Smillert
1340b39c5158Smillert            ### you might have an extended prefix, if so, set it in the clone
1341b39c5158Smillert            ### XXX is ::Unix right?
1342b39c5158Smillert            $clone->name( length $ext_prefix
1343b39c5158Smillert                            ? File::Spec::Unix->catdir( $ext_prefix,
1344b39c5158Smillert                                                        $clone->full_path)
1345b39c5158Smillert                            : $clone->full_path );
1346b39c5158Smillert            $clone->prefix( '' );
1347b39c5158Smillert
1348b39c5158Smillert        ### otherwise, we'll have to set it properly -- prefix part in the
1349b39c5158Smillert        ### prefix and name part in the name field.
1350b39c5158Smillert        } else {
1351b39c5158Smillert
1352b39c5158Smillert            ### split them here, not before!
1353b39c5158Smillert            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1354b39c5158Smillert
1355b39c5158Smillert            ### you might have an extended prefix, if so, set it in the clone
1356b39c5158Smillert            ### XXX is ::Unix right?
1357b39c5158Smillert            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1358b39c5158Smillert                if length $ext_prefix;
1359b39c5158Smillert
1360b39c5158Smillert            $clone->prefix( $prefix );
1361b39c5158Smillert            $clone->name( $name );
1362b39c5158Smillert        }
1363b39c5158Smillert
1364b39c5158Smillert        ### names are too long, and will get truncated if we don't add a
1365b39c5158Smillert        ### '@LongLink' file...
1366b39c5158Smillert        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1367b39c5158Smillert                                length($clone->prefix)  > PREFIX_LENGTH
1368b39c5158Smillert                            ) || 0;
1369b39c5158Smillert
1370b39c5158Smillert        ### perhaps we need to make a longlink file?
1371b39c5158Smillert        if( $make_longlink ) {
1372b39c5158Smillert            my $longlink = Archive::Tar::File->new(
1373b39c5158Smillert                            data => LONGLINK_NAME,
1374b39c5158Smillert                            $clone->full_path,
1375b39c5158Smillert                            { type => LONGLINK }
1376b39c5158Smillert                        );
1377b39c5158Smillert
1378b39c5158Smillert            unless( $longlink ) {
1379b39c5158Smillert                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1380b39c5158Smillert                                qq[oversize file '] . $clone->full_path ."'" );
1381b39c5158Smillert                return;
1382b39c5158Smillert            };
1383b39c5158Smillert
1384b39c5158Smillert            push @write_me, $longlink;
1385b39c5158Smillert        }
1386b39c5158Smillert
1387b39c5158Smillert        push @write_me, $clone;
1388b39c5158Smillert
1389b39c5158Smillert        ### write the one, optionally 2 a::t::file objects to the handle
1390b39c5158Smillert        for my $clone (@write_me) {
1391b39c5158Smillert
1392b39c5158Smillert            ### if the file is a symlink, there are 2 options:
1393b39c5158Smillert            ### either we leave the symlink intact, but then we don't write any
1394b39c5158Smillert            ### data OR we follow the symlink, which means we actually make a
1395b39c5158Smillert            ### copy. if we do the latter, we have to change the TYPE of the
1396b39c5158Smillert            ### clone to 'FILE'
1397b39c5158Smillert            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1398b39c5158Smillert            my $data_ok = !$clone->is_symlink && $clone->has_content;
1399b39c5158Smillert
1400b39c5158Smillert            ### downgrade to a 'normal' file if it's a symlink we're going to
1401b39c5158Smillert            ### treat as a regular file
1402b39c5158Smillert            $clone->_downgrade_to_plainfile if $link_ok;
1403b39c5158Smillert
1404b39c5158Smillert            ### get the header for this block
1405b39c5158Smillert            my $header = $self->_format_tar_entry( $clone );
1406b39c5158Smillert            unless( $header ) {
1407b39c5158Smillert                $self->_error(q[Could not format header for: ] .
1408b39c5158Smillert                                    $clone->full_path );
1409b39c5158Smillert                return;
1410b39c5158Smillert            }
1411b39c5158Smillert
1412b39c5158Smillert            unless( print $handle $header ) {
1413b39c5158Smillert                $self->_error(q[Could not write header for: ] .
1414b39c5158Smillert                                    $clone->full_path);
1415b39c5158Smillert                return;
1416b39c5158Smillert            }
1417b39c5158Smillert
1418b39c5158Smillert            if( $link_ok or $data_ok ) {
1419b39c5158Smillert                unless( print $handle $clone->data ) {
1420b39c5158Smillert                    $self->_error(q[Could not write data for: ] .
1421b39c5158Smillert                                    $clone->full_path);
1422b39c5158Smillert                    return;
1423b39c5158Smillert                }
1424b39c5158Smillert
1425b39c5158Smillert                ### pad the end of the clone if required ###
1426b39c5158Smillert                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1427b39c5158Smillert            }
1428b39c5158Smillert
1429b39c5158Smillert        } ### done writing these entries
1430b39c5158Smillert    }
1431b39c5158Smillert
1432b39c5158Smillert    ### write the end markers ###
1433b39c5158Smillert    print $handle TAR_END x 2 or
1434b39c5158Smillert            return $self->_error( qq[Could not write tar end markers] );
1435b39c5158Smillert
1436b39c5158Smillert    ### did you want it written to a file, or returned as a string? ###
1437b39c5158Smillert    my $rv =  length($file) ? 1
1438b39c5158Smillert                        : $HAS_PERLIO ? $dummy
1439b39c5158Smillert                        : do { seek $handle, 0, 0; local $/; <$handle> };
1440b39c5158Smillert
1441898184e3Ssthen    ### make sure to close the handle if we created it
1442898184e3Ssthen    if ( $file ne $handle ) {
1443898184e3Ssthen	unless( close $handle ) {
1444898184e3Ssthen	    $self->_error( qq[Could not write tar] );
1445898184e3Ssthen	    return;
1446898184e3Ssthen	}
1447898184e3Ssthen    }
1448b39c5158Smillert
1449b39c5158Smillert    return $rv;
1450b39c5158Smillert}
1451b39c5158Smillert
1452b39c5158Smillertsub _format_tar_entry {
1453b39c5158Smillert    my $self        = shift;
1454b39c5158Smillert    my $entry       = shift or return;
1455b39c5158Smillert    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1456b39c5158Smillert    my $no_prefix   = shift || 0;
1457b39c5158Smillert
1458b39c5158Smillert    my $file    = $entry->name;
1459b39c5158Smillert    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1460b39c5158Smillert
1461b39c5158Smillert    ### remove the prefix from the file name
1462898184e3Ssthen    ### not sure if this is still needed --kane
1463b39c5158Smillert    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1464b39c5158Smillert    ### this for us. Even worse, this would break if we tried to add a file
1465b39c5158Smillert    ### like x/x.
1466b39c5158Smillert    #if( length $prefix ) {
1467b39c5158Smillert    #    $file =~ s/^$match//;
1468b39c5158Smillert    #}
1469b39c5158Smillert
1470b39c5158Smillert    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1471b39c5158Smillert                if length $ext_prefix;
1472b39c5158Smillert
1473b39c5158Smillert    ### not sure why this is... ###
1474b39c5158Smillert    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1475b39c5158Smillert    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1476b39c5158Smillert
1477898184e3Ssthen    my $f1 = "%06o"; my $f2  = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
1478b39c5158Smillert
1479b39c5158Smillert    ### this might be optimizable with a 'changed' flag in the file objects ###
1480b39c5158Smillert    my $tar = pack (
1481b39c5158Smillert                PACK,
1482b39c5158Smillert                $file,
1483b39c5158Smillert
1484b39c5158Smillert                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1485b39c5158Smillert                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1486b39c5158Smillert
1487b39c5158Smillert                "",  # checksum field - space padded a bit down
1488b39c5158Smillert
1489b39c5158Smillert                (map { $entry->$_() }                 qw[type linkname magic]),
1490b39c5158Smillert
1491b39c5158Smillert                $entry->version || TAR_VERSION,
1492b39c5158Smillert
1493b39c5158Smillert                (map { $entry->$_() }                 qw[uname gname]),
1494b39c5158Smillert                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1495b39c5158Smillert
1496b39c5158Smillert                ($no_prefix ? '' : $prefix)
1497b39c5158Smillert    );
1498b39c5158Smillert
1499b39c5158Smillert    ### add the checksum ###
1500898184e3Ssthen    my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
1501b39c5158Smillert    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1502b39c5158Smillert
1503b39c5158Smillert    return $tar;
1504b39c5158Smillert}
1505b39c5158Smillert
1506b39c5158Smillert=head2 $tar->add_files( @filenamelist )
1507b39c5158Smillert
1508b39c5158SmillertTakes a list of filenames and adds them to the in-memory archive.
1509b39c5158Smillert
1510b39c5158SmillertThe path to the file is automatically converted to a Unix like
1511b39c5158Smillertequivalent for use in the archive, and, if on MacOS, the file's
1512b39c5158Smillertmodification time is converted from the MacOS epoch to the Unix epoch.
1513b39c5158SmillertSo tar archives created on MacOS with B<Archive::Tar> can be read
1514b39c5158Smillertboth with I<tar> on Unix and applications like I<suntar> or
1515b39c5158SmillertI<Stuffit Expander> on MacOS.
1516b39c5158Smillert
1517b39c5158SmillertBe aware that the file's type/creator and resource fork will be lost,
1518b39c5158Smillertwhich is usually what you want in cross-platform archives.
1519b39c5158Smillert
1520b39c5158SmillertInstead of a filename, you can also pass it an existing C<Archive::Tar::File>
1521b39c5158Smillertobject from, for example, another archive. The object will be clone, and
1522b39c5158Smillerteffectively be a copy of the original, not an alias.
1523b39c5158Smillert
1524b39c5158SmillertReturns a list of C<Archive::Tar::File> objects that were just added.
1525b39c5158Smillert
1526b39c5158Smillert=cut
1527b39c5158Smillert
1528b39c5158Smillertsub add_files {
1529b39c5158Smillert    my $self    = shift;
1530b39c5158Smillert    my @files   = @_ or return;
1531b39c5158Smillert
1532b39c5158Smillert    my @rv;
1533b39c5158Smillert    for my $file ( @files ) {
1534b39c5158Smillert
1535b39c5158Smillert        ### you passed an Archive::Tar::File object
1536b39c5158Smillert        ### clone it so we don't accidentally have a reference to
1537b39c5158Smillert        ### an object from another archive
1538b39c5158Smillert        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1539b39c5158Smillert            push @rv, $file->clone;
1540b39c5158Smillert            next;
1541b39c5158Smillert        }
1542b39c5158Smillert
154391f110e0Safresh1        eval {
154491f110e0Safresh1            if( utf8::is_utf8( $file )) {
154591f110e0Safresh1              utf8::encode( $file );
154691f110e0Safresh1            }
154791f110e0Safresh1        };
154891f110e0Safresh1
1549b39c5158Smillert        unless( -e $file || -l $file ) {
1550b39c5158Smillert            $self->_error( qq[No such file: '$file'] );
1551b39c5158Smillert            next;
1552b39c5158Smillert        }
1553b39c5158Smillert
1554b39c5158Smillert        my $obj = Archive::Tar::File->new( file => $file );
1555b39c5158Smillert        unless( $obj ) {
1556b39c5158Smillert            $self->_error( qq[Unable to add file: '$file'] );
1557b39c5158Smillert            next;
1558b39c5158Smillert        }
1559b39c5158Smillert
1560b39c5158Smillert        push @rv, $obj;
1561b39c5158Smillert    }
1562b39c5158Smillert
1563b39c5158Smillert    push @{$self->{_data}}, @rv;
1564b39c5158Smillert
1565b39c5158Smillert    return @rv;
1566b39c5158Smillert}
1567b39c5158Smillert
1568b39c5158Smillert=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1569b39c5158Smillert
1570b39c5158SmillertTakes a filename, a scalar full of data and optionally a reference to
1571b39c5158Smillerta hash with specific options.
1572b39c5158Smillert
1573b39c5158SmillertWill add a file to the in-memory archive, with name C<$filename> and
1574b39c5158Smillertcontent C<$data>. Specific properties can be set using C<$opthashref>.
1575b39c5158SmillertThe following list of properties is supported: name, size, mtime
1576b39c5158Smillert(last modified date), mode, uid, gid, linkname, uname, gname,
1577b39c5158Smillertdevmajor, devminor, prefix, type.  (On MacOS, the file's path and
1578b39c5158Smillertmodification times are converted to Unix equivalents.)
1579b39c5158Smillert
158091f110e0Safresh1Valid values for the file type are the following constants defined by
158191f110e0Safresh1Archive::Tar::Constant:
1582b39c5158Smillert
1583b39c5158Smillert=over 4
1584b39c5158Smillert
1585b39c5158Smillert=item FILE
1586b39c5158Smillert
1587b39c5158SmillertRegular file.
1588b39c5158Smillert
1589b39c5158Smillert=item HARDLINK
1590b39c5158Smillert
1591b39c5158Smillert=item SYMLINK
1592b39c5158Smillert
1593b39c5158SmillertHard and symbolic ("soft") links; linkname should specify target.
1594b39c5158Smillert
1595b39c5158Smillert=item CHARDEV
1596b39c5158Smillert
1597b39c5158Smillert=item BLOCKDEV
1598b39c5158Smillert
1599b39c5158SmillertCharacter and block devices. devmajor and devminor should specify the major
1600b39c5158Smillertand minor device numbers.
1601b39c5158Smillert
1602b39c5158Smillert=item DIR
1603b39c5158Smillert
1604b39c5158SmillertDirectory.
1605b39c5158Smillert
1606b39c5158Smillert=item FIFO
1607b39c5158Smillert
1608b39c5158SmillertFIFO (named pipe).
1609b39c5158Smillert
1610b39c5158Smillert=item SOCKET
1611b39c5158Smillert
1612b39c5158SmillertSocket.
1613b39c5158Smillert
1614b39c5158Smillert=back
1615b39c5158Smillert
1616b39c5158SmillertReturns the C<Archive::Tar::File> object that was just added, or
1617b39c5158SmillertC<undef> on failure.
1618b39c5158Smillert
1619b39c5158Smillert=cut
1620b39c5158Smillert
1621b39c5158Smillertsub add_data {
1622b39c5158Smillert    my $self    = shift;
1623b39c5158Smillert    my ($file, $data, $opt) = @_;
1624b39c5158Smillert
1625b39c5158Smillert    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1626b39c5158Smillert    unless( $obj ) {
1627b39c5158Smillert        $self->_error( qq[Unable to add file: '$file'] );
1628b39c5158Smillert        return;
1629b39c5158Smillert    }
1630b39c5158Smillert
1631b39c5158Smillert    push @{$self->{_data}}, $obj;
1632b39c5158Smillert
1633b39c5158Smillert    return $obj;
1634b39c5158Smillert}
1635b39c5158Smillert
1636b39c5158Smillert=head2 $tar->error( [$BOOL] )
1637b39c5158Smillert
1638b39c5158SmillertReturns the current error string (usually, the last error reported).
1639b39c5158SmillertIf a true value was specified, it will give the C<Carp::longmess>
1640b39c5158Smillertequivalent of the error, in effect giving you a stacktrace.
1641b39c5158Smillert
1642b39c5158SmillertFor backwards compatibility, this error is also available as
1643b39c5158SmillertC<$Archive::Tar::error> although it is much recommended you use the
1644b39c5158Smillertmethod call instead.
1645b39c5158Smillert
1646b39c5158Smillert=cut
1647b39c5158Smillert
1648b39c5158Smillert{
1649b39c5158Smillert    $error = '';
1650b39c5158Smillert    my $longmess;
1651b39c5158Smillert
1652b39c5158Smillert    sub _error {
1653b39c5158Smillert        my $self    = shift;
1654b39c5158Smillert        my $msg     = $error = shift;
1655b39c5158Smillert        $longmess   = Carp::longmess($error);
1656b39c5158Smillert        if (ref $self) {
1657b39c5158Smillert            $self->{_error} = $error;
1658b39c5158Smillert            $self->{_longmess} = $longmess;
1659b39c5158Smillert        }
1660b39c5158Smillert
1661b39c5158Smillert        ### set Archive::Tar::WARN to 0 to disable printing
1662b39c5158Smillert        ### of errors
1663b39c5158Smillert        if( $WARN ) {
1664b39c5158Smillert            carp $DEBUG ? $longmess : $msg;
1665b39c5158Smillert        }
1666b39c5158Smillert
1667b39c5158Smillert        return;
1668b39c5158Smillert    }
1669b39c5158Smillert
1670b39c5158Smillert    sub error {
1671b39c5158Smillert        my $self = shift;
1672b39c5158Smillert        if (ref $self) {
1673b39c5158Smillert            return shift() ? $self->{_longmess} : $self->{_error};
1674b39c5158Smillert        } else {
1675b39c5158Smillert            return shift() ? $longmess : $error;
1676b39c5158Smillert        }
1677b39c5158Smillert    }
1678b39c5158Smillert}
1679b39c5158Smillert
1680b39c5158Smillert=head2 $tar->setcwd( $cwd );
1681b39c5158Smillert
1682b39c5158SmillertC<Archive::Tar> needs to know the current directory, and it will run
1683b39c5158SmillertC<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1684b39c5158Smillerttarfile and saves it in the file system. (As of version 1.30, however,
1685b39c5158SmillertC<Archive::Tar> will use the speed optimization described below
1686b39c5158Smillertautomatically, so it's only relevant if you're using C<extract_file()>).
1687b39c5158Smillert
1688b39c5158SmillertSince C<Archive::Tar> doesn't change the current directory internally
1689b39c5158Smillertwhile it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1690b39c5158Smillertcan be avoided if we can guarantee that the current directory doesn't
1691b39c5158Smillertget changed externally.
1692b39c5158Smillert
1693b39c5158SmillertTo use this performance boost, set the current directory via
1694b39c5158Smillert
1695b39c5158Smillert    use Cwd;
1696b39c5158Smillert    $tar->setcwd( cwd() );
1697b39c5158Smillert
1698b39c5158Smillertonce before calling a function like C<extract_file> and
1699b39c5158SmillertC<Archive::Tar> will use the current directory setting from then on
1700b39c5158Smillertand won't call C<Cwd::cwd()> internally.
1701b39c5158Smillert
1702b39c5158SmillertTo switch back to the default behaviour, use
1703b39c5158Smillert
1704b39c5158Smillert    $tar->setcwd( undef );
1705b39c5158Smillert
1706b39c5158Smillertand C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1707b39c5158Smillert
1708898184e3SsthenIf you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
1709b39c5158Smillertbe called for you.
1710b39c5158Smillert
1711b39c5158Smillert=cut
1712b39c5158Smillert
1713b39c5158Smillertsub setcwd {
1714b39c5158Smillert    my $self     = shift;
1715b39c5158Smillert    my $cwd      = shift;
1716b39c5158Smillert
1717b39c5158Smillert    $self->{cwd} = $cwd;
1718b39c5158Smillert}
1719b39c5158Smillert
1720b39c5158Smillert=head1 Class Methods
1721b39c5158Smillert
1722b39c5158Smillert=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1723b39c5158Smillert
1724b39c5158SmillertCreates a tar file from the list of files provided.  The first
1725b39c5158Smillertargument can either be the name of the tar file to create or a
1726b39c5158Smillertreference to an open file handle (e.g. a GLOB reference).
1727b39c5158Smillert
172856d68f1eSafresh1The second argument is used to indicate compression. You can
172956d68f1eSafresh1compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed
1730b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of
1731898184e3Ssthenconstants is preferred:
1732b39c5158Smillert
1733b39c5158Smillert  # write a gzip compressed file
1734b39c5158Smillert  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1735b39c5158Smillert
1736b39c5158Smillert  # write a bzip compressed file
1737b39c5158Smillert  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1738b39c5158Smillert
173956d68f1eSafresh1  # write a xz compressed file
174056d68f1eSafresh1  Archive::Tar->create_archive( 'out.txz', COMPRESS_XZ, @filelist );
174156d68f1eSafresh1
1742b39c5158SmillertNote that when you pass in a filehandle, the compression argument
1743b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle.
1744b39c5158SmillertIf you wish to enable compression with filehandles, use an
174556d68f1eSafresh1C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead.
1746b39c5158Smillert
1747b39c5158SmillertThe remaining arguments list the files to be included in the tar file.
1748b39c5158SmillertThese files must all exist. Any files which don't exist or can't be
1749b39c5158Smillertread are silently ignored.
1750b39c5158Smillert
1751b39c5158SmillertIf the archive creation fails for any reason, C<create_archive> will
1752b39c5158Smillertreturn false. Please use the C<error> method to find the cause of the
1753b39c5158Smillertfailure.
1754b39c5158Smillert
1755b39c5158SmillertNote that this method does not write C<on the fly> as it were; it
1756b39c5158Smillertstill reads all the files into memory before writing out the archive.
1757b39c5158SmillertConsult the FAQ below if this is a problem.
1758b39c5158Smillert
1759b39c5158Smillert=cut
1760b39c5158Smillert
1761b39c5158Smillertsub create_archive {
1762b39c5158Smillert    my $class = shift;
1763b39c5158Smillert
1764b39c5158Smillert    my $file    = shift; return unless defined $file;
1765b39c5158Smillert    my $gzip    = shift || 0;
1766b39c5158Smillert    my @files   = @_;
1767b39c5158Smillert
1768b39c5158Smillert    unless( @files ) {
1769b39c5158Smillert        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1770b39c5158Smillert    }
1771b39c5158Smillert
1772b39c5158Smillert    my $tar = $class->new;
1773b39c5158Smillert    $tar->add_files( @files );
1774b39c5158Smillert    return $tar->write( $file, $gzip );
1775b39c5158Smillert}
1776b39c5158Smillert
1777b39c5158Smillert=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1778b39c5158Smillert
1779b39c5158SmillertReturns an iterator function that reads the tar file without loading
1780b39c5158Smillertit all in memory.  Each time the function is called it will return the
1781b39c5158Smillertnext file in the tarball. The files are returned as
1782b39c5158SmillertC<Archive::Tar::File> objects. The iterator function returns the
1783b39c5158Smillertempty list once it has exhausted the files contained.
1784b39c5158Smillert
1785b39c5158SmillertThe second argument can be a hash reference with options, which are
1786b39c5158Smillertidentical to the arguments passed to C<read()>.
1787b39c5158Smillert
1788b39c5158SmillertExample usage:
1789b39c5158Smillert
1790b39c5158Smillert    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1791b39c5158Smillert
1792b39c5158Smillert    while( my $f = $next->() ) {
1793b39c5158Smillert        print $f->name, "\n";
1794b39c5158Smillert
1795b39c5158Smillert        $f->extract or warn "Extraction failed";
1796b39c5158Smillert
1797b39c5158Smillert        # ....
1798b39c5158Smillert    }
1799b39c5158Smillert
1800b39c5158Smillert=cut
1801b39c5158Smillert
1802b39c5158Smillert
1803b39c5158Smillertsub iter {
1804b39c5158Smillert    my $class       = shift;
18059f11ffb7Safresh1    my $filename    = shift;
18069f11ffb7Safresh1    return unless defined $filename;
1807898184e3Ssthen    my $compressed  = shift || 0;
1808b39c5158Smillert    my $opts        = shift || {};
1809b39c5158Smillert
1810b39c5158Smillert    ### get a handle to read from.
1811b39c5158Smillert    my $handle = $class->_get_handle(
1812b39c5158Smillert        $filename,
1813b39c5158Smillert        $compressed,
1814b39c5158Smillert        READ_ONLY->( ZLIB )
1815b39c5158Smillert    ) or return;
1816b39c5158Smillert
1817b39c5158Smillert    my @data;
18186fb12b70Safresh1		my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
1819b39c5158Smillert    return sub {
1820b39c5158Smillert        return shift(@data)     if @data;       # more than one file returned?
1821b39c5158Smillert        return                  unless $handle; # handle exhausted?
1822b39c5158Smillert
1823b39c5158Smillert        ### read data, should only return file
1824b39c5158Smillert        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1825b39c5158Smillert        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
18266fb12b70Safresh1				if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
18276fb12b70Safresh1					foreach(@data){
18286fb12b70Safresh1						#may refine this heuristic for ON_UNIX?
18296fb12b70Safresh1						if($_->linkname){
18306fb12b70Safresh1							#is there a better slot to store/share it ?
18316fb12b70Safresh1							$_->{'_archive'} = $CONSTRUCT_ARGS;
18326fb12b70Safresh1						}
18336fb12b70Safresh1					}
18346fb12b70Safresh1				}
1835b39c5158Smillert
1836b39c5158Smillert        ### return one piece of data
1837b39c5158Smillert        return shift(@data)     if @data;
1838b39c5158Smillert
1839b39c5158Smillert        ### data is exhausted, free the filehandle
1840b39c5158Smillert        undef $handle;
18416fb12b70Safresh1				if(@$CONSTRUCT_ARGS == 4){
18426fb12b70Safresh1					#free archive in memory
18436fb12b70Safresh1					undef $CONSTRUCT_ARGS->[-1];
18446fb12b70Safresh1				}
1845b39c5158Smillert        return;
1846b39c5158Smillert    };
1847b39c5158Smillert}
1848b39c5158Smillert
1849b39c5158Smillert=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1850b39c5158Smillert
1851b39c5158SmillertReturns a list of the names of all the files in the archive.  The
1852b39c5158Smillertfirst argument can either be the name of the tar file to list or a
1853b39c5158Smillertreference to an open file handle (e.g. a GLOB reference).
1854b39c5158Smillert
1855b39c5158SmillertIf C<list_archive()> is passed an array reference as its third
1856b39c5158Smillertargument it returns a list of hash references containing the requested
1857b39c5158Smillertproperties of each file.  The following list of properties is
1858b39c5158Smillertsupported: full_path, name, size, mtime (last modified date), mode,
185991f110e0Safresh1uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
1860b39c5158Smillert
1861b39c5158SmillertSee C<Archive::Tar::File> for details about supported properties.
1862b39c5158Smillert
1863b39c5158SmillertPassing an array reference containing only one element, 'name', is
1864b39c5158Smillertspecial cased to return a list of names rather than a list of hash
1865b39c5158Smillertreferences.
1866b39c5158Smillert
1867b39c5158Smillert=cut
1868b39c5158Smillert
1869b39c5158Smillertsub list_archive {
1870b39c5158Smillert    my $class   = shift;
1871b39c5158Smillert    my $file    = shift; return unless defined $file;
1872b39c5158Smillert    my $gzip    = shift || 0;
1873b39c5158Smillert
1874b39c5158Smillert    my $tar = $class->new($file, $gzip);
1875b39c5158Smillert    return unless $tar;
1876b39c5158Smillert
1877b39c5158Smillert    return $tar->list_files( @_ );
1878b39c5158Smillert}
1879b39c5158Smillert
1880b39c5158Smillert=head2 Archive::Tar->extract_archive($file, $compressed)
1881b39c5158Smillert
1882b39c5158SmillertExtracts the contents of the tar file.  The first argument can either
1883b39c5158Smillertbe the name of the tar file to create or a reference to an open file
1884b39c5158Smillerthandle (e.g. a GLOB reference).  All relative paths in the tar file will
1885b39c5158Smillertbe created underneath the current working directory.
1886b39c5158Smillert
1887b39c5158SmillertC<extract_archive> will return a list of files it extracted.
1888b39c5158SmillertIf the archive extraction fails for any reason, C<extract_archive>
1889b39c5158Smillertwill return false.  Please use the C<error> method to find the cause
1890b39c5158Smillertof the failure.
1891b39c5158Smillert
1892b39c5158Smillert=cut
1893b39c5158Smillert
1894b39c5158Smillertsub extract_archive {
1895b39c5158Smillert    my $class   = shift;
1896b39c5158Smillert    my $file    = shift; return unless defined $file;
1897b39c5158Smillert    my $gzip    = shift || 0;
1898b39c5158Smillert
1899b39c5158Smillert    my $tar = $class->new( ) or return;
1900b39c5158Smillert
1901b39c5158Smillert    return $tar->read( $file, $gzip, { extract => 1 } );
1902b39c5158Smillert}
1903b39c5158Smillert
1904b39c5158Smillert=head2 $bool = Archive::Tar->has_io_string
1905b39c5158Smillert
1906b39c5158SmillertReturns true if we currently have C<IO::String> support loaded.
1907b39c5158Smillert
1908b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing
1909b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if
1910b39c5158Smillertavailable.
1911b39c5158Smillert
1912b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference.
1913b39c5158Smillert
1914b39c5158Smillert=cut
1915b39c5158Smillert
1916b39c5158Smillertsub has_io_string { return $HAS_IO_STRING; }
1917b39c5158Smillert
1918b39c5158Smillert=head2 $bool = Archive::Tar->has_perlio
1919b39c5158Smillert
1920b39c5158SmillertReturns true if we currently have C<perlio> support loaded.
1921b39c5158Smillert
1922b39c5158SmillertThis requires C<perl-5.8> or higher, compiled with C<perlio>
1923b39c5158Smillert
1924b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing
1925b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if
1926b39c5158Smillertavailable.
1927b39c5158Smillert
1928b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference.
1929b39c5158Smillert
1930b39c5158Smillert=cut
1931b39c5158Smillert
1932b39c5158Smillertsub has_perlio { return $HAS_PERLIO; }
1933b39c5158Smillert
1934b39c5158Smillert=head2 $bool = Archive::Tar->has_zlib_support
1935b39c5158Smillert
1936b39c5158SmillertReturns true if C<Archive::Tar> can extract C<zlib> compressed archives
1937b39c5158Smillert
1938b39c5158Smillert=cut
1939b39c5158Smillert
1940b39c5158Smillertsub has_zlib_support { return ZLIB }
1941b39c5158Smillert
1942b39c5158Smillert=head2 $bool = Archive::Tar->has_bzip2_support
1943b39c5158Smillert
1944b39c5158SmillertReturns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1945b39c5158Smillert
1946b39c5158Smillert=cut
1947b39c5158Smillert
1948b39c5158Smillertsub has_bzip2_support { return BZIP }
1949b39c5158Smillert
195056d68f1eSafresh1=head2 $bool = Archive::Tar->has_xz_support
195156d68f1eSafresh1
195256d68f1eSafresh1Returns true if C<Archive::Tar> can extract C<xz> compressed archives
195356d68f1eSafresh1
195456d68f1eSafresh1=cut
195556d68f1eSafresh1
195656d68f1eSafresh1sub has_xz_support { return XZ }
195756d68f1eSafresh1
1958b39c5158Smillert=head2 Archive::Tar->can_handle_compressed_files
1959b39c5158Smillert
1960b39c5158SmillertA simple checking routine, which will return true if C<Archive::Tar>
196156d68f1eSafresh1is able to uncompress compressed archives on the fly with C<IO::Zlib>,
196256d68f1eSafresh1C<IO::Compress::Bzip2> and C<IO::Compress::Xz> or false if not both are installed.
1963b39c5158Smillert
1964b39c5158SmillertYou can use this as a shortcut to determine whether C<Archive::Tar>
1965b39c5158Smillertwill do what you think before passing compressed archives to its
1966b39c5158SmillertC<read> method.
1967b39c5158Smillert
1968b39c5158Smillert=cut
1969b39c5158Smillert
1970b39c5158Smillertsub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1971b39c5158Smillert
1972b39c5158Smillertsub no_string_support {
1973b39c5158Smillert    croak("You have to install IO::String to support writing archives to strings");
1974b39c5158Smillert}
1975b39c5158Smillert
19766fb12b70Safresh1sub _symlinks_resolver{
19776fb12b70Safresh1  my ($src, $trg) = @_;
19786fb12b70Safresh1  my @src = split /[\/\\]/, $src;
19796fb12b70Safresh1  my @trg = split /[\/\\]/, $trg;
19806fb12b70Safresh1  pop @src; #strip out current object name
19816fb12b70Safresh1  if(@trg and $trg[0] eq ''){
19826fb12b70Safresh1    shift @trg;
19836fb12b70Safresh1    #restart path from scratch
19846fb12b70Safresh1    @src = ( );
19856fb12b70Safresh1  }
19866fb12b70Safresh1  foreach my $part ( @trg ){
19876fb12b70Safresh1    next if $part eq '.'; #ignore current
19886fb12b70Safresh1    if($part eq '..'){
19896fb12b70Safresh1      #got to parent
19906fb12b70Safresh1      pop @src;
19916fb12b70Safresh1    }
19926fb12b70Safresh1    else{
19936fb12b70Safresh1      #append it
19946fb12b70Safresh1      push @src, $part;
19956fb12b70Safresh1    }
19966fb12b70Safresh1  }
19976fb12b70Safresh1  my $path = join('/', @src);
19986fb12b70Safresh1  warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
19996fb12b70Safresh1  return $path;
20006fb12b70Safresh1}
20016fb12b70Safresh1
2002b39c5158Smillert1;
2003b39c5158Smillert
2004b39c5158Smillert__END__
2005b39c5158Smillert
2006b39c5158Smillert=head1 GLOBAL VARIABLES
2007b39c5158Smillert
2008b39c5158Smillert=head2 $Archive::Tar::FOLLOW_SYMLINK
2009b39c5158Smillert
2010b39c5158SmillertSet this variable to C<1> to make C<Archive::Tar> effectively make a
2011b39c5158Smillertcopy of the file when extracting. Default is C<0>, which
2012b39c5158Smillertmeans the symlink stays intact. Of course, you will have to pack the
2013b39c5158Smillertfile linked to as well.
2014b39c5158Smillert
2015b39c5158SmillertThis option is checked when you write out the tarfile using C<write>
2016b39c5158Smillertor C<create_archive>.
2017b39c5158Smillert
2018b39c5158SmillertThis works just like C</bin/tar>'s C<-h> option.
2019b39c5158Smillert
2020b39c5158Smillert=head2 $Archive::Tar::CHOWN
2021b39c5158Smillert
2022b39c5158SmillertBy default, C<Archive::Tar> will try to C<chown> your files if it is
2023b39c5158Smillertable to. In some cases, this may not be desired. In that case, set
2024b39c5158Smillertthis variable to C<0> to disable C<chown>-ing, even if it were
2025b39c5158Smillertpossible.
2026b39c5158Smillert
2027b39c5158SmillertThe default is C<1>.
2028b39c5158Smillert
2029b39c5158Smillert=head2 $Archive::Tar::CHMOD
2030b39c5158Smillert
2031b39c5158SmillertBy default, C<Archive::Tar> will try to C<chmod> your files to
2032b39c5158Smillertwhatever mode was specified for the particular file in the archive.
2033b39c5158SmillertIn some cases, this may not be desired. In that case, set this
2034b39c5158Smillertvariable to C<0> to disable C<chmod>-ing.
2035b39c5158Smillert
2036b39c5158SmillertThe default is C<1>.
2037b39c5158Smillert
2038b39c5158Smillert=head2 $Archive::Tar::SAME_PERMISSIONS
2039b39c5158Smillert
2040b39c5158SmillertWhen, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
2041b39c5158Smillertthe permissions on files from the archive are used without modification
2042b39c5158Smillertof if they are filtered by removing any setid bits and applying the
2043b39c5158Smillertcurrent umask.
2044b39c5158Smillert
2045b39c5158SmillertThe default is C<1> for the root user and C<0> for normal users.
2046b39c5158Smillert
2047b39c5158Smillert=head2 $Archive::Tar::DO_NOT_USE_PREFIX
2048b39c5158Smillert
2049b39c5158SmillertBy default, C<Archive::Tar> will try to put paths that are over
2050b39c5158Smillert100 characters in the C<prefix> field of your tar header, as
2051b39c5158Smillertdefined per POSIX-standard. However, some (older) tar programs
2052b39c5158Smillertdo not implement this spec. To retain compatibility with these older
2053b39c5158Smillertor non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
2054b39c5158Smillertvariable to a true value, and C<Archive::Tar> will use an alternate
2055b39c5158Smillertway of dealing with paths over 100 characters by using the
2056b39c5158SmillertC<GNU Extended Header> feature.
2057b39c5158Smillert
2058b39c5158SmillertNote that clients who do not support the C<GNU Extended Header>
2059b39c5158Smillertfeature will not be able to read these archives. Such clients include
2060b39c5158Smillerttars on C<Solaris>, C<Irix> and C<AIX>.
2061b39c5158Smillert
2062b39c5158SmillertThe default is C<0>.
2063b39c5158Smillert
2064b39c5158Smillert=head2 $Archive::Tar::DEBUG
2065b39c5158Smillert
2066b39c5158SmillertSet this variable to C<1> to always get the C<Carp::longmess> output
2067b39c5158Smillertof the warnings, instead of the regular C<carp>. This is the same
2068b39c5158Smillertmessage you would get by doing:
2069b39c5158Smillert
2070b39c5158Smillert    $tar->error(1);
2071b39c5158Smillert
2072b39c5158SmillertDefaults to C<0>.
2073b39c5158Smillert
2074b39c5158Smillert=head2 $Archive::Tar::WARN
2075b39c5158Smillert
2076b39c5158SmillertSet this variable to C<0> if you do not want any warnings printed.
2077b39c5158SmillertPersonally I recommend against doing this, but people asked for the
2078b39c5158Smillertoption. Also, be advised that this is of course not threadsafe.
2079b39c5158Smillert
2080b39c5158SmillertDefaults to C<1>.
2081b39c5158Smillert
2082b39c5158Smillert=head2 $Archive::Tar::error
2083b39c5158Smillert
2084b39c5158SmillertHolds the last reported error. Kept for historical reasons, but its
2085b39c5158Smillertuse is very much discouraged. Use the C<error()> method instead:
2086b39c5158Smillert
2087b39c5158Smillert    warn $tar->error unless $tar->extract;
2088b39c5158Smillert
2089b39c5158SmillertNote that in older versions of this module, the C<error()> method
2090b39c5158Smillertwould return an effectively global value even when called an instance
2091b39c5158Smillertmethod as above. This has since been fixed, and multiple instances of
2092b39c5158SmillertC<Archive::Tar> now have separate error strings.
2093b39c5158Smillert
2094b39c5158Smillert=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2095b39c5158Smillert
2096b39c5158SmillertThis variable indicates whether C<Archive::Tar> should allow
2097b39c5158Smillertfiles to be extracted outside their current working directory.
2098b39c5158Smillert
2099b39c5158SmillertAllowing this could have security implications, as a malicious
2100b39c5158Smillerttar archive could alter or replace any file the extracting user
2101b39c5158Smillerthas permissions to. Therefor, the default is to not allow
2102b39c5158Smillertinsecure extractions.
2103b39c5158Smillert
2104b39c5158SmillertIf you trust the archive, or have other reasons to allow the
2105b39c5158Smillertarchive to write files outside your current working directory,
2106b39c5158Smillertset this variable to C<true>.
2107b39c5158Smillert
2108b39c5158SmillertNote that this is a backwards incompatible change from version
2109b39c5158SmillertC<1.36> and before.
2110b39c5158Smillert
2111b39c5158Smillert=head2 $Archive::Tar::HAS_PERLIO
2112b39c5158Smillert
2113b39c5158SmillertThis variable holds a boolean indicating if we currently have
2114b39c5158SmillertC<perlio> support loaded. This will be enabled for any perl
2115b39c5158Smillertgreater than C<5.8> compiled with C<perlio>.
2116b39c5158Smillert
2117b39c5158SmillertIf you feel strongly about disabling it, set this variable to
2118b39c5158SmillertC<false>. Note that you will then need C<IO::String> installed
2119b39c5158Smillertto support writing stringified archives.
2120b39c5158Smillert
2121b39c5158SmillertDon't change this variable unless you B<really> know what you're
2122b39c5158Smillertdoing.
2123b39c5158Smillert
2124b39c5158Smillert=head2 $Archive::Tar::HAS_IO_STRING
2125b39c5158Smillert
2126b39c5158SmillertThis variable holds a boolean indicating if we currently have
2127b39c5158SmillertC<IO::String> support loaded. This will be enabled for any perl
2128b39c5158Smillertthat has a loadable C<IO::String> module.
2129b39c5158Smillert
2130b39c5158SmillertIf you feel strongly about disabling it, set this variable to
2131b39c5158SmillertC<false>. Note that you will then need C<perlio> support from
2132b39c5158Smillertyour perl to be able to  write stringified archives.
2133b39c5158Smillert
2134b39c5158SmillertDon't change this variable unless you B<really> know what you're
2135b39c5158Smillertdoing.
2136b39c5158Smillert
2137898184e3Ssthen=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2138898184e3Ssthen
2139898184e3SsthenThis variable holds a boolean indicating if we will create
2140898184e3Ssthenzero padded numbers for C<size>, C<mtime> and C<checksum>.
2141898184e3SsthenThe default is C<0>, indicating that we will create space padded
2142898184e3Ssthennumbers. Added for compatibility with C<busybox> implementations.
2143898184e3Ssthen
21446fb12b70Safresh1=head2 Tuning the way RESOLVE_SYMLINK will works
21456fb12b70Safresh1
21466fb12b70Safresh1	You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
21476fb12b70Safresh1	or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
21486fb12b70Safresh1
21496fb12b70Safresh1  Values can be one of the following:
21506fb12b70Safresh1
21516fb12b70Safresh1		none
21526fb12b70Safresh1           Disable this mechanism and failed as it was in previous version (<1.88)
21536fb12b70Safresh1
21546fb12b70Safresh1		speed (default)
21556fb12b70Safresh1           If you prefer speed
21566fb12b70Safresh1           this will read again the whole archive using read() so all entries
21576fb12b70Safresh1           will be available
21586fb12b70Safresh1
21596fb12b70Safresh1    memory
21606fb12b70Safresh1           If you prefer memory
21616fb12b70Safresh1
21626fb12b70Safresh1	Limitation
21636fb12b70Safresh1
21646fb12b70Safresh1		It won't work for terminal, pipe or sockets or every non seekable source.
21656fb12b70Safresh1
21666fb12b70Safresh1=cut
21676fb12b70Safresh1
2168b39c5158Smillert=head1 FAQ
2169b39c5158Smillert
2170b39c5158Smillert=over 4
2171b39c5158Smillert
2172b39c5158Smillert=item What's the minimum perl version required to run Archive::Tar?
2173b39c5158Smillert
2174b39c5158SmillertYou will need perl version 5.005_03 or newer.
2175b39c5158Smillert
2176b39c5158Smillert=item Isn't Archive::Tar slow?
2177b39c5158Smillert
2178b39c5158SmillertYes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2179b39c5158SmillertHowever, it's very portable. If speed is an issue, consider using
2180b39c5158SmillertC</bin/tar> instead.
2181b39c5158Smillert
2182b39c5158Smillert=item Isn't Archive::Tar heavier on memory than /bin/tar?
2183b39c5158Smillert
2184b39c5158SmillertYes it is, see previous answer. Since C<Compress::Zlib> and therefore
2185b39c5158SmillertC<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2186b39c5158Smillertchoice but to read the archive into memory.
2187b39c5158SmillertThis is ok if you want to do in-memory manipulation of the archive.
2188b39c5158Smillert
2189b39c5158SmillertIf you just want to extract, use the C<extract_archive> class method
2190b39c5158Smillertinstead. It will optimize and write to disk immediately.
2191b39c5158Smillert
2192b39c5158SmillertAnother option is to use the C<iter> class method to iterate over
2193b39c5158Smillertthe files in the tarball without reading them all in memory at once.
2194b39c5158Smillert
2195b39c5158Smillert=item Can you lazy-load data instead?
2196b39c5158Smillert
2197b39c5158SmillertIn some cases, yes. You can use the C<iter> class method to iterate
2198b39c5158Smillertover the files in the tarball without reading them all in memory at once.
2199b39c5158Smillert
2200b39c5158Smillert=item How much memory will an X kb tar file need?
2201b39c5158Smillert
2202b39c5158SmillertProbably more than X kb, since it will all be read into memory. If
2203b39c5158Smillertthis is a problem, and you don't need to do in memory manipulation
2204b39c5158Smillertof the archive, consider using the C<iter> class method, or C</bin/tar>
2205b39c5158Smillertinstead.
2206b39c5158Smillert
2207b39c5158Smillert=item What do you do with unsupported filetypes in an archive?
2208b39c5158Smillert
2209b39c5158SmillertC<Unix> has a few filetypes that aren't supported on other platforms,
2210b39c5158Smillertlike C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2211b39c5158Smillerttry to make a copy of the original file, rather than throwing an error.
2212b39c5158Smillert
2213b39c5158SmillertThis does require you to read the entire archive in to memory first,
2214b39c5158Smillertsince otherwise we wouldn't know what data to fill the copy with.
2215b39c5158Smillert(This means that you cannot use the class methods, including C<iter>
2216b39c5158Smillerton archives that have incompatible filetypes and still expect things
2217b39c5158Smillertto work).
2218b39c5158Smillert
2219b39c5158SmillertFor other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2220b39c5158Smillertthe extraction of this particular item didn't work.
2221b39c5158Smillert
2222b39c5158Smillert=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2223b39c5158Smillert
2224b39c5158SmillertBy default, C<Archive::Tar> is in a completely POSIX-compatible
2225b39c5158Smillertmode, which uses the POSIX-specification of C<tar> to store files.
2226898184e3SsthenFor paths greater than 100 characters, this is done using the
2227b39c5158SmillertC<POSIX header prefix>. Non-POSIX-compatible clients may not support
2228b39c5158Smillertthis part of the specification, and may only support the C<GNU Extended
2229b39c5158SmillertHeader> functionality. To facilitate those clients, you can set the
2230b39c5158SmillertC<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
2231b39c5158SmillertC<GLOBAL VARIABLES> section for details on this variable.
2232b39c5158Smillert
2233b39c5158SmillertNote that GNU tar earlier than version 1.14 does not cope well with
2234b39c5158Smillertthe C<POSIX header prefix>. If you use such a version, consider setting
2235b39c5158Smillertthe C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2236b39c5158Smillert
2237b39c5158Smillert=item How do I extract only files that have property X from an archive?
2238b39c5158Smillert
2239b39c5158SmillertSometimes, you might not wish to extract a complete archive, just
2240b39c5158Smillertthe files that are relevant to you, based on some criteria.
2241b39c5158Smillert
2242b39c5158SmillertYou can do this by filtering a list of C<Archive::Tar::File> objects
2243b39c5158Smillertbased on your criteria. For example, to extract only files that have
2244b39c5158Smillertthe string C<foo> in their title, you would use:
2245b39c5158Smillert
2246b39c5158Smillert    $tar->extract(
2247b39c5158Smillert        grep { $_->full_path =~ /foo/ } $tar->get_files
2248b39c5158Smillert    );
2249b39c5158Smillert
2250b39c5158SmillertThis way, you can filter on any attribute of the files in the archive.
2251b39c5158SmillertConsult the C<Archive::Tar::File> documentation on how to use these
2252b39c5158Smillertobjects.
2253b39c5158Smillert
2254b39c5158Smillert=item How do I access .tar.Z files?
2255b39c5158Smillert
2256b39c5158SmillertThe C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2257b39c5158Smillertthe C<IO::Zlib> module) to access tar files that have been compressed
2258b39c5158Smillertwith C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2259b39c5158Smillertutility cannot be read by C<Compress::Zlib> and so cannot be directly
2260b39c5158Smillertaccesses by C<Archive::Tar>.
2261b39c5158Smillert
2262b39c5158SmillertIf the C<uncompress> or C<gunzip> programs are available, you can use
2263b39c5158Smillertone of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2264b39c5158Smillert
2265b39c5158SmillertFirstly with C<uncompress>
2266b39c5158Smillert
2267b39c5158Smillert    use Archive::Tar;
2268b39c5158Smillert
2269b39c5158Smillert    open F, "uncompress -c $filename |";
2270b39c5158Smillert    my $tar = Archive::Tar->new(*F);
2271b39c5158Smillert    ...
2272b39c5158Smillert
2273b39c5158Smillertand this with C<gunzip>
2274b39c5158Smillert
2275b39c5158Smillert    use Archive::Tar;
2276b39c5158Smillert
2277b39c5158Smillert    open F, "gunzip -c $filename |";
2278b39c5158Smillert    my $tar = Archive::Tar->new(*F);
2279b39c5158Smillert    ...
2280b39c5158Smillert
2281b39c5158SmillertSimilarly, if the C<compress> program is available, you can use this to
2282b39c5158Smillertwrite a C<.tar.Z> file
2283b39c5158Smillert
2284b39c5158Smillert    use Archive::Tar;
2285b39c5158Smillert    use IO::File;
2286b39c5158Smillert
2287*eac174f2Safresh1    my $fh = IO::File->new( "| compress -c >$filename" );
2288b39c5158Smillert    my $tar = Archive::Tar->new();
2289b39c5158Smillert    ...
2290b39c5158Smillert    $tar->write($fh);
2291b39c5158Smillert    $fh->close ;
2292b39c5158Smillert
2293b39c5158Smillert=item How do I handle Unicode strings?
2294b39c5158Smillert
2295b39c5158SmillertC<Archive::Tar> uses byte semantics for any files it reads from or writes
2296b39c5158Smillertto disk. This is not a problem if you only deal with files and never
2297b39c5158Smillertlook at their content or work solely with byte strings. But if you use
2298b39c5158SmillertUnicode strings with character semantics, some additional steps need
2299b39c5158Smillertto be taken.
2300b39c5158Smillert
2301b39c5158SmillertFor example, if you add a Unicode string like
2302b39c5158Smillert
2303b39c5158Smillert    # Problem
2304b39c5158Smillert    $tar->add_data('file.txt', "Euro: \x{20AC}");
2305b39c5158Smillert
2306b39c5158Smillertthen there will be a problem later when the tarfile gets written out
23079f11ffb7Safresh1to disk via C<< $tar->write() >>:
2308b39c5158Smillert
2309b39c5158Smillert    Wide character in print at .../Archive/Tar.pm line 1014.
2310b39c5158Smillert
2311b39c5158SmillertThe data was added as a Unicode string and when writing it out to disk,
2312b39c5158Smillertthe C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2313b39c5158Smillerttried to convert the string to ISO-8859 and failed. The written file
2314b39c5158Smillertnow contains garbage.
2315b39c5158Smillert
2316b39c5158SmillertFor this reason, Unicode strings need to be converted to UTF-8-encoded
2317b39c5158Smillertbytestrings before they are handed off to C<add_data()>:
2318b39c5158Smillert
2319b39c5158Smillert    use Encode;
2320b39c5158Smillert    my $data = "Accented character: \x{20AC}";
2321b39c5158Smillert    $data = encode('utf8', $data);
2322b39c5158Smillert
2323b39c5158Smillert    $tar->add_data('file.txt', $data);
2324b39c5158Smillert
2325b39c5158SmillertA opposite problem occurs if you extract a UTF8-encoded file from a
2326b39c5158Smillerttarball. Using C<get_content()> on the C<Archive::Tar::File> object
2327b39c5158Smillertwill return its content as a bytestring, not as a Unicode string.
2328b39c5158Smillert
2329b39c5158SmillertIf you want it to be a Unicode string (because you want character
2330b39c5158Smillertsemantics with operations like regular expression matching), you need
2331b39c5158Smillertto decode the UTF8-encoded content and have Perl convert it into
2332b39c5158Smillerta Unicode string:
2333b39c5158Smillert
2334b39c5158Smillert    use Encode;
2335b39c5158Smillert    my $data = $tar->get_content();
2336b39c5158Smillert
2337b39c5158Smillert    # Make it a Unicode string
2338b39c5158Smillert    $data = decode('utf8', $data);
2339b39c5158Smillert
2340b39c5158SmillertThere is no easy way to provide this functionality in C<Archive::Tar>,
2341b39c5158Smillertbecause a tarball can contain many files, and each of which could be
2342b39c5158Smillertencoded in a different way.
2343b39c5158Smillert
2344b39c5158Smillert=back
2345b39c5158Smillert
2346b39c5158Smillert=head1 CAVEATS
2347b39c5158Smillert
2348b39c5158SmillertThe AIX tar does not fill all unused space in the tar archive with 0x00.
2349b39c5158SmillertThis sometimes leads to warning messages from C<Archive::Tar>.
2350b39c5158Smillert
2351b39c5158Smillert  Invalid header block at offset nnn
2352b39c5158Smillert
2353b39c5158SmillertA fix for that problem is scheduled to be released in the following levels
2354b39c5158Smillertof AIX, all of which should be coming out in the 4th quarter of 2009:
2355b39c5158Smillert
2356b39c5158Smillert AIX 5.3 TL7 SP10
2357b39c5158Smillert AIX 5.3 TL8 SP8
2358b39c5158Smillert AIX 5.3 TL9 SP5
2359b39c5158Smillert AIX 5.3 TL10 SP2
2360b39c5158Smillert
2361b39c5158Smillert AIX 6.1 TL0 SP11
2362b39c5158Smillert AIX 6.1 TL1 SP7
2363b39c5158Smillert AIX 6.1 TL2 SP6
2364b39c5158Smillert AIX 6.1 TL3 SP3
2365b39c5158Smillert
2366b39c5158SmillertThe IBM APAR number for this problem is IZ50240 (Reported component ID:
2367b39c5158Smillert5765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2368b39c5158SmillertIf you need an ifix please contact your local IBM AIX support.
2369b39c5158Smillert
2370b39c5158Smillert=head1 TODO
2371b39c5158Smillert
2372b39c5158Smillert=over 4
2373b39c5158Smillert
2374b39c5158Smillert=item Check if passed in handles are open for read/write
2375b39c5158Smillert
2376b39c5158SmillertCurrently I don't know of any portable pure perl way to do this.
2377b39c5158SmillertSuggestions welcome.
2378b39c5158Smillert
2379b39c5158Smillert=item Allow archives to be passed in as string
2380b39c5158Smillert
2381b39c5158SmillertCurrently, we only allow opened filehandles or filenames, but
2382b39c5158Smillertnot strings. The internals would need some reworking to facilitate
2383b39c5158Smillertstringified archives.
2384b39c5158Smillert
2385b39c5158Smillert=item Facilitate processing an opened filehandle of a compressed archive
2386b39c5158Smillert
2387b39c5158SmillertCurrently, we only support this if the filehandle is an IO::Zlib object.
2388b39c5158SmillertEnvironments, like apache, will present you with an opened filehandle
2389b39c5158Smillertto an uploaded file, which might be a compressed archive.
2390b39c5158Smillert
2391b39c5158Smillert=back
2392b39c5158Smillert
2393b39c5158Smillert=head1 SEE ALSO
2394b39c5158Smillert
2395b39c5158Smillert=over 4
2396b39c5158Smillert
2397b39c5158Smillert=item The GNU tar specification
2398b39c5158Smillert
2399b39c5158SmillertC<http://www.gnu.org/software/tar/manual/tar.html>
2400b39c5158Smillert
2401898184e3Ssthen=item The PAX format specification
2402b39c5158Smillert
2403898184e3SsthenThe specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2404b39c5158Smillert
2405b39c5158Smillert=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2406b39c5158Smillert
2407b39c5158Smillert=item GNU tar intends to switch to POSIX compatibility
2408b39c5158Smillert
2409b39c5158SmillertGNU Tar authors have expressed their intention to become completely
2410b39c5158SmillertPOSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2411b39c5158Smillert
2412b39c5158Smillert=item A Comparison between various tar implementations
2413b39c5158Smillert
2414b39c5158SmillertLists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2415b39c5158Smillert
2416b39c5158Smillert=back
2417b39c5158Smillert
2418b39c5158Smillert=head1 AUTHOR
2419b39c5158Smillert
2420b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2421b39c5158Smillert
2422b39c5158SmillertPlease reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2423b39c5158Smillert
2424b39c5158Smillert=head1 ACKNOWLEDGEMENTS
2425b39c5158Smillert
2426b39c5158SmillertThanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2427b39c5158SmillertRainer Tammer and especially Andrew Savige for their help and suggestions.
2428b39c5158Smillert
2429b39c5158Smillert=head1 COPYRIGHT
2430b39c5158Smillert
2431b39c5158SmillertThis module is copyright (c) 2002 - 2009 Jos Boumans
2432b39c5158SmillertE<lt>kane@cpan.orgE<gt>. All rights reserved.
2433b39c5158Smillert
2434b39c5158SmillertThis library is free software; you may redistribute and/or modify
2435b39c5158Smillertit under the same terms as Perl itself.
2436b39c5158Smillert
2437b39c5158Smillert=cut
2438