1b39c5158Smillert### the gnu tar specification:
2b39c5158Smillert### http://www.gnu.org/software/tar/manual/tar.html
3b39c5158Smillert###
4b39c5158Smillert### and the pax format spec, which tar derives from:
5b39c5158Smillert### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6b39c5158Smillert
7b39c5158Smillertpackage Archive::Tar;
8b39c5158Smillertrequire 5.005_03;
9b39c5158Smillert
10b39c5158Smillertuse Cwd;
11b39c5158Smillertuse IO::Zlib;
12b39c5158Smillertuse IO::File;
13b39c5158Smillertuse Carp                qw(carp croak);
14b39c5158Smillertuse File::Spec          ();
15b39c5158Smillertuse File::Spec::Unix    ();
16b39c5158Smillertuse File::Path          ();
17b39c5158Smillert
18b39c5158Smillertuse Archive::Tar::File;
19b39c5158Smillertuse Archive::Tar::Constant;
20b39c5158Smillert
21b39c5158Smillertrequire Exporter;
22b39c5158Smillert
23b39c5158Smillertuse strict;
24b39c5158Smillertuse vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25b39c5158Smillert            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
266fb12b70Safresh1            $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
27b39c5158Smillert         ];
28b39c5158Smillert
29b39c5158Smillert@ISA                    = qw[Exporter];
30b39c5158Smillert@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
31b39c5158Smillert$DEBUG                  = 0;
32b39c5158Smillert$WARN                   = 1;
33b39c5158Smillert$FOLLOW_SYMLINK         = 0;
34*9f11ffb7Safresh1$VERSION                = "2.30";
35b39c5158Smillert$CHOWN                  = 1;
36b39c5158Smillert$CHMOD                  = 1;
37b39c5158Smillert$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
38b39c5158Smillert$DO_NOT_USE_PREFIX      = 0;
39b39c5158Smillert$INSECURE_EXTRACT_MODE  = 0;
40898184e3Ssthen$ZERO_PAD_NUMBERS       = 0;
416fb12b70Safresh1$RESOLVE_SYMLINK        = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
42b39c5158Smillert
43b39c5158SmillertBEGIN {
44b39c5158Smillert    use Config;
45b39c5158Smillert    $HAS_PERLIO = $Config::Config{useperlio};
46b39c5158Smillert
47b39c5158Smillert    ### try and load IO::String anyway, so you can dynamically
48b39c5158Smillert    ### switch between perlio and IO::String
49b39c5158Smillert    $HAS_IO_STRING = eval {
50b39c5158Smillert        require IO::String;
51b39c5158Smillert        import IO::String;
52b39c5158Smillert        1;
53b39c5158Smillert    } || 0;
54b39c5158Smillert}
55b39c5158Smillert
56b39c5158Smillert=head1 NAME
57b39c5158Smillert
58b39c5158SmillertArchive::Tar - module for manipulations of tar archives
59b39c5158Smillert
60b39c5158Smillert=head1 SYNOPSIS
61b39c5158Smillert
62b39c5158Smillert    use Archive::Tar;
63b39c5158Smillert    my $tar = Archive::Tar->new;
64b39c5158Smillert
65b39c5158Smillert    $tar->read('origin.tgz');
66b39c5158Smillert    $tar->extract();
67b39c5158Smillert
68b39c5158Smillert    $tar->add_files('file/foo.pl', 'docs/README');
69b39c5158Smillert    $tar->add_data('file/baz.txt', 'This is the contents now');
70b39c5158Smillert
71b39c5158Smillert    $tar->rename('oldname', 'new/file/name');
72898184e3Ssthen    $tar->chown('/', 'root');
73898184e3Ssthen    $tar->chown('/', 'root:root');
74898184e3Ssthen    $tar->chmod('/tmp', '1777');
75b39c5158Smillert
76b39c5158Smillert    $tar->write('files.tar');                   # plain tar
77b39c5158Smillert    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
78b39c5158Smillert    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
79b39c5158Smillert
80b39c5158Smillert=head1 DESCRIPTION
81b39c5158Smillert
82b39c5158SmillertArchive::Tar provides an object oriented mechanism for handling tar
83b39c5158Smillertfiles.  It provides class methods for quick and easy files handling
84b39c5158Smillertwhile also allowing for the creation of tar file objects for custom
85b39c5158Smillertmanipulation.  If you have the IO::Zlib module installed,
86b39c5158SmillertArchive::Tar will also support compressed or gzipped tar files.
87b39c5158Smillert
88b39c5158SmillertAn object of class Archive::Tar represents a .tar(.gz) archive full
89b39c5158Smillertof files and things.
90b39c5158Smillert
91b39c5158Smillert=head1 Object Methods
92b39c5158Smillert
93b39c5158Smillert=head2 Archive::Tar->new( [$file, $compressed] )
94b39c5158Smillert
95b39c5158SmillertReturns a new Tar object. If given any arguments, C<new()> calls the
96b39c5158SmillertC<read()> method automatically, passing on the arguments provided to
97b39c5158Smillertthe C<read()> method.
98b39c5158Smillert
99b39c5158SmillertIf C<new()> is invoked with arguments and the C<read()> method fails
100b39c5158Smillertfor any reason, C<new()> returns undef.
101b39c5158Smillert
102b39c5158Smillert=cut
103b39c5158Smillert
104b39c5158Smillertmy $tmpl = {
105b39c5158Smillert    _data   => [ ],
106b39c5158Smillert    _file   => 'Unknown',
107b39c5158Smillert};
108b39c5158Smillert
109b39c5158Smillert### install get/set accessors for this object.
110b39c5158Smillertfor my $key ( keys %$tmpl ) {
111b39c5158Smillert    no strict 'refs';
112b39c5158Smillert    *{__PACKAGE__."::$key"} = sub {
113b39c5158Smillert        my $self = shift;
114b39c5158Smillert        $self->{$key} = $_[0] if @_;
115b39c5158Smillert        return $self->{$key};
116b39c5158Smillert    }
117b39c5158Smillert}
118b39c5158Smillert
119b39c5158Smillertsub new {
120b39c5158Smillert    my $class = shift;
121b39c5158Smillert    $class = ref $class if ref $class;
122b39c5158Smillert
123b39c5158Smillert    ### copying $tmpl here since a shallow copy makes it use the
124b39c5158Smillert    ### same aref, causing for files to remain in memory always.
125b39c5158Smillert    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
126b39c5158Smillert
127b39c5158Smillert    if (@_) {
128b39c5158Smillert        unless ( $obj->read( @_ ) ) {
129b39c5158Smillert            $obj->_error(qq[No data could be read from file]);
130b39c5158Smillert            return;
131b39c5158Smillert        }
132b39c5158Smillert    }
133b39c5158Smillert
134b39c5158Smillert    return $obj;
135b39c5158Smillert}
136b39c5158Smillert
137b39c5158Smillert=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
138b39c5158Smillert
139b39c5158SmillertRead the given tar file into memory.
140b39c5158SmillertThe first argument can either be the name of a file or a reference to
141b39c5158Smillertan already open filehandle (or an IO::Zlib object if it's compressed)
142b39c5158Smillert
143b39c5158SmillertThe C<read> will I<replace> any previous content in C<$tar>!
144b39c5158Smillert
145b39c5158SmillertThe second argument may be considered optional, but remains for
146b39c5158Smillertbackwards compatibility. Archive::Tar now looks at the file
147b39c5158Smillertmagic to determine what class should be used to open the file
148b39c5158Smillertand will transparently Do The Right Thing.
149b39c5158Smillert
150b39c5158SmillertArchive::Tar will warn if you try to pass a bzip2 compressed file and the
151b39c5158SmillertIO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
152b39c5158Smillert
153b39c5158SmillertNote that you can currently B<not> pass a C<gzip> compressed
154b39c5158Smillertfilehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
155b39c5158Smillertfilehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
156b39c5158Smillertcontaining the full archive information (either compressed or
157b39c5158Smillertuncompressed). These are worth while features, but not currently
158b39c5158Smillertimplemented. See the C<TODO> section.
159b39c5158Smillert
160b39c5158SmillertThe third argument can be a hash reference with options. Note that
161b39c5158Smillertall options are case-sensitive.
162b39c5158Smillert
163b39c5158Smillert=over 4
164b39c5158Smillert
165b39c5158Smillert=item limit
166b39c5158Smillert
167b39c5158SmillertDo not read more than C<limit> files. This is useful if you have
168b39c5158Smillertvery big archives, and are only interested in the first few files.
169b39c5158Smillert
170b39c5158Smillert=item filter
171b39c5158Smillert
172b39c5158SmillertCan be set to a regular expression.  Only files with names that match
173b39c5158Smillertthe expression will be read.
174b39c5158Smillert
175898184e3Ssthen=item md5
176898184e3Ssthen
177898184e3SsthenSet to 1 and the md5sum of files will be returned (instead of file data)
178898184e3Ssthen    my $iter = Archive::Tar->iter( $file,  1, {md5 => 1} );
179898184e3Ssthen    while( my $f = $iter->() ) {
180898184e3Ssthen        print $f->data . "\t" . $f->full_path . $/;
181898184e3Ssthen    }
182898184e3Ssthen
183b39c5158Smillert=item extract
184b39c5158Smillert
185b39c5158SmillertIf set to true, immediately extract entries when reading them. This
186b39c5158Smillertgives you the same memory break as the C<extract_archive> function.
187b39c5158SmillertNote however that entries will not be read into memory, but written
188b39c5158Smillertstraight to disk. This means no C<Archive::Tar::File> objects are
189b39c5158Smillertcreated for you to inspect.
190b39c5158Smillert
191b39c5158Smillert=back
192b39c5158Smillert
193b39c5158SmillertAll files are stored internally as C<Archive::Tar::File> objects.
194b39c5158SmillertPlease consult the L<Archive::Tar::File> documentation for details.
195b39c5158Smillert
196b39c5158SmillertReturns the number of files read in scalar context, and a list of
197b39c5158SmillertC<Archive::Tar::File> objects in list context.
198b39c5158Smillert
199b39c5158Smillert=cut
200b39c5158Smillert
201b39c5158Smillertsub read {
202b39c5158Smillert    my $self = shift;
203b39c5158Smillert    my $file = shift;
204b39c5158Smillert    my $gzip = shift || 0;
205b39c5158Smillert    my $opts = shift || {};
206b39c5158Smillert
207b39c5158Smillert    unless( defined $file ) {
208b39c5158Smillert        $self->_error( qq[No file to read from!] );
209b39c5158Smillert        return;
210b39c5158Smillert    } else {
211b39c5158Smillert        $self->_file( $file );
212b39c5158Smillert    }
213b39c5158Smillert
214b39c5158Smillert    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
215b39c5158Smillert                    or return;
216b39c5158Smillert
217b39c5158Smillert    my $data = $self->_read_tar( $handle, $opts ) or return;
218b39c5158Smillert
219b39c5158Smillert    $self->_data( $data );
220b39c5158Smillert
221b39c5158Smillert    return wantarray ? @$data : scalar @$data;
222b39c5158Smillert}
223b39c5158Smillert
224b39c5158Smillertsub _get_handle {
225b39c5158Smillert    my $self     = shift;
226b39c5158Smillert    my $file     = shift;   return unless defined $file;
227b39c5158Smillert    my $compress = shift || 0;
228b39c5158Smillert    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
229b39c5158Smillert
230898184e3Ssthen    ### Check if file is a file handle or IO glob
231898184e3Ssthen    if ( ref $file ) {
232898184e3Ssthen	return $file if eval{ *$file{IO} };
233898184e3Ssthen	return $file if eval{ $file->isa(q{IO::Handle}) };
234898184e3Ssthen	$file = q{}.$file;
235898184e3Ssthen    }
236b39c5158Smillert
237b39c5158Smillert    ### get a FH opened to the right class, so we can use it transparently
238b39c5158Smillert    ### throughout the program
239b39c5158Smillert    my $fh;
240b39c5158Smillert    {   ### reading magic only makes sense if we're opening a file for
241b39c5158Smillert        ### reading. otherwise, just use what the user requested.
242b39c5158Smillert        my $magic = '';
243b39c5158Smillert        if( MODE_READ->($mode) ) {
244b39c5158Smillert            open my $tmp, $file or do {
245b39c5158Smillert                $self->_error( qq[Could not open '$file' for reading: $!] );
246b39c5158Smillert                return;
247b39c5158Smillert            };
248b39c5158Smillert
249b39c5158Smillert            ### read the first 4 bites of the file to figure out which class to
250b39c5158Smillert            ### use to open the file.
251b39c5158Smillert            sysread( $tmp, $magic, 4 );
252b39c5158Smillert            close $tmp;
253b39c5158Smillert        }
254b39c5158Smillert
255b39c5158Smillert        ### is it bzip?
256b39c5158Smillert        ### if you asked specifically for bzip compression, or if we're in
257b39c5158Smillert        ### read mode and the magic numbers add up, use bzip
258b39c5158Smillert        if( BZIP and (
259b39c5158Smillert                ($compress eq COMPRESS_BZIP) or
260b39c5158Smillert                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
261b39c5158Smillert            )
262b39c5158Smillert        ) {
263b39c5158Smillert
264b39c5158Smillert            ### different reader/writer modules, different error vars... sigh
265b39c5158Smillert            if( MODE_READ->($mode) ) {
266*9f11ffb7Safresh1                $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do {
267b39c5158Smillert                    $self->_error( qq[Could not read '$file': ] .
268b39c5158Smillert                        $IO::Uncompress::Bunzip2::Bunzip2Error
269b39c5158Smillert                    );
270b39c5158Smillert                    return;
271b39c5158Smillert                };
272b39c5158Smillert
273b39c5158Smillert            } else {
274b39c5158Smillert                $fh = IO::Compress::Bzip2->new( $file ) or do {
275b39c5158Smillert                    $self->_error( qq[Could not write to '$file': ] .
276b39c5158Smillert                        $IO::Compress::Bzip2::Bzip2Error
277b39c5158Smillert                    );
278b39c5158Smillert                    return;
279b39c5158Smillert                };
280b39c5158Smillert            }
281b39c5158Smillert
282b39c5158Smillert        ### is it gzip?
283b39c5158Smillert        ### if you asked for compression, if you wanted to read or the gzip
284b39c5158Smillert        ### magic number is present (redundant with read)
285b39c5158Smillert        } elsif( ZLIB and (
286b39c5158Smillert                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
287b39c5158Smillert                 )
288b39c5158Smillert        ) {
289b39c5158Smillert            $fh = IO::Zlib->new;
290b39c5158Smillert
291b39c5158Smillert            unless( $fh->open( $file, $mode ) ) {
292b39c5158Smillert                $self->_error(qq[Could not create filehandle for '$file': $!]);
293b39c5158Smillert                return;
294b39c5158Smillert            }
295b39c5158Smillert
296b39c5158Smillert        ### is it plain tar?
297b39c5158Smillert        } else {
298b39c5158Smillert            $fh = IO::File->new;
299b39c5158Smillert
300b39c5158Smillert            unless( $fh->open( $file, $mode ) ) {
301b39c5158Smillert                $self->_error(qq[Could not create filehandle for '$file': $!]);
302b39c5158Smillert                return;
303b39c5158Smillert            }
304b39c5158Smillert
305b39c5158Smillert            ### enable bin mode on tar archives
306b39c5158Smillert            binmode $fh;
307b39c5158Smillert        }
308b39c5158Smillert    }
309b39c5158Smillert
310b39c5158Smillert    return $fh;
311b39c5158Smillert}
312b39c5158Smillert
313b39c5158Smillert
314b39c5158Smillertsub _read_tar {
315b39c5158Smillert    my $self    = shift;
316b39c5158Smillert    my $handle  = shift or return;
317b39c5158Smillert    my $opts    = shift || {};
318b39c5158Smillert
319b39c5158Smillert    my $count   = $opts->{limit}    || 0;
320b39c5158Smillert    my $filter  = $opts->{filter};
321898184e3Ssthen    my $md5  = $opts->{md5} || 0;	# cdrake
322898184e3Ssthen    my $filter_cb = $opts->{filter_cb};
323b39c5158Smillert    my $extract = $opts->{extract}  || 0;
324b39c5158Smillert
325b39c5158Smillert    ### set a cap on the amount of files to extract ###
326b39c5158Smillert    my $limit   = 0;
327b39c5158Smillert    $limit = 1 if $count > 0;
328b39c5158Smillert
329b39c5158Smillert    my $tarfile = [ ];
330b39c5158Smillert    my $chunk;
331b39c5158Smillert    my $read = 0;
332b39c5158Smillert    my $real_name;  # to set the name of a file when
333b39c5158Smillert                    # we're encountering @longlink
334b39c5158Smillert    my $data;
335b39c5158Smillert
336b39c5158Smillert    LOOP:
337b39c5158Smillert    while( $handle->read( $chunk, HEAD ) ) {
338b39c5158Smillert        ### IO::Zlib doesn't support this yet
33991f110e0Safresh1        my $offset;
34091f110e0Safresh1        if ( ref($handle) ne 'IO::Zlib' ) {
34191f110e0Safresh1            local $@;
34291f110e0Safresh1            $offset = eval { tell $handle } || 'unknown';
343898184e3Ssthen            $@ = '';
34491f110e0Safresh1        }
34591f110e0Safresh1        else {
34691f110e0Safresh1            $offset = 'unknown';
34791f110e0Safresh1        }
348b39c5158Smillert
349b39c5158Smillert        unless( $read++ ) {
350b39c5158Smillert            my $gzip = GZIP_MAGIC_NUM;
351b39c5158Smillert            if( $chunk =~ /$gzip/ ) {
352b39c5158Smillert                $self->_error( qq[Cannot read compressed format in tar-mode] );
353b39c5158Smillert                return;
354b39c5158Smillert            }
355b39c5158Smillert
356b39c5158Smillert            ### size is < HEAD, which means a corrupted file, as the minimum
357b39c5158Smillert            ### length is _at least_ HEAD
358b39c5158Smillert            if (length $chunk != HEAD) {
359b39c5158Smillert                $self->_error( qq[Cannot read enough bytes from the tarfile] );
360b39c5158Smillert                return;
361b39c5158Smillert            }
362b39c5158Smillert        }
363b39c5158Smillert
364b39c5158Smillert        ### if we can't read in all bytes... ###
365b39c5158Smillert        last if length $chunk != HEAD;
366b39c5158Smillert
367b39c5158Smillert        ### Apparently this should really be two blocks of 512 zeroes,
368b39c5158Smillert        ### but GNU tar sometimes gets it wrong. See comment in the
369b39c5158Smillert        ### source code (tar.c) to GNU cpio.
370b39c5158Smillert        next if $chunk eq TAR_END;
371b39c5158Smillert
372b39c5158Smillert        ### according to the posix spec, the last 12 bytes of the header are
373b39c5158Smillert        ### null bytes, to pad it to a 512 byte block. That means if these
374898184e3Ssthen        ### bytes are NOT null bytes, it's a corrupt header. See:
375b39c5158Smillert        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
376b39c5158Smillert        ### line 111
377b39c5158Smillert        {   my $nulls = join '', "\0" x 12;
378b39c5158Smillert            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
379b39c5158Smillert                $self->_error( qq[Invalid header block at offset $offset] );
380b39c5158Smillert                next LOOP;
381b39c5158Smillert            }
382b39c5158Smillert        }
383b39c5158Smillert
384b39c5158Smillert        ### pass the realname, so we can set it 'proper' right away
385b39c5158Smillert        ### some of the heuristics are done on the name, so important
386b39c5158Smillert        ### to set it ASAP
387b39c5158Smillert        my $entry;
388b39c5158Smillert        {   my %extra_args = ();
389b39c5158Smillert            $extra_args{'name'} = $$real_name if defined $real_name;
390b39c5158Smillert
391b39c5158Smillert            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
392b39c5158Smillert                                                        %extra_args )
393b39c5158Smillert            ) {
394b39c5158Smillert                $self->_error( qq[Couldn't read chunk at offset $offset] );
395b39c5158Smillert                next LOOP;
396b39c5158Smillert            }
397b39c5158Smillert        }
398b39c5158Smillert
399b39c5158Smillert        ### ignore labels:
400898184e3Ssthen        ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
401b39c5158Smillert        next if $entry->is_label;
402b39c5158Smillert
403b39c5158Smillert        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
404b39c5158Smillert
405b39c5158Smillert            if ( $entry->is_file && !$entry->validate ) {
406b39c5158Smillert                ### sometimes the chunk is rather fux0r3d and a whole 512
407b39c5158Smillert                ### bytes ends up in the ->name area.
408b39c5158Smillert                ### clean it up, if need be
409b39c5158Smillert                my $name = $entry->name;
410b39c5158Smillert                $name = substr($name, 0, 100) if length $name > 100;
411b39c5158Smillert                $name =~ s/\n/ /g;
412b39c5158Smillert
413b39c5158Smillert                $self->_error( $name . qq[: checksum error] );
414b39c5158Smillert                next LOOP;
415b39c5158Smillert            }
416b39c5158Smillert
417b39c5158Smillert            my $block = BLOCK_SIZE->( $entry->size );
418b39c5158Smillert
419b39c5158Smillert            $data = $entry->get_content_by_ref;
420b39c5158Smillert
421898184e3Ssthen	    my $skip = 0;
422898184e3Ssthen	    my $ctx;			# cdrake
423898184e3Ssthen	    ### skip this entry if we're filtering
424898184e3Ssthen
425898184e3Ssthen	    if($md5) {			# cdrake
426898184e3Ssthen	      $ctx = Digest::MD5->new;	# cdrake
427898184e3Ssthen	        $skip=5;		# cdrake
428898184e3Ssthen
429898184e3Ssthen	    } elsif ($filter && $entry->name !~ $filter) {
430898184e3Ssthen		$skip = 1;
431898184e3Ssthen
432b8851fccSafresh1	    } elsif ($filter_cb && ! $filter_cb->($entry)) {
433b8851fccSafresh1		$skip = 2;
434b8851fccSafresh1
435898184e3Ssthen	    ### skip this entry if it's a pax header. This is a special file added
436898184e3Ssthen	    ### by, among others, git-generated tarballs. It holds comments and is
437898184e3Ssthen	    ### not meant for extracting. See #38932: pax_global_header extracted
438898184e3Ssthen	    } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
439898184e3Ssthen		$skip = 3;
440898184e3Ssthen	    }
441898184e3Ssthen
442898184e3Ssthen	    if ($skip) {
443898184e3Ssthen		#
444898184e3Ssthen		# Since we're skipping, do not allocate memory for the
445898184e3Ssthen		# whole file.  Read it 64 BLOCKS at a time.  Do not
446898184e3Ssthen		# complete the skip yet because maybe what we read is a
447898184e3Ssthen		# longlink and it won't get skipped after all
448898184e3Ssthen		#
449898184e3Ssthen		my $amt = $block;
450898184e3Ssthen		my $fsz=$entry->size;	# cdrake
451898184e3Ssthen		while ($amt > 0) {
452898184e3Ssthen		    $$data = '';
453898184e3Ssthen		    my $this = 64 * BLOCK;
454898184e3Ssthen		    $this = $amt if $this > $amt;
455898184e3Ssthen		    if( $handle->read( $$data, $this ) < $this ) {
456898184e3Ssthen			$self->_error( qq[Read error on tarfile (missing data) '].
457898184e3Ssthen					    $entry->full_path ."' at offset $offset" );
458898184e3Ssthen			next LOOP;
459898184e3Ssthen		    }
460898184e3Ssthen		    $amt -= $this;
461898184e3Ssthen		    $fsz -= $this;	# cdrake
462898184e3Ssthen		substr ($$data, $fsz) = "" if ($fsz<0);	# remove external junk prior to md5	# cdrake
463898184e3Ssthen		$ctx->add($$data) if($skip==5);	# cdrake
464898184e3Ssthen		}
465898184e3Ssthen		$$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ;	# cdrake
466898184e3Ssthen            } else {
467898184e3Ssthen
468b39c5158Smillert		### just read everything into memory
469b39c5158Smillert		### can't do lazy loading since IO::Zlib doesn't support 'seek'
470b39c5158Smillert		### this is because Compress::Zlib doesn't support it =/
471b39c5158Smillert		### this reads in the whole data in one read() call.
472b39c5158Smillert		if ( $handle->read( $$data, $block ) < $block ) {
473b39c5158Smillert		    $self->_error( qq[Read error on tarfile (missing data) '].
474b39c5158Smillert                                    $entry->full_path ."' at offset $offset" );
475b39c5158Smillert		    next LOOP;
476b39c5158Smillert		}
477b39c5158Smillert		### throw away trailing garbage ###
478b39c5158Smillert		substr ($$data, $entry->size) = "" if defined $$data;
479898184e3Ssthen            }
480b39c5158Smillert
481b39c5158Smillert            ### part II of the @LongLink munging -- need to do /after/
482b39c5158Smillert            ### the checksum check.
483b39c5158Smillert            if( $entry->is_longlink ) {
484b39c5158Smillert                ### weird thing in tarfiles -- if the file is actually a
485b39c5158Smillert                ### @LongLink, the data part seems to have a trailing ^@
486b39c5158Smillert                ### (unprintable) char. to display, pipe output through less.
487b39c5158Smillert                ### but that doesn't *always* happen.. so check if the last
488b39c5158Smillert                ### character is a control character, and if so remove it
489b39c5158Smillert                ### at any rate, we better remove that character here, or tests
490b39c5158Smillert                ### like 'eq' and hash lookups based on names will SO not work
491b39c5158Smillert                ### remove it by calculating the proper size, and then
492b39c5158Smillert                ### tossing out everything that's longer than that size.
493b39c5158Smillert
494b39c5158Smillert                ### count number of nulls
495b39c5158Smillert                my $nulls = $$data =~ tr/\0/\0/;
496b39c5158Smillert
497b39c5158Smillert                ### cut data + size by that many bytes
498b39c5158Smillert                $entry->size( $entry->size - $nulls );
499b39c5158Smillert                substr ($$data, $entry->size) = "";
500b39c5158Smillert            }
501b39c5158Smillert        }
502b39c5158Smillert
503b39c5158Smillert        ### clean up of the entries.. posix tar /apparently/ has some
504b39c5158Smillert        ### weird 'feature' that allows for filenames > 255 characters
505b39c5158Smillert        ### they'll put a header in with as name '././@LongLink' and the
506b39c5158Smillert        ### contents will be the name of the /next/ file in the archive
507b39c5158Smillert        ### pretty crappy and kludgy if you ask me
508b39c5158Smillert
509b39c5158Smillert        ### set the name for the next entry if this is a @LongLink;
510b39c5158Smillert        ### this is one ugly hack =/ but needed for direct extraction
511b39c5158Smillert        if( $entry->is_longlink ) {
512b39c5158Smillert            $real_name = $data;
513b39c5158Smillert            next LOOP;
514b39c5158Smillert        } elsif ( defined $real_name ) {
515b39c5158Smillert            $entry->name( $$real_name );
516b39c5158Smillert            $entry->prefix('');
517b39c5158Smillert            undef $real_name;
518b39c5158Smillert        }
519b39c5158Smillert
520b39c5158Smillert	if ($filter && $entry->name !~ $filter) {
521b39c5158Smillert	    next LOOP;
522b39c5158Smillert
523b8851fccSafresh1	} elsif ($filter_cb && ! $filter_cb->($entry)) {
524b8851fccSafresh1	    next LOOP;
525b8851fccSafresh1
526b39c5158Smillert	### skip this entry if it's a pax header. This is a special file added
527b39c5158Smillert	### by, among others, git-generated tarballs. It holds comments and is
528b39c5158Smillert	### not meant for extracting. See #38932: pax_global_header extracted
529898184e3Ssthen	} elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
530898184e3Ssthen	    next LOOP;
531b39c5158Smillert	}
532b39c5158Smillert
533898184e3Ssthen        if ( $extract && !$entry->is_longlink
534b39c5158Smillert                      && !$entry->is_unknown
535898184e3Ssthen                      && !$entry->is_label ) {
536898184e3Ssthen            $self->_extract_file( $entry ) or return;
537898184e3Ssthen        }
538b39c5158Smillert
539b39c5158Smillert        ### Guard against tarfiles with garbage at the end
540b39c5158Smillert	    last LOOP if $entry->name eq '';
541b39c5158Smillert
542b39c5158Smillert        ### push only the name on the rv if we're extracting
543b39c5158Smillert        ### -- for extract_archive
544b39c5158Smillert        push @$tarfile, ($extract ? $entry->name : $entry);
545b39c5158Smillert
546b39c5158Smillert        if( $limit ) {
547b39c5158Smillert            $count-- unless $entry->is_longlink || $entry->is_dir;
548b39c5158Smillert            last LOOP unless $count;
549b39c5158Smillert        }
550b39c5158Smillert    } continue {
551b39c5158Smillert        undef $data;
552b39c5158Smillert    }
553b39c5158Smillert
554b39c5158Smillert    return $tarfile;
555b39c5158Smillert}
556b39c5158Smillert
557b39c5158Smillert=head2 $tar->contains_file( $filename )
558b39c5158Smillert
559b39c5158SmillertCheck if the archive contains a certain file.
560b39c5158SmillertIt will return true if the file is in the archive, false otherwise.
561b39c5158Smillert
562b39c5158SmillertNote however, that this function does an exact match using C<eq>
563b39c5158Smillerton the full path. So it cannot compensate for case-insensitive file-
564b39c5158Smillertsystems or compare 2 paths to see if they would point to the same
565b39c5158Smillertunderlying file.
566b39c5158Smillert
567b39c5158Smillert=cut
568b39c5158Smillert
569b39c5158Smillertsub contains_file {
570b39c5158Smillert    my $self = shift;
571b39c5158Smillert    my $full = shift;
572b39c5158Smillert
573b39c5158Smillert    return unless defined $full;
574b39c5158Smillert
575b39c5158Smillert    ### don't warn if the entry isn't there.. that's what this function
576b39c5158Smillert    ### is for after all.
577b39c5158Smillert    local $WARN = 0;
578b39c5158Smillert    return 1 if $self->_find_entry($full);
579b39c5158Smillert    return;
580b39c5158Smillert}
581b39c5158Smillert
582b39c5158Smillert=head2 $tar->extract( [@filenames] )
583b39c5158Smillert
584b39c5158SmillertWrite files whose names are equivalent to any of the names in
585b39c5158SmillertC<@filenames> to disk, creating subdirectories as necessary. This
586b39c5158Smillertmight not work too well under VMS.
587b39c5158SmillertUnder MacPerl, the file's modification time will be converted to the
588b39c5158SmillertMacOS zero of time, and appropriate conversions will be done to the
589b39c5158Smillertpath.  However, the length of each element of the path is not
590b39c5158Smillertinspected to see whether it's longer than MacOS currently allows (32
591b39c5158Smillertcharacters).
592b39c5158Smillert
593b39c5158SmillertIf C<extract> is called without a list of file names, the entire
594b39c5158Smillertcontents of the archive are extracted.
595b39c5158Smillert
596b39c5158SmillertReturns a list of filenames extracted.
597b39c5158Smillert
598b39c5158Smillert=cut
599b39c5158Smillert
600b39c5158Smillertsub extract {
601b39c5158Smillert    my $self    = shift;
602b39c5158Smillert    my @args    = @_;
603b39c5158Smillert    my @files;
604*9f11ffb7Safresh1    my $hashmap;
605b39c5158Smillert
606b39c5158Smillert    # use the speed optimization for all extracted files
607b39c5158Smillert    local($self->{cwd}) = cwd() unless $self->{cwd};
608b39c5158Smillert
609898184e3Ssthen    ### you requested the extraction of only certain files
610b39c5158Smillert    if( @args ) {
611b39c5158Smillert        for my $file ( @args ) {
612b39c5158Smillert
613b39c5158Smillert            ### it's already an object?
614b39c5158Smillert            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
615b39c5158Smillert                push @files, $file;
616b39c5158Smillert                next;
617b39c5158Smillert
618b39c5158Smillert            ### go find it then
619b39c5158Smillert            } else {
620b39c5158Smillert
621*9f11ffb7Safresh1                # create hash-map once to speed up lookup
622*9f11ffb7Safresh1                $hashmap = $hashmap || {
623*9f11ffb7Safresh1                    map { $_->full_path, $_ } @{$self->_data}
624*9f11ffb7Safresh1                };
625b39c5158Smillert
626*9f11ffb7Safresh1                if (exists $hashmap->{$file}) {
627b39c5158Smillert                    ### we found the file you're looking for
628*9f11ffb7Safresh1                    push @files, $hashmap->{$file};
629*9f11ffb7Safresh1                } else {
630b39c5158Smillert                    return $self->_error(
631b39c5158Smillert                        qq[Could not find '$file' in archive] );
632b39c5158Smillert                }
633b39c5158Smillert            }
634b39c5158Smillert        }
635b39c5158Smillert
636b39c5158Smillert    ### just grab all the file items
637b39c5158Smillert    } else {
638b39c5158Smillert        @files = $self->get_files;
639b39c5158Smillert    }
640b39c5158Smillert
641b39c5158Smillert    ### nothing found? that's an error
642b39c5158Smillert    unless( scalar @files ) {
643b39c5158Smillert        $self->_error( qq[No files found for ] . $self->_file );
644b39c5158Smillert        return;
645b39c5158Smillert    }
646b39c5158Smillert
647b39c5158Smillert    ### now extract them
648b39c5158Smillert    for my $entry ( @files ) {
649b39c5158Smillert        unless( $self->_extract_file( $entry ) ) {
650b39c5158Smillert            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
651b39c5158Smillert            return;
652b39c5158Smillert        }
653b39c5158Smillert    }
654b39c5158Smillert
655b39c5158Smillert    return @files;
656b39c5158Smillert}
657b39c5158Smillert
658b39c5158Smillert=head2 $tar->extract_file( $file, [$extract_path] )
659b39c5158Smillert
660b39c5158SmillertWrite an entry, whose name is equivalent to the file name provided to
661b39c5158Smillertdisk. Optionally takes a second parameter, which is the full native
662b39c5158Smillertpath (including filename) the entry will be written to.
663b39c5158Smillert
664b39c5158SmillertFor example:
665b39c5158Smillert
666b39c5158Smillert    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
667b39c5158Smillert
668b39c5158Smillert    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
669b39c5158Smillert
670b39c5158SmillertReturns true on success, false on failure.
671b39c5158Smillert
672b39c5158Smillert=cut
673b39c5158Smillert
674b39c5158Smillertsub extract_file {
675b39c5158Smillert    my $self = shift;
676b39c5158Smillert    my $file = shift;   return unless defined $file;
677b39c5158Smillert    my $alt  = shift;
678b39c5158Smillert
679b39c5158Smillert    my $entry = $self->_find_entry( $file )
680b39c5158Smillert        or $self->_error( qq[Could not find an entry for '$file'] ), return;
681b39c5158Smillert
682b39c5158Smillert    return $self->_extract_file( $entry, $alt );
683b39c5158Smillert}
684b39c5158Smillert
685b39c5158Smillertsub _extract_file {
686b39c5158Smillert    my $self    = shift;
687b39c5158Smillert    my $entry   = shift or return;
688b39c5158Smillert    my $alt     = shift;
689b39c5158Smillert
690b39c5158Smillert    ### you wanted an alternate extraction location ###
691b39c5158Smillert    my $name = defined $alt ? $alt : $entry->full_path;
692b39c5158Smillert
693b39c5158Smillert                            ### splitpath takes a bool at the end to indicate
694b39c5158Smillert                            ### that it's splitting a dir
695b39c5158Smillert    my ($vol,$dirs,$file);
696b39c5158Smillert    if ( defined $alt ) { # It's a local-OS path
697b39c5158Smillert        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
698b39c5158Smillert                                                          $entry->is_dir );
699b39c5158Smillert    } else {
700b39c5158Smillert        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
701b39c5158Smillert                                                          $entry->is_dir );
702b39c5158Smillert    }
703b39c5158Smillert
704b39c5158Smillert    my $dir;
705b39c5158Smillert    ### is $name an absolute path? ###
706b39c5158Smillert    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
707b39c5158Smillert
708b39c5158Smillert        ### absolute names are not allowed to be in tarballs under
709b39c5158Smillert        ### strict mode, so only allow it if a user tells us to do it
710b39c5158Smillert        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
711b39c5158Smillert            $self->_error(
712b39c5158Smillert                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
713b39c5158Smillert                q[Not extracting absolute paths under SECURE EXTRACT MODE]
714b39c5158Smillert            );
715b39c5158Smillert            return;
716b39c5158Smillert        }
717b39c5158Smillert
718b39c5158Smillert        ### user asked us to, it's fine.
719b39c5158Smillert        $dir = File::Spec->catpath( $vol, $dirs, "" );
720b39c5158Smillert
721b39c5158Smillert    ### it's a relative path ###
722b39c5158Smillert    } else {
723b39c5158Smillert        my $cwd     = (ref $self and defined $self->{cwd})
724b39c5158Smillert                        ? $self->{cwd}
725b39c5158Smillert                        : cwd();
726b39c5158Smillert
727b39c5158Smillert        my @dirs = defined $alt
728b39c5158Smillert            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
729b39c5158Smillert            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
730b39c5158Smillert                                                    # straight from the tarball
731b39c5158Smillert
732b39c5158Smillert        if( not defined $alt            and
733b39c5158Smillert            not $INSECURE_EXTRACT_MODE
734b39c5158Smillert        ) {
735b39c5158Smillert
736b39c5158Smillert            ### paths that leave the current directory are not allowed under
737b39c5158Smillert            ### strict mode, so only allow it if a user tells us to do this.
738b39c5158Smillert            if( grep { $_ eq '..' } @dirs ) {
739b39c5158Smillert
740b39c5158Smillert                $self->_error(
741b39c5158Smillert                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
742b39c5158Smillert                    q[the current working directory. Not extracting under ].
743b39c5158Smillert                    q[SECURE EXTRACT MODE]
744b39c5158Smillert                );
745b39c5158Smillert                return;
746b39c5158Smillert            }
747b39c5158Smillert
748b39c5158Smillert            ### the archive may be asking us to extract into a symlink. This
749b39c5158Smillert            ### is not sane and a possible security issue, as outlined here:
750b39c5158Smillert            ### https://rt.cpan.org/Ticket/Display.html?id=30380
751b39c5158Smillert            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
752b39c5158Smillert            ### https://issues.rpath.com/browse/RPL-1716
753b39c5158Smillert            my $full_path = $cwd;
754b39c5158Smillert            for my $d ( @dirs ) {
755b39c5158Smillert                $full_path = File::Spec->catdir( $full_path, $d );
756b39c5158Smillert
757b39c5158Smillert                ### we've already checked this one, and it's safe. Move on.
758b39c5158Smillert                next if ref $self and $self->{_link_cache}->{$full_path};
759b39c5158Smillert
760b39c5158Smillert                if( -l $full_path ) {
761b39c5158Smillert                    my $to   = readlink $full_path;
762b39c5158Smillert                    my $diag = "symlinked directory ($full_path => $to)";
763b39c5158Smillert
764b39c5158Smillert                    $self->_error(
765b39c5158Smillert                        q[Entry ']. $entry->full_path .q[' is attempting to ].
766b39c5158Smillert                        qq[extract to a $diag. This is considered a security ].
767b39c5158Smillert                        q[vulnerability and not allowed under SECURE EXTRACT ].
768b39c5158Smillert                        q[MODE]
769b39c5158Smillert                    );
770b39c5158Smillert                    return;
771b39c5158Smillert                }
772b39c5158Smillert
773b39c5158Smillert                ### XXX keep a cache if possible, so the stats become cheaper:
774b39c5158Smillert                $self->{_link_cache}->{$full_path} = 1 if ref $self;
775b39c5158Smillert            }
776b39c5158Smillert        }
777b39c5158Smillert
778b39c5158Smillert        ### '.' is the directory delimiter on VMS, which has to be escaped
779b39c5158Smillert        ### or changed to '_' on vms.  vmsify is used, because older versions
780b39c5158Smillert        ### of vmspath do not handle this properly.
781b39c5158Smillert        ### Must not add a '/' to an empty directory though.
782b39c5158Smillert        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
783b39c5158Smillert
784b39c5158Smillert        my ($cwd_vol,$cwd_dir,$cwd_file)
785b39c5158Smillert                    = File::Spec->splitpath( $cwd );
786b39c5158Smillert        my @cwd     = File::Spec->splitdir( $cwd_dir );
787b39c5158Smillert        push @cwd, $cwd_file if length $cwd_file;
788b39c5158Smillert
789898184e3Ssthen        ### We need to pass '' as the last element to catpath. Craig Berry
790b39c5158Smillert        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
791b39c5158Smillert        ### The root problem is that splitpath on UNIX always returns the
792b39c5158Smillert        ### final path element as a file even if it is a directory, and of
793b39c5158Smillert        ### course there is no way it can know the difference without checking
794b39c5158Smillert        ### against the filesystem, which it is documented as not doing.  When
795b39c5158Smillert        ### you turn around and call catpath, on VMS you have to know which bits
796b39c5158Smillert        ### are directory bits and which bits are file bits.  In this case we
797b39c5158Smillert        ### know the result should be a directory.  I had thought you could omit
798b39c5158Smillert        ### the file argument to catpath in such a case, but apparently on UNIX
799b39c5158Smillert        ### you can't.
800b39c5158Smillert        $dir        = File::Spec->catpath(
801b39c5158Smillert                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
802b39c5158Smillert                        );
803b39c5158Smillert
804b39c5158Smillert        ### catdir() returns undef if the path is longer than 255 chars on
805b39c5158Smillert        ### older VMS systems.
806b39c5158Smillert        unless ( defined $dir ) {
807b39c5158Smillert            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
808b39c5158Smillert            return;
809b39c5158Smillert        }
810b39c5158Smillert
811b39c5158Smillert    }
812b39c5158Smillert
813b39c5158Smillert    if( -e $dir && !-d _ ) {
814b39c5158Smillert        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
815b39c5158Smillert        return;
816b39c5158Smillert    }
817b39c5158Smillert
818b39c5158Smillert    unless ( -d _ ) {
819b39c5158Smillert        eval { File::Path::mkpath( $dir, 0, 0777 ) };
820b39c5158Smillert        if( $@ ) {
821b39c5158Smillert            my $fp = $entry->full_path;
822b39c5158Smillert            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
823b39c5158Smillert            return;
824b39c5158Smillert        }
825b39c5158Smillert
826b39c5158Smillert        ### XXX chown here? that might not be the same as in the archive
827b39c5158Smillert        ### as we're only chown'ing to the owner of the file we're extracting
828b39c5158Smillert        ### not to the owner of the directory itself, which may or may not
829b39c5158Smillert        ### be another entry in the archive
830b39c5158Smillert        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
831b39c5158Smillert        ### way to go.
832b39c5158Smillert        #if( $CHOWN && CAN_CHOWN ) {
833b39c5158Smillert        #    chown $entry->uid, $entry->gid, $dir or
834b39c5158Smillert        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
835b39c5158Smillert        #}
836b39c5158Smillert    }
837b39c5158Smillert
838b39c5158Smillert    ### we're done if we just needed to create a dir ###
839b39c5158Smillert    return 1 if $entry->is_dir;
840b39c5158Smillert
841b39c5158Smillert    my $full = File::Spec->catfile( $dir, $file );
842b39c5158Smillert
843b39c5158Smillert    if( $entry->is_unknown ) {
844b39c5158Smillert        $self->_error( qq[Unknown file type for file '$full'] );
845b39c5158Smillert        return;
846b39c5158Smillert    }
847b39c5158Smillert
84826f75d2dSafresh1    ### If a file system already contains a block device with the same name as
84926f75d2dSafresh1    ### the being extracted regular file, we would write the file's content
85026f75d2dSafresh1    ### to the block device. So remove the existing file (block device) now.
85126f75d2dSafresh1    ### If an archive contains multiple same-named entries, the last one
85226f75d2dSafresh1    ### should replace the previous ones. So remove the old file now.
85326f75d2dSafresh1    ### If the old entry is a symlink to a file outside of the CWD, the new
85426f75d2dSafresh1    ### entry would create a file there. This is CVE-2018-12015
85526f75d2dSafresh1    ### <https://rt.cpan.org/Ticket/Display.html?id=125523>.
85626f75d2dSafresh1    if (-l $full || -e _) {
85726f75d2dSafresh1	if (!unlink $full) {
85826f75d2dSafresh1	    $self->_error( qq[Could not remove old file '$full': $!] );
85926f75d2dSafresh1	    return;
86026f75d2dSafresh1	}
86126f75d2dSafresh1    }
862b39c5158Smillert    if( length $entry->type && $entry->is_file ) {
863b39c5158Smillert        my $fh = IO::File->new;
864*9f11ffb7Safresh1        $fh->open( $full, '>' ) or (
865b39c5158Smillert            $self->_error( qq[Could not open file '$full': $!] ),
866b39c5158Smillert            return
867b39c5158Smillert        );
868b39c5158Smillert
869b39c5158Smillert        if( $entry->size ) {
870b39c5158Smillert            binmode $fh;
871b39c5158Smillert            syswrite $fh, $entry->data or (
872b39c5158Smillert                $self->_error( qq[Could not write data to '$full'] ),
873b39c5158Smillert                return
874b39c5158Smillert            );
875b39c5158Smillert        }
876b39c5158Smillert
877b39c5158Smillert        close $fh or (
878b39c5158Smillert            $self->_error( qq[Could not close file '$full'] ),
879b39c5158Smillert            return
880b39c5158Smillert        );
881b39c5158Smillert
882b39c5158Smillert    } else {
883b39c5158Smillert        $self->_make_special_file( $entry, $full ) or return;
884b39c5158Smillert    }
885b39c5158Smillert
886b39c5158Smillert    ### only update the timestamp if it's not a symlink; that will change the
887b39c5158Smillert    ### timestamp of the original. This addresses bug #33669: Could not update
888b39c5158Smillert    ### timestamp warning on symlinks
889b39c5158Smillert    if( not -l $full ) {
890b39c5158Smillert        utime time, $entry->mtime - TIME_OFFSET, $full or
891b39c5158Smillert            $self->_error( qq[Could not update timestamp] );
892b39c5158Smillert    }
893b39c5158Smillert
89491f110e0Safresh1    if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
895b39c5158Smillert        chown $entry->uid, $entry->gid, $full or
896b39c5158Smillert            $self->_error( qq[Could not set uid/gid on '$full'] );
897b39c5158Smillert    }
898b39c5158Smillert
899b39c5158Smillert    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
900b39c5158Smillert    ### change the perms on the file they're linking too...
901b39c5158Smillert    if( $CHMOD and not -l $full ) {
902b39c5158Smillert        my $mode = $entry->mode;
903b39c5158Smillert        unless ($SAME_PERMISSIONS) {
904b39c5158Smillert            $mode &= ~(oct(7000) | umask);
905b39c5158Smillert        }
906b39c5158Smillert        chmod $mode, $full or
907b39c5158Smillert            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
908b39c5158Smillert    }
909b39c5158Smillert
910b39c5158Smillert    return 1;
911b39c5158Smillert}
912b39c5158Smillert
913b39c5158Smillertsub _make_special_file {
914b39c5158Smillert    my $self    = shift;
915b39c5158Smillert    my $entry   = shift     or return;
916b39c5158Smillert    my $file    = shift;    return unless defined $file;
917b39c5158Smillert
918b39c5158Smillert    my $err;
919b39c5158Smillert
920b39c5158Smillert    if( $entry->is_symlink ) {
921b39c5158Smillert        my $fail;
922b39c5158Smillert        if( ON_UNIX ) {
923b39c5158Smillert            symlink( $entry->linkname, $file ) or $fail++;
924b39c5158Smillert
925b39c5158Smillert        } else {
926b39c5158Smillert            $self->_extract_special_file_as_plain_file( $entry, $file )
927b39c5158Smillert                or $fail++;
928b39c5158Smillert        }
929b39c5158Smillert
930b39c5158Smillert        $err =  qq[Making symbolic link '$file' to '] .
931b39c5158Smillert                $entry->linkname .q[' failed] if $fail;
932b39c5158Smillert
933b39c5158Smillert    } elsif ( $entry->is_hardlink ) {
934b39c5158Smillert        my $fail;
935b39c5158Smillert        if( ON_UNIX ) {
936b39c5158Smillert            link( $entry->linkname, $file ) or $fail++;
937b39c5158Smillert
938b39c5158Smillert        } else {
939b39c5158Smillert            $self->_extract_special_file_as_plain_file( $entry, $file )
940b39c5158Smillert                or $fail++;
941b39c5158Smillert        }
942b39c5158Smillert
943b39c5158Smillert        $err =  qq[Making hard link from '] . $entry->linkname .
944b39c5158Smillert                qq[' to '$file' failed] if $fail;
945b39c5158Smillert
946b39c5158Smillert    } elsif ( $entry->is_fifo ) {
947b39c5158Smillert        ON_UNIX && !system('mknod', $file, 'p') or
948b39c5158Smillert            $err = qq[Making fifo ']. $entry->name .qq[' failed];
949b39c5158Smillert
950b39c5158Smillert    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
951b39c5158Smillert        my $mode = $entry->is_blockdev ? 'b' : 'c';
952b39c5158Smillert
953b39c5158Smillert        ON_UNIX && !system('mknod', $file, $mode,
954b39c5158Smillert                            $entry->devmajor, $entry->devminor) or
955b39c5158Smillert            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
956b39c5158Smillert                    $entry->devmajor . qq[ min=] . $entry->devminor .
957b39c5158Smillert                    qq[) failed.];
958b39c5158Smillert
959b39c5158Smillert    } elsif ( $entry->is_socket ) {
960b39c5158Smillert        ### the original doesn't do anything special for sockets.... ###
961b39c5158Smillert        1;
962b39c5158Smillert    }
963b39c5158Smillert
964b39c5158Smillert    return $err ? $self->_error( $err ) : 1;
965b39c5158Smillert}
966b39c5158Smillert
967b39c5158Smillert### don't know how to make symlinks, let's just extract the file as
968b39c5158Smillert### a plain file
969b39c5158Smillertsub _extract_special_file_as_plain_file {
970b39c5158Smillert    my $self    = shift;
971b39c5158Smillert    my $entry   = shift     or return;
972b39c5158Smillert    my $file    = shift;    return unless defined $file;
973b39c5158Smillert
974b39c5158Smillert    my $err;
975b39c5158Smillert    TRY: {
9766fb12b70Safresh1        my $orig = $self->_find_entry( $entry->linkname, $entry );
977b39c5158Smillert
978b39c5158Smillert        unless( $orig ) {
979b39c5158Smillert            $err =  qq[Could not find file '] . $entry->linkname .
980b39c5158Smillert                    qq[' in memory.];
981b39c5158Smillert            last TRY;
982b39c5158Smillert        }
983b39c5158Smillert
984b39c5158Smillert        ### clone the entry, make it appear as a normal file ###
9856fb12b70Safresh1        my $clone = $orig->clone;
986b39c5158Smillert        $clone->_downgrade_to_plainfile;
987b39c5158Smillert        $self->_extract_file( $clone, $file ) or last TRY;
988b39c5158Smillert
989b39c5158Smillert        return 1;
990b39c5158Smillert    }
991b39c5158Smillert
992b39c5158Smillert    return $self->_error($err);
993b39c5158Smillert}
994b39c5158Smillert
995b39c5158Smillert=head2 $tar->list_files( [\@properties] )
996b39c5158Smillert
997b39c5158SmillertReturns a list of the names of all the files in the archive.
998b39c5158Smillert
999b39c5158SmillertIf C<list_files()> is passed an array reference as its first argument
1000b39c5158Smillertit returns a list of hash references containing the requested
1001b39c5158Smillertproperties of each file.  The following list of properties is
1002b39c5158Smillertsupported: name, size, mtime (last modified date), mode, uid, gid,
1003b39c5158Smillertlinkname, uname, gname, devmajor, devminor, prefix.
1004b39c5158Smillert
1005b39c5158SmillertPassing an array reference containing only one element, 'name', is
1006b39c5158Smillertspecial cased to return a list of names rather than a list of hash
1007b39c5158Smillertreferences, making it equivalent to calling C<list_files> without
1008b39c5158Smillertarguments.
1009b39c5158Smillert
1010b39c5158Smillert=cut
1011b39c5158Smillert
1012b39c5158Smillertsub list_files {
1013b39c5158Smillert    my $self = shift;
1014b39c5158Smillert    my $aref = shift || [ ];
1015b39c5158Smillert
1016b39c5158Smillert    unless( $self->_data ) {
1017b39c5158Smillert        $self->read() or return;
1018b39c5158Smillert    }
1019b39c5158Smillert
1020b39c5158Smillert    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1021b39c5158Smillert        return map { $_->full_path } @{$self->_data};
1022b39c5158Smillert    } else {
1023b39c5158Smillert
1024b39c5158Smillert        #my @rv;
1025b39c5158Smillert        #for my $obj ( @{$self->_data} ) {
1026b39c5158Smillert        #    push @rv, { map { $_ => $obj->$_() } @$aref };
1027b39c5158Smillert        #}
1028b39c5158Smillert        #return @rv;
1029b39c5158Smillert
1030b39c5158Smillert        ### this does the same as the above.. just needs a +{ }
1031b39c5158Smillert        ### to make sure perl doesn't confuse it for a block
1032b39c5158Smillert        return map {    my $o=$_;
1033b39c5158Smillert                        +{ map { $_ => $o->$_() } @$aref }
1034b39c5158Smillert                    } @{$self->_data};
1035b39c5158Smillert    }
1036b39c5158Smillert}
1037b39c5158Smillert
1038b39c5158Smillertsub _find_entry {
1039b39c5158Smillert    my $self = shift;
1040b39c5158Smillert    my $file = shift;
1041b39c5158Smillert
1042b39c5158Smillert    unless( defined $file ) {
1043b39c5158Smillert        $self->_error( qq[No file specified] );
1044b39c5158Smillert        return;
1045b39c5158Smillert    }
1046b39c5158Smillert
1047b39c5158Smillert    ### it's an object already
1048b39c5158Smillert    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1049b39c5158Smillert
10506fb12b70Safresh1seach_entry:
10516fb12b70Safresh1		if($self->_data){
1052b39c5158Smillert			for my $entry ( @{$self->_data} ) {
1053b39c5158Smillert					my $path = $entry->full_path;
1054b39c5158Smillert					return $entry if $path eq $file;
1055b39c5158Smillert			}
10566fb12b70Safresh1		}
10576fb12b70Safresh1
10586fb12b70Safresh1		if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
10596fb12b70Safresh1			if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
10606fb12b70Safresh1				$file = _symlinks_resolver( $link_entry->name, $file );
10616fb12b70Safresh1				goto seach_entry if $self->_data;
10626fb12b70Safresh1
10636fb12b70Safresh1				#this will be slower than never, but won't failed!
10646fb12b70Safresh1
10656fb12b70Safresh1				my $iterargs = $link_entry->{'_archive'};
10666fb12b70Safresh1				if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
10676fb12b70Safresh1				#faster	but whole archive will be read in memory
10686fb12b70Safresh1					#read whole archive and share data
10696fb12b70Safresh1					my $archive = Archive::Tar->new;
10706fb12b70Safresh1					$archive->read( @$iterargs );
10716fb12b70Safresh1					push @$iterargs, $archive; #take a trace for destruction
10726fb12b70Safresh1					if($archive->_data){
10736fb12b70Safresh1						$self->_data( $archive->_data );
10746fb12b70Safresh1						goto seach_entry;
10756fb12b70Safresh1					}
10766fb12b70Safresh1				}#faster
10776fb12b70Safresh1
10786fb12b70Safresh1				{#slower but lower memory usage
10796fb12b70Safresh1					# $iterargs = [$filename, $compressed, $opts];
10806fb12b70Safresh1					my $next = Archive::Tar->iter( @$iterargs );
10816fb12b70Safresh1					while(my $e = $next->()){
10826fb12b70Safresh1						if($e->full_path eq $file){
10836fb12b70Safresh1							undef $next;
10846fb12b70Safresh1							return $e;
10856fb12b70Safresh1						}
10866fb12b70Safresh1					}
10876fb12b70Safresh1				}#slower
10886fb12b70Safresh1			}
10896fb12b70Safresh1		}
1090b39c5158Smillert
1091b39c5158Smillert    $self->_error( qq[No such file in archive: '$file'] );
1092b39c5158Smillert    return;
1093b39c5158Smillert}
1094b39c5158Smillert
1095b39c5158Smillert=head2 $tar->get_files( [@filenames] )
1096b39c5158Smillert
1097b39c5158SmillertReturns the C<Archive::Tar::File> objects matching the filenames
1098b39c5158Smillertprovided. If no filename list was passed, all C<Archive::Tar::File>
1099b39c5158Smillertobjects in the current Tar object are returned.
1100b39c5158Smillert
1101b39c5158SmillertPlease refer to the C<Archive::Tar::File> documentation on how to
1102b39c5158Smillerthandle these objects.
1103b39c5158Smillert
1104b39c5158Smillert=cut
1105b39c5158Smillert
1106b39c5158Smillertsub get_files {
1107b39c5158Smillert    my $self = shift;
1108b39c5158Smillert
1109b39c5158Smillert    return @{ $self->_data } unless @_;
1110b39c5158Smillert
1111b39c5158Smillert    my @list;
1112b39c5158Smillert    for my $file ( @_ ) {
1113b39c5158Smillert        push @list, grep { defined } $self->_find_entry( $file );
1114b39c5158Smillert    }
1115b39c5158Smillert
1116b39c5158Smillert    return @list;
1117b39c5158Smillert}
1118b39c5158Smillert
1119b39c5158Smillert=head2 $tar->get_content( $file )
1120b39c5158Smillert
1121b39c5158SmillertReturn the content of the named file.
1122b39c5158Smillert
1123b39c5158Smillert=cut
1124b39c5158Smillert
1125b39c5158Smillertsub get_content {
1126b39c5158Smillert    my $self = shift;
1127b39c5158Smillert    my $entry = $self->_find_entry( shift ) or return;
1128b39c5158Smillert
1129b39c5158Smillert    return $entry->data;
1130b39c5158Smillert}
1131b39c5158Smillert
1132b39c5158Smillert=head2 $tar->replace_content( $file, $content )
1133b39c5158Smillert
1134b39c5158SmillertMake the string $content be the content for the file named $file.
1135b39c5158Smillert
1136b39c5158Smillert=cut
1137b39c5158Smillert
1138b39c5158Smillertsub replace_content {
1139b39c5158Smillert    my $self = shift;
1140b39c5158Smillert    my $entry = $self->_find_entry( shift ) or return;
1141b39c5158Smillert
1142b39c5158Smillert    return $entry->replace_content( shift );
1143b39c5158Smillert}
1144b39c5158Smillert
1145b39c5158Smillert=head2 $tar->rename( $file, $new_name )
1146b39c5158Smillert
1147b39c5158SmillertRename the file of the in-memory archive to $new_name.
1148b39c5158Smillert
1149b39c5158SmillertNote that you must specify a Unix path for $new_name, since per tar
1150b39c5158Smillertstandard, all files in the archive must be Unix paths.
1151b39c5158Smillert
1152b39c5158SmillertReturns true on success and false on failure.
1153b39c5158Smillert
1154b39c5158Smillert=cut
1155b39c5158Smillert
1156b39c5158Smillertsub rename {
1157b39c5158Smillert    my $self = shift;
1158b39c5158Smillert    my $file = shift; return unless defined $file;
1159b39c5158Smillert    my $new  = shift; return unless defined $new;
1160b39c5158Smillert
1161b39c5158Smillert    my $entry = $self->_find_entry( $file ) or return;
1162b39c5158Smillert
1163b39c5158Smillert    return $entry->rename( $new );
1164b39c5158Smillert}
1165b39c5158Smillert
1166898184e3Ssthen=head2 $tar->chmod( $file, $mode )
1167898184e3Ssthen
1168898184e3SsthenChange mode of $file to $mode.
1169898184e3Ssthen
1170898184e3SsthenReturns true on success and false on failure.
1171898184e3Ssthen
1172898184e3Ssthen=cut
1173898184e3Ssthen
1174898184e3Ssthensub chmod {
1175898184e3Ssthen    my $self = shift;
1176898184e3Ssthen    my $file = shift; return unless defined $file;
1177898184e3Ssthen    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1178898184e3Ssthen    my @args = ("$mode");
1179898184e3Ssthen
1180898184e3Ssthen    my $entry = $self->_find_entry( $file ) or return;
1181898184e3Ssthen    my $x = $entry->chmod( @args );
1182898184e3Ssthen    return $x;
1183898184e3Ssthen}
1184898184e3Ssthen
1185898184e3Ssthen=head2 $tar->chown( $file, $uname [, $gname] )
1186898184e3Ssthen
1187898184e3SsthenChange owner $file to $uname and $gname.
1188898184e3Ssthen
1189898184e3SsthenReturns true on success and false on failure.
1190898184e3Ssthen
1191898184e3Ssthen=cut
1192898184e3Ssthen
1193898184e3Ssthensub chown {
1194898184e3Ssthen    my $self = shift;
1195898184e3Ssthen    my $file = shift; return unless defined $file;
1196898184e3Ssthen    my $uname  = shift; return unless defined $uname;
1197898184e3Ssthen    my @args   = ($uname);
1198898184e3Ssthen    push(@args, shift);
1199898184e3Ssthen
1200898184e3Ssthen    my $entry = $self->_find_entry( $file ) or return;
1201898184e3Ssthen    my $x = $entry->chown( @args );
1202898184e3Ssthen    return $x;
1203898184e3Ssthen}
1204898184e3Ssthen
1205b39c5158Smillert=head2 $tar->remove (@filenamelist)
1206b39c5158Smillert
1207b39c5158SmillertRemoves any entries with names matching any of the given filenames
1208b39c5158Smillertfrom the in-memory archive. Returns a list of C<Archive::Tar::File>
1209b39c5158Smillertobjects that remain.
1210b39c5158Smillert
1211b39c5158Smillert=cut
1212b39c5158Smillert
1213b39c5158Smillertsub remove {
1214b39c5158Smillert    my $self = shift;
1215b39c5158Smillert    my @list = @_;
1216b39c5158Smillert
1217b39c5158Smillert    my %seen = map { $_->full_path => $_ } @{$self->_data};
1218b39c5158Smillert    delete $seen{ $_ } for @list;
1219b39c5158Smillert
1220b39c5158Smillert    $self->_data( [values %seen] );
1221b39c5158Smillert
1222b39c5158Smillert    return values %seen;
1223b39c5158Smillert}
1224b39c5158Smillert
1225b39c5158Smillert=head2 $tar->clear
1226b39c5158Smillert
1227b39c5158SmillertC<clear> clears the current in-memory archive. This effectively gives
1228b39c5158Smillertyou a 'blank' object, ready to be filled again. Note that C<clear>
1229b39c5158Smillertonly has effect on the object, not the underlying tarfile.
1230b39c5158Smillert
1231b39c5158Smillert=cut
1232b39c5158Smillert
1233b39c5158Smillertsub clear {
1234b39c5158Smillert    my $self = shift or return;
1235b39c5158Smillert
1236b39c5158Smillert    $self->_data( [] );
1237b39c5158Smillert    $self->_file( '' );
1238b39c5158Smillert
1239b39c5158Smillert    return 1;
1240b39c5158Smillert}
1241b39c5158Smillert
1242b39c5158Smillert
1243b39c5158Smillert=head2 $tar->write ( [$file, $compressed, $prefix] )
1244b39c5158Smillert
1245b39c5158SmillertWrite the in-memory archive to disk.  The first argument can either
1246b39c5158Smillertbe the name of a file or a reference to an already open filehandle (a
1247b39c5158SmillertGLOB reference).
1248b39c5158Smillert
1249b39c5158SmillertThe second argument is used to indicate compression. You can either
1250b39c5158Smillertcompress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1251b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of
1252898184e3Ssthenconstants is preferred:
1253b39c5158Smillert
1254b39c5158Smillert  # write a gzip compressed file
1255b39c5158Smillert  $tar->write( 'out.tgz', COMPRESS_GZIP );
1256b39c5158Smillert
1257b39c5158Smillert  # write a bzip compressed file
1258b39c5158Smillert  $tar->write( 'out.tbz', COMPRESS_BZIP );
1259b39c5158Smillert
1260b39c5158SmillertNote that when you pass in a filehandle, the compression argument
1261b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle.
1262b39c5158SmillertIf you wish to enable compression with filehandles, use an
1263b39c5158SmillertC<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1264b39c5158Smillert
1265b39c5158SmillertThe third argument is an optional prefix. All files will be tucked
1266b39c5158Smillertaway in the directory you specify as prefix. So if you have files
1267b39c5158Smillert'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1268b39c5158Smillertwill be written to the archive as 'foo/a' and 'foo/b'.
1269b39c5158Smillert
1270b39c5158SmillertIf no arguments are given, C<write> returns the entire formatted
1271b39c5158Smillertarchive as a string, which could be useful if you'd like to stuff the
1272b39c5158Smillertarchive into a socket or a pipe to gzip or something.
1273b39c5158Smillert
1274b39c5158Smillert
1275b39c5158Smillert=cut
1276b39c5158Smillert
1277b39c5158Smillertsub write {
1278b39c5158Smillert    my $self        = shift;
1279b39c5158Smillert    my $file        = shift; $file = '' unless defined $file;
1280b39c5158Smillert    my $gzip        = shift || 0;
1281b39c5158Smillert    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1282b39c5158Smillert    my $dummy       = '';
1283b39c5158Smillert
1284b39c5158Smillert    ### only need a handle if we have a file to print to ###
1285b39c5158Smillert    my $handle = length($file)
1286b39c5158Smillert                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1287b39c5158Smillert                        or return )
1288b39c5158Smillert                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1289b39c5158Smillert                    : $HAS_IO_STRING ? IO::String->new
1290b39c5158Smillert                    : __PACKAGE__->no_string_support();
1291b39c5158Smillert
1292b39c5158Smillert    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1293b39c5158Smillert    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1294b39c5158Smillert    ### printed to the archive
1295b39c5158Smillert    local $\;
1296b39c5158Smillert
1297b39c5158Smillert    for my $entry ( @{$self->_data} ) {
1298b39c5158Smillert        ### entries to be written to the tarfile ###
1299b39c5158Smillert        my @write_me;
1300b39c5158Smillert
1301b39c5158Smillert        ### only now will we change the object to reflect the current state
1302b39c5158Smillert        ### of the name and prefix fields -- this needs to be limited to
1303b39c5158Smillert        ### write() only!
1304b39c5158Smillert        my $clone = $entry->clone;
1305b39c5158Smillert
1306b39c5158Smillert
1307b39c5158Smillert        ### so, if you don't want use to use the prefix, we'll stuff
1308b39c5158Smillert        ### everything in the name field instead
1309b39c5158Smillert        if( $DO_NOT_USE_PREFIX ) {
1310b39c5158Smillert
1311b39c5158Smillert            ### you might have an extended prefix, if so, set it in the clone
1312b39c5158Smillert            ### XXX is ::Unix right?
1313b39c5158Smillert            $clone->name( length $ext_prefix
1314b39c5158Smillert                            ? File::Spec::Unix->catdir( $ext_prefix,
1315b39c5158Smillert                                                        $clone->full_path)
1316b39c5158Smillert                            : $clone->full_path );
1317b39c5158Smillert            $clone->prefix( '' );
1318b39c5158Smillert
1319b39c5158Smillert        ### otherwise, we'll have to set it properly -- prefix part in the
1320b39c5158Smillert        ### prefix and name part in the name field.
1321b39c5158Smillert        } else {
1322b39c5158Smillert
1323b39c5158Smillert            ### split them here, not before!
1324b39c5158Smillert            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1325b39c5158Smillert
1326b39c5158Smillert            ### you might have an extended prefix, if so, set it in the clone
1327b39c5158Smillert            ### XXX is ::Unix right?
1328b39c5158Smillert            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1329b39c5158Smillert                if length $ext_prefix;
1330b39c5158Smillert
1331b39c5158Smillert            $clone->prefix( $prefix );
1332b39c5158Smillert            $clone->name( $name );
1333b39c5158Smillert        }
1334b39c5158Smillert
1335b39c5158Smillert        ### names are too long, and will get truncated if we don't add a
1336b39c5158Smillert        ### '@LongLink' file...
1337b39c5158Smillert        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1338b39c5158Smillert                                length($clone->prefix)  > PREFIX_LENGTH
1339b39c5158Smillert                            ) || 0;
1340b39c5158Smillert
1341b39c5158Smillert        ### perhaps we need to make a longlink file?
1342b39c5158Smillert        if( $make_longlink ) {
1343b39c5158Smillert            my $longlink = Archive::Tar::File->new(
1344b39c5158Smillert                            data => LONGLINK_NAME,
1345b39c5158Smillert                            $clone->full_path,
1346b39c5158Smillert                            { type => LONGLINK }
1347b39c5158Smillert                        );
1348b39c5158Smillert
1349b39c5158Smillert            unless( $longlink ) {
1350b39c5158Smillert                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1351b39c5158Smillert                                qq[oversize file '] . $clone->full_path ."'" );
1352b39c5158Smillert                return;
1353b39c5158Smillert            };
1354b39c5158Smillert
1355b39c5158Smillert            push @write_me, $longlink;
1356b39c5158Smillert        }
1357b39c5158Smillert
1358b39c5158Smillert        push @write_me, $clone;
1359b39c5158Smillert
1360b39c5158Smillert        ### write the one, optionally 2 a::t::file objects to the handle
1361b39c5158Smillert        for my $clone (@write_me) {
1362b39c5158Smillert
1363b39c5158Smillert            ### if the file is a symlink, there are 2 options:
1364b39c5158Smillert            ### either we leave the symlink intact, but then we don't write any
1365b39c5158Smillert            ### data OR we follow the symlink, which means we actually make a
1366b39c5158Smillert            ### copy. if we do the latter, we have to change the TYPE of the
1367b39c5158Smillert            ### clone to 'FILE'
1368b39c5158Smillert            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1369b39c5158Smillert            my $data_ok = !$clone->is_symlink && $clone->has_content;
1370b39c5158Smillert
1371b39c5158Smillert            ### downgrade to a 'normal' file if it's a symlink we're going to
1372b39c5158Smillert            ### treat as a regular file
1373b39c5158Smillert            $clone->_downgrade_to_plainfile if $link_ok;
1374b39c5158Smillert
1375b39c5158Smillert            ### get the header for this block
1376b39c5158Smillert            my $header = $self->_format_tar_entry( $clone );
1377b39c5158Smillert            unless( $header ) {
1378b39c5158Smillert                $self->_error(q[Could not format header for: ] .
1379b39c5158Smillert                                    $clone->full_path );
1380b39c5158Smillert                return;
1381b39c5158Smillert            }
1382b39c5158Smillert
1383b39c5158Smillert            unless( print $handle $header ) {
1384b39c5158Smillert                $self->_error(q[Could not write header for: ] .
1385b39c5158Smillert                                    $clone->full_path);
1386b39c5158Smillert                return;
1387b39c5158Smillert            }
1388b39c5158Smillert
1389b39c5158Smillert            if( $link_ok or $data_ok ) {
1390b39c5158Smillert                unless( print $handle $clone->data ) {
1391b39c5158Smillert                    $self->_error(q[Could not write data for: ] .
1392b39c5158Smillert                                    $clone->full_path);
1393b39c5158Smillert                    return;
1394b39c5158Smillert                }
1395b39c5158Smillert
1396b39c5158Smillert                ### pad the end of the clone if required ###
1397b39c5158Smillert                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1398b39c5158Smillert            }
1399b39c5158Smillert
1400b39c5158Smillert        } ### done writing these entries
1401b39c5158Smillert    }
1402b39c5158Smillert
1403b39c5158Smillert    ### write the end markers ###
1404b39c5158Smillert    print $handle TAR_END x 2 or
1405b39c5158Smillert            return $self->_error( qq[Could not write tar end markers] );
1406b39c5158Smillert
1407b39c5158Smillert    ### did you want it written to a file, or returned as a string? ###
1408b39c5158Smillert    my $rv =  length($file) ? 1
1409b39c5158Smillert                        : $HAS_PERLIO ? $dummy
1410b39c5158Smillert                        : do { seek $handle, 0, 0; local $/; <$handle> };
1411b39c5158Smillert
1412898184e3Ssthen    ### make sure to close the handle if we created it
1413898184e3Ssthen    if ( $file ne $handle ) {
1414898184e3Ssthen	unless( close $handle ) {
1415898184e3Ssthen	    $self->_error( qq[Could not write tar] );
1416898184e3Ssthen	    return;
1417898184e3Ssthen	}
1418898184e3Ssthen    }
1419b39c5158Smillert
1420b39c5158Smillert    return $rv;
1421b39c5158Smillert}
1422b39c5158Smillert
1423b39c5158Smillertsub _format_tar_entry {
1424b39c5158Smillert    my $self        = shift;
1425b39c5158Smillert    my $entry       = shift or return;
1426b39c5158Smillert    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1427b39c5158Smillert    my $no_prefix   = shift || 0;
1428b39c5158Smillert
1429b39c5158Smillert    my $file    = $entry->name;
1430b39c5158Smillert    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1431b39c5158Smillert
1432b39c5158Smillert    ### remove the prefix from the file name
1433898184e3Ssthen    ### not sure if this is still needed --kane
1434b39c5158Smillert    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1435b39c5158Smillert    ### this for us. Even worse, this would break if we tried to add a file
1436b39c5158Smillert    ### like x/x.
1437b39c5158Smillert    #if( length $prefix ) {
1438b39c5158Smillert    #    $file =~ s/^$match//;
1439b39c5158Smillert    #}
1440b39c5158Smillert
1441b39c5158Smillert    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1442b39c5158Smillert                if length $ext_prefix;
1443b39c5158Smillert
1444b39c5158Smillert    ### not sure why this is... ###
1445b39c5158Smillert    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1446b39c5158Smillert    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1447b39c5158Smillert
1448898184e3Ssthen    my $f1 = "%06o"; my $f2  = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
1449b39c5158Smillert
1450b39c5158Smillert    ### this might be optimizable with a 'changed' flag in the file objects ###
1451b39c5158Smillert    my $tar = pack (
1452b39c5158Smillert                PACK,
1453b39c5158Smillert                $file,
1454b39c5158Smillert
1455b39c5158Smillert                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1456b39c5158Smillert                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1457b39c5158Smillert
1458b39c5158Smillert                "",  # checksum field - space padded a bit down
1459b39c5158Smillert
1460b39c5158Smillert                (map { $entry->$_() }                 qw[type linkname magic]),
1461b39c5158Smillert
1462b39c5158Smillert                $entry->version || TAR_VERSION,
1463b39c5158Smillert
1464b39c5158Smillert                (map { $entry->$_() }                 qw[uname gname]),
1465b39c5158Smillert                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1466b39c5158Smillert
1467b39c5158Smillert                ($no_prefix ? '' : $prefix)
1468b39c5158Smillert    );
1469b39c5158Smillert
1470b39c5158Smillert    ### add the checksum ###
1471898184e3Ssthen    my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
1472b39c5158Smillert    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1473b39c5158Smillert
1474b39c5158Smillert    return $tar;
1475b39c5158Smillert}
1476b39c5158Smillert
1477b39c5158Smillert=head2 $tar->add_files( @filenamelist )
1478b39c5158Smillert
1479b39c5158SmillertTakes a list of filenames and adds them to the in-memory archive.
1480b39c5158Smillert
1481b39c5158SmillertThe path to the file is automatically converted to a Unix like
1482b39c5158Smillertequivalent for use in the archive, and, if on MacOS, the file's
1483b39c5158Smillertmodification time is converted from the MacOS epoch to the Unix epoch.
1484b39c5158SmillertSo tar archives created on MacOS with B<Archive::Tar> can be read
1485b39c5158Smillertboth with I<tar> on Unix and applications like I<suntar> or
1486b39c5158SmillertI<Stuffit Expander> on MacOS.
1487b39c5158Smillert
1488b39c5158SmillertBe aware that the file's type/creator and resource fork will be lost,
1489b39c5158Smillertwhich is usually what you want in cross-platform archives.
1490b39c5158Smillert
1491b39c5158SmillertInstead of a filename, you can also pass it an existing C<Archive::Tar::File>
1492b39c5158Smillertobject from, for example, another archive. The object will be clone, and
1493b39c5158Smillerteffectively be a copy of the original, not an alias.
1494b39c5158Smillert
1495b39c5158SmillertReturns a list of C<Archive::Tar::File> objects that were just added.
1496b39c5158Smillert
1497b39c5158Smillert=cut
1498b39c5158Smillert
1499b39c5158Smillertsub add_files {
1500b39c5158Smillert    my $self    = shift;
1501b39c5158Smillert    my @files   = @_ or return;
1502b39c5158Smillert
1503b39c5158Smillert    my @rv;
1504b39c5158Smillert    for my $file ( @files ) {
1505b39c5158Smillert
1506b39c5158Smillert        ### you passed an Archive::Tar::File object
1507b39c5158Smillert        ### clone it so we don't accidentally have a reference to
1508b39c5158Smillert        ### an object from another archive
1509b39c5158Smillert        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1510b39c5158Smillert            push @rv, $file->clone;
1511b39c5158Smillert            next;
1512b39c5158Smillert        }
1513b39c5158Smillert
151491f110e0Safresh1        eval {
151591f110e0Safresh1            if( utf8::is_utf8( $file )) {
151691f110e0Safresh1              utf8::encode( $file );
151791f110e0Safresh1            }
151891f110e0Safresh1        };
151991f110e0Safresh1
1520b39c5158Smillert        unless( -e $file || -l $file ) {
1521b39c5158Smillert            $self->_error( qq[No such file: '$file'] );
1522b39c5158Smillert            next;
1523b39c5158Smillert        }
1524b39c5158Smillert
1525b39c5158Smillert        my $obj = Archive::Tar::File->new( file => $file );
1526b39c5158Smillert        unless( $obj ) {
1527b39c5158Smillert            $self->_error( qq[Unable to add file: '$file'] );
1528b39c5158Smillert            next;
1529b39c5158Smillert        }
1530b39c5158Smillert
1531b39c5158Smillert        push @rv, $obj;
1532b39c5158Smillert    }
1533b39c5158Smillert
1534b39c5158Smillert    push @{$self->{_data}}, @rv;
1535b39c5158Smillert
1536b39c5158Smillert    return @rv;
1537b39c5158Smillert}
1538b39c5158Smillert
1539b39c5158Smillert=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1540b39c5158Smillert
1541b39c5158SmillertTakes a filename, a scalar full of data and optionally a reference to
1542b39c5158Smillerta hash with specific options.
1543b39c5158Smillert
1544b39c5158SmillertWill add a file to the in-memory archive, with name C<$filename> and
1545b39c5158Smillertcontent C<$data>. Specific properties can be set using C<$opthashref>.
1546b39c5158SmillertThe following list of properties is supported: name, size, mtime
1547b39c5158Smillert(last modified date), mode, uid, gid, linkname, uname, gname,
1548b39c5158Smillertdevmajor, devminor, prefix, type.  (On MacOS, the file's path and
1549b39c5158Smillertmodification times are converted to Unix equivalents.)
1550b39c5158Smillert
155191f110e0Safresh1Valid values for the file type are the following constants defined by
155291f110e0Safresh1Archive::Tar::Constant:
1553b39c5158Smillert
1554b39c5158Smillert=over 4
1555b39c5158Smillert
1556b39c5158Smillert=item FILE
1557b39c5158Smillert
1558b39c5158SmillertRegular file.
1559b39c5158Smillert
1560b39c5158Smillert=item HARDLINK
1561b39c5158Smillert
1562b39c5158Smillert=item SYMLINK
1563b39c5158Smillert
1564b39c5158SmillertHard and symbolic ("soft") links; linkname should specify target.
1565b39c5158Smillert
1566b39c5158Smillert=item CHARDEV
1567b39c5158Smillert
1568b39c5158Smillert=item BLOCKDEV
1569b39c5158Smillert
1570b39c5158SmillertCharacter and block devices. devmajor and devminor should specify the major
1571b39c5158Smillertand minor device numbers.
1572b39c5158Smillert
1573b39c5158Smillert=item DIR
1574b39c5158Smillert
1575b39c5158SmillertDirectory.
1576b39c5158Smillert
1577b39c5158Smillert=item FIFO
1578b39c5158Smillert
1579b39c5158SmillertFIFO (named pipe).
1580b39c5158Smillert
1581b39c5158Smillert=item SOCKET
1582b39c5158Smillert
1583b39c5158SmillertSocket.
1584b39c5158Smillert
1585b39c5158Smillert=back
1586b39c5158Smillert
1587b39c5158SmillertReturns the C<Archive::Tar::File> object that was just added, or
1588b39c5158SmillertC<undef> on failure.
1589b39c5158Smillert
1590b39c5158Smillert=cut
1591b39c5158Smillert
1592b39c5158Smillertsub add_data {
1593b39c5158Smillert    my $self    = shift;
1594b39c5158Smillert    my ($file, $data, $opt) = @_;
1595b39c5158Smillert
1596b39c5158Smillert    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1597b39c5158Smillert    unless( $obj ) {
1598b39c5158Smillert        $self->_error( qq[Unable to add file: '$file'] );
1599b39c5158Smillert        return;
1600b39c5158Smillert    }
1601b39c5158Smillert
1602b39c5158Smillert    push @{$self->{_data}}, $obj;
1603b39c5158Smillert
1604b39c5158Smillert    return $obj;
1605b39c5158Smillert}
1606b39c5158Smillert
1607b39c5158Smillert=head2 $tar->error( [$BOOL] )
1608b39c5158Smillert
1609b39c5158SmillertReturns the current error string (usually, the last error reported).
1610b39c5158SmillertIf a true value was specified, it will give the C<Carp::longmess>
1611b39c5158Smillertequivalent of the error, in effect giving you a stacktrace.
1612b39c5158Smillert
1613b39c5158SmillertFor backwards compatibility, this error is also available as
1614b39c5158SmillertC<$Archive::Tar::error> although it is much recommended you use the
1615b39c5158Smillertmethod call instead.
1616b39c5158Smillert
1617b39c5158Smillert=cut
1618b39c5158Smillert
1619b39c5158Smillert{
1620b39c5158Smillert    $error = '';
1621b39c5158Smillert    my $longmess;
1622b39c5158Smillert
1623b39c5158Smillert    sub _error {
1624b39c5158Smillert        my $self    = shift;
1625b39c5158Smillert        my $msg     = $error = shift;
1626b39c5158Smillert        $longmess   = Carp::longmess($error);
1627b39c5158Smillert        if (ref $self) {
1628b39c5158Smillert            $self->{_error} = $error;
1629b39c5158Smillert            $self->{_longmess} = $longmess;
1630b39c5158Smillert        }
1631b39c5158Smillert
1632b39c5158Smillert        ### set Archive::Tar::WARN to 0 to disable printing
1633b39c5158Smillert        ### of errors
1634b39c5158Smillert        if( $WARN ) {
1635b39c5158Smillert            carp $DEBUG ? $longmess : $msg;
1636b39c5158Smillert        }
1637b39c5158Smillert
1638b39c5158Smillert        return;
1639b39c5158Smillert    }
1640b39c5158Smillert
1641b39c5158Smillert    sub error {
1642b39c5158Smillert        my $self = shift;
1643b39c5158Smillert        if (ref $self) {
1644b39c5158Smillert            return shift() ? $self->{_longmess} : $self->{_error};
1645b39c5158Smillert        } else {
1646b39c5158Smillert            return shift() ? $longmess : $error;
1647b39c5158Smillert        }
1648b39c5158Smillert    }
1649b39c5158Smillert}
1650b39c5158Smillert
1651b39c5158Smillert=head2 $tar->setcwd( $cwd );
1652b39c5158Smillert
1653b39c5158SmillertC<Archive::Tar> needs to know the current directory, and it will run
1654b39c5158SmillertC<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1655b39c5158Smillerttarfile and saves it in the file system. (As of version 1.30, however,
1656b39c5158SmillertC<Archive::Tar> will use the speed optimization described below
1657b39c5158Smillertautomatically, so it's only relevant if you're using C<extract_file()>).
1658b39c5158Smillert
1659b39c5158SmillertSince C<Archive::Tar> doesn't change the current directory internally
1660b39c5158Smillertwhile it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1661b39c5158Smillertcan be avoided if we can guarantee that the current directory doesn't
1662b39c5158Smillertget changed externally.
1663b39c5158Smillert
1664b39c5158SmillertTo use this performance boost, set the current directory via
1665b39c5158Smillert
1666b39c5158Smillert    use Cwd;
1667b39c5158Smillert    $tar->setcwd( cwd() );
1668b39c5158Smillert
1669b39c5158Smillertonce before calling a function like C<extract_file> and
1670b39c5158SmillertC<Archive::Tar> will use the current directory setting from then on
1671b39c5158Smillertand won't call C<Cwd::cwd()> internally.
1672b39c5158Smillert
1673b39c5158SmillertTo switch back to the default behaviour, use
1674b39c5158Smillert
1675b39c5158Smillert    $tar->setcwd( undef );
1676b39c5158Smillert
1677b39c5158Smillertand C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1678b39c5158Smillert
1679898184e3SsthenIf you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
1680b39c5158Smillertbe called for you.
1681b39c5158Smillert
1682b39c5158Smillert=cut
1683b39c5158Smillert
1684b39c5158Smillertsub setcwd {
1685b39c5158Smillert    my $self     = shift;
1686b39c5158Smillert    my $cwd      = shift;
1687b39c5158Smillert
1688b39c5158Smillert    $self->{cwd} = $cwd;
1689b39c5158Smillert}
1690b39c5158Smillert
1691b39c5158Smillert=head1 Class Methods
1692b39c5158Smillert
1693b39c5158Smillert=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1694b39c5158Smillert
1695b39c5158SmillertCreates a tar file from the list of files provided.  The first
1696b39c5158Smillertargument can either be the name of the tar file to create or a
1697b39c5158Smillertreference to an open file handle (e.g. a GLOB reference).
1698b39c5158Smillert
1699b39c5158SmillertThe second argument is used to indicate compression. You can either
1700b39c5158Smillertcompress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1701b39c5158Smillertto be the C<gzip> compression level (between 1 and 9), but the use of
1702898184e3Ssthenconstants is preferred:
1703b39c5158Smillert
1704b39c5158Smillert  # write a gzip compressed file
1705b39c5158Smillert  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1706b39c5158Smillert
1707b39c5158Smillert  # write a bzip compressed file
1708b39c5158Smillert  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1709b39c5158Smillert
1710b39c5158SmillertNote that when you pass in a filehandle, the compression argument
1711b39c5158Smillertis ignored, as all files are printed verbatim to your filehandle.
1712b39c5158SmillertIf you wish to enable compression with filehandles, use an
1713b39c5158SmillertC<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1714b39c5158Smillert
1715b39c5158SmillertThe remaining arguments list the files to be included in the tar file.
1716b39c5158SmillertThese files must all exist. Any files which don't exist or can't be
1717b39c5158Smillertread are silently ignored.
1718b39c5158Smillert
1719b39c5158SmillertIf the archive creation fails for any reason, C<create_archive> will
1720b39c5158Smillertreturn false. Please use the C<error> method to find the cause of the
1721b39c5158Smillertfailure.
1722b39c5158Smillert
1723b39c5158SmillertNote that this method does not write C<on the fly> as it were; it
1724b39c5158Smillertstill reads all the files into memory before writing out the archive.
1725b39c5158SmillertConsult the FAQ below if this is a problem.
1726b39c5158Smillert
1727b39c5158Smillert=cut
1728b39c5158Smillert
1729b39c5158Smillertsub create_archive {
1730b39c5158Smillert    my $class = shift;
1731b39c5158Smillert
1732b39c5158Smillert    my $file    = shift; return unless defined $file;
1733b39c5158Smillert    my $gzip    = shift || 0;
1734b39c5158Smillert    my @files   = @_;
1735b39c5158Smillert
1736b39c5158Smillert    unless( @files ) {
1737b39c5158Smillert        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1738b39c5158Smillert    }
1739b39c5158Smillert
1740b39c5158Smillert    my $tar = $class->new;
1741b39c5158Smillert    $tar->add_files( @files );
1742b39c5158Smillert    return $tar->write( $file, $gzip );
1743b39c5158Smillert}
1744b39c5158Smillert
1745b39c5158Smillert=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1746b39c5158Smillert
1747b39c5158SmillertReturns an iterator function that reads the tar file without loading
1748b39c5158Smillertit all in memory.  Each time the function is called it will return the
1749b39c5158Smillertnext file in the tarball. The files are returned as
1750b39c5158SmillertC<Archive::Tar::File> objects. The iterator function returns the
1751b39c5158Smillertempty list once it has exhausted the files contained.
1752b39c5158Smillert
1753b39c5158SmillertThe second argument can be a hash reference with options, which are
1754b39c5158Smillertidentical to the arguments passed to C<read()>.
1755b39c5158Smillert
1756b39c5158SmillertExample usage:
1757b39c5158Smillert
1758b39c5158Smillert    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1759b39c5158Smillert
1760b39c5158Smillert    while( my $f = $next->() ) {
1761b39c5158Smillert        print $f->name, "\n";
1762b39c5158Smillert
1763b39c5158Smillert        $f->extract or warn "Extraction failed";
1764b39c5158Smillert
1765b39c5158Smillert        # ....
1766b39c5158Smillert    }
1767b39c5158Smillert
1768b39c5158Smillert=cut
1769b39c5158Smillert
1770b39c5158Smillert
1771b39c5158Smillertsub iter {
1772b39c5158Smillert    my $class       = shift;
1773*9f11ffb7Safresh1    my $filename    = shift;
1774*9f11ffb7Safresh1    return unless defined $filename;
1775898184e3Ssthen    my $compressed  = shift || 0;
1776b39c5158Smillert    my $opts        = shift || {};
1777b39c5158Smillert
1778b39c5158Smillert    ### get a handle to read from.
1779b39c5158Smillert    my $handle = $class->_get_handle(
1780b39c5158Smillert        $filename,
1781b39c5158Smillert        $compressed,
1782b39c5158Smillert        READ_ONLY->( ZLIB )
1783b39c5158Smillert    ) or return;
1784b39c5158Smillert
1785b39c5158Smillert    my @data;
17866fb12b70Safresh1		my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
1787b39c5158Smillert    return sub {
1788b39c5158Smillert        return shift(@data)     if @data;       # more than one file returned?
1789b39c5158Smillert        return                  unless $handle; # handle exhausted?
1790b39c5158Smillert
1791b39c5158Smillert        ### read data, should only return file
1792b39c5158Smillert        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1793b39c5158Smillert        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
17946fb12b70Safresh1				if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
17956fb12b70Safresh1					foreach(@data){
17966fb12b70Safresh1						#may refine this heuristic for ON_UNIX?
17976fb12b70Safresh1						if($_->linkname){
17986fb12b70Safresh1							#is there a better slot to store/share it ?
17996fb12b70Safresh1							$_->{'_archive'} = $CONSTRUCT_ARGS;
18006fb12b70Safresh1						}
18016fb12b70Safresh1					}
18026fb12b70Safresh1				}
1803b39c5158Smillert
1804b39c5158Smillert        ### return one piece of data
1805b39c5158Smillert        return shift(@data)     if @data;
1806b39c5158Smillert
1807b39c5158Smillert        ### data is exhausted, free the filehandle
1808b39c5158Smillert        undef $handle;
18096fb12b70Safresh1				if(@$CONSTRUCT_ARGS == 4){
18106fb12b70Safresh1					#free archive in memory
18116fb12b70Safresh1					undef $CONSTRUCT_ARGS->[-1];
18126fb12b70Safresh1				}
1813b39c5158Smillert        return;
1814b39c5158Smillert    };
1815b39c5158Smillert}
1816b39c5158Smillert
1817b39c5158Smillert=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1818b39c5158Smillert
1819b39c5158SmillertReturns a list of the names of all the files in the archive.  The
1820b39c5158Smillertfirst argument can either be the name of the tar file to list or a
1821b39c5158Smillertreference to an open file handle (e.g. a GLOB reference).
1822b39c5158Smillert
1823b39c5158SmillertIf C<list_archive()> is passed an array reference as its third
1824b39c5158Smillertargument it returns a list of hash references containing the requested
1825b39c5158Smillertproperties of each file.  The following list of properties is
1826b39c5158Smillertsupported: full_path, name, size, mtime (last modified date), mode,
182791f110e0Safresh1uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
1828b39c5158Smillert
1829b39c5158SmillertSee C<Archive::Tar::File> for details about supported properties.
1830b39c5158Smillert
1831b39c5158SmillertPassing an array reference containing only one element, 'name', is
1832b39c5158Smillertspecial cased to return a list of names rather than a list of hash
1833b39c5158Smillertreferences.
1834b39c5158Smillert
1835b39c5158Smillert=cut
1836b39c5158Smillert
1837b39c5158Smillertsub list_archive {
1838b39c5158Smillert    my $class   = shift;
1839b39c5158Smillert    my $file    = shift; return unless defined $file;
1840b39c5158Smillert    my $gzip    = shift || 0;
1841b39c5158Smillert
1842b39c5158Smillert    my $tar = $class->new($file, $gzip);
1843b39c5158Smillert    return unless $tar;
1844b39c5158Smillert
1845b39c5158Smillert    return $tar->list_files( @_ );
1846b39c5158Smillert}
1847b39c5158Smillert
1848b39c5158Smillert=head2 Archive::Tar->extract_archive($file, $compressed)
1849b39c5158Smillert
1850b39c5158SmillertExtracts the contents of the tar file.  The first argument can either
1851b39c5158Smillertbe the name of the tar file to create or a reference to an open file
1852b39c5158Smillerthandle (e.g. a GLOB reference).  All relative paths in the tar file will
1853b39c5158Smillertbe created underneath the current working directory.
1854b39c5158Smillert
1855b39c5158SmillertC<extract_archive> will return a list of files it extracted.
1856b39c5158SmillertIf the archive extraction fails for any reason, C<extract_archive>
1857b39c5158Smillertwill return false.  Please use the C<error> method to find the cause
1858b39c5158Smillertof the failure.
1859b39c5158Smillert
1860b39c5158Smillert=cut
1861b39c5158Smillert
1862b39c5158Smillertsub extract_archive {
1863b39c5158Smillert    my $class   = shift;
1864b39c5158Smillert    my $file    = shift; return unless defined $file;
1865b39c5158Smillert    my $gzip    = shift || 0;
1866b39c5158Smillert
1867b39c5158Smillert    my $tar = $class->new( ) or return;
1868b39c5158Smillert
1869b39c5158Smillert    return $tar->read( $file, $gzip, { extract => 1 } );
1870b39c5158Smillert}
1871b39c5158Smillert
1872b39c5158Smillert=head2 $bool = Archive::Tar->has_io_string
1873b39c5158Smillert
1874b39c5158SmillertReturns true if we currently have C<IO::String> support loaded.
1875b39c5158Smillert
1876b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing
1877b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if
1878b39c5158Smillertavailable.
1879b39c5158Smillert
1880b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference.
1881b39c5158Smillert
1882b39c5158Smillert=cut
1883b39c5158Smillert
1884b39c5158Smillertsub has_io_string { return $HAS_IO_STRING; }
1885b39c5158Smillert
1886b39c5158Smillert=head2 $bool = Archive::Tar->has_perlio
1887b39c5158Smillert
1888b39c5158SmillertReturns true if we currently have C<perlio> support loaded.
1889b39c5158Smillert
1890b39c5158SmillertThis requires C<perl-5.8> or higher, compiled with C<perlio>
1891b39c5158Smillert
1892b39c5158SmillertEither C<IO::String> or C<perlio> support is needed to support writing
1893b39c5158Smillertstringified archives. Currently, C<perlio> is the preferred method, if
1894b39c5158Smillertavailable.
1895b39c5158Smillert
1896b39c5158SmillertSee the C<GLOBAL VARIABLES> section to see how to change this preference.
1897b39c5158Smillert
1898b39c5158Smillert=cut
1899b39c5158Smillert
1900b39c5158Smillertsub has_perlio { return $HAS_PERLIO; }
1901b39c5158Smillert
1902b39c5158Smillert=head2 $bool = Archive::Tar->has_zlib_support
1903b39c5158Smillert
1904b39c5158SmillertReturns true if C<Archive::Tar> can extract C<zlib> compressed archives
1905b39c5158Smillert
1906b39c5158Smillert=cut
1907b39c5158Smillert
1908b39c5158Smillertsub has_zlib_support { return ZLIB }
1909b39c5158Smillert
1910b39c5158Smillert=head2 $bool = Archive::Tar->has_bzip2_support
1911b39c5158Smillert
1912b39c5158SmillertReturns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1913b39c5158Smillert
1914b39c5158Smillert=cut
1915b39c5158Smillert
1916b39c5158Smillertsub has_bzip2_support { return BZIP }
1917b39c5158Smillert
1918b39c5158Smillert=head2 Archive::Tar->can_handle_compressed_files
1919b39c5158Smillert
1920b39c5158SmillertA simple checking routine, which will return true if C<Archive::Tar>
1921b39c5158Smillertis able to uncompress compressed archives on the fly with C<IO::Zlib>
1922b39c5158Smillertand C<IO::Compress::Bzip2> or false if not both are installed.
1923b39c5158Smillert
1924b39c5158SmillertYou can use this as a shortcut to determine whether C<Archive::Tar>
1925b39c5158Smillertwill do what you think before passing compressed archives to its
1926b39c5158SmillertC<read> method.
1927b39c5158Smillert
1928b39c5158Smillert=cut
1929b39c5158Smillert
1930b39c5158Smillertsub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1931b39c5158Smillert
1932b39c5158Smillertsub no_string_support {
1933b39c5158Smillert    croak("You have to install IO::String to support writing archives to strings");
1934b39c5158Smillert}
1935b39c5158Smillert
19366fb12b70Safresh1sub _symlinks_resolver{
19376fb12b70Safresh1  my ($src, $trg) = @_;
19386fb12b70Safresh1  my @src = split /[\/\\]/, $src;
19396fb12b70Safresh1  my @trg = split /[\/\\]/, $trg;
19406fb12b70Safresh1  pop @src; #strip out current object name
19416fb12b70Safresh1  if(@trg and $trg[0] eq ''){
19426fb12b70Safresh1    shift @trg;
19436fb12b70Safresh1    #restart path from scratch
19446fb12b70Safresh1    @src = ( );
19456fb12b70Safresh1  }
19466fb12b70Safresh1  foreach my $part ( @trg ){
19476fb12b70Safresh1    next if $part eq '.'; #ignore current
19486fb12b70Safresh1    if($part eq '..'){
19496fb12b70Safresh1      #got to parent
19506fb12b70Safresh1      pop @src;
19516fb12b70Safresh1    }
19526fb12b70Safresh1    else{
19536fb12b70Safresh1      #append it
19546fb12b70Safresh1      push @src, $part;
19556fb12b70Safresh1    }
19566fb12b70Safresh1  }
19576fb12b70Safresh1  my $path = join('/', @src);
19586fb12b70Safresh1  warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
19596fb12b70Safresh1  return $path;
19606fb12b70Safresh1}
19616fb12b70Safresh1
1962b39c5158Smillert1;
1963b39c5158Smillert
1964b39c5158Smillert__END__
1965b39c5158Smillert
1966b39c5158Smillert=head1 GLOBAL VARIABLES
1967b39c5158Smillert
1968b39c5158Smillert=head2 $Archive::Tar::FOLLOW_SYMLINK
1969b39c5158Smillert
1970b39c5158SmillertSet this variable to C<1> to make C<Archive::Tar> effectively make a
1971b39c5158Smillertcopy of the file when extracting. Default is C<0>, which
1972b39c5158Smillertmeans the symlink stays intact. Of course, you will have to pack the
1973b39c5158Smillertfile linked to as well.
1974b39c5158Smillert
1975b39c5158SmillertThis option is checked when you write out the tarfile using C<write>
1976b39c5158Smillertor C<create_archive>.
1977b39c5158Smillert
1978b39c5158SmillertThis works just like C</bin/tar>'s C<-h> option.
1979b39c5158Smillert
1980b39c5158Smillert=head2 $Archive::Tar::CHOWN
1981b39c5158Smillert
1982b39c5158SmillertBy default, C<Archive::Tar> will try to C<chown> your files if it is
1983b39c5158Smillertable to. In some cases, this may not be desired. In that case, set
1984b39c5158Smillertthis variable to C<0> to disable C<chown>-ing, even if it were
1985b39c5158Smillertpossible.
1986b39c5158Smillert
1987b39c5158SmillertThe default is C<1>.
1988b39c5158Smillert
1989b39c5158Smillert=head2 $Archive::Tar::CHMOD
1990b39c5158Smillert
1991b39c5158SmillertBy default, C<Archive::Tar> will try to C<chmod> your files to
1992b39c5158Smillertwhatever mode was specified for the particular file in the archive.
1993b39c5158SmillertIn some cases, this may not be desired. In that case, set this
1994b39c5158Smillertvariable to C<0> to disable C<chmod>-ing.
1995b39c5158Smillert
1996b39c5158SmillertThe default is C<1>.
1997b39c5158Smillert
1998b39c5158Smillert=head2 $Archive::Tar::SAME_PERMISSIONS
1999b39c5158Smillert
2000b39c5158SmillertWhen, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
2001b39c5158Smillertthe permissions on files from the archive are used without modification
2002b39c5158Smillertof if they are filtered by removing any setid bits and applying the
2003b39c5158Smillertcurrent umask.
2004b39c5158Smillert
2005b39c5158SmillertThe default is C<1> for the root user and C<0> for normal users.
2006b39c5158Smillert
2007b39c5158Smillert=head2 $Archive::Tar::DO_NOT_USE_PREFIX
2008b39c5158Smillert
2009b39c5158SmillertBy default, C<Archive::Tar> will try to put paths that are over
2010b39c5158Smillert100 characters in the C<prefix> field of your tar header, as
2011b39c5158Smillertdefined per POSIX-standard. However, some (older) tar programs
2012b39c5158Smillertdo not implement this spec. To retain compatibility with these older
2013b39c5158Smillertor non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
2014b39c5158Smillertvariable to a true value, and C<Archive::Tar> will use an alternate
2015b39c5158Smillertway of dealing with paths over 100 characters by using the
2016b39c5158SmillertC<GNU Extended Header> feature.
2017b39c5158Smillert
2018b39c5158SmillertNote that clients who do not support the C<GNU Extended Header>
2019b39c5158Smillertfeature will not be able to read these archives. Such clients include
2020b39c5158Smillerttars on C<Solaris>, C<Irix> and C<AIX>.
2021b39c5158Smillert
2022b39c5158SmillertThe default is C<0>.
2023b39c5158Smillert
2024b39c5158Smillert=head2 $Archive::Tar::DEBUG
2025b39c5158Smillert
2026b39c5158SmillertSet this variable to C<1> to always get the C<Carp::longmess> output
2027b39c5158Smillertof the warnings, instead of the regular C<carp>. This is the same
2028b39c5158Smillertmessage you would get by doing:
2029b39c5158Smillert
2030b39c5158Smillert    $tar->error(1);
2031b39c5158Smillert
2032b39c5158SmillertDefaults to C<0>.
2033b39c5158Smillert
2034b39c5158Smillert=head2 $Archive::Tar::WARN
2035b39c5158Smillert
2036b39c5158SmillertSet this variable to C<0> if you do not want any warnings printed.
2037b39c5158SmillertPersonally I recommend against doing this, but people asked for the
2038b39c5158Smillertoption. Also, be advised that this is of course not threadsafe.
2039b39c5158Smillert
2040b39c5158SmillertDefaults to C<1>.
2041b39c5158Smillert
2042b39c5158Smillert=head2 $Archive::Tar::error
2043b39c5158Smillert
2044b39c5158SmillertHolds the last reported error. Kept for historical reasons, but its
2045b39c5158Smillertuse is very much discouraged. Use the C<error()> method instead:
2046b39c5158Smillert
2047b39c5158Smillert    warn $tar->error unless $tar->extract;
2048b39c5158Smillert
2049b39c5158SmillertNote that in older versions of this module, the C<error()> method
2050b39c5158Smillertwould return an effectively global value even when called an instance
2051b39c5158Smillertmethod as above. This has since been fixed, and multiple instances of
2052b39c5158SmillertC<Archive::Tar> now have separate error strings.
2053b39c5158Smillert
2054b39c5158Smillert=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2055b39c5158Smillert
2056b39c5158SmillertThis variable indicates whether C<Archive::Tar> should allow
2057b39c5158Smillertfiles to be extracted outside their current working directory.
2058b39c5158Smillert
2059b39c5158SmillertAllowing this could have security implications, as a malicious
2060b39c5158Smillerttar archive could alter or replace any file the extracting user
2061b39c5158Smillerthas permissions to. Therefor, the default is to not allow
2062b39c5158Smillertinsecure extractions.
2063b39c5158Smillert
2064b39c5158SmillertIf you trust the archive, or have other reasons to allow the
2065b39c5158Smillertarchive to write files outside your current working directory,
2066b39c5158Smillertset this variable to C<true>.
2067b39c5158Smillert
2068b39c5158SmillertNote that this is a backwards incompatible change from version
2069b39c5158SmillertC<1.36> and before.
2070b39c5158Smillert
2071b39c5158Smillert=head2 $Archive::Tar::HAS_PERLIO
2072b39c5158Smillert
2073b39c5158SmillertThis variable holds a boolean indicating if we currently have
2074b39c5158SmillertC<perlio> support loaded. This will be enabled for any perl
2075b39c5158Smillertgreater than C<5.8> compiled with C<perlio>.
2076b39c5158Smillert
2077b39c5158SmillertIf you feel strongly about disabling it, set this variable to
2078b39c5158SmillertC<false>. Note that you will then need C<IO::String> installed
2079b39c5158Smillertto support writing stringified archives.
2080b39c5158Smillert
2081b39c5158SmillertDon't change this variable unless you B<really> know what you're
2082b39c5158Smillertdoing.
2083b39c5158Smillert
2084b39c5158Smillert=head2 $Archive::Tar::HAS_IO_STRING
2085b39c5158Smillert
2086b39c5158SmillertThis variable holds a boolean indicating if we currently have
2087b39c5158SmillertC<IO::String> support loaded. This will be enabled for any perl
2088b39c5158Smillertthat has a loadable C<IO::String> module.
2089b39c5158Smillert
2090b39c5158SmillertIf you feel strongly about disabling it, set this variable to
2091b39c5158SmillertC<false>. Note that you will then need C<perlio> support from
2092b39c5158Smillertyour perl to be able to  write stringified archives.
2093b39c5158Smillert
2094b39c5158SmillertDon't change this variable unless you B<really> know what you're
2095b39c5158Smillertdoing.
2096b39c5158Smillert
2097898184e3Ssthen=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2098898184e3Ssthen
2099898184e3SsthenThis variable holds a boolean indicating if we will create
2100898184e3Ssthenzero padded numbers for C<size>, C<mtime> and C<checksum>.
2101898184e3SsthenThe default is C<0>, indicating that we will create space padded
2102898184e3Ssthennumbers. Added for compatibility with C<busybox> implementations.
2103898184e3Ssthen
21046fb12b70Safresh1=head2 Tuning the way RESOLVE_SYMLINK will works
21056fb12b70Safresh1
21066fb12b70Safresh1	You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
21076fb12b70Safresh1	or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
21086fb12b70Safresh1
21096fb12b70Safresh1  Values can be one of the following:
21106fb12b70Safresh1
21116fb12b70Safresh1		none
21126fb12b70Safresh1           Disable this mechanism and failed as it was in previous version (<1.88)
21136fb12b70Safresh1
21146fb12b70Safresh1		speed (default)
21156fb12b70Safresh1           If you prefer speed
21166fb12b70Safresh1           this will read again the whole archive using read() so all entries
21176fb12b70Safresh1           will be available
21186fb12b70Safresh1
21196fb12b70Safresh1    memory
21206fb12b70Safresh1           If you prefer memory
21216fb12b70Safresh1
21226fb12b70Safresh1	Limitation
21236fb12b70Safresh1
21246fb12b70Safresh1		It won't work for terminal, pipe or sockets or every non seekable source.
21256fb12b70Safresh1
21266fb12b70Safresh1=cut
21276fb12b70Safresh1
2128b39c5158Smillert=head1 FAQ
2129b39c5158Smillert
2130b39c5158Smillert=over 4
2131b39c5158Smillert
2132b39c5158Smillert=item What's the minimum perl version required to run Archive::Tar?
2133b39c5158Smillert
2134b39c5158SmillertYou will need perl version 5.005_03 or newer.
2135b39c5158Smillert
2136b39c5158Smillert=item Isn't Archive::Tar slow?
2137b39c5158Smillert
2138b39c5158SmillertYes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2139b39c5158SmillertHowever, it's very portable. If speed is an issue, consider using
2140b39c5158SmillertC</bin/tar> instead.
2141b39c5158Smillert
2142b39c5158Smillert=item Isn't Archive::Tar heavier on memory than /bin/tar?
2143b39c5158Smillert
2144b39c5158SmillertYes it is, see previous answer. Since C<Compress::Zlib> and therefore
2145b39c5158SmillertC<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2146b39c5158Smillertchoice but to read the archive into memory.
2147b39c5158SmillertThis is ok if you want to do in-memory manipulation of the archive.
2148b39c5158Smillert
2149b39c5158SmillertIf you just want to extract, use the C<extract_archive> class method
2150b39c5158Smillertinstead. It will optimize and write to disk immediately.
2151b39c5158Smillert
2152b39c5158SmillertAnother option is to use the C<iter> class method to iterate over
2153b39c5158Smillertthe files in the tarball without reading them all in memory at once.
2154b39c5158Smillert
2155b39c5158Smillert=item Can you lazy-load data instead?
2156b39c5158Smillert
2157b39c5158SmillertIn some cases, yes. You can use the C<iter> class method to iterate
2158b39c5158Smillertover the files in the tarball without reading them all in memory at once.
2159b39c5158Smillert
2160b39c5158Smillert=item How much memory will an X kb tar file need?
2161b39c5158Smillert
2162b39c5158SmillertProbably more than X kb, since it will all be read into memory. If
2163b39c5158Smillertthis is a problem, and you don't need to do in memory manipulation
2164b39c5158Smillertof the archive, consider using the C<iter> class method, or C</bin/tar>
2165b39c5158Smillertinstead.
2166b39c5158Smillert
2167b39c5158Smillert=item What do you do with unsupported filetypes in an archive?
2168b39c5158Smillert
2169b39c5158SmillertC<Unix> has a few filetypes that aren't supported on other platforms,
2170b39c5158Smillertlike C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2171b39c5158Smillerttry to make a copy of the original file, rather than throwing an error.
2172b39c5158Smillert
2173b39c5158SmillertThis does require you to read the entire archive in to memory first,
2174b39c5158Smillertsince otherwise we wouldn't know what data to fill the copy with.
2175b39c5158Smillert(This means that you cannot use the class methods, including C<iter>
2176b39c5158Smillerton archives that have incompatible filetypes and still expect things
2177b39c5158Smillertto work).
2178b39c5158Smillert
2179b39c5158SmillertFor other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2180b39c5158Smillertthe extraction of this particular item didn't work.
2181b39c5158Smillert
2182b39c5158Smillert=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2183b39c5158Smillert
2184b39c5158SmillertBy default, C<Archive::Tar> is in a completely POSIX-compatible
2185b39c5158Smillertmode, which uses the POSIX-specification of C<tar> to store files.
2186898184e3SsthenFor paths greater than 100 characters, this is done using the
2187b39c5158SmillertC<POSIX header prefix>. Non-POSIX-compatible clients may not support
2188b39c5158Smillertthis part of the specification, and may only support the C<GNU Extended
2189b39c5158SmillertHeader> functionality. To facilitate those clients, you can set the
2190b39c5158SmillertC<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
2191b39c5158SmillertC<GLOBAL VARIABLES> section for details on this variable.
2192b39c5158Smillert
2193b39c5158SmillertNote that GNU tar earlier than version 1.14 does not cope well with
2194b39c5158Smillertthe C<POSIX header prefix>. If you use such a version, consider setting
2195b39c5158Smillertthe C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2196b39c5158Smillert
2197b39c5158Smillert=item How do I extract only files that have property X from an archive?
2198b39c5158Smillert
2199b39c5158SmillertSometimes, you might not wish to extract a complete archive, just
2200b39c5158Smillertthe files that are relevant to you, based on some criteria.
2201b39c5158Smillert
2202b39c5158SmillertYou can do this by filtering a list of C<Archive::Tar::File> objects
2203b39c5158Smillertbased on your criteria. For example, to extract only files that have
2204b39c5158Smillertthe string C<foo> in their title, you would use:
2205b39c5158Smillert
2206b39c5158Smillert    $tar->extract(
2207b39c5158Smillert        grep { $_->full_path =~ /foo/ } $tar->get_files
2208b39c5158Smillert    );
2209b39c5158Smillert
2210b39c5158SmillertThis way, you can filter on any attribute of the files in the archive.
2211b39c5158SmillertConsult the C<Archive::Tar::File> documentation on how to use these
2212b39c5158Smillertobjects.
2213b39c5158Smillert
2214b39c5158Smillert=item How do I access .tar.Z files?
2215b39c5158Smillert
2216b39c5158SmillertThe C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2217b39c5158Smillertthe C<IO::Zlib> module) to access tar files that have been compressed
2218b39c5158Smillertwith C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2219b39c5158Smillertutility cannot be read by C<Compress::Zlib> and so cannot be directly
2220b39c5158Smillertaccesses by C<Archive::Tar>.
2221b39c5158Smillert
2222b39c5158SmillertIf the C<uncompress> or C<gunzip> programs are available, you can use
2223b39c5158Smillertone of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2224b39c5158Smillert
2225b39c5158SmillertFirstly with C<uncompress>
2226b39c5158Smillert
2227b39c5158Smillert    use Archive::Tar;
2228b39c5158Smillert
2229b39c5158Smillert    open F, "uncompress -c $filename |";
2230b39c5158Smillert    my $tar = Archive::Tar->new(*F);
2231b39c5158Smillert    ...
2232b39c5158Smillert
2233b39c5158Smillertand this with C<gunzip>
2234b39c5158Smillert
2235b39c5158Smillert    use Archive::Tar;
2236b39c5158Smillert
2237b39c5158Smillert    open F, "gunzip -c $filename |";
2238b39c5158Smillert    my $tar = Archive::Tar->new(*F);
2239b39c5158Smillert    ...
2240b39c5158Smillert
2241b39c5158SmillertSimilarly, if the C<compress> program is available, you can use this to
2242b39c5158Smillertwrite a C<.tar.Z> file
2243b39c5158Smillert
2244b39c5158Smillert    use Archive::Tar;
2245b39c5158Smillert    use IO::File;
2246b39c5158Smillert
2247b39c5158Smillert    my $fh = new IO::File "| compress -c >$filename";
2248b39c5158Smillert    my $tar = Archive::Tar->new();
2249b39c5158Smillert    ...
2250b39c5158Smillert    $tar->write($fh);
2251b39c5158Smillert    $fh->close ;
2252b39c5158Smillert
2253b39c5158Smillert=item How do I handle Unicode strings?
2254b39c5158Smillert
2255b39c5158SmillertC<Archive::Tar> uses byte semantics for any files it reads from or writes
2256b39c5158Smillertto disk. This is not a problem if you only deal with files and never
2257b39c5158Smillertlook at their content or work solely with byte strings. But if you use
2258b39c5158SmillertUnicode strings with character semantics, some additional steps need
2259b39c5158Smillertto be taken.
2260b39c5158Smillert
2261b39c5158SmillertFor example, if you add a Unicode string like
2262b39c5158Smillert
2263b39c5158Smillert    # Problem
2264b39c5158Smillert    $tar->add_data('file.txt', "Euro: \x{20AC}");
2265b39c5158Smillert
2266b39c5158Smillertthen there will be a problem later when the tarfile gets written out
2267*9f11ffb7Safresh1to disk via C<< $tar->write() >>:
2268b39c5158Smillert
2269b39c5158Smillert    Wide character in print at .../Archive/Tar.pm line 1014.
2270b39c5158Smillert
2271b39c5158SmillertThe data was added as a Unicode string and when writing it out to disk,
2272b39c5158Smillertthe C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2273b39c5158Smillerttried to convert the string to ISO-8859 and failed. The written file
2274b39c5158Smillertnow contains garbage.
2275b39c5158Smillert
2276b39c5158SmillertFor this reason, Unicode strings need to be converted to UTF-8-encoded
2277b39c5158Smillertbytestrings before they are handed off to C<add_data()>:
2278b39c5158Smillert
2279b39c5158Smillert    use Encode;
2280b39c5158Smillert    my $data = "Accented character: \x{20AC}";
2281b39c5158Smillert    $data = encode('utf8', $data);
2282b39c5158Smillert
2283b39c5158Smillert    $tar->add_data('file.txt', $data);
2284b39c5158Smillert
2285b39c5158SmillertA opposite problem occurs if you extract a UTF8-encoded file from a
2286b39c5158Smillerttarball. Using C<get_content()> on the C<Archive::Tar::File> object
2287b39c5158Smillertwill return its content as a bytestring, not as a Unicode string.
2288b39c5158Smillert
2289b39c5158SmillertIf you want it to be a Unicode string (because you want character
2290b39c5158Smillertsemantics with operations like regular expression matching), you need
2291b39c5158Smillertto decode the UTF8-encoded content and have Perl convert it into
2292b39c5158Smillerta Unicode string:
2293b39c5158Smillert
2294b39c5158Smillert    use Encode;
2295b39c5158Smillert    my $data = $tar->get_content();
2296b39c5158Smillert
2297b39c5158Smillert    # Make it a Unicode string
2298b39c5158Smillert    $data = decode('utf8', $data);
2299b39c5158Smillert
2300b39c5158SmillertThere is no easy way to provide this functionality in C<Archive::Tar>,
2301b39c5158Smillertbecause a tarball can contain many files, and each of which could be
2302b39c5158Smillertencoded in a different way.
2303b39c5158Smillert
2304b39c5158Smillert=back
2305b39c5158Smillert
2306b39c5158Smillert=head1 CAVEATS
2307b39c5158Smillert
2308b39c5158SmillertThe AIX tar does not fill all unused space in the tar archive with 0x00.
2309b39c5158SmillertThis sometimes leads to warning messages from C<Archive::Tar>.
2310b39c5158Smillert
2311b39c5158Smillert  Invalid header block at offset nnn
2312b39c5158Smillert
2313b39c5158SmillertA fix for that problem is scheduled to be released in the following levels
2314b39c5158Smillertof AIX, all of which should be coming out in the 4th quarter of 2009:
2315b39c5158Smillert
2316b39c5158Smillert AIX 5.3 TL7 SP10
2317b39c5158Smillert AIX 5.3 TL8 SP8
2318b39c5158Smillert AIX 5.3 TL9 SP5
2319b39c5158Smillert AIX 5.3 TL10 SP2
2320b39c5158Smillert
2321b39c5158Smillert AIX 6.1 TL0 SP11
2322b39c5158Smillert AIX 6.1 TL1 SP7
2323b39c5158Smillert AIX 6.1 TL2 SP6
2324b39c5158Smillert AIX 6.1 TL3 SP3
2325b39c5158Smillert
2326b39c5158SmillertThe IBM APAR number for this problem is IZ50240 (Reported component ID:
2327b39c5158Smillert5765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2328b39c5158SmillertIf you need an ifix please contact your local IBM AIX support.
2329b39c5158Smillert
2330b39c5158Smillert=head1 TODO
2331b39c5158Smillert
2332b39c5158Smillert=over 4
2333b39c5158Smillert
2334b39c5158Smillert=item Check if passed in handles are open for read/write
2335b39c5158Smillert
2336b39c5158SmillertCurrently I don't know of any portable pure perl way to do this.
2337b39c5158SmillertSuggestions welcome.
2338b39c5158Smillert
2339b39c5158Smillert=item Allow archives to be passed in as string
2340b39c5158Smillert
2341b39c5158SmillertCurrently, we only allow opened filehandles or filenames, but
2342b39c5158Smillertnot strings. The internals would need some reworking to facilitate
2343b39c5158Smillertstringified archives.
2344b39c5158Smillert
2345b39c5158Smillert=item Facilitate processing an opened filehandle of a compressed archive
2346b39c5158Smillert
2347b39c5158SmillertCurrently, we only support this if the filehandle is an IO::Zlib object.
2348b39c5158SmillertEnvironments, like apache, will present you with an opened filehandle
2349b39c5158Smillertto an uploaded file, which might be a compressed archive.
2350b39c5158Smillert
2351b39c5158Smillert=back
2352b39c5158Smillert
2353b39c5158Smillert=head1 SEE ALSO
2354b39c5158Smillert
2355b39c5158Smillert=over 4
2356b39c5158Smillert
2357b39c5158Smillert=item The GNU tar specification
2358b39c5158Smillert
2359b39c5158SmillertC<http://www.gnu.org/software/tar/manual/tar.html>
2360b39c5158Smillert
2361898184e3Ssthen=item The PAX format specification
2362b39c5158Smillert
2363898184e3SsthenThe specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2364b39c5158Smillert
2365b39c5158Smillert=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2366b39c5158Smillert
2367b39c5158Smillert=item GNU tar intends to switch to POSIX compatibility
2368b39c5158Smillert
2369b39c5158SmillertGNU Tar authors have expressed their intention to become completely
2370b39c5158SmillertPOSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2371b39c5158Smillert
2372b39c5158Smillert=item A Comparison between various tar implementations
2373b39c5158Smillert
2374b39c5158SmillertLists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2375b39c5158Smillert
2376b39c5158Smillert=back
2377b39c5158Smillert
2378b39c5158Smillert=head1 AUTHOR
2379b39c5158Smillert
2380b39c5158SmillertThis module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2381b39c5158Smillert
2382b39c5158SmillertPlease reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2383b39c5158Smillert
2384b39c5158Smillert=head1 ACKNOWLEDGEMENTS
2385b39c5158Smillert
2386b39c5158SmillertThanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2387b39c5158SmillertRainer Tammer and especially Andrew Savige for their help and suggestions.
2388b39c5158Smillert
2389b39c5158Smillert=head1 COPYRIGHT
2390b39c5158Smillert
2391b39c5158SmillertThis module is copyright (c) 2002 - 2009 Jos Boumans
2392b39c5158SmillertE<lt>kane@cpan.orgE<gt>. All rights reserved.
2393b39c5158Smillert
2394b39c5158SmillertThis library is free software; you may redistribute and/or modify
2395b39c5158Smillertit under the same terms as Perl itself.
2396b39c5158Smillert
2397b39c5158Smillert=cut
2398