1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp                qw(carp croak);
14use File::Spec          ();
15use File::Spec::Unix    ();
16use File::Path          ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
26            $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
27         ];
28
29@ISA                    = qw[Exporter];
30@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
31$DEBUG                  = 0;
32$WARN                   = 1;
33$FOLLOW_SYMLINK         = 0;
34$VERSION                = "2.32";
35$CHOWN                  = 1;
36$CHMOD                  = 1;
37$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
38$DO_NOT_USE_PREFIX      = 0;
39$INSECURE_EXTRACT_MODE  = 0;
40$ZERO_PAD_NUMBERS       = 0;
41$RESOLVE_SYMLINK        = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
42
43BEGIN {
44    use Config;
45    $HAS_PERLIO = $Config::Config{useperlio};
46
47    ### try and load IO::String anyway, so you can dynamically
48    ### switch between perlio and IO::String
49    $HAS_IO_STRING = eval {
50        require IO::String;
51        import IO::String;
52        1;
53    } || 0;
54}
55
56=head1 NAME
57
58Archive::Tar - module for manipulations of tar archives
59
60=head1 SYNOPSIS
61
62    use Archive::Tar;
63    my $tar = Archive::Tar->new;
64
65    $tar->read('origin.tgz');
66    $tar->extract();
67
68    $tar->add_files('file/foo.pl', 'docs/README');
69    $tar->add_data('file/baz.txt', 'This is the contents now');
70
71    $tar->rename('oldname', 'new/file/name');
72    $tar->chown('/', 'root');
73    $tar->chown('/', 'root:root');
74    $tar->chmod('/tmp', '1777');
75
76    $tar->write('files.tar');                   # plain tar
77    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
78    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
79
80=head1 DESCRIPTION
81
82Archive::Tar provides an object oriented mechanism for handling tar
83files.  It provides class methods for quick and easy files handling
84while also allowing for the creation of tar file objects for custom
85manipulation.  If you have the IO::Zlib module installed,
86Archive::Tar will also support compressed or gzipped tar files.
87
88An object of class Archive::Tar represents a .tar(.gz) archive full
89of files and things.
90
91=head1 Object Methods
92
93=head2 Archive::Tar->new( [$file, $compressed] )
94
95Returns a new Tar object. If given any arguments, C<new()> calls the
96C<read()> method automatically, passing on the arguments provided to
97the C<read()> method.
98
99If C<new()> is invoked with arguments and the C<read()> method fails
100for any reason, C<new()> returns undef.
101
102=cut
103
104my $tmpl = {
105    _data   => [ ],
106    _file   => 'Unknown',
107};
108
109### install get/set accessors for this object.
110for my $key ( keys %$tmpl ) {
111    no strict 'refs';
112    *{__PACKAGE__."::$key"} = sub {
113        my $self = shift;
114        $self->{$key} = $_[0] if @_;
115        return $self->{$key};
116    }
117}
118
119sub new {
120    my $class = shift;
121    $class = ref $class if ref $class;
122
123    ### copying $tmpl here since a shallow copy makes it use the
124    ### same aref, causing for files to remain in memory always.
125    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
126
127    if (@_) {
128        unless ( $obj->read( @_ ) ) {
129            $obj->_error(qq[No data could be read from file]);
130            return;
131        }
132    }
133
134    return $obj;
135}
136
137=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
138
139Read the given tar file into memory.
140The first argument can either be the name of a file or a reference to
141an already open filehandle (or an IO::Zlib object if it's compressed)
142
143The C<read> will I<replace> any previous content in C<$tar>!
144
145The second argument may be considered optional, but remains for
146backwards compatibility. Archive::Tar now looks at the file
147magic to determine what class should be used to open the file
148and will transparently Do The Right Thing.
149
150Archive::Tar will warn if you try to pass a bzip2 compressed file and the
151IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
152
153Note that you can currently B<not> pass a C<gzip> compressed
154filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
155filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
156containing the full archive information (either compressed or
157uncompressed). These are worth while features, but not currently
158implemented. See the C<TODO> section.
159
160The third argument can be a hash reference with options. Note that
161all options are case-sensitive.
162
163=over 4
164
165=item limit
166
167Do not read more than C<limit> files. This is useful if you have
168very big archives, and are only interested in the first few files.
169
170=item filter
171
172Can be set to a regular expression.  Only files with names that match
173the expression will be read.
174
175=item md5
176
177Set to 1 and the md5sum of files will be returned (instead of file data)
178    my $iter = Archive::Tar->iter( $file,  1, {md5 => 1} );
179    while( my $f = $iter->() ) {
180        print $f->data . "\t" . $f->full_path . $/;
181    }
182
183=item extract
184
185If set to true, immediately extract entries when reading them. This
186gives you the same memory break as the C<extract_archive> function.
187Note however that entries will not be read into memory, but written
188straight to disk. This means no C<Archive::Tar::File> objects are
189created for you to inspect.
190
191=back
192
193All files are stored internally as C<Archive::Tar::File> objects.
194Please consult the L<Archive::Tar::File> documentation for details.
195
196Returns the number of files read in scalar context, and a list of
197C<Archive::Tar::File> objects in list context.
198
199=cut
200
201sub read {
202    my $self = shift;
203    my $file = shift;
204    my $gzip = shift || 0;
205    my $opts = shift || {};
206
207    unless( defined $file ) {
208        $self->_error( qq[No file to read from!] );
209        return;
210    } else {
211        $self->_file( $file );
212    }
213
214    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
215                    or return;
216
217    my $data = $self->_read_tar( $handle, $opts ) or return;
218
219    $self->_data( $data );
220
221    return wantarray ? @$data : scalar @$data;
222}
223
224sub _get_handle {
225    my $self     = shift;
226    my $file     = shift;   return unless defined $file;
227    my $compress = shift || 0;
228    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
229
230    ### Check if file is a file handle or IO glob
231    if ( ref $file ) {
232	return $file if eval{ *$file{IO} };
233	return $file if eval{ $file->isa(q{IO::Handle}) };
234	$file = q{}.$file;
235    }
236
237    ### get a FH opened to the right class, so we can use it transparently
238    ### throughout the program
239    my $fh;
240    {   ### reading magic only makes sense if we're opening a file for
241        ### reading. otherwise, just use what the user requested.
242        my $magic = '';
243        if( MODE_READ->($mode) ) {
244            open my $tmp, $file or do {
245                $self->_error( qq[Could not open '$file' for reading: $!] );
246                return;
247            };
248
249            ### read the first 4 bites of the file to figure out which class to
250            ### use to open the file.
251            sysread( $tmp, $magic, 4 );
252            close $tmp;
253        }
254
255        ### is it bzip?
256        ### if you asked specifically for bzip compression, or if we're in
257        ### read mode and the magic numbers add up, use bzip
258        if( BZIP and (
259                ($compress eq COMPRESS_BZIP) or
260                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
261            )
262        ) {
263
264            ### different reader/writer modules, different error vars... sigh
265            if( MODE_READ->($mode) ) {
266                $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do {
267                    $self->_error( qq[Could not read '$file': ] .
268                        $IO::Uncompress::Bunzip2::Bunzip2Error
269                    );
270                    return;
271                };
272
273            } else {
274                $fh = IO::Compress::Bzip2->new( $file ) or do {
275                    $self->_error( qq[Could not write to '$file': ] .
276                        $IO::Compress::Bzip2::Bzip2Error
277                    );
278                    return;
279                };
280            }
281
282        ### is it gzip?
283        ### if you asked for compression, if you wanted to read or the gzip
284        ### magic number is present (redundant with read)
285        } elsif( ZLIB and (
286                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
287                 )
288        ) {
289            $fh = IO::Zlib->new;
290
291            unless( $fh->open( $file, $mode ) ) {
292                $self->_error(qq[Could not create filehandle for '$file': $!]);
293                return;
294            }
295
296        ### is it plain tar?
297        } else {
298            $fh = IO::File->new;
299
300            unless( $fh->open( $file, $mode ) ) {
301                $self->_error(qq[Could not create filehandle for '$file': $!]);
302                return;
303            }
304
305            ### enable bin mode on tar archives
306            binmode $fh;
307        }
308    }
309
310    return $fh;
311}
312
313
314sub _read_tar {
315    my $self    = shift;
316    my $handle  = shift or return;
317    my $opts    = shift || {};
318
319    my $count   = $opts->{limit}    || 0;
320    my $filter  = $opts->{filter};
321    my $md5  = $opts->{md5} || 0;	# cdrake
322    my $filter_cb = $opts->{filter_cb};
323    my $extract = $opts->{extract}  || 0;
324
325    ### set a cap on the amount of files to extract ###
326    my $limit   = 0;
327    $limit = 1 if $count > 0;
328
329    my $tarfile = [ ];
330    my $chunk;
331    my $read = 0;
332    my $real_name;  # to set the name of a file when
333                    # we're encountering @longlink
334    my $data;
335
336    LOOP:
337    while( $handle->read( $chunk, HEAD ) ) {
338        ### IO::Zlib doesn't support this yet
339        my $offset;
340        if ( ref($handle) ne 'IO::Zlib' ) {
341            local $@;
342            $offset = eval { tell $handle } || 'unknown';
343            $@ = '';
344        }
345        else {
346            $offset = 'unknown';
347        }
348
349        unless( $read++ ) {
350            my $gzip = GZIP_MAGIC_NUM;
351            if( $chunk =~ /$gzip/ ) {
352                $self->_error( qq[Cannot read compressed format in tar-mode] );
353                return;
354            }
355
356            ### size is < HEAD, which means a corrupted file, as the minimum
357            ### length is _at least_ HEAD
358            if (length $chunk != HEAD) {
359                $self->_error( qq[Cannot read enough bytes from the tarfile] );
360                return;
361            }
362        }
363
364        ### if we can't read in all bytes... ###
365        last if length $chunk != HEAD;
366
367        ### Apparently this should really be two blocks of 512 zeroes,
368        ### but GNU tar sometimes gets it wrong. See comment in the
369        ### source code (tar.c) to GNU cpio.
370        next if $chunk eq TAR_END;
371
372        ### according to the posix spec, the last 12 bytes of the header are
373        ### null bytes, to pad it to a 512 byte block. That means if these
374        ### bytes are NOT null bytes, it's a corrupt header. See:
375        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
376        ### line 111
377        {   my $nulls = join '', "\0" x 12;
378            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
379                $self->_error( qq[Invalid header block at offset $offset] );
380                next LOOP;
381            }
382        }
383
384        ### pass the realname, so we can set it 'proper' right away
385        ### some of the heuristics are done on the name, so important
386        ### to set it ASAP
387        my $entry;
388        {   my %extra_args = ();
389            $extra_args{'name'} = $$real_name if defined $real_name;
390
391            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
392                                                        %extra_args )
393            ) {
394                $self->_error( qq[Couldn't read chunk at offset $offset] );
395                next LOOP;
396            }
397        }
398
399        ### ignore labels:
400        ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
401        next if $entry->is_label;
402
403        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
404
405            if ( $entry->is_file && !$entry->validate ) {
406                ### sometimes the chunk is rather fux0r3d and a whole 512
407                ### bytes ends up in the ->name area.
408                ### clean it up, if need be
409                my $name = $entry->name;
410                $name = substr($name, 0, 100) if length $name > 100;
411                $name =~ s/\n/ /g;
412
413                $self->_error( $name . qq[: checksum error] );
414                next LOOP;
415            }
416
417            my $block = BLOCK_SIZE->( $entry->size );
418
419            $data = $entry->get_content_by_ref;
420
421	    my $skip = 0;
422	    my $ctx;			# cdrake
423	    ### skip this entry if we're filtering
424
425	    if($md5) {			# cdrake
426	      $ctx = Digest::MD5->new;	# cdrake
427	        $skip=5;		# cdrake
428
429	    } elsif ($filter && $entry->name !~ $filter) {
430		$skip = 1;
431
432	    } elsif ($filter_cb && ! $filter_cb->($entry)) {
433		$skip = 2;
434
435	    ### skip this entry if it's a pax header. This is a special file added
436	    ### by, among others, git-generated tarballs. It holds comments and is
437	    ### not meant for extracting. See #38932: pax_global_header extracted
438	    } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
439		$skip = 3;
440	    }
441
442	    if ($skip) {
443		#
444		# Since we're skipping, do not allocate memory for the
445		# whole file.  Read it 64 BLOCKS at a time.  Do not
446		# complete the skip yet because maybe what we read is a
447		# longlink and it won't get skipped after all
448		#
449		my $amt = $block;
450		my $fsz=$entry->size;	# cdrake
451		while ($amt > 0) {
452		    $$data = '';
453		    my $this = 64 * BLOCK;
454		    $this = $amt if $this > $amt;
455		    if( $handle->read( $$data, $this ) < $this ) {
456			$self->_error( qq[Read error on tarfile (missing data) '].
457					    $entry->full_path ."' at offset $offset" );
458			next LOOP;
459		    }
460		    $amt -= $this;
461		    $fsz -= $this;	# cdrake
462		substr ($$data, $fsz) = "" if ($fsz<0);	# remove external junk prior to md5	# cdrake
463		$ctx->add($$data) if($skip==5);	# cdrake
464		}
465		$$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ;	# cdrake
466            } else {
467
468		### just read everything into memory
469		### can't do lazy loading since IO::Zlib doesn't support 'seek'
470		### this is because Compress::Zlib doesn't support it =/
471		### this reads in the whole data in one read() call.
472		if ( $handle->read( $$data, $block ) < $block ) {
473		    $self->_error( qq[Read error on tarfile (missing data) '].
474                                    $entry->full_path ."' at offset $offset" );
475		    next LOOP;
476		}
477		### throw away trailing garbage ###
478		substr ($$data, $entry->size) = "" if defined $$data;
479            }
480
481            ### part II of the @LongLink munging -- need to do /after/
482            ### the checksum check.
483            if( $entry->is_longlink ) {
484                ### weird thing in tarfiles -- if the file is actually a
485                ### @LongLink, the data part seems to have a trailing ^@
486                ### (unprintable) char. to display, pipe output through less.
487                ### but that doesn't *always* happen.. so check if the last
488                ### character is a control character, and if so remove it
489                ### at any rate, we better remove that character here, or tests
490                ### like 'eq' and hash lookups based on names will SO not work
491                ### remove it by calculating the proper size, and then
492                ### tossing out everything that's longer than that size.
493
494                ### count number of nulls
495                my $nulls = $$data =~ tr/\0/\0/;
496
497                ### cut data + size by that many bytes
498                $entry->size( $entry->size - $nulls );
499                substr ($$data, $entry->size) = "";
500            }
501        }
502
503        ### clean up of the entries.. posix tar /apparently/ has some
504        ### weird 'feature' that allows for filenames > 255 characters
505        ### they'll put a header in with as name '././@LongLink' and the
506        ### contents will be the name of the /next/ file in the archive
507        ### pretty crappy and kludgy if you ask me
508
509        ### set the name for the next entry if this is a @LongLink;
510        ### this is one ugly hack =/ but needed for direct extraction
511        if( $entry->is_longlink ) {
512            $real_name = $data;
513            next LOOP;
514        } elsif ( defined $real_name ) {
515            $entry->name( $$real_name );
516            $entry->prefix('');
517            undef $real_name;
518        }
519
520	if ($filter && $entry->name !~ $filter) {
521	    next LOOP;
522
523	} elsif ($filter_cb && ! $filter_cb->($entry)) {
524	    next LOOP;
525
526	### skip this entry if it's a pax header. This is a special file added
527	### by, among others, git-generated tarballs. It holds comments and is
528	### not meant for extracting. See #38932: pax_global_header extracted
529	} elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
530	    next LOOP;
531	}
532
533        if ( $extract && !$entry->is_longlink
534                      && !$entry->is_unknown
535                      && !$entry->is_label ) {
536            $self->_extract_file( $entry ) or return;
537        }
538
539        ### Guard against tarfiles with garbage at the end
540	    last LOOP if $entry->name eq '';
541
542        ### push only the name on the rv if we're extracting
543        ### -- for extract_archive
544        push @$tarfile, ($extract ? $entry->name : $entry);
545
546        if( $limit ) {
547            $count-- unless $entry->is_longlink || $entry->is_dir;
548            last LOOP unless $count;
549        }
550    } continue {
551        undef $data;
552    }
553
554    return $tarfile;
555}
556
557=head2 $tar->contains_file( $filename )
558
559Check if the archive contains a certain file.
560It will return true if the file is in the archive, false otherwise.
561
562Note however, that this function does an exact match using C<eq>
563on the full path. So it cannot compensate for case-insensitive file-
564systems or compare 2 paths to see if they would point to the same
565underlying file.
566
567=cut
568
569sub contains_file {
570    my $self = shift;
571    my $full = shift;
572
573    return unless defined $full;
574
575    ### don't warn if the entry isn't there.. that's what this function
576    ### is for after all.
577    local $WARN = 0;
578    return 1 if $self->_find_entry($full);
579    return;
580}
581
582=head2 $tar->extract( [@filenames] )
583
584Write files whose names are equivalent to any of the names in
585C<@filenames> to disk, creating subdirectories as necessary. This
586might not work too well under VMS.
587Under MacPerl, the file's modification time will be converted to the
588MacOS zero of time, and appropriate conversions will be done to the
589path.  However, the length of each element of the path is not
590inspected to see whether it's longer than MacOS currently allows (32
591characters).
592
593If C<extract> is called without a list of file names, the entire
594contents of the archive are extracted.
595
596Returns a list of filenames extracted.
597
598=cut
599
600sub extract {
601    my $self    = shift;
602    my @args    = @_;
603    my @files;
604    my $hashmap;
605
606    # use the speed optimization for all extracted files
607    local($self->{cwd}) = cwd() unless $self->{cwd};
608
609    ### you requested the extraction of only certain files
610    if( @args ) {
611        for my $file ( @args ) {
612
613            ### it's already an object?
614            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
615                push @files, $file;
616                next;
617
618            ### go find it then
619            } else {
620
621                # create hash-map once to speed up lookup
622                $hashmap = $hashmap || {
623                    map { $_->full_path, $_ } @{$self->_data}
624                };
625
626                if (exists $hashmap->{$file}) {
627                    ### we found the file you're looking for
628                    push @files, $hashmap->{$file};
629                } else {
630                    return $self->_error(
631                        qq[Could not find '$file' in archive] );
632                }
633            }
634        }
635
636    ### just grab all the file items
637    } else {
638        @files = $self->get_files;
639    }
640
641    ### nothing found? that's an error
642    unless( scalar @files ) {
643        $self->_error( qq[No files found for ] . $self->_file );
644        return;
645    }
646
647    ### now extract them
648    for my $entry ( @files ) {
649        unless( $self->_extract_file( $entry ) ) {
650            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
651            return;
652        }
653    }
654
655    return @files;
656}
657
658=head2 $tar->extract_file( $file, [$extract_path] )
659
660Write an entry, whose name is equivalent to the file name provided to
661disk. Optionally takes a second parameter, which is the full native
662path (including filename) the entry will be written to.
663
664For example:
665
666    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
667
668    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
669
670Returns true on success, false on failure.
671
672=cut
673
674sub extract_file {
675    my $self = shift;
676    my $file = shift;   return unless defined $file;
677    my $alt  = shift;
678
679    my $entry = $self->_find_entry( $file )
680        or $self->_error( qq[Could not find an entry for '$file'] ), return;
681
682    return $self->_extract_file( $entry, $alt );
683}
684
685sub _extract_file {
686    my $self    = shift;
687    my $entry   = shift or return;
688    my $alt     = shift;
689
690    ### you wanted an alternate extraction location ###
691    my $name = defined $alt ? $alt : $entry->full_path;
692
693                            ### splitpath takes a bool at the end to indicate
694                            ### that it's splitting a dir
695    my ($vol,$dirs,$file);
696    if ( defined $alt ) { # It's a local-OS path
697        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
698                                                          $entry->is_dir );
699    } else {
700        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
701                                                          $entry->is_dir );
702    }
703
704    my $dir;
705    ### is $name an absolute path? ###
706    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
707
708        ### absolute names are not allowed to be in tarballs under
709        ### strict mode, so only allow it if a user tells us to do it
710        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
711            $self->_error(
712                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
713                q[Not extracting absolute paths under SECURE EXTRACT MODE]
714            );
715            return;
716        }
717
718        ### user asked us to, it's fine.
719        $dir = File::Spec->catpath( $vol, $dirs, "" );
720
721    ### it's a relative path ###
722    } else {
723        my $cwd     = (ref $self and defined $self->{cwd})
724                        ? $self->{cwd}
725                        : cwd();
726
727        my @dirs = defined $alt
728            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
729            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
730                                                    # straight from the tarball
731
732        if( not defined $alt            and
733            not $INSECURE_EXTRACT_MODE
734        ) {
735
736            ### paths that leave the current directory are not allowed under
737            ### strict mode, so only allow it if a user tells us to do this.
738            if( grep { $_ eq '..' } @dirs ) {
739
740                $self->_error(
741                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
742                    q[the current working directory. Not extracting under ].
743                    q[SECURE EXTRACT MODE]
744                );
745                return;
746            }
747
748            ### the archive may be asking us to extract into a symlink. This
749            ### is not sane and a possible security issue, as outlined here:
750            ### https://rt.cpan.org/Ticket/Display.html?id=30380
751            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
752            ### https://issues.rpath.com/browse/RPL-1716
753            my $full_path = $cwd;
754            for my $d ( @dirs ) {
755                $full_path = File::Spec->catdir( $full_path, $d );
756
757                ### we've already checked this one, and it's safe. Move on.
758                next if ref $self and $self->{_link_cache}->{$full_path};
759
760                if( -l $full_path ) {
761                    my $to   = readlink $full_path;
762                    my $diag = "symlinked directory ($full_path => $to)";
763
764                    $self->_error(
765                        q[Entry ']. $entry->full_path .q[' is attempting to ].
766                        qq[extract to a $diag. This is considered a security ].
767                        q[vulnerability and not allowed under SECURE EXTRACT ].
768                        q[MODE]
769                    );
770                    return;
771                }
772
773                ### XXX keep a cache if possible, so the stats become cheaper:
774                $self->{_link_cache}->{$full_path} = 1 if ref $self;
775            }
776        }
777
778        ### '.' is the directory delimiter on VMS, which has to be escaped
779        ### or changed to '_' on vms.  vmsify is used, because older versions
780        ### of vmspath do not handle this properly.
781        ### Must not add a '/' to an empty directory though.
782        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
783
784        my ($cwd_vol,$cwd_dir,$cwd_file)
785                    = File::Spec->splitpath( $cwd );
786        my @cwd     = File::Spec->splitdir( $cwd_dir );
787        push @cwd, $cwd_file if length $cwd_file;
788
789        ### We need to pass '' as the last element to catpath. Craig Berry
790        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
791        ### The root problem is that splitpath on UNIX always returns the
792        ### final path element as a file even if it is a directory, and of
793        ### course there is no way it can know the difference without checking
794        ### against the filesystem, which it is documented as not doing.  When
795        ### you turn around and call catpath, on VMS you have to know which bits
796        ### are directory bits and which bits are file bits.  In this case we
797        ### know the result should be a directory.  I had thought you could omit
798        ### the file argument to catpath in such a case, but apparently on UNIX
799        ### you can't.
800        $dir        = File::Spec->catpath(
801                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
802                        );
803
804        ### catdir() returns undef if the path is longer than 255 chars on
805        ### older VMS systems.
806        unless ( defined $dir ) {
807            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
808            return;
809        }
810
811    }
812
813    if( -e $dir && !-d _ ) {
814        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
815        return;
816    }
817
818    unless ( -d _ ) {
819        eval { File::Path::mkpath( $dir, 0, 0777 ) };
820        if( $@ ) {
821            my $fp = $entry->full_path;
822            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
823            return;
824        }
825
826        ### XXX chown here? that might not be the same as in the archive
827        ### as we're only chown'ing to the owner of the file we're extracting
828        ### not to the owner of the directory itself, which may or may not
829        ### be another entry in the archive
830        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
831        ### way to go.
832        #if( $CHOWN && CAN_CHOWN ) {
833        #    chown $entry->uid, $entry->gid, $dir or
834        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
835        #}
836    }
837
838    ### we're done if we just needed to create a dir ###
839    return 1 if $entry->is_dir;
840
841    my $full = File::Spec->catfile( $dir, $file );
842
843    if( $entry->is_unknown ) {
844        $self->_error( qq[Unknown file type for file '$full'] );
845        return;
846    }
847
848    ### If a file system already contains a block device with the same name as
849    ### the being extracted regular file, we would write the file's content
850    ### to the block device. So remove the existing file (block device) now.
851    ### If an archive contains multiple same-named entries, the last one
852    ### should replace the previous ones. So remove the old file now.
853    ### If the old entry is a symlink to a file outside of the CWD, the new
854    ### entry would create a file there. This is CVE-2018-12015
855    ### <https://rt.cpan.org/Ticket/Display.html?id=125523>.
856    if (-l $full || -e _) {
857	if (!unlink $full) {
858	    $self->_error( qq[Could not remove old file '$full': $!] );
859	    return;
860	}
861    }
862    if( length $entry->type && $entry->is_file ) {
863        my $fh = IO::File->new;
864        $fh->open( $full, '>' ) or (
865            $self->_error( qq[Could not open file '$full': $!] ),
866            return
867        );
868
869        if( $entry->size ) {
870            binmode $fh;
871            syswrite $fh, $entry->data or (
872                $self->_error( qq[Could not write data to '$full'] ),
873                return
874            );
875        }
876
877        close $fh or (
878            $self->_error( qq[Could not close file '$full'] ),
879            return
880        );
881
882    } else {
883        $self->_make_special_file( $entry, $full ) or return;
884    }
885
886    ### only update the timestamp if it's not a symlink; that will change the
887    ### timestamp of the original. This addresses bug #33669: Could not update
888    ### timestamp warning on symlinks
889    if( not -l $full ) {
890        utime time, $entry->mtime - TIME_OFFSET, $full or
891            $self->_error( qq[Could not update timestamp] );
892    }
893
894    if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
895        chown $entry->uid, $entry->gid, $full or
896            $self->_error( qq[Could not set uid/gid on '$full'] );
897    }
898
899    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
900    ### change the perms on the file they're linking too...
901    if( $CHMOD and not -l $full ) {
902        my $mode = $entry->mode;
903        unless ($SAME_PERMISSIONS) {
904            $mode &= ~(oct(7000) | umask);
905        }
906        chmod $mode, $full or
907            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
908    }
909
910    return 1;
911}
912
913sub _make_special_file {
914    my $self    = shift;
915    my $entry   = shift     or return;
916    my $file    = shift;    return unless defined $file;
917
918    my $err;
919
920    if( $entry->is_symlink ) {
921        my $fail;
922        if( ON_UNIX ) {
923            symlink( $entry->linkname, $file ) or $fail++;
924
925        } else {
926            $self->_extract_special_file_as_plain_file( $entry, $file )
927                or $fail++;
928        }
929
930        $err =  qq[Making symbolic link '$file' to '] .
931                $entry->linkname .q[' failed] if $fail;
932
933    } elsif ( $entry->is_hardlink ) {
934        my $fail;
935        if( ON_UNIX ) {
936            link( $entry->linkname, $file ) or $fail++;
937
938        } else {
939            $self->_extract_special_file_as_plain_file( $entry, $file )
940                or $fail++;
941        }
942
943        $err =  qq[Making hard link from '] . $entry->linkname .
944                qq[' to '$file' failed] if $fail;
945
946    } elsif ( $entry->is_fifo ) {
947        ON_UNIX && !system('mknod', $file, 'p') or
948            $err = qq[Making fifo ']. $entry->name .qq[' failed];
949
950    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
951        my $mode = $entry->is_blockdev ? 'b' : 'c';
952
953        ON_UNIX && !system('mknod', $file, $mode,
954                            $entry->devmajor, $entry->devminor) or
955            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
956                    $entry->devmajor . qq[ min=] . $entry->devminor .
957                    qq[) failed.];
958
959    } elsif ( $entry->is_socket ) {
960        ### the original doesn't do anything special for sockets.... ###
961        1;
962    }
963
964    return $err ? $self->_error( $err ) : 1;
965}
966
967### don't know how to make symlinks, let's just extract the file as
968### a plain file
969sub _extract_special_file_as_plain_file {
970    my $self    = shift;
971    my $entry   = shift     or return;
972    my $file    = shift;    return unless defined $file;
973
974    my $err;
975    TRY: {
976        my $orig = $self->_find_entry( $entry->linkname, $entry );
977
978        unless( $orig ) {
979            $err =  qq[Could not find file '] . $entry->linkname .
980                    qq[' in memory.];
981            last TRY;
982        }
983
984        ### clone the entry, make it appear as a normal file ###
985        my $clone = $orig->clone;
986        $clone->_downgrade_to_plainfile;
987        $self->_extract_file( $clone, $file ) or last TRY;
988
989        return 1;
990    }
991
992    return $self->_error($err);
993}
994
995=head2 $tar->list_files( [\@properties] )
996
997Returns a list of the names of all the files in the archive.
998
999If C<list_files()> is passed an array reference as its first argument
1000it returns a list of hash references containing the requested
1001properties of each file.  The following list of properties is
1002supported: name, size, mtime (last modified date), mode, uid, gid,
1003linkname, uname, gname, devmajor, devminor, prefix.
1004
1005Passing an array reference containing only one element, 'name', is
1006special cased to return a list of names rather than a list of hash
1007references, making it equivalent to calling C<list_files> without
1008arguments.
1009
1010=cut
1011
1012sub list_files {
1013    my $self = shift;
1014    my $aref = shift || [ ];
1015
1016    unless( $self->_data ) {
1017        $self->read() or return;
1018    }
1019
1020    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1021        return map { $_->full_path } @{$self->_data};
1022    } else {
1023
1024        #my @rv;
1025        #for my $obj ( @{$self->_data} ) {
1026        #    push @rv, { map { $_ => $obj->$_() } @$aref };
1027        #}
1028        #return @rv;
1029
1030        ### this does the same as the above.. just needs a +{ }
1031        ### to make sure perl doesn't confuse it for a block
1032        return map {    my $o=$_;
1033                        +{ map { $_ => $o->$_() } @$aref }
1034                    } @{$self->_data};
1035    }
1036}
1037
1038sub _find_entry {
1039    my $self = shift;
1040    my $file = shift;
1041
1042    unless( defined $file ) {
1043        $self->_error( qq[No file specified] );
1044        return;
1045    }
1046
1047    ### it's an object already
1048    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1049
1050seach_entry:
1051		if($self->_data){
1052			for my $entry ( @{$self->_data} ) {
1053					my $path = $entry->full_path;
1054					return $entry if $path eq $file;
1055			}
1056		}
1057
1058		if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1059			if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
1060				$file = _symlinks_resolver( $link_entry->name, $file );
1061				goto seach_entry if $self->_data;
1062
1063				#this will be slower than never, but won't failed!
1064
1065				my $iterargs = $link_entry->{'_archive'};
1066				if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
1067				#faster	but whole archive will be read in memory
1068					#read whole archive and share data
1069					my $archive = Archive::Tar->new;
1070					$archive->read( @$iterargs );
1071					push @$iterargs, $archive; #take a trace for destruction
1072					if($archive->_data){
1073						$self->_data( $archive->_data );
1074						goto seach_entry;
1075					}
1076				}#faster
1077
1078				{#slower but lower memory usage
1079					# $iterargs = [$filename, $compressed, $opts];
1080					my $next = Archive::Tar->iter( @$iterargs );
1081					while(my $e = $next->()){
1082						if($e->full_path eq $file){
1083							undef $next;
1084							return $e;
1085						}
1086					}
1087				}#slower
1088			}
1089		}
1090
1091    $self->_error( qq[No such file in archive: '$file'] );
1092    return;
1093}
1094
1095=head2 $tar->get_files( [@filenames] )
1096
1097Returns the C<Archive::Tar::File> objects matching the filenames
1098provided. If no filename list was passed, all C<Archive::Tar::File>
1099objects in the current Tar object are returned.
1100
1101Please refer to the C<Archive::Tar::File> documentation on how to
1102handle these objects.
1103
1104=cut
1105
1106sub get_files {
1107    my $self = shift;
1108
1109    return @{ $self->_data } unless @_;
1110
1111    my @list;
1112    for my $file ( @_ ) {
1113        push @list, grep { defined } $self->_find_entry( $file );
1114    }
1115
1116    return @list;
1117}
1118
1119=head2 $tar->get_content( $file )
1120
1121Return the content of the named file.
1122
1123=cut
1124
1125sub get_content {
1126    my $self = shift;
1127    my $entry = $self->_find_entry( shift ) or return;
1128
1129    return $entry->data;
1130}
1131
1132=head2 $tar->replace_content( $file, $content )
1133
1134Make the string $content be the content for the file named $file.
1135
1136=cut
1137
1138sub replace_content {
1139    my $self = shift;
1140    my $entry = $self->_find_entry( shift ) or return;
1141
1142    return $entry->replace_content( shift );
1143}
1144
1145=head2 $tar->rename( $file, $new_name )
1146
1147Rename the file of the in-memory archive to $new_name.
1148
1149Note that you must specify a Unix path for $new_name, since per tar
1150standard, all files in the archive must be Unix paths.
1151
1152Returns true on success and false on failure.
1153
1154=cut
1155
1156sub rename {
1157    my $self = shift;
1158    my $file = shift; return unless defined $file;
1159    my $new  = shift; return unless defined $new;
1160
1161    my $entry = $self->_find_entry( $file ) or return;
1162
1163    return $entry->rename( $new );
1164}
1165
1166=head2 $tar->chmod( $file, $mode )
1167
1168Change mode of $file to $mode.
1169
1170Returns true on success and false on failure.
1171
1172=cut
1173
1174sub chmod {
1175    my $self = shift;
1176    my $file = shift; return unless defined $file;
1177    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1178    my @args = ("$mode");
1179
1180    my $entry = $self->_find_entry( $file ) or return;
1181    my $x = $entry->chmod( @args );
1182    return $x;
1183}
1184
1185=head2 $tar->chown( $file, $uname [, $gname] )
1186
1187Change owner $file to $uname and $gname.
1188
1189Returns true on success and false on failure.
1190
1191=cut
1192
1193sub chown {
1194    my $self = shift;
1195    my $file = shift; return unless defined $file;
1196    my $uname  = shift; return unless defined $uname;
1197    my @args   = ($uname);
1198    push(@args, shift);
1199
1200    my $entry = $self->_find_entry( $file ) or return;
1201    my $x = $entry->chown( @args );
1202    return $x;
1203}
1204
1205=head2 $tar->remove (@filenamelist)
1206
1207Removes any entries with names matching any of the given filenames
1208from the in-memory archive. Returns a list of C<Archive::Tar::File>
1209objects that remain.
1210
1211=cut
1212
1213sub remove {
1214    my $self = shift;
1215    my @list = @_;
1216
1217    my %seen = map { $_->full_path => $_ } @{$self->_data};
1218    delete $seen{ $_ } for @list;
1219
1220    $self->_data( [values %seen] );
1221
1222    return values %seen;
1223}
1224
1225=head2 $tar->clear
1226
1227C<clear> clears the current in-memory archive. This effectively gives
1228you a 'blank' object, ready to be filled again. Note that C<clear>
1229only has effect on the object, not the underlying tarfile.
1230
1231=cut
1232
1233sub clear {
1234    my $self = shift or return;
1235
1236    $self->_data( [] );
1237    $self->_file( '' );
1238
1239    return 1;
1240}
1241
1242
1243=head2 $tar->write ( [$file, $compressed, $prefix] )
1244
1245Write the in-memory archive to disk.  The first argument can either
1246be the name of a file or a reference to an already open filehandle (a
1247GLOB reference).
1248
1249The second argument is used to indicate compression. You can either
1250compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1251to be the C<gzip> compression level (between 1 and 9), but the use of
1252constants is preferred:
1253
1254  # write a gzip compressed file
1255  $tar->write( 'out.tgz', COMPRESS_GZIP );
1256
1257  # write a bzip compressed file
1258  $tar->write( 'out.tbz', COMPRESS_BZIP );
1259
1260Note that when you pass in a filehandle, the compression argument
1261is ignored, as all files are printed verbatim to your filehandle.
1262If you wish to enable compression with filehandles, use an
1263C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1264
1265The third argument is an optional prefix. All files will be tucked
1266away in the directory you specify as prefix. So if you have files
1267'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1268will be written to the archive as 'foo/a' and 'foo/b'.
1269
1270If no arguments are given, C<write> returns the entire formatted
1271archive as a string, which could be useful if you'd like to stuff the
1272archive into a socket or a pipe to gzip or something.
1273
1274
1275=cut
1276
1277sub write {
1278    my $self        = shift;
1279    my $file        = shift; $file = '' unless defined $file;
1280    my $gzip        = shift || 0;
1281    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1282    my $dummy       = '';
1283
1284    ### only need a handle if we have a file to print to ###
1285    my $handle = length($file)
1286                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1287                        or return )
1288                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1289                    : $HAS_IO_STRING ? IO::String->new
1290                    : __PACKAGE__->no_string_support();
1291
1292    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1293    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1294    ### printed to the archive
1295    local $\;
1296
1297    for my $entry ( @{$self->_data} ) {
1298        ### entries to be written to the tarfile ###
1299        my @write_me;
1300
1301        ### only now will we change the object to reflect the current state
1302        ### of the name and prefix fields -- this needs to be limited to
1303        ### write() only!
1304        my $clone = $entry->clone;
1305
1306
1307        ### so, if you don't want use to use the prefix, we'll stuff
1308        ### everything in the name field instead
1309        if( $DO_NOT_USE_PREFIX ) {
1310
1311            ### you might have an extended prefix, if so, set it in the clone
1312            ### XXX is ::Unix right?
1313            $clone->name( length $ext_prefix
1314                            ? File::Spec::Unix->catdir( $ext_prefix,
1315                                                        $clone->full_path)
1316                            : $clone->full_path );
1317            $clone->prefix( '' );
1318
1319        ### otherwise, we'll have to set it properly -- prefix part in the
1320        ### prefix and name part in the name field.
1321        } else {
1322
1323            ### split them here, not before!
1324            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1325
1326            ### you might have an extended prefix, if so, set it in the clone
1327            ### XXX is ::Unix right?
1328            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1329                if length $ext_prefix;
1330
1331            $clone->prefix( $prefix );
1332            $clone->name( $name );
1333        }
1334
1335        ### names are too long, and will get truncated if we don't add a
1336        ### '@LongLink' file...
1337        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1338                                length($clone->prefix)  > PREFIX_LENGTH
1339                            ) || 0;
1340
1341        ### perhaps we need to make a longlink file?
1342        if( $make_longlink ) {
1343            my $longlink = Archive::Tar::File->new(
1344                            data => LONGLINK_NAME,
1345                            $clone->full_path,
1346                            { type => LONGLINK }
1347                        );
1348
1349            unless( $longlink ) {
1350                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1351                                qq[oversize file '] . $clone->full_path ."'" );
1352                return;
1353            };
1354
1355            push @write_me, $longlink;
1356        }
1357
1358        push @write_me, $clone;
1359
1360        ### write the one, optionally 2 a::t::file objects to the handle
1361        for my $clone (@write_me) {
1362
1363            ### if the file is a symlink, there are 2 options:
1364            ### either we leave the symlink intact, but then we don't write any
1365            ### data OR we follow the symlink, which means we actually make a
1366            ### copy. if we do the latter, we have to change the TYPE of the
1367            ### clone to 'FILE'
1368            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1369            my $data_ok = !$clone->is_symlink && $clone->has_content;
1370
1371            ### downgrade to a 'normal' file if it's a symlink we're going to
1372            ### treat as a regular file
1373            $clone->_downgrade_to_plainfile if $link_ok;
1374
1375            ### get the header for this block
1376            my $header = $self->_format_tar_entry( $clone );
1377            unless( $header ) {
1378                $self->_error(q[Could not format header for: ] .
1379                                    $clone->full_path );
1380                return;
1381            }
1382
1383            unless( print $handle $header ) {
1384                $self->_error(q[Could not write header for: ] .
1385                                    $clone->full_path);
1386                return;
1387            }
1388
1389            if( $link_ok or $data_ok ) {
1390                unless( print $handle $clone->data ) {
1391                    $self->_error(q[Could not write data for: ] .
1392                                    $clone->full_path);
1393                    return;
1394                }
1395
1396                ### pad the end of the clone if required ###
1397                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1398            }
1399
1400        } ### done writing these entries
1401    }
1402
1403    ### write the end markers ###
1404    print $handle TAR_END x 2 or
1405            return $self->_error( qq[Could not write tar end markers] );
1406
1407    ### did you want it written to a file, or returned as a string? ###
1408    my $rv =  length($file) ? 1
1409                        : $HAS_PERLIO ? $dummy
1410                        : do { seek $handle, 0, 0; local $/; <$handle> };
1411
1412    ### make sure to close the handle if we created it
1413    if ( $file ne $handle ) {
1414	unless( close $handle ) {
1415	    $self->_error( qq[Could not write tar] );
1416	    return;
1417	}
1418    }
1419
1420    return $rv;
1421}
1422
1423sub _format_tar_entry {
1424    my $self        = shift;
1425    my $entry       = shift or return;
1426    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1427    my $no_prefix   = shift || 0;
1428
1429    my $file    = $entry->name;
1430    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1431
1432    ### remove the prefix from the file name
1433    ### not sure if this is still needed --kane
1434    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1435    ### this for us. Even worse, this would break if we tried to add a file
1436    ### like x/x.
1437    #if( length $prefix ) {
1438    #    $file =~ s/^$match//;
1439    #}
1440
1441    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1442                if length $ext_prefix;
1443
1444    ### not sure why this is... ###
1445    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1446    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1447
1448    my $f1 = "%06o"; my $f2  = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
1449
1450    ### this might be optimizable with a 'changed' flag in the file objects ###
1451    my $tar = pack (
1452                PACK,
1453                $file,
1454
1455                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1456                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1457
1458                "",  # checksum field - space padded a bit down
1459
1460                (map { $entry->$_() }                 qw[type linkname magic]),
1461
1462                $entry->version || TAR_VERSION,
1463
1464                (map { $entry->$_() }                 qw[uname gname]),
1465                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1466
1467                ($no_prefix ? '' : $prefix)
1468    );
1469
1470    ### add the checksum ###
1471    my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
1472    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1473
1474    return $tar;
1475}
1476
1477=head2 $tar->add_files( @filenamelist )
1478
1479Takes a list of filenames and adds them to the in-memory archive.
1480
1481The path to the file is automatically converted to a Unix like
1482equivalent for use in the archive, and, if on MacOS, the file's
1483modification time is converted from the MacOS epoch to the Unix epoch.
1484So tar archives created on MacOS with B<Archive::Tar> can be read
1485both with I<tar> on Unix and applications like I<suntar> or
1486I<Stuffit Expander> on MacOS.
1487
1488Be aware that the file's type/creator and resource fork will be lost,
1489which is usually what you want in cross-platform archives.
1490
1491Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1492object from, for example, another archive. The object will be clone, and
1493effectively be a copy of the original, not an alias.
1494
1495Returns a list of C<Archive::Tar::File> objects that were just added.
1496
1497=cut
1498
1499sub add_files {
1500    my $self    = shift;
1501    my @files   = @_ or return;
1502
1503    my @rv;
1504    for my $file ( @files ) {
1505
1506        ### you passed an Archive::Tar::File object
1507        ### clone it so we don't accidentally have a reference to
1508        ### an object from another archive
1509        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1510            push @rv, $file->clone;
1511            next;
1512        }
1513
1514        eval {
1515            if( utf8::is_utf8( $file )) {
1516              utf8::encode( $file );
1517            }
1518        };
1519
1520        unless( -e $file || -l $file ) {
1521            $self->_error( qq[No such file: '$file'] );
1522            next;
1523        }
1524
1525        my $obj = Archive::Tar::File->new( file => $file );
1526        unless( $obj ) {
1527            $self->_error( qq[Unable to add file: '$file'] );
1528            next;
1529        }
1530
1531        push @rv, $obj;
1532    }
1533
1534    push @{$self->{_data}}, @rv;
1535
1536    return @rv;
1537}
1538
1539=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1540
1541Takes a filename, a scalar full of data and optionally a reference to
1542a hash with specific options.
1543
1544Will add a file to the in-memory archive, with name C<$filename> and
1545content C<$data>. Specific properties can be set using C<$opthashref>.
1546The following list of properties is supported: name, size, mtime
1547(last modified date), mode, uid, gid, linkname, uname, gname,
1548devmajor, devminor, prefix, type.  (On MacOS, the file's path and
1549modification times are converted to Unix equivalents.)
1550
1551Valid values for the file type are the following constants defined by
1552Archive::Tar::Constant:
1553
1554=over 4
1555
1556=item FILE
1557
1558Regular file.
1559
1560=item HARDLINK
1561
1562=item SYMLINK
1563
1564Hard and symbolic ("soft") links; linkname should specify target.
1565
1566=item CHARDEV
1567
1568=item BLOCKDEV
1569
1570Character and block devices. devmajor and devminor should specify the major
1571and minor device numbers.
1572
1573=item DIR
1574
1575Directory.
1576
1577=item FIFO
1578
1579FIFO (named pipe).
1580
1581=item SOCKET
1582
1583Socket.
1584
1585=back
1586
1587Returns the C<Archive::Tar::File> object that was just added, or
1588C<undef> on failure.
1589
1590=cut
1591
1592sub add_data {
1593    my $self    = shift;
1594    my ($file, $data, $opt) = @_;
1595
1596    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1597    unless( $obj ) {
1598        $self->_error( qq[Unable to add file: '$file'] );
1599        return;
1600    }
1601
1602    push @{$self->{_data}}, $obj;
1603
1604    return $obj;
1605}
1606
1607=head2 $tar->error( [$BOOL] )
1608
1609Returns the current error string (usually, the last error reported).
1610If a true value was specified, it will give the C<Carp::longmess>
1611equivalent of the error, in effect giving you a stacktrace.
1612
1613For backwards compatibility, this error is also available as
1614C<$Archive::Tar::error> although it is much recommended you use the
1615method call instead.
1616
1617=cut
1618
1619{
1620    $error = '';
1621    my $longmess;
1622
1623    sub _error {
1624        my $self    = shift;
1625        my $msg     = $error = shift;
1626        $longmess   = Carp::longmess($error);
1627        if (ref $self) {
1628            $self->{_error} = $error;
1629            $self->{_longmess} = $longmess;
1630        }
1631
1632        ### set Archive::Tar::WARN to 0 to disable printing
1633        ### of errors
1634        if( $WARN ) {
1635            carp $DEBUG ? $longmess : $msg;
1636        }
1637
1638        return;
1639    }
1640
1641    sub error {
1642        my $self = shift;
1643        if (ref $self) {
1644            return shift() ? $self->{_longmess} : $self->{_error};
1645        } else {
1646            return shift() ? $longmess : $error;
1647        }
1648    }
1649}
1650
1651=head2 $tar->setcwd( $cwd );
1652
1653C<Archive::Tar> needs to know the current directory, and it will run
1654C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1655tarfile and saves it in the file system. (As of version 1.30, however,
1656C<Archive::Tar> will use the speed optimization described below
1657automatically, so it's only relevant if you're using C<extract_file()>).
1658
1659Since C<Archive::Tar> doesn't change the current directory internally
1660while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1661can be avoided if we can guarantee that the current directory doesn't
1662get changed externally.
1663
1664To use this performance boost, set the current directory via
1665
1666    use Cwd;
1667    $tar->setcwd( cwd() );
1668
1669once before calling a function like C<extract_file> and
1670C<Archive::Tar> will use the current directory setting from then on
1671and won't call C<Cwd::cwd()> internally.
1672
1673To switch back to the default behaviour, use
1674
1675    $tar->setcwd( undef );
1676
1677and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1678
1679If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
1680be called for you.
1681
1682=cut
1683
1684sub setcwd {
1685    my $self     = shift;
1686    my $cwd      = shift;
1687
1688    $self->{cwd} = $cwd;
1689}
1690
1691=head1 Class Methods
1692
1693=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1694
1695Creates a tar file from the list of files provided.  The first
1696argument can either be the name of the tar file to create or a
1697reference to an open file handle (e.g. a GLOB reference).
1698
1699The second argument is used to indicate compression. You can either
1700compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1701to be the C<gzip> compression level (between 1 and 9), but the use of
1702constants is preferred:
1703
1704  # write a gzip compressed file
1705  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1706
1707  # write a bzip compressed file
1708  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1709
1710Note that when you pass in a filehandle, the compression argument
1711is ignored, as all files are printed verbatim to your filehandle.
1712If you wish to enable compression with filehandles, use an
1713C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1714
1715The remaining arguments list the files to be included in the tar file.
1716These files must all exist. Any files which don't exist or can't be
1717read are silently ignored.
1718
1719If the archive creation fails for any reason, C<create_archive> will
1720return false. Please use the C<error> method to find the cause of the
1721failure.
1722
1723Note that this method does not write C<on the fly> as it were; it
1724still reads all the files into memory before writing out the archive.
1725Consult the FAQ below if this is a problem.
1726
1727=cut
1728
1729sub create_archive {
1730    my $class = shift;
1731
1732    my $file    = shift; return unless defined $file;
1733    my $gzip    = shift || 0;
1734    my @files   = @_;
1735
1736    unless( @files ) {
1737        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1738    }
1739
1740    my $tar = $class->new;
1741    $tar->add_files( @files );
1742    return $tar->write( $file, $gzip );
1743}
1744
1745=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1746
1747Returns an iterator function that reads the tar file without loading
1748it all in memory.  Each time the function is called it will return the
1749next file in the tarball. The files are returned as
1750C<Archive::Tar::File> objects. The iterator function returns the
1751empty list once it has exhausted the files contained.
1752
1753The second argument can be a hash reference with options, which are
1754identical to the arguments passed to C<read()>.
1755
1756Example usage:
1757
1758    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1759
1760    while( my $f = $next->() ) {
1761        print $f->name, "\n";
1762
1763        $f->extract or warn "Extraction failed";
1764
1765        # ....
1766    }
1767
1768=cut
1769
1770
1771sub iter {
1772    my $class       = shift;
1773    my $filename    = shift;
1774    return unless defined $filename;
1775    my $compressed  = shift || 0;
1776    my $opts        = shift || {};
1777
1778    ### get a handle to read from.
1779    my $handle = $class->_get_handle(
1780        $filename,
1781        $compressed,
1782        READ_ONLY->( ZLIB )
1783    ) or return;
1784
1785    my @data;
1786		my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
1787    return sub {
1788        return shift(@data)     if @data;       # more than one file returned?
1789        return                  unless $handle; # handle exhausted?
1790
1791        ### read data, should only return file
1792        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1793        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
1794				if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1795					foreach(@data){
1796						#may refine this heuristic for ON_UNIX?
1797						if($_->linkname){
1798							#is there a better slot to store/share it ?
1799							$_->{'_archive'} = $CONSTRUCT_ARGS;
1800						}
1801					}
1802				}
1803
1804        ### return one piece of data
1805        return shift(@data)     if @data;
1806
1807        ### data is exhausted, free the filehandle
1808        undef $handle;
1809				if(@$CONSTRUCT_ARGS == 4){
1810					#free archive in memory
1811					undef $CONSTRUCT_ARGS->[-1];
1812				}
1813        return;
1814    };
1815}
1816
1817=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1818
1819Returns a list of the names of all the files in the archive.  The
1820first argument can either be the name of the tar file to list or a
1821reference to an open file handle (e.g. a GLOB reference).
1822
1823If C<list_archive()> is passed an array reference as its third
1824argument it returns a list of hash references containing the requested
1825properties of each file.  The following list of properties is
1826supported: full_path, name, size, mtime (last modified date), mode,
1827uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
1828
1829See C<Archive::Tar::File> for details about supported properties.
1830
1831Passing an array reference containing only one element, 'name', is
1832special cased to return a list of names rather than a list of hash
1833references.
1834
1835=cut
1836
1837sub list_archive {
1838    my $class   = shift;
1839    my $file    = shift; return unless defined $file;
1840    my $gzip    = shift || 0;
1841
1842    my $tar = $class->new($file, $gzip);
1843    return unless $tar;
1844
1845    return $tar->list_files( @_ );
1846}
1847
1848=head2 Archive::Tar->extract_archive($file, $compressed)
1849
1850Extracts the contents of the tar file.  The first argument can either
1851be the name of the tar file to create or a reference to an open file
1852handle (e.g. a GLOB reference).  All relative paths in the tar file will
1853be created underneath the current working directory.
1854
1855C<extract_archive> will return a list of files it extracted.
1856If the archive extraction fails for any reason, C<extract_archive>
1857will return false.  Please use the C<error> method to find the cause
1858of the failure.
1859
1860=cut
1861
1862sub extract_archive {
1863    my $class   = shift;
1864    my $file    = shift; return unless defined $file;
1865    my $gzip    = shift || 0;
1866
1867    my $tar = $class->new( ) or return;
1868
1869    return $tar->read( $file, $gzip, { extract => 1 } );
1870}
1871
1872=head2 $bool = Archive::Tar->has_io_string
1873
1874Returns true if we currently have C<IO::String> support loaded.
1875
1876Either C<IO::String> or C<perlio> support is needed to support writing
1877stringified archives. Currently, C<perlio> is the preferred method, if
1878available.
1879
1880See the C<GLOBAL VARIABLES> section to see how to change this preference.
1881
1882=cut
1883
1884sub has_io_string { return $HAS_IO_STRING; }
1885
1886=head2 $bool = Archive::Tar->has_perlio
1887
1888Returns true if we currently have C<perlio> support loaded.
1889
1890This requires C<perl-5.8> or higher, compiled with C<perlio>
1891
1892Either C<IO::String> or C<perlio> support is needed to support writing
1893stringified archives. Currently, C<perlio> is the preferred method, if
1894available.
1895
1896See the C<GLOBAL VARIABLES> section to see how to change this preference.
1897
1898=cut
1899
1900sub has_perlio { return $HAS_PERLIO; }
1901
1902=head2 $bool = Archive::Tar->has_zlib_support
1903
1904Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1905
1906=cut
1907
1908sub has_zlib_support { return ZLIB }
1909
1910=head2 $bool = Archive::Tar->has_bzip2_support
1911
1912Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1913
1914=cut
1915
1916sub has_bzip2_support { return BZIP }
1917
1918=head2 Archive::Tar->can_handle_compressed_files
1919
1920A simple checking routine, which will return true if C<Archive::Tar>
1921is able to uncompress compressed archives on the fly with C<IO::Zlib>
1922and C<IO::Compress::Bzip2> or false if not both are installed.
1923
1924You can use this as a shortcut to determine whether C<Archive::Tar>
1925will do what you think before passing compressed archives to its
1926C<read> method.
1927
1928=cut
1929
1930sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1931
1932sub no_string_support {
1933    croak("You have to install IO::String to support writing archives to strings");
1934}
1935
1936sub _symlinks_resolver{
1937  my ($src, $trg) = @_;
1938  my @src = split /[\/\\]/, $src;
1939  my @trg = split /[\/\\]/, $trg;
1940  pop @src; #strip out current object name
1941  if(@trg and $trg[0] eq ''){
1942    shift @trg;
1943    #restart path from scratch
1944    @src = ( );
1945  }
1946  foreach my $part ( @trg ){
1947    next if $part eq '.'; #ignore current
1948    if($part eq '..'){
1949      #got to parent
1950      pop @src;
1951    }
1952    else{
1953      #append it
1954      push @src, $part;
1955    }
1956  }
1957  my $path = join('/', @src);
1958  warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
1959  return $path;
1960}
1961
19621;
1963
1964__END__
1965
1966=head1 GLOBAL VARIABLES
1967
1968=head2 $Archive::Tar::FOLLOW_SYMLINK
1969
1970Set this variable to C<1> to make C<Archive::Tar> effectively make a
1971copy of the file when extracting. Default is C<0>, which
1972means the symlink stays intact. Of course, you will have to pack the
1973file linked to as well.
1974
1975This option is checked when you write out the tarfile using C<write>
1976or C<create_archive>.
1977
1978This works just like C</bin/tar>'s C<-h> option.
1979
1980=head2 $Archive::Tar::CHOWN
1981
1982By default, C<Archive::Tar> will try to C<chown> your files if it is
1983able to. In some cases, this may not be desired. In that case, set
1984this variable to C<0> to disable C<chown>-ing, even if it were
1985possible.
1986
1987The default is C<1>.
1988
1989=head2 $Archive::Tar::CHMOD
1990
1991By default, C<Archive::Tar> will try to C<chmod> your files to
1992whatever mode was specified for the particular file in the archive.
1993In some cases, this may not be desired. In that case, set this
1994variable to C<0> to disable C<chmod>-ing.
1995
1996The default is C<1>.
1997
1998=head2 $Archive::Tar::SAME_PERMISSIONS
1999
2000When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
2001the permissions on files from the archive are used without modification
2002of if they are filtered by removing any setid bits and applying the
2003current umask.
2004
2005The default is C<1> for the root user and C<0> for normal users.
2006
2007=head2 $Archive::Tar::DO_NOT_USE_PREFIX
2008
2009By default, C<Archive::Tar> will try to put paths that are over
2010100 characters in the C<prefix> field of your tar header, as
2011defined per POSIX-standard. However, some (older) tar programs
2012do not implement this spec. To retain compatibility with these older
2013or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
2014variable to a true value, and C<Archive::Tar> will use an alternate
2015way of dealing with paths over 100 characters by using the
2016C<GNU Extended Header> feature.
2017
2018Note that clients who do not support the C<GNU Extended Header>
2019feature will not be able to read these archives. Such clients include
2020tars on C<Solaris>, C<Irix> and C<AIX>.
2021
2022The default is C<0>.
2023
2024=head2 $Archive::Tar::DEBUG
2025
2026Set this variable to C<1> to always get the C<Carp::longmess> output
2027of the warnings, instead of the regular C<carp>. This is the same
2028message you would get by doing:
2029
2030    $tar->error(1);
2031
2032Defaults to C<0>.
2033
2034=head2 $Archive::Tar::WARN
2035
2036Set this variable to C<0> if you do not want any warnings printed.
2037Personally I recommend against doing this, but people asked for the
2038option. Also, be advised that this is of course not threadsafe.
2039
2040Defaults to C<1>.
2041
2042=head2 $Archive::Tar::error
2043
2044Holds the last reported error. Kept for historical reasons, but its
2045use is very much discouraged. Use the C<error()> method instead:
2046
2047    warn $tar->error unless $tar->extract;
2048
2049Note that in older versions of this module, the C<error()> method
2050would return an effectively global value even when called an instance
2051method as above. This has since been fixed, and multiple instances of
2052C<Archive::Tar> now have separate error strings.
2053
2054=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2055
2056This variable indicates whether C<Archive::Tar> should allow
2057files to be extracted outside their current working directory.
2058
2059Allowing this could have security implications, as a malicious
2060tar archive could alter or replace any file the extracting user
2061has permissions to. Therefor, the default is to not allow
2062insecure extractions.
2063
2064If you trust the archive, or have other reasons to allow the
2065archive to write files outside your current working directory,
2066set this variable to C<true>.
2067
2068Note that this is a backwards incompatible change from version
2069C<1.36> and before.
2070
2071=head2 $Archive::Tar::HAS_PERLIO
2072
2073This variable holds a boolean indicating if we currently have
2074C<perlio> support loaded. This will be enabled for any perl
2075greater than C<5.8> compiled with C<perlio>.
2076
2077If you feel strongly about disabling it, set this variable to
2078C<false>. Note that you will then need C<IO::String> installed
2079to support writing stringified archives.
2080
2081Don't change this variable unless you B<really> know what you're
2082doing.
2083
2084=head2 $Archive::Tar::HAS_IO_STRING
2085
2086This variable holds a boolean indicating if we currently have
2087C<IO::String> support loaded. This will be enabled for any perl
2088that has a loadable C<IO::String> module.
2089
2090If you feel strongly about disabling it, set this variable to
2091C<false>. Note that you will then need C<perlio> support from
2092your perl to be able to  write stringified archives.
2093
2094Don't change this variable unless you B<really> know what you're
2095doing.
2096
2097=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2098
2099This variable holds a boolean indicating if we will create
2100zero padded numbers for C<size>, C<mtime> and C<checksum>.
2101The default is C<0>, indicating that we will create space padded
2102numbers. Added for compatibility with C<busybox> implementations.
2103
2104=head2 Tuning the way RESOLVE_SYMLINK will works
2105
2106	You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
2107	or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
2108
2109  Values can be one of the following:
2110
2111		none
2112           Disable this mechanism and failed as it was in previous version (<1.88)
2113
2114		speed (default)
2115           If you prefer speed
2116           this will read again the whole archive using read() so all entries
2117           will be available
2118
2119    memory
2120           If you prefer memory
2121
2122	Limitation
2123
2124		It won't work for terminal, pipe or sockets or every non seekable source.
2125
2126=cut
2127
2128=head1 FAQ
2129
2130=over 4
2131
2132=item What's the minimum perl version required to run Archive::Tar?
2133
2134You will need perl version 5.005_03 or newer.
2135
2136=item Isn't Archive::Tar slow?
2137
2138Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2139However, it's very portable. If speed is an issue, consider using
2140C</bin/tar> instead.
2141
2142=item Isn't Archive::Tar heavier on memory than /bin/tar?
2143
2144Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
2145C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2146choice but to read the archive into memory.
2147This is ok if you want to do in-memory manipulation of the archive.
2148
2149If you just want to extract, use the C<extract_archive> class method
2150instead. It will optimize and write to disk immediately.
2151
2152Another option is to use the C<iter> class method to iterate over
2153the files in the tarball without reading them all in memory at once.
2154
2155=item Can you lazy-load data instead?
2156
2157In some cases, yes. You can use the C<iter> class method to iterate
2158over the files in the tarball without reading them all in memory at once.
2159
2160=item How much memory will an X kb tar file need?
2161
2162Probably more than X kb, since it will all be read into memory. If
2163this is a problem, and you don't need to do in memory manipulation
2164of the archive, consider using the C<iter> class method, or C</bin/tar>
2165instead.
2166
2167=item What do you do with unsupported filetypes in an archive?
2168
2169C<Unix> has a few filetypes that aren't supported on other platforms,
2170like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2171try to make a copy of the original file, rather than throwing an error.
2172
2173This does require you to read the entire archive in to memory first,
2174since otherwise we wouldn't know what data to fill the copy with.
2175(This means that you cannot use the class methods, including C<iter>
2176on archives that have incompatible filetypes and still expect things
2177to work).
2178
2179For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2180the extraction of this particular item didn't work.
2181
2182=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2183
2184By default, C<Archive::Tar> is in a completely POSIX-compatible
2185mode, which uses the POSIX-specification of C<tar> to store files.
2186For paths greater than 100 characters, this is done using the
2187C<POSIX header prefix>. Non-POSIX-compatible clients may not support
2188this part of the specification, and may only support the C<GNU Extended
2189Header> functionality. To facilitate those clients, you can set the
2190C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
2191C<GLOBAL VARIABLES> section for details on this variable.
2192
2193Note that GNU tar earlier than version 1.14 does not cope well with
2194the C<POSIX header prefix>. If you use such a version, consider setting
2195the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2196
2197=item How do I extract only files that have property X from an archive?
2198
2199Sometimes, you might not wish to extract a complete archive, just
2200the files that are relevant to you, based on some criteria.
2201
2202You can do this by filtering a list of C<Archive::Tar::File> objects
2203based on your criteria. For example, to extract only files that have
2204the string C<foo> in their title, you would use:
2205
2206    $tar->extract(
2207        grep { $_->full_path =~ /foo/ } $tar->get_files
2208    );
2209
2210This way, you can filter on any attribute of the files in the archive.
2211Consult the C<Archive::Tar::File> documentation on how to use these
2212objects.
2213
2214=item How do I access .tar.Z files?
2215
2216The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2217the C<IO::Zlib> module) to access tar files that have been compressed
2218with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2219utility cannot be read by C<Compress::Zlib> and so cannot be directly
2220accesses by C<Archive::Tar>.
2221
2222If the C<uncompress> or C<gunzip> programs are available, you can use
2223one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2224
2225Firstly with C<uncompress>
2226
2227    use Archive::Tar;
2228
2229    open F, "uncompress -c $filename |";
2230    my $tar = Archive::Tar->new(*F);
2231    ...
2232
2233and this with C<gunzip>
2234
2235    use Archive::Tar;
2236
2237    open F, "gunzip -c $filename |";
2238    my $tar = Archive::Tar->new(*F);
2239    ...
2240
2241Similarly, if the C<compress> program is available, you can use this to
2242write a C<.tar.Z> file
2243
2244    use Archive::Tar;
2245    use IO::File;
2246
2247    my $fh = new IO::File "| compress -c >$filename";
2248    my $tar = Archive::Tar->new();
2249    ...
2250    $tar->write($fh);
2251    $fh->close ;
2252
2253=item How do I handle Unicode strings?
2254
2255C<Archive::Tar> uses byte semantics for any files it reads from or writes
2256to disk. This is not a problem if you only deal with files and never
2257look at their content or work solely with byte strings. But if you use
2258Unicode strings with character semantics, some additional steps need
2259to be taken.
2260
2261For example, if you add a Unicode string like
2262
2263    # Problem
2264    $tar->add_data('file.txt', "Euro: \x{20AC}");
2265
2266then there will be a problem later when the tarfile gets written out
2267to disk via C<< $tar->write() >>:
2268
2269    Wide character in print at .../Archive/Tar.pm line 1014.
2270
2271The data was added as a Unicode string and when writing it out to disk,
2272the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2273tried to convert the string to ISO-8859 and failed. The written file
2274now contains garbage.
2275
2276For this reason, Unicode strings need to be converted to UTF-8-encoded
2277bytestrings before they are handed off to C<add_data()>:
2278
2279    use Encode;
2280    my $data = "Accented character: \x{20AC}";
2281    $data = encode('utf8', $data);
2282
2283    $tar->add_data('file.txt', $data);
2284
2285A opposite problem occurs if you extract a UTF8-encoded file from a
2286tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2287will return its content as a bytestring, not as a Unicode string.
2288
2289If you want it to be a Unicode string (because you want character
2290semantics with operations like regular expression matching), you need
2291to decode the UTF8-encoded content and have Perl convert it into
2292a Unicode string:
2293
2294    use Encode;
2295    my $data = $tar->get_content();
2296
2297    # Make it a Unicode string
2298    $data = decode('utf8', $data);
2299
2300There is no easy way to provide this functionality in C<Archive::Tar>,
2301because a tarball can contain many files, and each of which could be
2302encoded in a different way.
2303
2304=back
2305
2306=head1 CAVEATS
2307
2308The AIX tar does not fill all unused space in the tar archive with 0x00.
2309This sometimes leads to warning messages from C<Archive::Tar>.
2310
2311  Invalid header block at offset nnn
2312
2313A fix for that problem is scheduled to be released in the following levels
2314of AIX, all of which should be coming out in the 4th quarter of 2009:
2315
2316 AIX 5.3 TL7 SP10
2317 AIX 5.3 TL8 SP8
2318 AIX 5.3 TL9 SP5
2319 AIX 5.3 TL10 SP2
2320
2321 AIX 6.1 TL0 SP11
2322 AIX 6.1 TL1 SP7
2323 AIX 6.1 TL2 SP6
2324 AIX 6.1 TL3 SP3
2325
2326The IBM APAR number for this problem is IZ50240 (Reported component ID:
23275765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2328If you need an ifix please contact your local IBM AIX support.
2329
2330=head1 TODO
2331
2332=over 4
2333
2334=item Check if passed in handles are open for read/write
2335
2336Currently I don't know of any portable pure perl way to do this.
2337Suggestions welcome.
2338
2339=item Allow archives to be passed in as string
2340
2341Currently, we only allow opened filehandles or filenames, but
2342not strings. The internals would need some reworking to facilitate
2343stringified archives.
2344
2345=item Facilitate processing an opened filehandle of a compressed archive
2346
2347Currently, we only support this if the filehandle is an IO::Zlib object.
2348Environments, like apache, will present you with an opened filehandle
2349to an uploaded file, which might be a compressed archive.
2350
2351=back
2352
2353=head1 SEE ALSO
2354
2355=over 4
2356
2357=item The GNU tar specification
2358
2359C<http://www.gnu.org/software/tar/manual/tar.html>
2360
2361=item The PAX format specification
2362
2363The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2364
2365=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2366
2367=item GNU tar intends to switch to POSIX compatibility
2368
2369GNU Tar authors have expressed their intention to become completely
2370POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2371
2372=item A Comparison between various tar implementations
2373
2374Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2375
2376=back
2377
2378=head1 AUTHOR
2379
2380This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2381
2382Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2383
2384=head1 ACKNOWLEDGEMENTS
2385
2386Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2387Rainer Tammer and especially Andrew Savige for their help and suggestions.
2388
2389=head1 COPYRIGHT
2390
2391This module is copyright (c) 2002 - 2009 Jos Boumans
2392E<lt>kane@cpan.orgE<gt>. All rights reserved.
2393
2394This library is free software; you may redistribute and/or modify
2395it under the same terms as Perl itself.
2396
2397=cut
2398