1### the gnu tar specification:
2### https://www.gnu.org/software/tar/manual/tar.html
3###
4### and the pax format spec, which tar derives from:
5### https://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp                qw(carp croak);
14use File::Spec          ();
15use File::Spec::Unix    ();
16use File::Path          ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
26            $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
27            $EXTRACT_BLOCK_SIZE
28         ];
29
30@ISA                    = qw[Exporter];
31@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP COMPRESS_XZ ];
32$DEBUG                  = 0;
33$WARN                   = 1;
34$FOLLOW_SYMLINK         = 0;
35$VERSION                = "3.02_001";
36$CHOWN                  = 1;
37$CHMOD                  = 1;
38$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
39$DO_NOT_USE_PREFIX      = 0;
40$INSECURE_EXTRACT_MODE  = 0;
41$ZERO_PAD_NUMBERS       = 0;
42$RESOLVE_SYMLINK        = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
43$EXTRACT_BLOCK_SIZE     = 1024 * 1024 * 1024;
44
45BEGIN {
46    use Config;
47    $HAS_PERLIO = $Config::Config{useperlio};
48
49    ### try and load IO::String anyway, so you can dynamically
50    ### switch between perlio and IO::String
51    $HAS_IO_STRING = eval {
52        require IO::String;
53        IO::String->import;
54        1;
55    } || 0;
56}
57
58=head1 NAME
59
60Archive::Tar - module for manipulations of tar archives
61
62=head1 SYNOPSIS
63
64    use Archive::Tar;
65    my $tar = Archive::Tar->new;
66
67    $tar->read('origin.tgz');
68    $tar->extract();
69
70    $tar->add_files('file/foo.pl', 'docs/README');
71    $tar->add_data('file/baz.txt', 'This is the contents now');
72
73    $tar->rename('oldname', 'new/file/name');
74    $tar->chown('/', 'root');
75    $tar->chown('/', 'root:root');
76    $tar->chmod('/tmp', '1777');
77
78    $tar->write('files.tar');                   # plain tar
79    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
80    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
81    $tar->write('files.txz', COMPRESS_XZ);      # xz compressed
82
83=head1 DESCRIPTION
84
85Archive::Tar provides an object oriented mechanism for handling tar
86files.  It provides class methods for quick and easy files handling
87while also allowing for the creation of tar file objects for custom
88manipulation.  If you have the IO::Zlib module installed,
89Archive::Tar will also support compressed or gzipped tar files.
90
91An object of class Archive::Tar represents a .tar(.gz) archive full
92of files and things.
93
94=head1 Object Methods
95
96=head2 Archive::Tar->new( [$file, $compressed] )
97
98Returns a new Tar object. If given any arguments, C<new()> calls the
99C<read()> method automatically, passing on the arguments provided to
100the C<read()> method.
101
102If C<new()> is invoked with arguments and the C<read()> method fails
103for any reason, C<new()> returns undef.
104
105=cut
106
107my $tmpl = {
108    _data   => [ ],
109    _file   => 'Unknown',
110};
111
112### install get/set accessors for this object.
113for my $key ( keys %$tmpl ) {
114    no strict 'refs';
115    *{__PACKAGE__."::$key"} = sub {
116        my $self = shift;
117        $self->{$key} = $_[0] if @_;
118        return $self->{$key};
119    }
120}
121
122sub new {
123    my $class = shift;
124    $class = ref $class if ref $class;
125
126    ### copying $tmpl here since a shallow copy makes it use the
127    ### same aref, causing for files to remain in memory always.
128    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
129
130    if (@_) {
131        unless ( $obj->read( @_ ) ) {
132            $obj->_error(qq[No data could be read from file]);
133            return;
134        }
135    }
136
137    return $obj;
138}
139
140=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
141
142Read the given tar file into memory.
143The first argument can either be the name of a file or a reference to
144an already open filehandle (or an IO::Zlib object if it's compressed)
145
146The C<read> will I<replace> any previous content in C<$tar>!
147
148The second argument may be considered optional, but remains for
149backwards compatibility. Archive::Tar now looks at the file
150magic to determine what class should be used to open the file
151and will transparently Do The Right Thing.
152
153Archive::Tar will warn if you try to pass a bzip2 / xz compressed file and the
154IO::Uncompress::Bunzip2 / IO::Uncompress::UnXz are not available and simply return.
155
156Note that you can currently B<not> pass a C<gzip> compressed
157filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
158filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, a C<xz> compressed
159filehandle, which is not opened with C<IO::Uncompress::UnXz>, nor a string
160containing the full archive information (either compressed or
161uncompressed). These are worth while features, but not currently
162implemented. See the C<TODO> section.
163
164The third argument can be a hash reference with options. Note that
165all options are case-sensitive.
166
167=over 4
168
169=item limit
170
171Do not read more than C<limit> files. This is useful if you have
172very big archives, and are only interested in the first few files.
173
174=item filter
175
176Can be set to a regular expression.  Only files with names that match
177the expression will be read.
178
179=item md5
180
181Set to 1 and the md5sum of files will be returned (instead of file data)
182    my $iter = Archive::Tar->iter( $file,  1, {md5 => 1} );
183    while( my $f = $iter->() ) {
184        print $f->data . "\t" . $f->full_path . $/;
185    }
186
187=item extract
188
189If set to true, immediately extract entries when reading them. This
190gives you the same memory break as the C<extract_archive> function.
191Note however that entries will not be read into memory, but written
192straight to disk. This means no C<Archive::Tar::File> objects are
193created for you to inspect.
194
195=back
196
197All files are stored internally as C<Archive::Tar::File> objects.
198Please consult the L<Archive::Tar::File> documentation for details.
199
200Returns the number of files read in scalar context, and a list of
201C<Archive::Tar::File> objects in list context.
202
203=cut
204
205sub read {
206    my $self = shift;
207    my $file = shift;
208    my $gzip = shift || 0;
209    my $opts = shift || {};
210
211    unless( defined $file ) {
212        $self->_error( qq[No file to read from!] );
213        return;
214    } else {
215        $self->_file( $file );
216    }
217
218    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
219                    or return;
220
221    my $data = $self->_read_tar( $handle, $opts ) or return;
222
223    $self->_data( $data );
224
225    return wantarray ? @$data : scalar @$data;
226}
227
228sub _get_handle {
229    my $self     = shift;
230    my $file     = shift;   return unless defined $file;
231    my $compress = shift || 0;
232    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
233
234    ### Check if file is a file handle or IO glob
235    if ( ref $file ) {
236	return $file if eval{ *$file{IO} };
237	return $file if eval{ $file->isa(q{IO::Handle}) };
238	$file = q{}.$file;
239    }
240
241    ### get a FH opened to the right class, so we can use it transparently
242    ### throughout the program
243    my $fh;
244    {   ### reading magic only makes sense if we're opening a file for
245        ### reading. otherwise, just use what the user requested.
246        my $magic = '';
247        if( MODE_READ->($mode) ) {
248            open my $tmp, $file or do {
249                $self->_error( qq[Could not open '$file' for reading: $!] );
250                return;
251            };
252
253            ### read the first 6 bytes of the file to figure out which class to
254            ### use to open the file.
255            sysread( $tmp, $magic, 6 );
256            close $tmp;
257        }
258
259        ### is it xz?
260        ### if you asked specifically for xz compression, or if we're in
261        ### read mode and the magic numbers add up, use xz
262        if( XZ and (
263               ($compress eq COMPRESS_XZ) or
264               ( MODE_READ->($mode) and $magic =~ XZ_MAGIC_NUM )
265            )
266        ) {
267            if( MODE_READ->($mode) ) {
268                $fh = IO::Uncompress::UnXz->new( $file ) or do {
269                    $self->_error( qq[Could not read '$file': ] .
270                        $IO::Uncompress::UnXz::UnXzError
271                    );
272                    return;
273                };
274            } else {
275                $fh = IO::Compress::Xz->new( $file ) or do {
276                    $self->_error( qq[Could not write to '$file': ] .
277                        $IO::Compress::Xz::XzError
278                    );
279                    return;
280                };
281            }
282
283        ### is it bzip?
284        ### if you asked specifically for bzip compression, or if we're in
285        ### read mode and the magic numbers add up, use bzip
286        } elsif( BZIP and (
287                ($compress eq COMPRESS_BZIP) or
288                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
289            )
290        ) {
291
292            ### different reader/writer modules, different error vars... sigh
293            if( MODE_READ->($mode) ) {
294                $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do {
295                    $self->_error( qq[Could not read '$file': ] .
296                        $IO::Uncompress::Bunzip2::Bunzip2Error
297                    );
298                    return;
299                };
300
301            } else {
302                $fh = IO::Compress::Bzip2->new( $file ) or do {
303                    $self->_error( qq[Could not write to '$file': ] .
304                        $IO::Compress::Bzip2::Bzip2Error
305                    );
306                    return;
307                };
308            }
309
310        ### is it gzip?
311        ### if you asked for compression, if you wanted to read or the gzip
312        ### magic number is present (redundant with read)
313        } elsif( ZLIB and (
314                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
315                 )
316        ) {
317            $fh = IO::Zlib->new;
318
319            unless( $fh->open( $file, $mode ) ) {
320                $self->_error(qq[Could not create filehandle for '$file': $!]);
321                return;
322            }
323
324        ### is it plain tar?
325        } else {
326            $fh = IO::File->new;
327
328            unless( $fh->open( $file, $mode ) ) {
329                $self->_error(qq[Could not create filehandle for '$file': $!]);
330                return;
331            }
332
333            ### enable bin mode on tar archives
334            binmode $fh;
335        }
336    }
337
338    return $fh;
339}
340
341
342sub _read_tar {
343    my $self    = shift;
344    my $handle  = shift or return;
345    my $opts    = shift || {};
346
347    my $count   = $opts->{limit}    || 0;
348    my $filter  = $opts->{filter};
349    my $md5  = $opts->{md5} || 0;	# cdrake
350    my $filter_cb = $opts->{filter_cb};
351    my $extract = $opts->{extract}  || 0;
352
353    ### set a cap on the amount of files to extract ###
354    my $limit   = 0;
355    $limit = 1 if $count > 0;
356
357    my $tarfile = [ ];
358    my $chunk;
359    my $read = 0;
360    my $real_name;  # to set the name of a file when
361                    # we're encountering @longlink
362    my $data;
363
364    LOOP:
365    while( $handle->read( $chunk, HEAD ) ) {
366        ### IO::Zlib doesn't support this yet
367        my $offset;
368        if ( ref($handle) ne 'IO::Zlib' ) {
369            local $@;
370            $offset = eval { tell $handle } || 'unknown';
371            $@ = '';
372        }
373        else {
374            $offset = 'unknown';
375        }
376
377        unless( $read++ ) {
378            my $gzip = GZIP_MAGIC_NUM;
379            if( $chunk =~ /$gzip/ ) {
380                $self->_error( qq[Cannot read compressed format in tar-mode] );
381                return;
382            }
383
384            ### size is < HEAD, which means a corrupted file, as the minimum
385            ### length is _at least_ HEAD
386            if (length $chunk != HEAD) {
387                $self->_error( qq[Cannot read enough bytes from the tarfile] );
388                return;
389            }
390        }
391
392        ### if we can't read in all bytes... ###
393        last if length $chunk != HEAD;
394
395        ### Apparently this should really be two blocks of 512 zeroes,
396        ### but GNU tar sometimes gets it wrong. See comment in the
397        ### source code (tar.c) to GNU cpio.
398        next if $chunk eq TAR_END;
399
400        ### according to the posix spec, the last 12 bytes of the header are
401        ### null bytes, to pad it to a 512 byte block. That means if these
402        ### bytes are NOT null bytes, it's a corrupt header. See:
403        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
404        ### line 111
405        {   my $nulls = join '', "\0" x 12;
406            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
407                $self->_error( qq[Invalid header block at offset $offset] );
408                next LOOP;
409            }
410        }
411
412        ### pass the realname, so we can set it 'proper' right away
413        ### some of the heuristics are done on the name, so important
414        ### to set it ASAP
415        my $entry;
416        {   my %extra_args = ();
417            $extra_args{'name'} = $$real_name if defined $real_name;
418
419            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
420                                                        %extra_args )
421            ) {
422                $self->_error( qq[Couldn't read chunk at offset $offset] );
423                next LOOP;
424            }
425        }
426
427        ### ignore labels:
428        ### https://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
429        next if $entry->is_label;
430
431        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
432
433            if ( $entry->is_file && !$entry->validate ) {
434                ### sometimes the chunk is rather fux0r3d and a whole 512
435                ### bytes ends up in the ->name area.
436                ### clean it up, if need be
437                my $name = $entry->name;
438                $name = substr($name, 0, 100) if length $name > 100;
439                $name =~ s/\n/ /g;
440
441                $self->_error( $name . qq[: checksum error] );
442                next LOOP;
443            }
444
445            my $block = BLOCK_SIZE->( $entry->size );
446
447            $data = $entry->get_content_by_ref;
448
449	    my $skip = 0;
450	    my $ctx;			# cdrake
451	    ### skip this entry if we're filtering
452
453	    if($md5) {			# cdrake
454	      $ctx = Digest::MD5->new;	# cdrake
455	        $skip=5;		# cdrake
456
457	    } elsif ($filter && $entry->name !~ $filter) {
458		$skip = 1;
459
460	    } elsif ($filter_cb && ! $filter_cb->($entry)) {
461		$skip = 2;
462
463	    ### skip this entry if it's a pax header. This is a special file added
464	    ### by, among others, git-generated tarballs. It holds comments and is
465	    ### not meant for extracting. See #38932: pax_global_header extracted
466	    } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
467		$skip = 3;
468	    }
469
470	    if ($skip) {
471		#
472		# Since we're skipping, do not allocate memory for the
473		# whole file.  Read it 64 BLOCKS at a time.  Do not
474		# complete the skip yet because maybe what we read is a
475		# longlink and it won't get skipped after all
476		#
477		my $amt = $block;
478		my $fsz=$entry->size;	# cdrake
479		while ($amt > 0) {
480		    $$data = '';
481		    my $this = 64 * BLOCK;
482		    $this = $amt if $this > $amt;
483		    if( $handle->read( $$data, $this ) < $this ) {
484			$self->_error( qq[Read error on tarfile (missing data) '].
485					    $entry->full_path ."' at offset $offset" );
486			next LOOP;
487		    }
488		    $amt -= $this;
489		    $fsz -= $this;	# cdrake
490		substr ($$data, $fsz) = "" if ($fsz<0);	# remove external junk prior to md5	# cdrake
491		$ctx->add($$data) if($skip==5);	# cdrake
492		}
493		$$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ;	# cdrake
494            } else {
495
496		### just read everything into memory
497		### can't do lazy loading since IO::Zlib doesn't support 'seek'
498		### this is because Compress::Zlib doesn't support it =/
499		### this reads in the whole data in one read() call.
500		if ( $handle->read( $$data, $block ) < $block ) {
501		    $self->_error( qq[Read error on tarfile (missing data) '].
502                                    $entry->full_path ."' at offset $offset" );
503		    next LOOP;
504		}
505		### throw away trailing garbage ###
506		substr ($$data, $entry->size) = "" if defined $$data;
507            }
508
509            ### part II of the @LongLink munging -- need to do /after/
510            ### the checksum check.
511            if( $entry->is_longlink ) {
512                ### weird thing in tarfiles -- if the file is actually a
513                ### @LongLink, the data part seems to have a trailing ^@
514                ### (unprintable) char. to display, pipe output through less.
515                ### but that doesn't *always* happen.. so check if the last
516                ### character is a control character, and if so remove it
517                ### at any rate, we better remove that character here, or tests
518                ### like 'eq' and hash lookups based on names will SO not work
519                ### remove it by calculating the proper size, and then
520                ### tossing out everything that's longer than that size.
521
522                ### count number of nulls
523                my $nulls = $$data =~ tr/\0/\0/;
524
525                ### cut data + size by that many bytes
526                $entry->size( $entry->size - $nulls );
527                substr ($$data, $entry->size) = "";
528            }
529        }
530
531        ### clean up of the entries.. posix tar /apparently/ has some
532        ### weird 'feature' that allows for filenames > 255 characters
533        ### they'll put a header in with as name '././@LongLink' and the
534        ### contents will be the name of the /next/ file in the archive
535        ### pretty crappy and kludgy if you ask me
536
537        ### set the name for the next entry if this is a @LongLink;
538        ### this is one ugly hack =/ but needed for direct extraction
539        if( $entry->is_longlink ) {
540            $real_name = $data;
541            next LOOP;
542        } elsif ( defined $real_name ) {
543            $entry->name( $$real_name );
544            $entry->prefix('');
545            undef $real_name;
546        }
547
548	if ($filter && $entry->name !~ $filter) {
549	    next LOOP;
550
551	} elsif ($filter_cb && ! $filter_cb->($entry)) {
552	    next LOOP;
553
554	### skip this entry if it's a pax header. This is a special file added
555	### by, among others, git-generated tarballs. It holds comments and is
556	### not meant for extracting. See #38932: pax_global_header extracted
557	} elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
558	    next LOOP;
559	}
560
561        if ( $extract && !$entry->is_longlink
562                      && !$entry->is_unknown
563                      && !$entry->is_label ) {
564            $self->_extract_file( $entry ) or return;
565        }
566
567        ### Guard against tarfiles with garbage at the end
568	    last LOOP if $entry->name eq '';
569
570        ### push only the name on the rv if we're extracting
571        ### -- for extract_archive
572        push @$tarfile, ($extract ? $entry->name : $entry);
573
574        if( $limit ) {
575            $count-- unless $entry->is_longlink || $entry->is_dir;
576            last LOOP unless $count;
577        }
578    } continue {
579        undef $data;
580    }
581
582    return $tarfile;
583}
584
585=head2 $tar->contains_file( $filename )
586
587Check if the archive contains a certain file.
588It will return true if the file is in the archive, false otherwise.
589
590Note however, that this function does an exact match using C<eq>
591on the full path. So it cannot compensate for case-insensitive file-
592systems or compare 2 paths to see if they would point to the same
593underlying file.
594
595=cut
596
597sub contains_file {
598    my $self = shift;
599    my $full = shift;
600
601    return unless defined $full;
602
603    ### don't warn if the entry isn't there.. that's what this function
604    ### is for after all.
605    local $WARN = 0;
606    return 1 if $self->_find_entry($full);
607    return;
608}
609
610=head2 $tar->extract( [@filenames] )
611
612Write files whose names are equivalent to any of the names in
613C<@filenames> to disk, creating subdirectories as necessary. This
614might not work too well under VMS.
615Under MacPerl, the file's modification time will be converted to the
616MacOS zero of time, and appropriate conversions will be done to the
617path.  However, the length of each element of the path is not
618inspected to see whether it's longer than MacOS currently allows (32
619characters).
620
621If C<extract> is called without a list of file names, the entire
622contents of the archive are extracted.
623
624Returns a list of filenames extracted.
625
626=cut
627
628sub extract {
629    my $self    = shift;
630    my @args    = @_;
631    my @files;
632    my $hashmap;
633
634    # use the speed optimization for all extracted files
635    local($self->{cwd}) = cwd() unless $self->{cwd};
636
637    ### you requested the extraction of only certain files
638    if( @args ) {
639        for my $file ( @args ) {
640
641            ### it's already an object?
642            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
643                push @files, $file;
644                next;
645
646            ### go find it then
647            } else {
648
649                # create hash-map once to speed up lookup
650                $hashmap = $hashmap || {
651                    map { $_->full_path, $_ } @{$self->_data}
652                };
653
654                if (exists $hashmap->{$file}) {
655                    ### we found the file you're looking for
656                    push @files, $hashmap->{$file};
657                } else {
658                    return $self->_error(
659                        qq[Could not find '$file' in archive] );
660                }
661            }
662        }
663
664    ### just grab all the file items
665    } else {
666        @files = $self->get_files;
667    }
668
669    ### nothing found? that's an error
670    unless( scalar @files ) {
671        $self->_error( qq[No files found for ] . $self->_file );
672        return;
673    }
674
675    ### now extract them
676    for my $entry ( @files ) {
677        unless( $self->_extract_file( $entry ) ) {
678            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
679            return;
680        }
681    }
682
683    return @files;
684}
685
686=head2 $tar->extract_file( $file, [$extract_path] )
687
688Write an entry, whose name is equivalent to the file name provided to
689disk. Optionally takes a second parameter, which is the full native
690path (including filename) the entry will be written to.
691
692For example:
693
694    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
695
696    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
697
698Returns true on success, false on failure.
699
700=cut
701
702sub extract_file {
703    my $self = shift;
704    my $file = shift;   return unless defined $file;
705    my $alt  = shift;
706
707    my $entry = $self->_find_entry( $file )
708        or $self->_error( qq[Could not find an entry for '$file'] ), return;
709
710    return $self->_extract_file( $entry, $alt );
711}
712
713sub _extract_file {
714    my $self    = shift;
715    my $entry   = shift or return;
716    my $alt     = shift;
717
718    ### you wanted an alternate extraction location ###
719    my $name = defined $alt ? $alt : $entry->full_path;
720
721                            ### splitpath takes a bool at the end to indicate
722                            ### that it's splitting a dir
723    my ($vol,$dirs,$file);
724    if ( defined $alt ) { # It's a local-OS path
725        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
726                                                          $entry->is_dir );
727    } else {
728        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
729                                                          $entry->is_dir );
730    }
731
732    my $dir;
733    ### is $name an absolute path? ###
734    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
735
736        ### absolute names are not allowed to be in tarballs under
737        ### strict mode, so only allow it if a user tells us to do it
738        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
739            $self->_error(
740                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
741                q[Not extracting absolute paths under SECURE EXTRACT MODE]
742            );
743            return;
744        }
745
746        ### user asked us to, it's fine.
747        $dir = File::Spec->catpath( $vol, $dirs, "" );
748
749    ### it's a relative path ###
750    } else {
751        my $cwd     = (ref $self and defined $self->{cwd})
752                        ? $self->{cwd}
753                        : cwd();
754
755        my @dirs = defined $alt
756            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
757            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
758                                                    # straight from the tarball
759
760        if( not defined $alt            and
761            not $INSECURE_EXTRACT_MODE
762        ) {
763
764            ### paths that leave the current directory are not allowed under
765            ### strict mode, so only allow it if a user tells us to do this.
766            if( grep { $_ eq '..' } @dirs ) {
767
768                $self->_error(
769                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
770                    q[the current working directory. Not extracting under ].
771                    q[SECURE EXTRACT MODE]
772                );
773                return;
774            }
775
776            ### the archive may be asking us to extract into a symlink. This
777            ### is not sane and a possible security issue, as outlined here:
778            ### https://rt.cpan.org/Ticket/Display.html?id=30380
779            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
780            ### https://issues.rpath.com/browse/RPL-1716
781            my $full_path = $cwd;
782            for my $d ( @dirs ) {
783                $full_path = File::Spec->catdir( $full_path, $d );
784
785                ### we've already checked this one, and it's safe. Move on.
786                next if ref $self and $self->{_link_cache}->{$full_path};
787
788                if( -l $full_path ) {
789                    my $to   = readlink $full_path;
790                    my $diag = "symlinked directory ($full_path => $to)";
791
792                    $self->_error(
793                        q[Entry ']. $entry->full_path .q[' is attempting to ].
794                        qq[extract to a $diag. This is considered a security ].
795                        q[vulnerability and not allowed under SECURE EXTRACT ].
796                        q[MODE]
797                    );
798                    return;
799                }
800
801                ### XXX keep a cache if possible, so the stats become cheaper:
802                $self->{_link_cache}->{$full_path} = 1 if ref $self;
803            }
804        }
805
806        ### '.' is the directory delimiter on VMS, which has to be escaped
807        ### or changed to '_' on vms.  vmsify is used, because older versions
808        ### of vmspath do not handle this properly.
809        ### Must not add a '/' to an empty directory though.
810        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
811
812        my ($cwd_vol,$cwd_dir,$cwd_file)
813                    = File::Spec->splitpath( $cwd );
814        my @cwd     = File::Spec->splitdir( $cwd_dir );
815        push @cwd, $cwd_file if length $cwd_file;
816
817        ### We need to pass '' as the last element to catpath. Craig Berry
818        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
819        ### The root problem is that splitpath on UNIX always returns the
820        ### final path element as a file even if it is a directory, and of
821        ### course there is no way it can know the difference without checking
822        ### against the filesystem, which it is documented as not doing.  When
823        ### you turn around and call catpath, on VMS you have to know which bits
824        ### are directory bits and which bits are file bits.  In this case we
825        ### know the result should be a directory.  I had thought you could omit
826        ### the file argument to catpath in such a case, but apparently on UNIX
827        ### you can't.
828        $dir        = File::Spec->catpath(
829                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
830                        );
831
832        ### catdir() returns undef if the path is longer than 255 chars on
833        ### older VMS systems.
834        unless ( defined $dir ) {
835            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
836            return;
837        }
838
839    }
840
841    if( -e $dir && !-d _ ) {
842        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
843        return;
844    }
845
846    unless ( -d _ ) {
847        eval { File::Path::mkpath( $dir, 0, 0777 ) };
848        if( $@ ) {
849            my $fp = $entry->full_path;
850            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
851            return;
852        }
853
854        ### XXX chown here? that might not be the same as in the archive
855        ### as we're only chown'ing to the owner of the file we're extracting
856        ### not to the owner of the directory itself, which may or may not
857        ### be another entry in the archive
858        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
859        ### way to go.
860        #if( $CHOWN && CAN_CHOWN ) {
861        #    chown $entry->uid, $entry->gid, $dir or
862        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
863        #}
864    }
865
866    ### we're done if we just needed to create a dir ###
867    return 1 if $entry->is_dir;
868
869    my $full = File::Spec->catfile( $dir, $file );
870
871    if( $entry->is_unknown ) {
872        $self->_error( qq[Unknown file type for file '$full'] );
873        return;
874    }
875
876    ### If a file system already contains a block device with the same name as
877    ### the being extracted regular file, we would write the file's content
878    ### to the block device. So remove the existing file (block device) now.
879    ### If an archive contains multiple same-named entries, the last one
880    ### should replace the previous ones. So remove the old file now.
881    ### If the old entry is a symlink to a file outside of the CWD, the new
882    ### entry would create a file there. This is CVE-2018-12015
883    ### <https://rt.cpan.org/Ticket/Display.html?id=125523>.
884    if (-l $full || -e _) {
885	if (!unlink $full) {
886	    $self->_error( qq[Could not remove old file '$full': $!] );
887	    return;
888	}
889    }
890    if( length $entry->type && $entry->is_file ) {
891        my $fh = IO::File->new;
892        $fh->open( $full, '>' ) or (
893            $self->_error( qq[Could not open file '$full': $!] ),
894            return
895        );
896
897        if( $entry->size ) {
898            binmode $fh;
899            my $offset = 0;
900            my $content = $entry->get_content_by_ref();
901            while ($offset < $entry->size) {
902                my $written
903                    = syswrite $fh, $$content, $EXTRACT_BLOCK_SIZE, $offset;
904                if (defined $written) {
905                    $offset += $written;
906                } else {
907                    $self->_error( qq[Could not write data to '$full': $!] );
908                    return;
909                }
910            }
911        }
912
913        close $fh or (
914            $self->_error( qq[Could not close file '$full'] ),
915            return
916        );
917
918    } else {
919        $self->_make_special_file( $entry, $full ) or return;
920    }
921
922    ### only update the timestamp if it's not a symlink; that will change the
923    ### timestamp of the original. This addresses bug #33669: Could not update
924    ### timestamp warning on symlinks
925    if( not -l $full ) {
926        utime time, $entry->mtime - TIME_OFFSET, $full or
927            $self->_error( qq[Could not update timestamp] );
928    }
929
930    if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
931        CORE::chown( $entry->uid, $entry->gid, $full ) or
932            $self->_error( qq[Could not set uid/gid on '$full'] );
933    }
934
935    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
936    ### change the perms on the file they're linking too...
937    if( $CHMOD and not -l $full ) {
938        my $mode = $entry->mode;
939        unless ($SAME_PERMISSIONS) {
940            $mode &= ~(oct(7000) | umask);
941        }
942        CORE::chmod( $mode, $full ) or
943            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
944    }
945
946    return 1;
947}
948
949sub _make_special_file {
950    my $self    = shift;
951    my $entry   = shift     or return;
952    my $file    = shift;    return unless defined $file;
953
954    my $err;
955
956    if( $entry->is_symlink ) {
957        my $fail;
958        if( ON_UNIX ) {
959            symlink( $entry->linkname, $file ) or $fail++;
960
961        } else {
962            $self->_extract_special_file_as_plain_file( $entry, $file )
963                or $fail++;
964        }
965
966        $err =  qq[Making symbolic link '$file' to '] .
967                $entry->linkname .q[' failed] if $fail;
968
969    } elsif ( $entry->is_hardlink ) {
970        my $fail;
971        if( ON_UNIX ) {
972            link( $entry->linkname, $file ) or $fail++;
973
974        } else {
975            $self->_extract_special_file_as_plain_file( $entry, $file )
976                or $fail++;
977        }
978
979        $err =  qq[Making hard link from '] . $entry->linkname .
980                qq[' to '$file' failed] if $fail;
981
982    } elsif ( $entry->is_fifo ) {
983        ON_UNIX && !system('mknod', $file, 'p') or
984            $err = qq[Making fifo ']. $entry->name .qq[' failed];
985
986    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
987        my $mode = $entry->is_blockdev ? 'b' : 'c';
988
989        ON_UNIX && !system('mknod', $file, $mode,
990                            $entry->devmajor, $entry->devminor) or
991            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
992                    $entry->devmajor . qq[ min=] . $entry->devminor .
993                    qq[) failed.];
994
995    } elsif ( $entry->is_socket ) {
996        ### the original doesn't do anything special for sockets.... ###
997        1;
998    }
999
1000    return $err ? $self->_error( $err ) : 1;
1001}
1002
1003### don't know how to make symlinks, let's just extract the file as
1004### a plain file
1005sub _extract_special_file_as_plain_file {
1006    my $self    = shift;
1007    my $entry   = shift     or return;
1008    my $file    = shift;    return unless defined $file;
1009
1010    my $err;
1011    TRY: {
1012        my $orig = $self->_find_entry( $entry->linkname, $entry );
1013
1014        unless( $orig ) {
1015            $err =  qq[Could not find file '] . $entry->linkname .
1016                    qq[' in memory.];
1017            last TRY;
1018        }
1019
1020        ### clone the entry, make it appear as a normal file ###
1021        my $clone = $orig->clone;
1022        $clone->_downgrade_to_plainfile;
1023        $self->_extract_file( $clone, $file ) or last TRY;
1024
1025        return 1;
1026    }
1027
1028    return $self->_error($err);
1029}
1030
1031=head2 $tar->list_files( [\@properties] )
1032
1033Returns a list of the names of all the files in the archive.
1034
1035If C<list_files()> is passed an array reference as its first argument
1036it returns a list of hash references containing the requested
1037properties of each file.  The following list of properties is
1038supported: name, size, mtime (last modified date), mode, uid, gid,
1039linkname, uname, gname, devmajor, devminor, prefix.
1040
1041Passing an array reference containing only one element, 'name', is
1042special cased to return a list of names rather than a list of hash
1043references, making it equivalent to calling C<list_files> without
1044arguments.
1045
1046=cut
1047
1048sub list_files {
1049    my $self = shift;
1050    my $aref = shift || [ ];
1051
1052    unless( $self->_data ) {
1053        $self->read() or return;
1054    }
1055
1056    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1057        return map { $_->full_path } @{$self->_data};
1058    } else {
1059
1060        #my @rv;
1061        #for my $obj ( @{$self->_data} ) {
1062        #    push @rv, { map { $_ => $obj->$_() } @$aref };
1063        #}
1064        #return @rv;
1065
1066        ### this does the same as the above.. just needs a +{ }
1067        ### to make sure perl doesn't confuse it for a block
1068        return map {    my $o=$_;
1069                        +{ map { $_ => $o->$_() } @$aref }
1070                    } @{$self->_data};
1071    }
1072}
1073
1074sub _find_entry {
1075    my $self = shift;
1076    my $file = shift;
1077
1078    unless( defined $file ) {
1079        $self->_error( qq[No file specified] );
1080        return;
1081    }
1082
1083    ### it's an object already
1084    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1085
1086seach_entry:
1087		if($self->_data){
1088			for my $entry ( @{$self->_data} ) {
1089					my $path = $entry->full_path;
1090					return $entry if $path eq $file;
1091			}
1092		}
1093
1094		if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1095			if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
1096				$file = _symlinks_resolver( $link_entry->name, $file );
1097				goto seach_entry if $self->_data;
1098
1099				#this will be slower than never, but won't failed!
1100
1101				my $iterargs = $link_entry->{'_archive'};
1102				if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
1103				#faster	but whole archive will be read in memory
1104					#read whole archive and share data
1105					my $archive = Archive::Tar->new;
1106					$archive->read( @$iterargs );
1107					push @$iterargs, $archive; #take a trace for destruction
1108					if($archive->_data){
1109						$self->_data( $archive->_data );
1110						goto seach_entry;
1111					}
1112				}#faster
1113
1114				{#slower but lower memory usage
1115					# $iterargs = [$filename, $compressed, $opts];
1116					my $next = Archive::Tar->iter( @$iterargs );
1117					while(my $e = $next->()){
1118						if($e->full_path eq $file){
1119							undef $next;
1120							return $e;
1121						}
1122					}
1123				}#slower
1124			}
1125		}
1126
1127    $self->_error( qq[No such file in archive: '$file'] );
1128    return;
1129}
1130
1131=head2 $tar->get_files( [@filenames] )
1132
1133Returns the C<Archive::Tar::File> objects matching the filenames
1134provided. If no filename list was passed, all C<Archive::Tar::File>
1135objects in the current Tar object are returned.
1136
1137Please refer to the C<Archive::Tar::File> documentation on how to
1138handle these objects.
1139
1140=cut
1141
1142sub get_files {
1143    my $self = shift;
1144
1145    return @{ $self->_data } unless @_;
1146
1147    my @list;
1148    for my $file ( @_ ) {
1149        push @list, grep { defined } $self->_find_entry( $file );
1150    }
1151
1152    return @list;
1153}
1154
1155=head2 $tar->get_content( $file )
1156
1157Return the content of the named file.
1158
1159=cut
1160
1161sub get_content {
1162    my $self = shift;
1163    my $entry = $self->_find_entry( shift ) or return;
1164
1165    return $entry->data;
1166}
1167
1168=head2 $tar->replace_content( $file, $content )
1169
1170Make the string $content be the content for the file named $file.
1171
1172=cut
1173
1174sub replace_content {
1175    my $self = shift;
1176    my $entry = $self->_find_entry( shift ) or return;
1177
1178    return $entry->replace_content( shift );
1179}
1180
1181=head2 $tar->rename( $file, $new_name )
1182
1183Rename the file of the in-memory archive to $new_name.
1184
1185Note that you must specify a Unix path for $new_name, since per tar
1186standard, all files in the archive must be Unix paths.
1187
1188Returns true on success and false on failure.
1189
1190=cut
1191
1192sub rename {
1193    my $self = shift;
1194    my $file = shift; return unless defined $file;
1195    my $new  = shift; return unless defined $new;
1196
1197    my $entry = $self->_find_entry( $file ) or return;
1198
1199    return $entry->rename( $new );
1200}
1201
1202=head2 $tar->chmod( $file, $mode )
1203
1204Change mode of $file to $mode.
1205
1206Returns true on success and false on failure.
1207
1208=cut
1209
1210sub chmod {
1211    my $self = shift;
1212    my $file = shift; return unless defined $file;
1213    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1214    my @args = ("$mode");
1215
1216    my $entry = $self->_find_entry( $file ) or return;
1217    my $x = $entry->chmod( @args );
1218    return $x;
1219}
1220
1221=head2 $tar->chown( $file, $uname [, $gname] )
1222
1223Change owner $file to $uname and $gname.
1224
1225Returns true on success and false on failure.
1226
1227=cut
1228
1229sub chown {
1230    my $self = shift;
1231    my $file = shift; return unless defined $file;
1232    my $uname  = shift; return unless defined $uname;
1233    my @args   = ($uname);
1234    push(@args, shift);
1235
1236    my $entry = $self->_find_entry( $file ) or return;
1237    my $x = $entry->chown( @args );
1238    return $x;
1239}
1240
1241=head2 $tar->remove (@filenamelist)
1242
1243Removes any entries with names matching any of the given filenames
1244from the in-memory archive. Returns a list of C<Archive::Tar::File>
1245objects that remain.
1246
1247=cut
1248
1249sub remove {
1250    my $self = shift;
1251    my @list = @_;
1252
1253    my %seen = map { $_->full_path => $_ } @{$self->_data};
1254    delete $seen{ $_ } for @list;
1255
1256    $self->_data( [values %seen] );
1257
1258    return values %seen;
1259}
1260
1261=head2 $tar->clear
1262
1263C<clear> clears the current in-memory archive. This effectively gives
1264you a 'blank' object, ready to be filled again. Note that C<clear>
1265only has effect on the object, not the underlying tarfile.
1266
1267=cut
1268
1269sub clear {
1270    my $self = shift or return;
1271
1272    $self->_data( [] );
1273    $self->_file( '' );
1274
1275    return 1;
1276}
1277
1278
1279=head2 $tar->write ( [$file, $compressed, $prefix] )
1280
1281Write the in-memory archive to disk.  The first argument can either
1282be the name of a file or a reference to an already open filehandle (a
1283GLOB reference).
1284
1285The second argument is used to indicate compression. You can
1286compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed
1287to be the C<gzip> compression level (between 1 and 9), but the use of
1288constants is preferred:
1289
1290  # write a gzip compressed file
1291  $tar->write( 'out.tgz', COMPRESS_GZIP );
1292
1293  # write a bzip compressed file
1294  $tar->write( 'out.tbz', COMPRESS_BZIP );
1295
1296  # write a xz compressed file
1297  $tar->write( 'out.txz', COMPRESS_XZ );
1298
1299Note that when you pass in a filehandle, the compression argument
1300is ignored, as all files are printed verbatim to your filehandle.
1301If you wish to enable compression with filehandles, use an
1302C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead.
1303
1304The third argument is an optional prefix. All files will be tucked
1305away in the directory you specify as prefix. So if you have files
1306'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1307will be written to the archive as 'foo/a' and 'foo/b'.
1308
1309If no arguments are given, C<write> returns the entire formatted
1310archive as a string, which could be useful if you'd like to stuff the
1311archive into a socket or a pipe to gzip or something.
1312
1313
1314=cut
1315
1316sub write {
1317    my $self        = shift;
1318    my $file        = shift; $file = '' unless defined $file;
1319    my $gzip        = shift || 0;
1320    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1321    my $dummy       = '';
1322
1323    ### only need a handle if we have a file to print to ###
1324    my $handle = length($file)
1325                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1326                        or return )
1327                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1328                    : $HAS_IO_STRING ? IO::String->new
1329                    : __PACKAGE__->no_string_support();
1330
1331    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1332    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1333    ### printed to the archive
1334    local $\;
1335
1336    for my $entry ( @{$self->_data} ) {
1337        ### entries to be written to the tarfile ###
1338        my @write_me;
1339
1340        ### only now will we change the object to reflect the current state
1341        ### of the name and prefix fields -- this needs to be limited to
1342        ### write() only!
1343        my $clone = $entry->clone;
1344
1345
1346        ### so, if you don't want use to use the prefix, we'll stuff
1347        ### everything in the name field instead
1348        if( $DO_NOT_USE_PREFIX ) {
1349
1350            ### you might have an extended prefix, if so, set it in the clone
1351            ### XXX is ::Unix right?
1352            $clone->name( length $ext_prefix
1353                            ? File::Spec::Unix->catdir( $ext_prefix,
1354                                                        $clone->full_path)
1355                            : $clone->full_path );
1356            $clone->prefix( '' );
1357
1358        ### otherwise, we'll have to set it properly -- prefix part in the
1359        ### prefix and name part in the name field.
1360        } else {
1361
1362            ### split them here, not before!
1363            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1364
1365            ### you might have an extended prefix, if so, set it in the clone
1366            ### XXX is ::Unix right?
1367            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1368                if length $ext_prefix;
1369
1370            $clone->prefix( $prefix );
1371            $clone->name( $name );
1372        }
1373
1374        ### names are too long, and will get truncated if we don't add a
1375        ### '@LongLink' file...
1376        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1377                                length($clone->prefix)  > PREFIX_LENGTH
1378                            ) || 0;
1379
1380        ### perhaps we need to make a longlink file?
1381        if( $make_longlink ) {
1382            my $longlink = Archive::Tar::File->new(
1383                            data => LONGLINK_NAME,
1384                            $clone->full_path,
1385                            { type => LONGLINK }
1386                        );
1387
1388            unless( $longlink ) {
1389                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1390                                qq[oversize file '] . $clone->full_path ."'" );
1391                return;
1392            };
1393
1394            push @write_me, $longlink;
1395        }
1396
1397        push @write_me, $clone;
1398
1399        ### write the one, optionally 2 a::t::file objects to the handle
1400        for my $clone (@write_me) {
1401
1402            ### if the file is a symlink, there are 2 options:
1403            ### either we leave the symlink intact, but then we don't write any
1404            ### data OR we follow the symlink, which means we actually make a
1405            ### copy. if we do the latter, we have to change the TYPE of the
1406            ### clone to 'FILE'
1407            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1408            my $data_ok = !$clone->is_symlink && $clone->has_content;
1409
1410            ### downgrade to a 'normal' file if it's a symlink we're going to
1411            ### treat as a regular file
1412            $clone->_downgrade_to_plainfile if $link_ok;
1413
1414            ### get the header for this block
1415            my $header = $self->_format_tar_entry( $clone );
1416            unless( $header ) {
1417                $self->_error(q[Could not format header for: ] .
1418                                    $clone->full_path );
1419                return;
1420            }
1421
1422            unless( print $handle $header ) {
1423                $self->_error(q[Could not write header for: ] .
1424                                    $clone->full_path);
1425                return;
1426            }
1427
1428            if( $link_ok or $data_ok ) {
1429                unless( print $handle $clone->data ) {
1430                    $self->_error(q[Could not write data for: ] .
1431                                    $clone->full_path);
1432                    return;
1433                }
1434
1435                ### pad the end of the clone if required ###
1436                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1437            }
1438
1439        } ### done writing these entries
1440    }
1441
1442    ### write the end markers ###
1443    print $handle TAR_END x 2 or
1444            return $self->_error( qq[Could not write tar end markers] );
1445
1446    ### did you want it written to a file, or returned as a string? ###
1447    my $rv =  length($file) ? 1
1448                        : $HAS_PERLIO ? $dummy
1449                        : do { seek $handle, 0, 0; local $/; <$handle> };
1450
1451    ### make sure to close the handle if we created it
1452    if ( $file ne $handle ) {
1453	unless( close $handle ) {
1454	    $self->_error( qq[Could not write tar] );
1455	    return;
1456	}
1457    }
1458
1459    return $rv;
1460}
1461
1462sub _format_tar_entry {
1463    my $self        = shift;
1464    my $entry       = shift or return;
1465    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1466    my $no_prefix   = shift || 0;
1467
1468    my $file    = $entry->name;
1469    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1470
1471    ### remove the prefix from the file name
1472    ### not sure if this is still needed --kane
1473    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1474    ### this for us. Even worse, this would break if we tried to add a file
1475    ### like x/x.
1476    #if( length $prefix ) {
1477    #    $file =~ s/^$match//;
1478    #}
1479
1480    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1481                if length $ext_prefix;
1482
1483    ### not sure why this is... ###
1484    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1485    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1486
1487    my $f1 = "%06o"; my $f2  = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
1488
1489    ### this might be optimizable with a 'changed' flag in the file objects ###
1490    my $tar = pack (
1491                PACK,
1492                $file,
1493
1494                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1495                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1496
1497                "",  # checksum field - space padded a bit down
1498
1499                (map { $entry->$_() }                 qw[type linkname magic]),
1500
1501                $entry->version || TAR_VERSION,
1502
1503                (map { $entry->$_() }                 qw[uname gname]),
1504                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1505
1506                ($no_prefix ? '' : $prefix)
1507    );
1508
1509    ### add the checksum ###
1510    my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
1511    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1512
1513    return $tar;
1514}
1515
1516=head2 $tar->add_files( @filenamelist )
1517
1518Takes a list of filenames and adds them to the in-memory archive.
1519
1520The path to the file is automatically converted to a Unix like
1521equivalent for use in the archive, and, if on MacOS, the file's
1522modification time is converted from the MacOS epoch to the Unix epoch.
1523So tar archives created on MacOS with B<Archive::Tar> can be read
1524both with I<tar> on Unix and applications like I<suntar> or
1525I<Stuffit Expander> on MacOS.
1526
1527Be aware that the file's type/creator and resource fork will be lost,
1528which is usually what you want in cross-platform archives.
1529
1530Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1531object from, for example, another archive. The object will be clone, and
1532effectively be a copy of the original, not an alias.
1533
1534Returns a list of C<Archive::Tar::File> objects that were just added.
1535
1536=cut
1537
1538sub add_files {
1539    my $self    = shift;
1540    my @files   = @_ or return;
1541
1542    my @rv;
1543    for my $file ( @files ) {
1544
1545        ### you passed an Archive::Tar::File object
1546        ### clone it so we don't accidentally have a reference to
1547        ### an object from another archive
1548        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1549            push @rv, $file->clone;
1550            next;
1551        }
1552
1553        eval {
1554            if( utf8::is_utf8( $file )) {
1555              utf8::encode( $file );
1556            }
1557        };
1558
1559        unless( -e $file || -l $file ) {
1560            $self->_error( qq[No such file: '$file'] );
1561            next;
1562        }
1563
1564        my $obj = Archive::Tar::File->new( file => $file );
1565        unless( $obj ) {
1566            $self->_error( qq[Unable to add file: '$file'] );
1567            next;
1568        }
1569
1570        push @rv, $obj;
1571    }
1572
1573    push @{$self->{_data}}, @rv;
1574
1575    return @rv;
1576}
1577
1578=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1579
1580Takes a filename, a scalar full of data and optionally a reference to
1581a hash with specific options.
1582
1583Will add a file to the in-memory archive, with name C<$filename> and
1584content C<$data>. Specific properties can be set using C<$opthashref>.
1585The following list of properties is supported: name, size, mtime
1586(last modified date), mode, uid, gid, linkname, uname, gname,
1587devmajor, devminor, prefix, type.  (On MacOS, the file's path and
1588modification times are converted to Unix equivalents.)
1589
1590Valid values for the file type are the following constants defined by
1591Archive::Tar::Constant:
1592
1593=over 4
1594
1595=item FILE
1596
1597Regular file.
1598
1599=item HARDLINK
1600
1601=item SYMLINK
1602
1603Hard and symbolic ("soft") links; linkname should specify target.
1604
1605=item CHARDEV
1606
1607=item BLOCKDEV
1608
1609Character and block devices. devmajor and devminor should specify the major
1610and minor device numbers.
1611
1612=item DIR
1613
1614Directory.
1615
1616=item FIFO
1617
1618FIFO (named pipe).
1619
1620=item SOCKET
1621
1622Socket.
1623
1624=back
1625
1626Returns the C<Archive::Tar::File> object that was just added, or
1627C<undef> on failure.
1628
1629=cut
1630
1631sub add_data {
1632    my $self    = shift;
1633    my ($file, $data, $opt) = @_;
1634
1635    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1636    unless( $obj ) {
1637        $self->_error( qq[Unable to add file: '$file'] );
1638        return;
1639    }
1640
1641    push @{$self->{_data}}, $obj;
1642
1643    return $obj;
1644}
1645
1646=head2 $tar->error( [$BOOL] )
1647
1648Returns the current error string (usually, the last error reported).
1649If a true value was specified, it will give the C<Carp::longmess>
1650equivalent of the error, in effect giving you a stacktrace.
1651
1652For backwards compatibility, this error is also available as
1653C<$Archive::Tar::error> although it is much recommended you use the
1654method call instead.
1655
1656=cut
1657
1658{
1659    $error = '';
1660    my $longmess;
1661
1662    sub _error {
1663        my $self    = shift;
1664        my $msg     = $error = shift;
1665        $longmess   = Carp::longmess($error);
1666        if (ref $self) {
1667            $self->{_error} = $error;
1668            $self->{_longmess} = $longmess;
1669        }
1670
1671        ### set Archive::Tar::WARN to 0 to disable printing
1672        ### of errors
1673        if( $WARN ) {
1674            carp $DEBUG ? $longmess : $msg;
1675        }
1676
1677        return;
1678    }
1679
1680    sub error {
1681        my $self = shift;
1682        if (ref $self) {
1683            return shift() ? $self->{_longmess} : $self->{_error};
1684        } else {
1685            return shift() ? $longmess : $error;
1686        }
1687    }
1688}
1689
1690=head2 $tar->setcwd( $cwd );
1691
1692C<Archive::Tar> needs to know the current directory, and it will run
1693C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1694tarfile and saves it in the file system. (As of version 1.30, however,
1695C<Archive::Tar> will use the speed optimization described below
1696automatically, so it's only relevant if you're using C<extract_file()>).
1697
1698Since C<Archive::Tar> doesn't change the current directory internally
1699while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1700can be avoided if we can guarantee that the current directory doesn't
1701get changed externally.
1702
1703To use this performance boost, set the current directory via
1704
1705    use Cwd;
1706    $tar->setcwd( cwd() );
1707
1708once before calling a function like C<extract_file> and
1709C<Archive::Tar> will use the current directory setting from then on
1710and won't call C<Cwd::cwd()> internally.
1711
1712To switch back to the default behaviour, use
1713
1714    $tar->setcwd( undef );
1715
1716and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1717
1718If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
1719be called for you.
1720
1721=cut
1722
1723sub setcwd {
1724    my $self     = shift;
1725    my $cwd      = shift;
1726
1727    $self->{cwd} = $cwd;
1728}
1729
1730=head1 Class Methods
1731
1732=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1733
1734Creates a tar file from the list of files provided.  The first
1735argument can either be the name of the tar file to create or a
1736reference to an open file handle (e.g. a GLOB reference).
1737
1738The second argument is used to indicate compression. You can
1739compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed
1740to be the C<gzip> compression level (between 1 and 9), but the use of
1741constants is preferred:
1742
1743  # write a gzip compressed file
1744  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1745
1746  # write a bzip compressed file
1747  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1748
1749  # write a xz compressed file
1750  Archive::Tar->create_archive( 'out.txz', COMPRESS_XZ, @filelist );
1751
1752Note that when you pass in a filehandle, the compression argument
1753is ignored, as all files are printed verbatim to your filehandle.
1754If you wish to enable compression with filehandles, use an
1755C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead.
1756
1757The remaining arguments list the files to be included in the tar file.
1758These files must all exist. Any files which don't exist or can't be
1759read are silently ignored.
1760
1761If the archive creation fails for any reason, C<create_archive> will
1762return false. Please use the C<error> method to find the cause of the
1763failure.
1764
1765Note that this method does not write C<on the fly> as it were; it
1766still reads all the files into memory before writing out the archive.
1767Consult the FAQ below if this is a problem.
1768
1769=cut
1770
1771sub create_archive {
1772    my $class = shift;
1773
1774    my $file    = shift; return unless defined $file;
1775    my $gzip    = shift || 0;
1776    my @files   = @_;
1777
1778    unless( @files ) {
1779        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1780    }
1781
1782    my $tar = $class->new;
1783    $tar->add_files( @files );
1784    return $tar->write( $file, $gzip );
1785}
1786
1787=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1788
1789Returns an iterator function that reads the tar file without loading
1790it all in memory.  Each time the function is called it will return the
1791next file in the tarball. The files are returned as
1792C<Archive::Tar::File> objects. The iterator function returns the
1793empty list once it has exhausted the files contained.
1794
1795The second argument can be a hash reference with options, which are
1796identical to the arguments passed to C<read()>.
1797
1798Example usage:
1799
1800    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1801
1802    while( my $f = $next->() ) {
1803        print $f->name, "\n";
1804
1805        $f->extract or warn "Extraction failed";
1806
1807        # ....
1808    }
1809
1810=cut
1811
1812
1813sub iter {
1814    my $class       = shift;
1815    my $filename    = shift;
1816    return unless defined $filename;
1817    my $compressed  = shift || 0;
1818    my $opts        = shift || {};
1819
1820    ### get a handle to read from.
1821    my $handle = $class->_get_handle(
1822        $filename,
1823        $compressed,
1824        READ_ONLY->( ZLIB )
1825    ) or return;
1826
1827    my @data;
1828		my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
1829    return sub {
1830        return shift(@data)     if @data;       # more than one file returned?
1831        return                  unless $handle; # handle exhausted?
1832
1833        ### read data, should only return file
1834        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1835        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
1836				if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1837					foreach(@data){
1838						#may refine this heuristic for ON_UNIX?
1839						if($_->linkname){
1840							#is there a better slot to store/share it ?
1841							$_->{'_archive'} = $CONSTRUCT_ARGS;
1842						}
1843					}
1844				}
1845
1846        ### return one piece of data
1847        return shift(@data)     if @data;
1848
1849        ### data is exhausted, free the filehandle
1850        undef $handle;
1851				if(@$CONSTRUCT_ARGS == 4){
1852					#free archive in memory
1853					undef $CONSTRUCT_ARGS->[-1];
1854				}
1855        return;
1856    };
1857}
1858
1859=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1860
1861Returns a list of the names of all the files in the archive.  The
1862first argument can either be the name of the tar file to list or a
1863reference to an open file handle (e.g. a GLOB reference).
1864
1865If C<list_archive()> is passed an array reference as its third
1866argument it returns a list of hash references containing the requested
1867properties of each file.  The following list of properties is
1868supported: full_path, name, size, mtime (last modified date), mode,
1869uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
1870
1871See C<Archive::Tar::File> for details about supported properties.
1872
1873Passing an array reference containing only one element, 'name', is
1874special cased to return a list of names rather than a list of hash
1875references.
1876
1877=cut
1878
1879sub list_archive {
1880    my $class   = shift;
1881    my $file    = shift; return unless defined $file;
1882    my $gzip    = shift || 0;
1883
1884    my $tar = $class->new($file, $gzip);
1885    return unless $tar;
1886
1887    return $tar->list_files( @_ );
1888}
1889
1890=head2 Archive::Tar->extract_archive($file, $compressed)
1891
1892Extracts the contents of the tar file.  The first argument can either
1893be the name of the tar file to create or a reference to an open file
1894handle (e.g. a GLOB reference).  All relative paths in the tar file will
1895be created underneath the current working directory.
1896
1897C<extract_archive> will return a list of files it extracted.
1898If the archive extraction fails for any reason, C<extract_archive>
1899will return false.  Please use the C<error> method to find the cause
1900of the failure.
1901
1902=cut
1903
1904sub extract_archive {
1905    my $class   = shift;
1906    my $file    = shift; return unless defined $file;
1907    my $gzip    = shift || 0;
1908
1909    my $tar = $class->new( ) or return;
1910
1911    return $tar->read( $file, $gzip, { extract => 1 } );
1912}
1913
1914=head2 $bool = Archive::Tar->has_io_string
1915
1916Returns true if we currently have C<IO::String> support loaded.
1917
1918Either C<IO::String> or C<perlio> support is needed to support writing
1919stringified archives. Currently, C<perlio> is the preferred method, if
1920available.
1921
1922See the C<GLOBAL VARIABLES> section to see how to change this preference.
1923
1924=cut
1925
1926sub has_io_string { return $HAS_IO_STRING; }
1927
1928=head2 $bool = Archive::Tar->has_perlio
1929
1930Returns true if we currently have C<perlio> support loaded.
1931
1932This requires C<perl-5.8> or higher, compiled with C<perlio>
1933
1934Either C<IO::String> or C<perlio> support is needed to support writing
1935stringified archives. Currently, C<perlio> is the preferred method, if
1936available.
1937
1938See the C<GLOBAL VARIABLES> section to see how to change this preference.
1939
1940=cut
1941
1942sub has_perlio { return $HAS_PERLIO; }
1943
1944=head2 $bool = Archive::Tar->has_zlib_support
1945
1946Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1947
1948=cut
1949
1950sub has_zlib_support { return ZLIB }
1951
1952=head2 $bool = Archive::Tar->has_bzip2_support
1953
1954Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1955
1956=cut
1957
1958sub has_bzip2_support { return BZIP }
1959
1960=head2 $bool = Archive::Tar->has_xz_support
1961
1962Returns true if C<Archive::Tar> can extract C<xz> compressed archives
1963
1964=cut
1965
1966sub has_xz_support { return XZ }
1967
1968=head2 Archive::Tar->can_handle_compressed_files
1969
1970A simple checking routine, which will return true if C<Archive::Tar>
1971is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1972C<IO::Compress::Bzip2> and C<IO::Compress::Xz> or false if not both are installed.
1973
1974You can use this as a shortcut to determine whether C<Archive::Tar>
1975will do what you think before passing compressed archives to its
1976C<read> method.
1977
1978=cut
1979
1980sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1981
1982sub no_string_support {
1983    croak("You have to install IO::String to support writing archives to strings");
1984}
1985
1986sub _symlinks_resolver{
1987  my ($src, $trg) = @_;
1988  my @src = split /[\/\\]/, $src;
1989  my @trg = split /[\/\\]/, $trg;
1990  pop @src; #strip out current object name
1991  if(@trg and $trg[0] eq ''){
1992    shift @trg;
1993    #restart path from scratch
1994    @src = ( );
1995  }
1996  foreach my $part ( @trg ){
1997    next if $part eq '.'; #ignore current
1998    if($part eq '..'){
1999      #got to parent
2000      pop @src;
2001    }
2002    else{
2003      #append it
2004      push @src, $part;
2005    }
2006  }
2007  my $path = join('/', @src);
2008  warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
2009  return $path;
2010}
2011
20121;
2013
2014__END__
2015
2016=head1 GLOBAL VARIABLES
2017
2018=head2 $Archive::Tar::FOLLOW_SYMLINK
2019
2020Set this variable to C<1> to make C<Archive::Tar> effectively make a
2021copy of the file when extracting. Default is C<0>, which
2022means the symlink stays intact. Of course, you will have to pack the
2023file linked to as well.
2024
2025This option is checked when you write out the tarfile using C<write>
2026or C<create_archive>.
2027
2028This works just like C</bin/tar>'s C<-h> option.
2029
2030=head2 $Archive::Tar::CHOWN
2031
2032By default, C<Archive::Tar> will try to C<chown> your files if it is
2033able to. In some cases, this may not be desired. In that case, set
2034this variable to C<0> to disable C<chown>-ing, even if it were
2035possible.
2036
2037The default is C<1>.
2038
2039=head2 $Archive::Tar::CHMOD
2040
2041By default, C<Archive::Tar> will try to C<chmod> your files to
2042whatever mode was specified for the particular file in the archive.
2043In some cases, this may not be desired. In that case, set this
2044variable to C<0> to disable C<chmod>-ing.
2045
2046The default is C<1>.
2047
2048=head2 $Archive::Tar::SAME_PERMISSIONS
2049
2050When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
2051the permissions on files from the archive are used without modification
2052of if they are filtered by removing any setid bits and applying the
2053current umask.
2054
2055The default is C<1> for the root user and C<0> for normal users.
2056
2057=head2 $Archive::Tar::DO_NOT_USE_PREFIX
2058
2059By default, C<Archive::Tar> will try to put paths that are over
2060100 characters in the C<prefix> field of your tar header, as
2061defined per POSIX-standard. However, some (older) tar programs
2062do not implement this spec. To retain compatibility with these older
2063or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
2064variable to a true value, and C<Archive::Tar> will use an alternate
2065way of dealing with paths over 100 characters by using the
2066C<GNU Extended Header> feature.
2067
2068Note that clients who do not support the C<GNU Extended Header>
2069feature will not be able to read these archives. Such clients include
2070tars on C<Solaris>, C<Irix> and C<AIX>.
2071
2072The default is C<0>.
2073
2074=head2 $Archive::Tar::DEBUG
2075
2076Set this variable to C<1> to always get the C<Carp::longmess> output
2077of the warnings, instead of the regular C<carp>. This is the same
2078message you would get by doing:
2079
2080    $tar->error(1);
2081
2082Defaults to C<0>.
2083
2084=head2 $Archive::Tar::WARN
2085
2086Set this variable to C<0> if you do not want any warnings printed.
2087Personally I recommend against doing this, but people asked for the
2088option. Also, be advised that this is of course not threadsafe.
2089
2090Defaults to C<1>.
2091
2092=head2 $Archive::Tar::error
2093
2094Holds the last reported error. Kept for historical reasons, but its
2095use is very much discouraged. Use the C<error()> method instead:
2096
2097    warn $tar->error unless $tar->extract;
2098
2099Note that in older versions of this module, the C<error()> method
2100would return an effectively global value even when called an instance
2101method as above. This has since been fixed, and multiple instances of
2102C<Archive::Tar> now have separate error strings.
2103
2104=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2105
2106This variable indicates whether C<Archive::Tar> should allow
2107files to be extracted outside their current working directory.
2108
2109Allowing this could have security implications, as a malicious
2110tar archive could alter or replace any file the extracting user
2111has permissions to. Therefor, the default is to not allow
2112insecure extractions.
2113
2114If you trust the archive, or have other reasons to allow the
2115archive to write files outside your current working directory,
2116set this variable to C<true>.
2117
2118Note that this is a backwards incompatible change from version
2119C<1.36> and before.
2120
2121=head2 $Archive::Tar::HAS_PERLIO
2122
2123This variable holds a boolean indicating if we currently have
2124C<perlio> support loaded. This will be enabled for any perl
2125greater than C<5.8> compiled with C<perlio>.
2126
2127If you feel strongly about disabling it, set this variable to
2128C<false>. Note that you will then need C<IO::String> installed
2129to support writing stringified archives.
2130
2131Don't change this variable unless you B<really> know what you're
2132doing.
2133
2134=head2 $Archive::Tar::HAS_IO_STRING
2135
2136This variable holds a boolean indicating if we currently have
2137C<IO::String> support loaded. This will be enabled for any perl
2138that has a loadable C<IO::String> module.
2139
2140If you feel strongly about disabling it, set this variable to
2141C<false>. Note that you will then need C<perlio> support from
2142your perl to be able to  write stringified archives.
2143
2144Don't change this variable unless you B<really> know what you're
2145doing.
2146
2147=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2148
2149This variable holds a boolean indicating if we will create
2150zero padded numbers for C<size>, C<mtime> and C<checksum>.
2151The default is C<0>, indicating that we will create space padded
2152numbers. Added for compatibility with C<busybox> implementations.
2153
2154=head2 Tuning the way RESOLVE_SYMLINK will works
2155
2156You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
2157or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
2158
2159Values can be one of the following:
2160
2161=over 4
2162
2163=item none
2164
2165Disable this mechanism and failed as it was in previous version (<1.88)
2166
2167=item speed (default)
2168
2169If you prefer speed
2170this will read again the whole archive using read() so all entries
2171will be available
2172
2173=item memory
2174
2175If you prefer memory
2176
2177=back
2178
2179Limitation: It won't work for terminal, pipe or sockets or every non seekable
2180source.
2181
2182=head2 $Archive::Tar::EXTRACT_BLOCK_SIZE
2183
2184This variable holds an integer with the block size that should be used when
2185writing files during extraction. It defaults to 1 GiB. Please note that this
2186cannot be arbitrarily large since some operating systems limit the number of
2187bytes that can be written in one call to C<write(2)>, so if this is too large,
2188extraction may fail with an error.
2189
2190=cut
2191
2192=head1 FAQ
2193
2194=over 4
2195
2196=item What's the minimum perl version required to run Archive::Tar?
2197
2198You will need perl version 5.005_03 or newer.
2199
2200=item Isn't Archive::Tar slow?
2201
2202Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2203However, it's very portable. If speed is an issue, consider using
2204C</bin/tar> instead.
2205
2206=item Isn't Archive::Tar heavier on memory than /bin/tar?
2207
2208Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
2209C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2210choice but to read the archive into memory.
2211This is ok if you want to do in-memory manipulation of the archive.
2212
2213If you just want to extract, use the C<extract_archive> class method
2214instead. It will optimize and write to disk immediately.
2215
2216Another option is to use the C<iter> class method to iterate over
2217the files in the tarball without reading them all in memory at once.
2218
2219=item Can you lazy-load data instead?
2220
2221In some cases, yes. You can use the C<iter> class method to iterate
2222over the files in the tarball without reading them all in memory at once.
2223
2224=item How much memory will an X kb tar file need?
2225
2226Probably more than X kb, since it will all be read into memory. If
2227this is a problem, and you don't need to do in memory manipulation
2228of the archive, consider using the C<iter> class method, or C</bin/tar>
2229instead.
2230
2231=item What do you do with unsupported filetypes in an archive?
2232
2233C<Unix> has a few filetypes that aren't supported on other platforms,
2234like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2235try to make a copy of the original file, rather than throwing an error.
2236
2237This does require you to read the entire archive in to memory first,
2238since otherwise we wouldn't know what data to fill the copy with.
2239(This means that you cannot use the class methods, including C<iter>
2240on archives that have incompatible filetypes and still expect things
2241to work).
2242
2243For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2244the extraction of this particular item didn't work.
2245
2246=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2247
2248By default, C<Archive::Tar> is in a completely POSIX-compatible
2249mode, which uses the POSIX-specification of C<tar> to store files.
2250For paths greater than 100 characters, this is done using the
2251C<POSIX header prefix>. Non-POSIX-compatible clients may not support
2252this part of the specification, and may only support the C<GNU Extended
2253Header> functionality. To facilitate those clients, you can set the
2254C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
2255C<GLOBAL VARIABLES> section for details on this variable.
2256
2257Note that GNU tar earlier than version 1.14 does not cope well with
2258the C<POSIX header prefix>. If you use such a version, consider setting
2259the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2260
2261=item How do I extract only files that have property X from an archive?
2262
2263Sometimes, you might not wish to extract a complete archive, just
2264the files that are relevant to you, based on some criteria.
2265
2266You can do this by filtering a list of C<Archive::Tar::File> objects
2267based on your criteria. For example, to extract only files that have
2268the string C<foo> in their title, you would use:
2269
2270    $tar->extract(
2271        grep { $_->full_path =~ /foo/ } $tar->get_files
2272    );
2273
2274This way, you can filter on any attribute of the files in the archive.
2275Consult the C<Archive::Tar::File> documentation on how to use these
2276objects.
2277
2278=item How do I access .tar.Z files?
2279
2280The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2281the C<IO::Zlib> module) to access tar files that have been compressed
2282with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2283utility cannot be read by C<Compress::Zlib> and so cannot be directly
2284accesses by C<Archive::Tar>.
2285
2286If the C<uncompress> or C<gunzip> programs are available, you can use
2287one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2288
2289Firstly with C<uncompress>
2290
2291    use Archive::Tar;
2292
2293    open F, "uncompress -c $filename |";
2294    my $tar = Archive::Tar->new(*F);
2295    ...
2296
2297and this with C<gunzip>
2298
2299    use Archive::Tar;
2300
2301    open F, "gunzip -c $filename |";
2302    my $tar = Archive::Tar->new(*F);
2303    ...
2304
2305Similarly, if the C<compress> program is available, you can use this to
2306write a C<.tar.Z> file
2307
2308    use Archive::Tar;
2309    use IO::File;
2310
2311    my $fh = IO::File->new( "| compress -c >$filename" );
2312    my $tar = Archive::Tar->new();
2313    ...
2314    $tar->write($fh);
2315    $fh->close ;
2316
2317=item How do I handle Unicode strings?
2318
2319C<Archive::Tar> uses byte semantics for any files it reads from or writes
2320to disk. This is not a problem if you only deal with files and never
2321look at their content or work solely with byte strings. But if you use
2322Unicode strings with character semantics, some additional steps need
2323to be taken.
2324
2325For example, if you add a Unicode string like
2326
2327    # Problem
2328    $tar->add_data('file.txt', "Euro: \x{20AC}");
2329
2330then there will be a problem later when the tarfile gets written out
2331to disk via C<< $tar->write() >>:
2332
2333    Wide character in print at .../Archive/Tar.pm line 1014.
2334
2335The data was added as a Unicode string and when writing it out to disk,
2336the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2337tried to convert the string to ISO-8859 and failed. The written file
2338now contains garbage.
2339
2340For this reason, Unicode strings need to be converted to UTF-8-encoded
2341bytestrings before they are handed off to C<add_data()>:
2342
2343    use Encode;
2344    my $data = "Accented character: \x{20AC}";
2345    $data = encode('utf8', $data);
2346
2347    $tar->add_data('file.txt', $data);
2348
2349A opposite problem occurs if you extract a UTF8-encoded file from a
2350tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2351will return its content as a bytestring, not as a Unicode string.
2352
2353If you want it to be a Unicode string (because you want character
2354semantics with operations like regular expression matching), you need
2355to decode the UTF8-encoded content and have Perl convert it into
2356a Unicode string:
2357
2358    use Encode;
2359    my $data = $tar->get_content();
2360
2361    # Make it a Unicode string
2362    $data = decode('utf8', $data);
2363
2364There is no easy way to provide this functionality in C<Archive::Tar>,
2365because a tarball can contain many files, and each of which could be
2366encoded in a different way.
2367
2368=back
2369
2370=head1 CAVEATS
2371
2372The AIX tar does not fill all unused space in the tar archive with 0x00.
2373This sometimes leads to warning messages from C<Archive::Tar>.
2374
2375  Invalid header block at offset nnn
2376
2377A fix for that problem is scheduled to be released in the following levels
2378of AIX, all of which should be coming out in the 4th quarter of 2009:
2379
2380 AIX 5.3 TL7 SP10
2381 AIX 5.3 TL8 SP8
2382 AIX 5.3 TL9 SP5
2383 AIX 5.3 TL10 SP2
2384
2385 AIX 6.1 TL0 SP11
2386 AIX 6.1 TL1 SP7
2387 AIX 6.1 TL2 SP6
2388 AIX 6.1 TL3 SP3
2389
2390The IBM APAR number for this problem is IZ50240 (Reported component ID:
23915765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2392If you need an ifix please contact your local IBM AIX support.
2393
2394=head1 TODO
2395
2396=over 4
2397
2398=item Check if passed in handles are open for read/write
2399
2400Currently I don't know of any portable pure perl way to do this.
2401Suggestions welcome.
2402
2403=item Allow archives to be passed in as string
2404
2405Currently, we only allow opened filehandles or filenames, but
2406not strings. The internals would need some reworking to facilitate
2407stringified archives.
2408
2409=item Facilitate processing an opened filehandle of a compressed archive
2410
2411Currently, we only support this if the filehandle is an IO::Zlib object.
2412Environments, like apache, will present you with an opened filehandle
2413to an uploaded file, which might be a compressed archive.
2414
2415=back
2416
2417=head1 SEE ALSO
2418
2419=over 4
2420
2421=item The GNU tar specification
2422
2423L<https://www.gnu.org/software/tar/manual/tar.html>
2424
2425=item The PAX format specification
2426
2427The specification which tar derives from; L<https://pubs.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2428
2429=back
2430
2431=head1 AUTHOR
2432
2433This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2434
2435Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2436
2437=head1 ACKNOWLEDGEMENTS
2438
2439Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2440Rainer Tammer and especially Andrew Savige for their help and suggestions.
2441
2442=head1 COPYRIGHT
2443
2444This module is copyright (c) 2002 - 2009 Jos Boumans
2445E<lt>kane@cpan.orgE<gt>. All rights reserved.
2446
2447This library is free software; you may redistribute and/or modify
2448it under the same terms as Perl itself.
2449
2450=cut
2451