1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp                qw(carp croak);
14use File::Spec          ();
15use File::Spec::Unix    ();
16use File::Path          ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
26            $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
27         ];
28
29@ISA                    = qw[Exporter];
30@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP ];
31$DEBUG                  = 0;
32$WARN                   = 1;
33$FOLLOW_SYMLINK         = 0;
34$VERSION                = "1.96_01";
35$CHOWN                  = 1;
36$CHMOD                  = 1;
37$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
38$DO_NOT_USE_PREFIX      = 0;
39$INSECURE_EXTRACT_MODE  = 0;
40$ZERO_PAD_NUMBERS       = 0;
41$RESOLVE_SYMLINK        = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
42
43BEGIN {
44    use Config;
45    $HAS_PERLIO = $Config::Config{useperlio};
46
47    ### try and load IO::String anyway, so you can dynamically
48    ### switch between perlio and IO::String
49    $HAS_IO_STRING = eval {
50        require IO::String;
51        import IO::String;
52        1;
53    } || 0;
54}
55
56=head1 NAME
57
58Archive::Tar - module for manipulations of tar archives
59
60=head1 SYNOPSIS
61
62    use Archive::Tar;
63    my $tar = Archive::Tar->new;
64
65    $tar->read('origin.tgz');
66    $tar->extract();
67
68    $tar->add_files('file/foo.pl', 'docs/README');
69    $tar->add_data('file/baz.txt', 'This is the contents now');
70
71    $tar->rename('oldname', 'new/file/name');
72    $tar->chown('/', 'root');
73    $tar->chown('/', 'root:root');
74    $tar->chmod('/tmp', '1777');
75
76    $tar->write('files.tar');                   # plain tar
77    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
78    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
79
80=head1 DESCRIPTION
81
82Archive::Tar provides an object oriented mechanism for handling tar
83files.  It provides class methods for quick and easy files handling
84while also allowing for the creation of tar file objects for custom
85manipulation.  If you have the IO::Zlib module installed,
86Archive::Tar will also support compressed or gzipped tar files.
87
88An object of class Archive::Tar represents a .tar(.gz) archive full
89of files and things.
90
91=head1 Object Methods
92
93=head2 Archive::Tar->new( [$file, $compressed] )
94
95Returns a new Tar object. If given any arguments, C<new()> calls the
96C<read()> method automatically, passing on the arguments provided to
97the C<read()> method.
98
99If C<new()> is invoked with arguments and the C<read()> method fails
100for any reason, C<new()> returns undef.
101
102=cut
103
104my $tmpl = {
105    _data   => [ ],
106    _file   => 'Unknown',
107};
108
109### install get/set accessors for this object.
110for my $key ( keys %$tmpl ) {
111    no strict 'refs';
112    *{__PACKAGE__."::$key"} = sub {
113        my $self = shift;
114        $self->{$key} = $_[0] if @_;
115        return $self->{$key};
116    }
117}
118
119sub new {
120    my $class = shift;
121    $class = ref $class if ref $class;
122
123    ### copying $tmpl here since a shallow copy makes it use the
124    ### same aref, causing for files to remain in memory always.
125    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
126
127    if (@_) {
128        unless ( $obj->read( @_ ) ) {
129            $obj->_error(qq[No data could be read from file]);
130            return;
131        }
132    }
133
134    return $obj;
135}
136
137=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
138
139Read the given tar file into memory.
140The first argument can either be the name of a file or a reference to
141an already open filehandle (or an IO::Zlib object if it's compressed)
142
143The C<read> will I<replace> any previous content in C<$tar>!
144
145The second argument may be considered optional, but remains for
146backwards compatibility. Archive::Tar now looks at the file
147magic to determine what class should be used to open the file
148and will transparently Do The Right Thing.
149
150Archive::Tar will warn if you try to pass a bzip2 compressed file and the
151IO::Zlib / IO::Uncompress::Bunzip2 modules are not available and simply return.
152
153Note that you can currently B<not> pass a C<gzip> compressed
154filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
155filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, nor a string
156containing the full archive information (either compressed or
157uncompressed). These are worth while features, but not currently
158implemented. See the C<TODO> section.
159
160The third argument can be a hash reference with options. Note that
161all options are case-sensitive.
162
163=over 4
164
165=item limit
166
167Do not read more than C<limit> files. This is useful if you have
168very big archives, and are only interested in the first few files.
169
170=item filter
171
172Can be set to a regular expression.  Only files with names that match
173the expression will be read.
174
175=item md5
176
177Set to 1 and the md5sum of files will be returned (instead of file data)
178    my $iter = Archive::Tar->iter( $file,  1, {md5 => 1} );
179    while( my $f = $iter->() ) {
180        print $f->data . "\t" . $f->full_path . $/;
181    }
182
183=item extract
184
185If set to true, immediately extract entries when reading them. This
186gives you the same memory break as the C<extract_archive> function.
187Note however that entries will not be read into memory, but written
188straight to disk. This means no C<Archive::Tar::File> objects are
189created for you to inspect.
190
191=back
192
193All files are stored internally as C<Archive::Tar::File> objects.
194Please consult the L<Archive::Tar::File> documentation for details.
195
196Returns the number of files read in scalar context, and a list of
197C<Archive::Tar::File> objects in list context.
198
199=cut
200
201sub read {
202    my $self = shift;
203    my $file = shift;
204    my $gzip = shift || 0;
205    my $opts = shift || {};
206
207    unless( defined $file ) {
208        $self->_error( qq[No file to read from!] );
209        return;
210    } else {
211        $self->_file( $file );
212    }
213
214    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
215                    or return;
216
217    my $data = $self->_read_tar( $handle, $opts ) or return;
218
219    $self->_data( $data );
220
221    return wantarray ? @$data : scalar @$data;
222}
223
224sub _get_handle {
225    my $self     = shift;
226    my $file     = shift;   return unless defined $file;
227    my $compress = shift || 0;
228    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
229
230    ### Check if file is a file handle or IO glob
231    if ( ref $file ) {
232	return $file if eval{ *$file{IO} };
233	return $file if eval{ $file->isa(q{IO::Handle}) };
234	$file = q{}.$file;
235    }
236
237    ### get a FH opened to the right class, so we can use it transparently
238    ### throughout the program
239    my $fh;
240    {   ### reading magic only makes sense if we're opening a file for
241        ### reading. otherwise, just use what the user requested.
242        my $magic = '';
243        if( MODE_READ->($mode) ) {
244            open my $tmp, $file or do {
245                $self->_error( qq[Could not open '$file' for reading: $!] );
246                return;
247            };
248
249            ### read the first 4 bites of the file to figure out which class to
250            ### use to open the file.
251            sysread( $tmp, $magic, 4 );
252            close $tmp;
253        }
254
255        ### is it bzip?
256        ### if you asked specifically for bzip compression, or if we're in
257        ### read mode and the magic numbers add up, use bzip
258        if( BZIP and (
259                ($compress eq COMPRESS_BZIP) or
260                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
261            )
262        ) {
263
264            ### different reader/writer modules, different error vars... sigh
265            if( MODE_READ->($mode) ) {
266                $fh = IO::Uncompress::Bunzip2->new( $file ) or do {
267                    $self->_error( qq[Could not read '$file': ] .
268                        $IO::Uncompress::Bunzip2::Bunzip2Error
269                    );
270                    return;
271                };
272
273            } else {
274                $fh = IO::Compress::Bzip2->new( $file ) or do {
275                    $self->_error( qq[Could not write to '$file': ] .
276                        $IO::Compress::Bzip2::Bzip2Error
277                    );
278                    return;
279                };
280            }
281
282        ### is it gzip?
283        ### if you asked for compression, if you wanted to read or the gzip
284        ### magic number is present (redundant with read)
285        } elsif( ZLIB and (
286                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
287                 )
288        ) {
289            $fh = IO::Zlib->new;
290
291            unless( $fh->open( $file, $mode ) ) {
292                $self->_error(qq[Could not create filehandle for '$file': $!]);
293                return;
294            }
295
296        ### is it plain tar?
297        } else {
298            $fh = IO::File->new;
299
300            unless( $fh->open( $file, $mode ) ) {
301                $self->_error(qq[Could not create filehandle for '$file': $!]);
302                return;
303            }
304
305            ### enable bin mode on tar archives
306            binmode $fh;
307        }
308    }
309
310    return $fh;
311}
312
313
314sub _read_tar {
315    my $self    = shift;
316    my $handle  = shift or return;
317    my $opts    = shift || {};
318
319    my $count   = $opts->{limit}    || 0;
320    my $filter  = $opts->{filter};
321    my $md5  = $opts->{md5} || 0;	# cdrake
322    my $filter_cb = $opts->{filter_cb};
323    my $extract = $opts->{extract}  || 0;
324
325    ### set a cap on the amount of files to extract ###
326    my $limit   = 0;
327    $limit = 1 if $count > 0;
328
329    my $tarfile = [ ];
330    my $chunk;
331    my $read = 0;
332    my $real_name;  # to set the name of a file when
333                    # we're encountering @longlink
334    my $data;
335
336    LOOP:
337    while( $handle->read( $chunk, HEAD ) ) {
338        ### IO::Zlib doesn't support this yet
339        my $offset;
340        if ( ref($handle) ne 'IO::Zlib' ) {
341            local $@;
342            $offset = eval { tell $handle } || 'unknown';
343            $@ = '';
344        }
345        else {
346            $offset = 'unknown';
347        }
348
349        unless( $read++ ) {
350            my $gzip = GZIP_MAGIC_NUM;
351            if( $chunk =~ /$gzip/ ) {
352                $self->_error( qq[Cannot read compressed format in tar-mode] );
353                return;
354            }
355
356            ### size is < HEAD, which means a corrupted file, as the minimum
357            ### length is _at least_ HEAD
358            if (length $chunk != HEAD) {
359                $self->_error( qq[Cannot read enough bytes from the tarfile] );
360                return;
361            }
362        }
363
364        ### if we can't read in all bytes... ###
365        last if length $chunk != HEAD;
366
367        ### Apparently this should really be two blocks of 512 zeroes,
368        ### but GNU tar sometimes gets it wrong. See comment in the
369        ### source code (tar.c) to GNU cpio.
370        next if $chunk eq TAR_END;
371
372        ### according to the posix spec, the last 12 bytes of the header are
373        ### null bytes, to pad it to a 512 byte block. That means if these
374        ### bytes are NOT null bytes, it's a corrupt header. See:
375        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
376        ### line 111
377        {   my $nulls = join '', "\0" x 12;
378            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
379                $self->_error( qq[Invalid header block at offset $offset] );
380                next LOOP;
381            }
382        }
383
384        ### pass the realname, so we can set it 'proper' right away
385        ### some of the heuristics are done on the name, so important
386        ### to set it ASAP
387        my $entry;
388        {   my %extra_args = ();
389            $extra_args{'name'} = $$real_name if defined $real_name;
390
391            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
392                                                        %extra_args )
393            ) {
394                $self->_error( qq[Couldn't read chunk at offset $offset] );
395                next LOOP;
396            }
397        }
398
399        ### ignore labels:
400        ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
401        next if $entry->is_label;
402
403        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
404
405            if ( $entry->is_file && !$entry->validate ) {
406                ### sometimes the chunk is rather fux0r3d and a whole 512
407                ### bytes ends up in the ->name area.
408                ### clean it up, if need be
409                my $name = $entry->name;
410                $name = substr($name, 0, 100) if length $name > 100;
411                $name =~ s/\n/ /g;
412
413                $self->_error( $name . qq[: checksum error] );
414                next LOOP;
415            }
416
417            my $block = BLOCK_SIZE->( $entry->size );
418
419            $data = $entry->get_content_by_ref;
420
421	    my $skip = 0;
422	    my $ctx;			# cdrake
423	    ### skip this entry if we're filtering
424
425	    if($md5) {			# cdrake
426	      $ctx = Digest::MD5->new;	# cdrake
427	        $skip=5;		# cdrake
428
429	    } elsif ($filter && $entry->name !~ $filter) {
430		$skip = 1;
431
432	    ### skip this entry if it's a pax header. This is a special file added
433	    ### by, among others, git-generated tarballs. It holds comments and is
434	    ### not meant for extracting. See #38932: pax_global_header extracted
435	    } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
436		$skip = 2;
437	    } elsif ($filter_cb && ! $filter_cb->($entry)) {
438		$skip = 3;
439	    }
440
441	    if ($skip) {
442		#
443		# Since we're skipping, do not allocate memory for the
444		# whole file.  Read it 64 BLOCKS at a time.  Do not
445		# complete the skip yet because maybe what we read is a
446		# longlink and it won't get skipped after all
447		#
448		my $amt = $block;
449		my $fsz=$entry->size;	# cdrake
450		while ($amt > 0) {
451		    $$data = '';
452		    my $this = 64 * BLOCK;
453		    $this = $amt if $this > $amt;
454		    if( $handle->read( $$data, $this ) < $this ) {
455			$self->_error( qq[Read error on tarfile (missing data) '].
456					    $entry->full_path ."' at offset $offset" );
457			next LOOP;
458		    }
459		    $amt -= $this;
460		    $fsz -= $this;	# cdrake
461		substr ($$data, $fsz) = "" if ($fsz<0);	# remove external junk prior to md5	# cdrake
462		$ctx->add($$data) if($skip==5);	# cdrake
463		}
464		$$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ;	# cdrake
465            } else {
466
467		### just read everything into memory
468		### can't do lazy loading since IO::Zlib doesn't support 'seek'
469		### this is because Compress::Zlib doesn't support it =/
470		### this reads in the whole data in one read() call.
471		if ( $handle->read( $$data, $block ) < $block ) {
472		    $self->_error( qq[Read error on tarfile (missing data) '].
473                                    $entry->full_path ."' at offset $offset" );
474		    next LOOP;
475		}
476		### throw away trailing garbage ###
477		substr ($$data, $entry->size) = "" if defined $$data;
478            }
479
480            ### part II of the @LongLink munging -- need to do /after/
481            ### the checksum check.
482            if( $entry->is_longlink ) {
483                ### weird thing in tarfiles -- if the file is actually a
484                ### @LongLink, the data part seems to have a trailing ^@
485                ### (unprintable) char. to display, pipe output through less.
486                ### but that doesn't *always* happen.. so check if the last
487                ### character is a control character, and if so remove it
488                ### at any rate, we better remove that character here, or tests
489                ### like 'eq' and hash lookups based on names will SO not work
490                ### remove it by calculating the proper size, and then
491                ### tossing out everything that's longer than that size.
492
493                ### count number of nulls
494                my $nulls = $$data =~ tr/\0/\0/;
495
496                ### cut data + size by that many bytes
497                $entry->size( $entry->size - $nulls );
498                substr ($$data, $entry->size) = "";
499            }
500        }
501
502        ### clean up of the entries.. posix tar /apparently/ has some
503        ### weird 'feature' that allows for filenames > 255 characters
504        ### they'll put a header in with as name '././@LongLink' and the
505        ### contents will be the name of the /next/ file in the archive
506        ### pretty crappy and kludgy if you ask me
507
508        ### set the name for the next entry if this is a @LongLink;
509        ### this is one ugly hack =/ but needed for direct extraction
510        if( $entry->is_longlink ) {
511            $real_name = $data;
512            next LOOP;
513        } elsif ( defined $real_name ) {
514            $entry->name( $$real_name );
515            $entry->prefix('');
516            undef $real_name;
517        }
518
519	if ($filter && $entry->name !~ $filter) {
520	    next LOOP;
521
522	### skip this entry if it's a pax header. This is a special file added
523	### by, among others, git-generated tarballs. It holds comments and is
524	### not meant for extracting. See #38932: pax_global_header extracted
525	} elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
526	    next LOOP;
527	} elsif ($filter_cb && ! $filter_cb->($entry)) {
528	    next LOOP;
529	}
530
531        if ( $extract && !$entry->is_longlink
532                      && !$entry->is_unknown
533                      && !$entry->is_label ) {
534            $self->_extract_file( $entry ) or return;
535        }
536
537        ### Guard against tarfiles with garbage at the end
538	    last LOOP if $entry->name eq '';
539
540        ### push only the name on the rv if we're extracting
541        ### -- for extract_archive
542        push @$tarfile, ($extract ? $entry->name : $entry);
543
544        if( $limit ) {
545            $count-- unless $entry->is_longlink || $entry->is_dir;
546            last LOOP unless $count;
547        }
548    } continue {
549        undef $data;
550    }
551
552    return $tarfile;
553}
554
555=head2 $tar->contains_file( $filename )
556
557Check if the archive contains a certain file.
558It will return true if the file is in the archive, false otherwise.
559
560Note however, that this function does an exact match using C<eq>
561on the full path. So it cannot compensate for case-insensitive file-
562systems or compare 2 paths to see if they would point to the same
563underlying file.
564
565=cut
566
567sub contains_file {
568    my $self = shift;
569    my $full = shift;
570
571    return unless defined $full;
572
573    ### don't warn if the entry isn't there.. that's what this function
574    ### is for after all.
575    local $WARN = 0;
576    return 1 if $self->_find_entry($full);
577    return;
578}
579
580=head2 $tar->extract( [@filenames] )
581
582Write files whose names are equivalent to any of the names in
583C<@filenames> to disk, creating subdirectories as necessary. This
584might not work too well under VMS.
585Under MacPerl, the file's modification time will be converted to the
586MacOS zero of time, and appropriate conversions will be done to the
587path.  However, the length of each element of the path is not
588inspected to see whether it's longer than MacOS currently allows (32
589characters).
590
591If C<extract> is called without a list of file names, the entire
592contents of the archive are extracted.
593
594Returns a list of filenames extracted.
595
596=cut
597
598sub extract {
599    my $self    = shift;
600    my @args    = @_;
601    my @files;
602
603    # use the speed optimization for all extracted files
604    local($self->{cwd}) = cwd() unless $self->{cwd};
605
606    ### you requested the extraction of only certain files
607    if( @args ) {
608        for my $file ( @args ) {
609
610            ### it's already an object?
611            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
612                push @files, $file;
613                next;
614
615            ### go find it then
616            } else {
617
618                my $found;
619                for my $entry ( @{$self->_data} ) {
620                    next unless $file eq $entry->full_path;
621
622                    ### we found the file you're looking for
623                    push @files, $entry;
624                    $found++;
625                }
626
627                unless( $found ) {
628                    return $self->_error(
629                        qq[Could not find '$file' in archive] );
630                }
631            }
632        }
633
634    ### just grab all the file items
635    } else {
636        @files = $self->get_files;
637    }
638
639    ### nothing found? that's an error
640    unless( scalar @files ) {
641        $self->_error( qq[No files found for ] . $self->_file );
642        return;
643    }
644
645    ### now extract them
646    for my $entry ( @files ) {
647        unless( $self->_extract_file( $entry ) ) {
648            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
649            return;
650        }
651    }
652
653    return @files;
654}
655
656=head2 $tar->extract_file( $file, [$extract_path] )
657
658Write an entry, whose name is equivalent to the file name provided to
659disk. Optionally takes a second parameter, which is the full native
660path (including filename) the entry will be written to.
661
662For example:
663
664    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
665
666    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
667
668Returns true on success, false on failure.
669
670=cut
671
672sub extract_file {
673    my $self = shift;
674    my $file = shift;   return unless defined $file;
675    my $alt  = shift;
676
677    my $entry = $self->_find_entry( $file )
678        or $self->_error( qq[Could not find an entry for '$file'] ), return;
679
680    return $self->_extract_file( $entry, $alt );
681}
682
683sub _extract_file {
684    my $self    = shift;
685    my $entry   = shift or return;
686    my $alt     = shift;
687
688    ### you wanted an alternate extraction location ###
689    my $name = defined $alt ? $alt : $entry->full_path;
690
691                            ### splitpath takes a bool at the end to indicate
692                            ### that it's splitting a dir
693    my ($vol,$dirs,$file);
694    if ( defined $alt ) { # It's a local-OS path
695        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
696                                                          $entry->is_dir );
697    } else {
698        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
699                                                          $entry->is_dir );
700    }
701
702    my $dir;
703    ### is $name an absolute path? ###
704    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
705
706        ### absolute names are not allowed to be in tarballs under
707        ### strict mode, so only allow it if a user tells us to do it
708        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
709            $self->_error(
710                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
711                q[Not extracting absolute paths under SECURE EXTRACT MODE]
712            );
713            return;
714        }
715
716        ### user asked us to, it's fine.
717        $dir = File::Spec->catpath( $vol, $dirs, "" );
718
719    ### it's a relative path ###
720    } else {
721        my $cwd     = (ref $self and defined $self->{cwd})
722                        ? $self->{cwd}
723                        : cwd();
724
725        my @dirs = defined $alt
726            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
727            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
728                                                    # straight from the tarball
729
730        if( not defined $alt            and
731            not $INSECURE_EXTRACT_MODE
732        ) {
733
734            ### paths that leave the current directory are not allowed under
735            ### strict mode, so only allow it if a user tells us to do this.
736            if( grep { $_ eq '..' } @dirs ) {
737
738                $self->_error(
739                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
740                    q[the current working directory. Not extracting under ].
741                    q[SECURE EXTRACT MODE]
742                );
743                return;
744            }
745
746            ### the archive may be asking us to extract into a symlink. This
747            ### is not sane and a possible security issue, as outlined here:
748            ### https://rt.cpan.org/Ticket/Display.html?id=30380
749            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
750            ### https://issues.rpath.com/browse/RPL-1716
751            my $full_path = $cwd;
752            for my $d ( @dirs ) {
753                $full_path = File::Spec->catdir( $full_path, $d );
754
755                ### we've already checked this one, and it's safe. Move on.
756                next if ref $self and $self->{_link_cache}->{$full_path};
757
758                if( -l $full_path ) {
759                    my $to   = readlink $full_path;
760                    my $diag = "symlinked directory ($full_path => $to)";
761
762                    $self->_error(
763                        q[Entry ']. $entry->full_path .q[' is attempting to ].
764                        qq[extract to a $diag. This is considered a security ].
765                        q[vulnerability and not allowed under SECURE EXTRACT ].
766                        q[MODE]
767                    );
768                    return;
769                }
770
771                ### XXX keep a cache if possible, so the stats become cheaper:
772                $self->{_link_cache}->{$full_path} = 1 if ref $self;
773            }
774        }
775
776        ### '.' is the directory delimiter on VMS, which has to be escaped
777        ### or changed to '_' on vms.  vmsify is used, because older versions
778        ### of vmspath do not handle this properly.
779        ### Must not add a '/' to an empty directory though.
780        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
781
782        my ($cwd_vol,$cwd_dir,$cwd_file)
783                    = File::Spec->splitpath( $cwd );
784        my @cwd     = File::Spec->splitdir( $cwd_dir );
785        push @cwd, $cwd_file if length $cwd_file;
786
787        ### We need to pass '' as the last element to catpath. Craig Berry
788        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
789        ### The root problem is that splitpath on UNIX always returns the
790        ### final path element as a file even if it is a directory, and of
791        ### course there is no way it can know the difference without checking
792        ### against the filesystem, which it is documented as not doing.  When
793        ### you turn around and call catpath, on VMS you have to know which bits
794        ### are directory bits and which bits are file bits.  In this case we
795        ### know the result should be a directory.  I had thought you could omit
796        ### the file argument to catpath in such a case, but apparently on UNIX
797        ### you can't.
798        $dir        = File::Spec->catpath(
799                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
800                        );
801
802        ### catdir() returns undef if the path is longer than 255 chars on
803        ### older VMS systems.
804        unless ( defined $dir ) {
805            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
806            return;
807        }
808
809    }
810
811    if( -e $dir && !-d _ ) {
812        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
813        return;
814    }
815
816    unless ( -d _ ) {
817        eval { File::Path::mkpath( $dir, 0, 0777 ) };
818        if( $@ ) {
819            my $fp = $entry->full_path;
820            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
821            return;
822        }
823
824        ### XXX chown here? that might not be the same as in the archive
825        ### as we're only chown'ing to the owner of the file we're extracting
826        ### not to the owner of the directory itself, which may or may not
827        ### be another entry in the archive
828        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
829        ### way to go.
830        #if( $CHOWN && CAN_CHOWN ) {
831        #    chown $entry->uid, $entry->gid, $dir or
832        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
833        #}
834    }
835
836    ### we're done if we just needed to create a dir ###
837    return 1 if $entry->is_dir;
838
839    my $full = File::Spec->catfile( $dir, $file );
840
841    if( $entry->is_unknown ) {
842        $self->_error( qq[Unknown file type for file '$full'] );
843        return;
844    }
845
846    if( length $entry->type && $entry->is_file ) {
847        my $fh = IO::File->new;
848        $fh->open( '>' . $full ) or (
849            $self->_error( qq[Could not open file '$full': $!] ),
850            return
851        );
852
853        if( $entry->size ) {
854            binmode $fh;
855            syswrite $fh, $entry->data or (
856                $self->_error( qq[Could not write data to '$full'] ),
857                return
858            );
859        }
860
861        close $fh or (
862            $self->_error( qq[Could not close file '$full'] ),
863            return
864        );
865
866    } else {
867        $self->_make_special_file( $entry, $full ) or return;
868    }
869
870    ### only update the timestamp if it's not a symlink; that will change the
871    ### timestamp of the original. This addresses bug #33669: Could not update
872    ### timestamp warning on symlinks
873    if( not -l $full ) {
874        utime time, $entry->mtime - TIME_OFFSET, $full or
875            $self->_error( qq[Could not update timestamp] );
876    }
877
878    if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
879        chown $entry->uid, $entry->gid, $full or
880            $self->_error( qq[Could not set uid/gid on '$full'] );
881    }
882
883    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
884    ### change the perms on the file they're linking too...
885    if( $CHMOD and not -l $full ) {
886        my $mode = $entry->mode;
887        unless ($SAME_PERMISSIONS) {
888            $mode &= ~(oct(7000) | umask);
889        }
890        chmod $mode, $full or
891            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
892    }
893
894    return 1;
895}
896
897sub _make_special_file {
898    my $self    = shift;
899    my $entry   = shift     or return;
900    my $file    = shift;    return unless defined $file;
901
902    my $err;
903
904    if( $entry->is_symlink ) {
905        my $fail;
906        if( ON_UNIX ) {
907            symlink( $entry->linkname, $file ) or $fail++;
908
909        } else {
910            $self->_extract_special_file_as_plain_file( $entry, $file )
911                or $fail++;
912        }
913
914        $err =  qq[Making symbolic link '$file' to '] .
915                $entry->linkname .q[' failed] if $fail;
916
917    } elsif ( $entry->is_hardlink ) {
918        my $fail;
919        if( ON_UNIX ) {
920            link( $entry->linkname, $file ) or $fail++;
921
922        } else {
923            $self->_extract_special_file_as_plain_file( $entry, $file )
924                or $fail++;
925        }
926
927        $err =  qq[Making hard link from '] . $entry->linkname .
928                qq[' to '$file' failed] if $fail;
929
930    } elsif ( $entry->is_fifo ) {
931        ON_UNIX && !system('mknod', $file, 'p') or
932            $err = qq[Making fifo ']. $entry->name .qq[' failed];
933
934    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
935        my $mode = $entry->is_blockdev ? 'b' : 'c';
936
937        ON_UNIX && !system('mknod', $file, $mode,
938                            $entry->devmajor, $entry->devminor) or
939            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
940                    $entry->devmajor . qq[ min=] . $entry->devminor .
941                    qq[) failed.];
942
943    } elsif ( $entry->is_socket ) {
944        ### the original doesn't do anything special for sockets.... ###
945        1;
946    }
947
948    return $err ? $self->_error( $err ) : 1;
949}
950
951### don't know how to make symlinks, let's just extract the file as
952### a plain file
953sub _extract_special_file_as_plain_file {
954    my $self    = shift;
955    my $entry   = shift     or return;
956    my $file    = shift;    return unless defined $file;
957
958    my $err;
959    TRY: {
960        my $orig = $self->_find_entry( $entry->linkname, $entry );
961
962        unless( $orig ) {
963            $err =  qq[Could not find file '] . $entry->linkname .
964                    qq[' in memory.];
965            last TRY;
966        }
967
968        ### clone the entry, make it appear as a normal file ###
969        my $clone = $orig->clone;
970        $clone->_downgrade_to_plainfile;
971        $self->_extract_file( $clone, $file ) or last TRY;
972
973        return 1;
974    }
975
976    return $self->_error($err);
977}
978
979=head2 $tar->list_files( [\@properties] )
980
981Returns a list of the names of all the files in the archive.
982
983If C<list_files()> is passed an array reference as its first argument
984it returns a list of hash references containing the requested
985properties of each file.  The following list of properties is
986supported: name, size, mtime (last modified date), mode, uid, gid,
987linkname, uname, gname, devmajor, devminor, prefix.
988
989Passing an array reference containing only one element, 'name', is
990special cased to return a list of names rather than a list of hash
991references, making it equivalent to calling C<list_files> without
992arguments.
993
994=cut
995
996sub list_files {
997    my $self = shift;
998    my $aref = shift || [ ];
999
1000    unless( $self->_data ) {
1001        $self->read() or return;
1002    }
1003
1004    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1005        return map { $_->full_path } @{$self->_data};
1006    } else {
1007
1008        #my @rv;
1009        #for my $obj ( @{$self->_data} ) {
1010        #    push @rv, { map { $_ => $obj->$_() } @$aref };
1011        #}
1012        #return @rv;
1013
1014        ### this does the same as the above.. just needs a +{ }
1015        ### to make sure perl doesn't confuse it for a block
1016        return map {    my $o=$_;
1017                        +{ map { $_ => $o->$_() } @$aref }
1018                    } @{$self->_data};
1019    }
1020}
1021
1022sub _find_entry {
1023    my $self = shift;
1024    my $file = shift;
1025
1026    unless( defined $file ) {
1027        $self->_error( qq[No file specified] );
1028        return;
1029    }
1030
1031    ### it's an object already
1032    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1033
1034seach_entry:
1035		if($self->_data){
1036			for my $entry ( @{$self->_data} ) {
1037					my $path = $entry->full_path;
1038					return $entry if $path eq $file;
1039			}
1040		}
1041
1042		if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1043			if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
1044				$file = _symlinks_resolver( $link_entry->name, $file );
1045				goto seach_entry if $self->_data;
1046
1047				#this will be slower than never, but won't failed!
1048
1049				my $iterargs = $link_entry->{'_archive'};
1050				if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
1051				#faster	but whole archive will be read in memory
1052					#read whole archive and share data
1053					my $archive = Archive::Tar->new;
1054					$archive->read( @$iterargs );
1055					push @$iterargs, $archive; #take a trace for destruction
1056					if($archive->_data){
1057						$self->_data( $archive->_data );
1058						goto seach_entry;
1059					}
1060				}#faster
1061
1062				{#slower but lower memory usage
1063					# $iterargs = [$filename, $compressed, $opts];
1064					my $next = Archive::Tar->iter( @$iterargs );
1065					while(my $e = $next->()){
1066						if($e->full_path eq $file){
1067							undef $next;
1068							return $e;
1069						}
1070					}
1071				}#slower
1072			}
1073		}
1074
1075    $self->_error( qq[No such file in archive: '$file'] );
1076    return;
1077}
1078
1079=head2 $tar->get_files( [@filenames] )
1080
1081Returns the C<Archive::Tar::File> objects matching the filenames
1082provided. If no filename list was passed, all C<Archive::Tar::File>
1083objects in the current Tar object are returned.
1084
1085Please refer to the C<Archive::Tar::File> documentation on how to
1086handle these objects.
1087
1088=cut
1089
1090sub get_files {
1091    my $self = shift;
1092
1093    return @{ $self->_data } unless @_;
1094
1095    my @list;
1096    for my $file ( @_ ) {
1097        push @list, grep { defined } $self->_find_entry( $file );
1098    }
1099
1100    return @list;
1101}
1102
1103=head2 $tar->get_content( $file )
1104
1105Return the content of the named file.
1106
1107=cut
1108
1109sub get_content {
1110    my $self = shift;
1111    my $entry = $self->_find_entry( shift ) or return;
1112
1113    return $entry->data;
1114}
1115
1116=head2 $tar->replace_content( $file, $content )
1117
1118Make the string $content be the content for the file named $file.
1119
1120=cut
1121
1122sub replace_content {
1123    my $self = shift;
1124    my $entry = $self->_find_entry( shift ) or return;
1125
1126    return $entry->replace_content( shift );
1127}
1128
1129=head2 $tar->rename( $file, $new_name )
1130
1131Rename the file of the in-memory archive to $new_name.
1132
1133Note that you must specify a Unix path for $new_name, since per tar
1134standard, all files in the archive must be Unix paths.
1135
1136Returns true on success and false on failure.
1137
1138=cut
1139
1140sub rename {
1141    my $self = shift;
1142    my $file = shift; return unless defined $file;
1143    my $new  = shift; return unless defined $new;
1144
1145    my $entry = $self->_find_entry( $file ) or return;
1146
1147    return $entry->rename( $new );
1148}
1149
1150=head2 $tar->chmod( $file, $mode )
1151
1152Change mode of $file to $mode.
1153
1154Returns true on success and false on failure.
1155
1156=cut
1157
1158sub chmod {
1159    my $self = shift;
1160    my $file = shift; return unless defined $file;
1161    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1162    my @args = ("$mode");
1163
1164    my $entry = $self->_find_entry( $file ) or return;
1165    my $x = $entry->chmod( @args );
1166    return $x;
1167}
1168
1169=head2 $tar->chown( $file, $uname [, $gname] )
1170
1171Change owner $file to $uname and $gname.
1172
1173Returns true on success and false on failure.
1174
1175=cut
1176
1177sub chown {
1178    my $self = shift;
1179    my $file = shift; return unless defined $file;
1180    my $uname  = shift; return unless defined $uname;
1181    my @args   = ($uname);
1182    push(@args, shift);
1183
1184    my $entry = $self->_find_entry( $file ) or return;
1185    my $x = $entry->chown( @args );
1186    return $x;
1187}
1188
1189=head2 $tar->remove (@filenamelist)
1190
1191Removes any entries with names matching any of the given filenames
1192from the in-memory archive. Returns a list of C<Archive::Tar::File>
1193objects that remain.
1194
1195=cut
1196
1197sub remove {
1198    my $self = shift;
1199    my @list = @_;
1200
1201    my %seen = map { $_->full_path => $_ } @{$self->_data};
1202    delete $seen{ $_ } for @list;
1203
1204    $self->_data( [values %seen] );
1205
1206    return values %seen;
1207}
1208
1209=head2 $tar->clear
1210
1211C<clear> clears the current in-memory archive. This effectively gives
1212you a 'blank' object, ready to be filled again. Note that C<clear>
1213only has effect on the object, not the underlying tarfile.
1214
1215=cut
1216
1217sub clear {
1218    my $self = shift or return;
1219
1220    $self->_data( [] );
1221    $self->_file( '' );
1222
1223    return 1;
1224}
1225
1226
1227=head2 $tar->write ( [$file, $compressed, $prefix] )
1228
1229Write the in-memory archive to disk.  The first argument can either
1230be the name of a file or a reference to an already open filehandle (a
1231GLOB reference).
1232
1233The second argument is used to indicate compression. You can either
1234compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1235to be the C<gzip> compression level (between 1 and 9), but the use of
1236constants is preferred:
1237
1238  # write a gzip compressed file
1239  $tar->write( 'out.tgz', COMPRESS_GZIP );
1240
1241  # write a bzip compressed file
1242  $tar->write( 'out.tbz', COMPRESS_BZIP );
1243
1244Note that when you pass in a filehandle, the compression argument
1245is ignored, as all files are printed verbatim to your filehandle.
1246If you wish to enable compression with filehandles, use an
1247C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1248
1249The third argument is an optional prefix. All files will be tucked
1250away in the directory you specify as prefix. So if you have files
1251'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1252will be written to the archive as 'foo/a' and 'foo/b'.
1253
1254If no arguments are given, C<write> returns the entire formatted
1255archive as a string, which could be useful if you'd like to stuff the
1256archive into a socket or a pipe to gzip or something.
1257
1258
1259=cut
1260
1261sub write {
1262    my $self        = shift;
1263    my $file        = shift; $file = '' unless defined $file;
1264    my $gzip        = shift || 0;
1265    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1266    my $dummy       = '';
1267
1268    ### only need a handle if we have a file to print to ###
1269    my $handle = length($file)
1270                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1271                        or return )
1272                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1273                    : $HAS_IO_STRING ? IO::String->new
1274                    : __PACKAGE__->no_string_support();
1275
1276    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1277    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1278    ### printed to the archive
1279    local $\;
1280
1281    for my $entry ( @{$self->_data} ) {
1282        ### entries to be written to the tarfile ###
1283        my @write_me;
1284
1285        ### only now will we change the object to reflect the current state
1286        ### of the name and prefix fields -- this needs to be limited to
1287        ### write() only!
1288        my $clone = $entry->clone;
1289
1290
1291        ### so, if you don't want use to use the prefix, we'll stuff
1292        ### everything in the name field instead
1293        if( $DO_NOT_USE_PREFIX ) {
1294
1295            ### you might have an extended prefix, if so, set it in the clone
1296            ### XXX is ::Unix right?
1297            $clone->name( length $ext_prefix
1298                            ? File::Spec::Unix->catdir( $ext_prefix,
1299                                                        $clone->full_path)
1300                            : $clone->full_path );
1301            $clone->prefix( '' );
1302
1303        ### otherwise, we'll have to set it properly -- prefix part in the
1304        ### prefix and name part in the name field.
1305        } else {
1306
1307            ### split them here, not before!
1308            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1309
1310            ### you might have an extended prefix, if so, set it in the clone
1311            ### XXX is ::Unix right?
1312            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1313                if length $ext_prefix;
1314
1315            $clone->prefix( $prefix );
1316            $clone->name( $name );
1317        }
1318
1319        ### names are too long, and will get truncated if we don't add a
1320        ### '@LongLink' file...
1321        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1322                                length($clone->prefix)  > PREFIX_LENGTH
1323                            ) || 0;
1324
1325        ### perhaps we need to make a longlink file?
1326        if( $make_longlink ) {
1327            my $longlink = Archive::Tar::File->new(
1328                            data => LONGLINK_NAME,
1329                            $clone->full_path,
1330                            { type => LONGLINK }
1331                        );
1332
1333            unless( $longlink ) {
1334                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1335                                qq[oversize file '] . $clone->full_path ."'" );
1336                return;
1337            };
1338
1339            push @write_me, $longlink;
1340        }
1341
1342        push @write_me, $clone;
1343
1344        ### write the one, optionally 2 a::t::file objects to the handle
1345        for my $clone (@write_me) {
1346
1347            ### if the file is a symlink, there are 2 options:
1348            ### either we leave the symlink intact, but then we don't write any
1349            ### data OR we follow the symlink, which means we actually make a
1350            ### copy. if we do the latter, we have to change the TYPE of the
1351            ### clone to 'FILE'
1352            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1353            my $data_ok = !$clone->is_symlink && $clone->has_content;
1354
1355            ### downgrade to a 'normal' file if it's a symlink we're going to
1356            ### treat as a regular file
1357            $clone->_downgrade_to_plainfile if $link_ok;
1358
1359            ### get the header for this block
1360            my $header = $self->_format_tar_entry( $clone );
1361            unless( $header ) {
1362                $self->_error(q[Could not format header for: ] .
1363                                    $clone->full_path );
1364                return;
1365            }
1366
1367            unless( print $handle $header ) {
1368                $self->_error(q[Could not write header for: ] .
1369                                    $clone->full_path);
1370                return;
1371            }
1372
1373            if( $link_ok or $data_ok ) {
1374                unless( print $handle $clone->data ) {
1375                    $self->_error(q[Could not write data for: ] .
1376                                    $clone->full_path);
1377                    return;
1378                }
1379
1380                ### pad the end of the clone if required ###
1381                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1382            }
1383
1384        } ### done writing these entries
1385    }
1386
1387    ### write the end markers ###
1388    print $handle TAR_END x 2 or
1389            return $self->_error( qq[Could not write tar end markers] );
1390
1391    ### did you want it written to a file, or returned as a string? ###
1392    my $rv =  length($file) ? 1
1393                        : $HAS_PERLIO ? $dummy
1394                        : do { seek $handle, 0, 0; local $/; <$handle> };
1395
1396    ### make sure to close the handle if we created it
1397    if ( $file ne $handle ) {
1398	unless( close $handle ) {
1399	    $self->_error( qq[Could not write tar] );
1400	    return;
1401	}
1402    }
1403
1404    return $rv;
1405}
1406
1407sub _format_tar_entry {
1408    my $self        = shift;
1409    my $entry       = shift or return;
1410    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1411    my $no_prefix   = shift || 0;
1412
1413    my $file    = $entry->name;
1414    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1415
1416    ### remove the prefix from the file name
1417    ### not sure if this is still needed --kane
1418    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1419    ### this for us. Even worse, this would break if we tried to add a file
1420    ### like x/x.
1421    #if( length $prefix ) {
1422    #    $file =~ s/^$match//;
1423    #}
1424
1425    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1426                if length $ext_prefix;
1427
1428    ### not sure why this is... ###
1429    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1430    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1431
1432    my $f1 = "%06o"; my $f2  = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
1433
1434    ### this might be optimizable with a 'changed' flag in the file objects ###
1435    my $tar = pack (
1436                PACK,
1437                $file,
1438
1439                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1440                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1441
1442                "",  # checksum field - space padded a bit down
1443
1444                (map { $entry->$_() }                 qw[type linkname magic]),
1445
1446                $entry->version || TAR_VERSION,
1447
1448                (map { $entry->$_() }                 qw[uname gname]),
1449                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1450
1451                ($no_prefix ? '' : $prefix)
1452    );
1453
1454    ### add the checksum ###
1455    my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
1456    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1457
1458    return $tar;
1459}
1460
1461=head2 $tar->add_files( @filenamelist )
1462
1463Takes a list of filenames and adds them to the in-memory archive.
1464
1465The path to the file is automatically converted to a Unix like
1466equivalent for use in the archive, and, if on MacOS, the file's
1467modification time is converted from the MacOS epoch to the Unix epoch.
1468So tar archives created on MacOS with B<Archive::Tar> can be read
1469both with I<tar> on Unix and applications like I<suntar> or
1470I<Stuffit Expander> on MacOS.
1471
1472Be aware that the file's type/creator and resource fork will be lost,
1473which is usually what you want in cross-platform archives.
1474
1475Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1476object from, for example, another archive. The object will be clone, and
1477effectively be a copy of the original, not an alias.
1478
1479Returns a list of C<Archive::Tar::File> objects that were just added.
1480
1481=cut
1482
1483sub add_files {
1484    my $self    = shift;
1485    my @files   = @_ or return;
1486
1487    my @rv;
1488    for my $file ( @files ) {
1489
1490        ### you passed an Archive::Tar::File object
1491        ### clone it so we don't accidentally have a reference to
1492        ### an object from another archive
1493        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1494            push @rv, $file->clone;
1495            next;
1496        }
1497
1498        eval {
1499            if( utf8::is_utf8( $file )) {
1500              utf8::encode( $file );
1501            }
1502        };
1503
1504        unless( -e $file || -l $file ) {
1505            $self->_error( qq[No such file: '$file'] );
1506            next;
1507        }
1508
1509        my $obj = Archive::Tar::File->new( file => $file );
1510        unless( $obj ) {
1511            $self->_error( qq[Unable to add file: '$file'] );
1512            next;
1513        }
1514
1515        push @rv, $obj;
1516    }
1517
1518    push @{$self->{_data}}, @rv;
1519
1520    return @rv;
1521}
1522
1523=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1524
1525Takes a filename, a scalar full of data and optionally a reference to
1526a hash with specific options.
1527
1528Will add a file to the in-memory archive, with name C<$filename> and
1529content C<$data>. Specific properties can be set using C<$opthashref>.
1530The following list of properties is supported: name, size, mtime
1531(last modified date), mode, uid, gid, linkname, uname, gname,
1532devmajor, devminor, prefix, type.  (On MacOS, the file's path and
1533modification times are converted to Unix equivalents.)
1534
1535Valid values for the file type are the following constants defined by
1536Archive::Tar::Constant:
1537
1538=over 4
1539
1540=item FILE
1541
1542Regular file.
1543
1544=item HARDLINK
1545
1546=item SYMLINK
1547
1548Hard and symbolic ("soft") links; linkname should specify target.
1549
1550=item CHARDEV
1551
1552=item BLOCKDEV
1553
1554Character and block devices. devmajor and devminor should specify the major
1555and minor device numbers.
1556
1557=item DIR
1558
1559Directory.
1560
1561=item FIFO
1562
1563FIFO (named pipe).
1564
1565=item SOCKET
1566
1567Socket.
1568
1569=back
1570
1571Returns the C<Archive::Tar::File> object that was just added, or
1572C<undef> on failure.
1573
1574=cut
1575
1576sub add_data {
1577    my $self    = shift;
1578    my ($file, $data, $opt) = @_;
1579
1580    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1581    unless( $obj ) {
1582        $self->_error( qq[Unable to add file: '$file'] );
1583        return;
1584    }
1585
1586    push @{$self->{_data}}, $obj;
1587
1588    return $obj;
1589}
1590
1591=head2 $tar->error( [$BOOL] )
1592
1593Returns the current error string (usually, the last error reported).
1594If a true value was specified, it will give the C<Carp::longmess>
1595equivalent of the error, in effect giving you a stacktrace.
1596
1597For backwards compatibility, this error is also available as
1598C<$Archive::Tar::error> although it is much recommended you use the
1599method call instead.
1600
1601=cut
1602
1603{
1604    $error = '';
1605    my $longmess;
1606
1607    sub _error {
1608        my $self    = shift;
1609        my $msg     = $error = shift;
1610        $longmess   = Carp::longmess($error);
1611        if (ref $self) {
1612            $self->{_error} = $error;
1613            $self->{_longmess} = $longmess;
1614        }
1615
1616        ### set Archive::Tar::WARN to 0 to disable printing
1617        ### of errors
1618        if( $WARN ) {
1619            carp $DEBUG ? $longmess : $msg;
1620        }
1621
1622        return;
1623    }
1624
1625    sub error {
1626        my $self = shift;
1627        if (ref $self) {
1628            return shift() ? $self->{_longmess} : $self->{_error};
1629        } else {
1630            return shift() ? $longmess : $error;
1631        }
1632    }
1633}
1634
1635=head2 $tar->setcwd( $cwd );
1636
1637C<Archive::Tar> needs to know the current directory, and it will run
1638C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1639tarfile and saves it in the file system. (As of version 1.30, however,
1640C<Archive::Tar> will use the speed optimization described below
1641automatically, so it's only relevant if you're using C<extract_file()>).
1642
1643Since C<Archive::Tar> doesn't change the current directory internally
1644while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1645can be avoided if we can guarantee that the current directory doesn't
1646get changed externally.
1647
1648To use this performance boost, set the current directory via
1649
1650    use Cwd;
1651    $tar->setcwd( cwd() );
1652
1653once before calling a function like C<extract_file> and
1654C<Archive::Tar> will use the current directory setting from then on
1655and won't call C<Cwd::cwd()> internally.
1656
1657To switch back to the default behaviour, use
1658
1659    $tar->setcwd( undef );
1660
1661and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1662
1663If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
1664be called for you.
1665
1666=cut
1667
1668sub setcwd {
1669    my $self     = shift;
1670    my $cwd      = shift;
1671
1672    $self->{cwd} = $cwd;
1673}
1674
1675=head1 Class Methods
1676
1677=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1678
1679Creates a tar file from the list of files provided.  The first
1680argument can either be the name of the tar file to create or a
1681reference to an open file handle (e.g. a GLOB reference).
1682
1683The second argument is used to indicate compression. You can either
1684compress using C<gzip> or C<bzip2>. If you pass a digit, it's assumed
1685to be the C<gzip> compression level (between 1 and 9), but the use of
1686constants is preferred:
1687
1688  # write a gzip compressed file
1689  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1690
1691  # write a bzip compressed file
1692  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1693
1694Note that when you pass in a filehandle, the compression argument
1695is ignored, as all files are printed verbatim to your filehandle.
1696If you wish to enable compression with filehandles, use an
1697C<IO::Zlib> or C<IO::Compress::Bzip2> filehandle instead.
1698
1699The remaining arguments list the files to be included in the tar file.
1700These files must all exist. Any files which don't exist or can't be
1701read are silently ignored.
1702
1703If the archive creation fails for any reason, C<create_archive> will
1704return false. Please use the C<error> method to find the cause of the
1705failure.
1706
1707Note that this method does not write C<on the fly> as it were; it
1708still reads all the files into memory before writing out the archive.
1709Consult the FAQ below if this is a problem.
1710
1711=cut
1712
1713sub create_archive {
1714    my $class = shift;
1715
1716    my $file    = shift; return unless defined $file;
1717    my $gzip    = shift || 0;
1718    my @files   = @_;
1719
1720    unless( @files ) {
1721        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1722    }
1723
1724    my $tar = $class->new;
1725    $tar->add_files( @files );
1726    return $tar->write( $file, $gzip );
1727}
1728
1729=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1730
1731Returns an iterator function that reads the tar file without loading
1732it all in memory.  Each time the function is called it will return the
1733next file in the tarball. The files are returned as
1734C<Archive::Tar::File> objects. The iterator function returns the
1735empty list once it has exhausted the files contained.
1736
1737The second argument can be a hash reference with options, which are
1738identical to the arguments passed to C<read()>.
1739
1740Example usage:
1741
1742    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1743
1744    while( my $f = $next->() ) {
1745        print $f->name, "\n";
1746
1747        $f->extract or warn "Extraction failed";
1748
1749        # ....
1750    }
1751
1752=cut
1753
1754
1755sub iter {
1756    my $class       = shift;
1757    my $filename    = shift or return;
1758    my $compressed  = shift || 0;
1759    my $opts        = shift || {};
1760
1761    ### get a handle to read from.
1762    my $handle = $class->_get_handle(
1763        $filename,
1764        $compressed,
1765        READ_ONLY->( ZLIB )
1766    ) or return;
1767
1768    my @data;
1769		my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
1770    return sub {
1771        return shift(@data)     if @data;       # more than one file returned?
1772        return                  unless $handle; # handle exhausted?
1773
1774        ### read data, should only return file
1775        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1776        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
1777				if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1778					foreach(@data){
1779						#may refine this heuristic for ON_UNIX?
1780						if($_->linkname){
1781							#is there a better slot to store/share it ?
1782							$_->{'_archive'} = $CONSTRUCT_ARGS;
1783						}
1784					}
1785				}
1786
1787        ### return one piece of data
1788        return shift(@data)     if @data;
1789
1790        ### data is exhausted, free the filehandle
1791        undef $handle;
1792				if(@$CONSTRUCT_ARGS == 4){
1793					#free archive in memory
1794					undef $CONSTRUCT_ARGS->[-1];
1795				}
1796        return;
1797    };
1798}
1799
1800=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1801
1802Returns a list of the names of all the files in the archive.  The
1803first argument can either be the name of the tar file to list or a
1804reference to an open file handle (e.g. a GLOB reference).
1805
1806If C<list_archive()> is passed an array reference as its third
1807argument it returns a list of hash references containing the requested
1808properties of each file.  The following list of properties is
1809supported: full_path, name, size, mtime (last modified date), mode,
1810uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
1811
1812See C<Archive::Tar::File> for details about supported properties.
1813
1814Passing an array reference containing only one element, 'name', is
1815special cased to return a list of names rather than a list of hash
1816references.
1817
1818=cut
1819
1820sub list_archive {
1821    my $class   = shift;
1822    my $file    = shift; return unless defined $file;
1823    my $gzip    = shift || 0;
1824
1825    my $tar = $class->new($file, $gzip);
1826    return unless $tar;
1827
1828    return $tar->list_files( @_ );
1829}
1830
1831=head2 Archive::Tar->extract_archive($file, $compressed)
1832
1833Extracts the contents of the tar file.  The first argument can either
1834be the name of the tar file to create or a reference to an open file
1835handle (e.g. a GLOB reference).  All relative paths in the tar file will
1836be created underneath the current working directory.
1837
1838C<extract_archive> will return a list of files it extracted.
1839If the archive extraction fails for any reason, C<extract_archive>
1840will return false.  Please use the C<error> method to find the cause
1841of the failure.
1842
1843=cut
1844
1845sub extract_archive {
1846    my $class   = shift;
1847    my $file    = shift; return unless defined $file;
1848    my $gzip    = shift || 0;
1849
1850    my $tar = $class->new( ) or return;
1851
1852    return $tar->read( $file, $gzip, { extract => 1 } );
1853}
1854
1855=head2 $bool = Archive::Tar->has_io_string
1856
1857Returns true if we currently have C<IO::String> support loaded.
1858
1859Either C<IO::String> or C<perlio> support is needed to support writing
1860stringified archives. Currently, C<perlio> is the preferred method, if
1861available.
1862
1863See the C<GLOBAL VARIABLES> section to see how to change this preference.
1864
1865=cut
1866
1867sub has_io_string { return $HAS_IO_STRING; }
1868
1869=head2 $bool = Archive::Tar->has_perlio
1870
1871Returns true if we currently have C<perlio> support loaded.
1872
1873This requires C<perl-5.8> or higher, compiled with C<perlio>
1874
1875Either C<IO::String> or C<perlio> support is needed to support writing
1876stringified archives. Currently, C<perlio> is the preferred method, if
1877available.
1878
1879See the C<GLOBAL VARIABLES> section to see how to change this preference.
1880
1881=cut
1882
1883sub has_perlio { return $HAS_PERLIO; }
1884
1885=head2 $bool = Archive::Tar->has_zlib_support
1886
1887Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1888
1889=cut
1890
1891sub has_zlib_support { return ZLIB }
1892
1893=head2 $bool = Archive::Tar->has_bzip2_support
1894
1895Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1896
1897=cut
1898
1899sub has_bzip2_support { return BZIP }
1900
1901=head2 Archive::Tar->can_handle_compressed_files
1902
1903A simple checking routine, which will return true if C<Archive::Tar>
1904is able to uncompress compressed archives on the fly with C<IO::Zlib>
1905and C<IO::Compress::Bzip2> or false if not both are installed.
1906
1907You can use this as a shortcut to determine whether C<Archive::Tar>
1908will do what you think before passing compressed archives to its
1909C<read> method.
1910
1911=cut
1912
1913sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1914
1915sub no_string_support {
1916    croak("You have to install IO::String to support writing archives to strings");
1917}
1918
1919sub _symlinks_resolver{
1920  my ($src, $trg) = @_;
1921  my @src = split /[\/\\]/, $src;
1922  my @trg = split /[\/\\]/, $trg;
1923  pop @src; #strip out current object name
1924  if(@trg and $trg[0] eq ''){
1925    shift @trg;
1926    #restart path from scratch
1927    @src = ( );
1928  }
1929  foreach my $part ( @trg ){
1930    next if $part eq '.'; #ignore current
1931    if($part eq '..'){
1932      #got to parent
1933      pop @src;
1934    }
1935    else{
1936      #append it
1937      push @src, $part;
1938    }
1939  }
1940  my $path = join('/', @src);
1941  warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
1942  return $path;
1943}
1944
19451;
1946
1947__END__
1948
1949=head1 GLOBAL VARIABLES
1950
1951=head2 $Archive::Tar::FOLLOW_SYMLINK
1952
1953Set this variable to C<1> to make C<Archive::Tar> effectively make a
1954copy of the file when extracting. Default is C<0>, which
1955means the symlink stays intact. Of course, you will have to pack the
1956file linked to as well.
1957
1958This option is checked when you write out the tarfile using C<write>
1959or C<create_archive>.
1960
1961This works just like C</bin/tar>'s C<-h> option.
1962
1963=head2 $Archive::Tar::CHOWN
1964
1965By default, C<Archive::Tar> will try to C<chown> your files if it is
1966able to. In some cases, this may not be desired. In that case, set
1967this variable to C<0> to disable C<chown>-ing, even if it were
1968possible.
1969
1970The default is C<1>.
1971
1972=head2 $Archive::Tar::CHMOD
1973
1974By default, C<Archive::Tar> will try to C<chmod> your files to
1975whatever mode was specified for the particular file in the archive.
1976In some cases, this may not be desired. In that case, set this
1977variable to C<0> to disable C<chmod>-ing.
1978
1979The default is C<1>.
1980
1981=head2 $Archive::Tar::SAME_PERMISSIONS
1982
1983When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
1984the permissions on files from the archive are used without modification
1985of if they are filtered by removing any setid bits and applying the
1986current umask.
1987
1988The default is C<1> for the root user and C<0> for normal users.
1989
1990=head2 $Archive::Tar::DO_NOT_USE_PREFIX
1991
1992By default, C<Archive::Tar> will try to put paths that are over
1993100 characters in the C<prefix> field of your tar header, as
1994defined per POSIX-standard. However, some (older) tar programs
1995do not implement this spec. To retain compatibility with these older
1996or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
1997variable to a true value, and C<Archive::Tar> will use an alternate
1998way of dealing with paths over 100 characters by using the
1999C<GNU Extended Header> feature.
2000
2001Note that clients who do not support the C<GNU Extended Header>
2002feature will not be able to read these archives. Such clients include
2003tars on C<Solaris>, C<Irix> and C<AIX>.
2004
2005The default is C<0>.
2006
2007=head2 $Archive::Tar::DEBUG
2008
2009Set this variable to C<1> to always get the C<Carp::longmess> output
2010of the warnings, instead of the regular C<carp>. This is the same
2011message you would get by doing:
2012
2013    $tar->error(1);
2014
2015Defaults to C<0>.
2016
2017=head2 $Archive::Tar::WARN
2018
2019Set this variable to C<0> if you do not want any warnings printed.
2020Personally I recommend against doing this, but people asked for the
2021option. Also, be advised that this is of course not threadsafe.
2022
2023Defaults to C<1>.
2024
2025=head2 $Archive::Tar::error
2026
2027Holds the last reported error. Kept for historical reasons, but its
2028use is very much discouraged. Use the C<error()> method instead:
2029
2030    warn $tar->error unless $tar->extract;
2031
2032Note that in older versions of this module, the C<error()> method
2033would return an effectively global value even when called an instance
2034method as above. This has since been fixed, and multiple instances of
2035C<Archive::Tar> now have separate error strings.
2036
2037=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2038
2039This variable indicates whether C<Archive::Tar> should allow
2040files to be extracted outside their current working directory.
2041
2042Allowing this could have security implications, as a malicious
2043tar archive could alter or replace any file the extracting user
2044has permissions to. Therefor, the default is to not allow
2045insecure extractions.
2046
2047If you trust the archive, or have other reasons to allow the
2048archive to write files outside your current working directory,
2049set this variable to C<true>.
2050
2051Note that this is a backwards incompatible change from version
2052C<1.36> and before.
2053
2054=head2 $Archive::Tar::HAS_PERLIO
2055
2056This variable holds a boolean indicating if we currently have
2057C<perlio> support loaded. This will be enabled for any perl
2058greater than C<5.8> compiled with C<perlio>.
2059
2060If you feel strongly about disabling it, set this variable to
2061C<false>. Note that you will then need C<IO::String> installed
2062to support writing stringified archives.
2063
2064Don't change this variable unless you B<really> know what you're
2065doing.
2066
2067=head2 $Archive::Tar::HAS_IO_STRING
2068
2069This variable holds a boolean indicating if we currently have
2070C<IO::String> support loaded. This will be enabled for any perl
2071that has a loadable C<IO::String> module.
2072
2073If you feel strongly about disabling it, set this variable to
2074C<false>. Note that you will then need C<perlio> support from
2075your perl to be able to  write stringified archives.
2076
2077Don't change this variable unless you B<really> know what you're
2078doing.
2079
2080=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2081
2082This variable holds a boolean indicating if we will create
2083zero padded numbers for C<size>, C<mtime> and C<checksum>.
2084The default is C<0>, indicating that we will create space padded
2085numbers. Added for compatibility with C<busybox> implementations.
2086
2087=head2 Tuning the way RESOLVE_SYMLINK will works
2088
2089	You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
2090	or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
2091
2092  Values can be one of the following:
2093
2094		none
2095           Disable this mechanism and failed as it was in previous version (<1.88)
2096
2097		speed (default)
2098           If you prefer speed
2099           this will read again the whole archive using read() so all entries
2100           will be available
2101
2102    memory
2103           If you prefer memory
2104
2105	Limitation
2106
2107		It won't work for terminal, pipe or sockets or every non seekable source.
2108
2109=cut
2110
2111=head1 FAQ
2112
2113=over 4
2114
2115=item What's the minimum perl version required to run Archive::Tar?
2116
2117You will need perl version 5.005_03 or newer.
2118
2119=item Isn't Archive::Tar slow?
2120
2121Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2122However, it's very portable. If speed is an issue, consider using
2123C</bin/tar> instead.
2124
2125=item Isn't Archive::Tar heavier on memory than /bin/tar?
2126
2127Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
2128C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2129choice but to read the archive into memory.
2130This is ok if you want to do in-memory manipulation of the archive.
2131
2132If you just want to extract, use the C<extract_archive> class method
2133instead. It will optimize and write to disk immediately.
2134
2135Another option is to use the C<iter> class method to iterate over
2136the files in the tarball without reading them all in memory at once.
2137
2138=item Can you lazy-load data instead?
2139
2140In some cases, yes. You can use the C<iter> class method to iterate
2141over the files in the tarball without reading them all in memory at once.
2142
2143=item How much memory will an X kb tar file need?
2144
2145Probably more than X kb, since it will all be read into memory. If
2146this is a problem, and you don't need to do in memory manipulation
2147of the archive, consider using the C<iter> class method, or C</bin/tar>
2148instead.
2149
2150=item What do you do with unsupported filetypes in an archive?
2151
2152C<Unix> has a few filetypes that aren't supported on other platforms,
2153like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2154try to make a copy of the original file, rather than throwing an error.
2155
2156This does require you to read the entire archive in to memory first,
2157since otherwise we wouldn't know what data to fill the copy with.
2158(This means that you cannot use the class methods, including C<iter>
2159on archives that have incompatible filetypes and still expect things
2160to work).
2161
2162For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2163the extraction of this particular item didn't work.
2164
2165=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2166
2167By default, C<Archive::Tar> is in a completely POSIX-compatible
2168mode, which uses the POSIX-specification of C<tar> to store files.
2169For paths greater than 100 characters, this is done using the
2170C<POSIX header prefix>. Non-POSIX-compatible clients may not support
2171this part of the specification, and may only support the C<GNU Extended
2172Header> functionality. To facilitate those clients, you can set the
2173C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
2174C<GLOBAL VARIABLES> section for details on this variable.
2175
2176Note that GNU tar earlier than version 1.14 does not cope well with
2177the C<POSIX header prefix>. If you use such a version, consider setting
2178the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2179
2180=item How do I extract only files that have property X from an archive?
2181
2182Sometimes, you might not wish to extract a complete archive, just
2183the files that are relevant to you, based on some criteria.
2184
2185You can do this by filtering a list of C<Archive::Tar::File> objects
2186based on your criteria. For example, to extract only files that have
2187the string C<foo> in their title, you would use:
2188
2189    $tar->extract(
2190        grep { $_->full_path =~ /foo/ } $tar->get_files
2191    );
2192
2193This way, you can filter on any attribute of the files in the archive.
2194Consult the C<Archive::Tar::File> documentation on how to use these
2195objects.
2196
2197=item How do I access .tar.Z files?
2198
2199The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2200the C<IO::Zlib> module) to access tar files that have been compressed
2201with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2202utility cannot be read by C<Compress::Zlib> and so cannot be directly
2203accesses by C<Archive::Tar>.
2204
2205If the C<uncompress> or C<gunzip> programs are available, you can use
2206one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2207
2208Firstly with C<uncompress>
2209
2210    use Archive::Tar;
2211
2212    open F, "uncompress -c $filename |";
2213    my $tar = Archive::Tar->new(*F);
2214    ...
2215
2216and this with C<gunzip>
2217
2218    use Archive::Tar;
2219
2220    open F, "gunzip -c $filename |";
2221    my $tar = Archive::Tar->new(*F);
2222    ...
2223
2224Similarly, if the C<compress> program is available, you can use this to
2225write a C<.tar.Z> file
2226
2227    use Archive::Tar;
2228    use IO::File;
2229
2230    my $fh = new IO::File "| compress -c >$filename";
2231    my $tar = Archive::Tar->new();
2232    ...
2233    $tar->write($fh);
2234    $fh->close ;
2235
2236=item How do I handle Unicode strings?
2237
2238C<Archive::Tar> uses byte semantics for any files it reads from or writes
2239to disk. This is not a problem if you only deal with files and never
2240look at their content or work solely with byte strings. But if you use
2241Unicode strings with character semantics, some additional steps need
2242to be taken.
2243
2244For example, if you add a Unicode string like
2245
2246    # Problem
2247    $tar->add_data('file.txt', "Euro: \x{20AC}");
2248
2249then there will be a problem later when the tarfile gets written out
2250to disk via C<$tar->write()>:
2251
2252    Wide character in print at .../Archive/Tar.pm line 1014.
2253
2254The data was added as a Unicode string and when writing it out to disk,
2255the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2256tried to convert the string to ISO-8859 and failed. The written file
2257now contains garbage.
2258
2259For this reason, Unicode strings need to be converted to UTF-8-encoded
2260bytestrings before they are handed off to C<add_data()>:
2261
2262    use Encode;
2263    my $data = "Accented character: \x{20AC}";
2264    $data = encode('utf8', $data);
2265
2266    $tar->add_data('file.txt', $data);
2267
2268A opposite problem occurs if you extract a UTF8-encoded file from a
2269tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2270will return its content as a bytestring, not as a Unicode string.
2271
2272If you want it to be a Unicode string (because you want character
2273semantics with operations like regular expression matching), you need
2274to decode the UTF8-encoded content and have Perl convert it into
2275a Unicode string:
2276
2277    use Encode;
2278    my $data = $tar->get_content();
2279
2280    # Make it a Unicode string
2281    $data = decode('utf8', $data);
2282
2283There is no easy way to provide this functionality in C<Archive::Tar>,
2284because a tarball can contain many files, and each of which could be
2285encoded in a different way.
2286
2287=back
2288
2289=head1 CAVEATS
2290
2291The AIX tar does not fill all unused space in the tar archive with 0x00.
2292This sometimes leads to warning messages from C<Archive::Tar>.
2293
2294  Invalid header block at offset nnn
2295
2296A fix for that problem is scheduled to be released in the following levels
2297of AIX, all of which should be coming out in the 4th quarter of 2009:
2298
2299 AIX 5.3 TL7 SP10
2300 AIX 5.3 TL8 SP8
2301 AIX 5.3 TL9 SP5
2302 AIX 5.3 TL10 SP2
2303
2304 AIX 6.1 TL0 SP11
2305 AIX 6.1 TL1 SP7
2306 AIX 6.1 TL2 SP6
2307 AIX 6.1 TL3 SP3
2308
2309The IBM APAR number for this problem is IZ50240 (Reported component ID:
23105765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2311If you need an ifix please contact your local IBM AIX support.
2312
2313=head1 TODO
2314
2315=over 4
2316
2317=item Check if passed in handles are open for read/write
2318
2319Currently I don't know of any portable pure perl way to do this.
2320Suggestions welcome.
2321
2322=item Allow archives to be passed in as string
2323
2324Currently, we only allow opened filehandles or filenames, but
2325not strings. The internals would need some reworking to facilitate
2326stringified archives.
2327
2328=item Facilitate processing an opened filehandle of a compressed archive
2329
2330Currently, we only support this if the filehandle is an IO::Zlib object.
2331Environments, like apache, will present you with an opened filehandle
2332to an uploaded file, which might be a compressed archive.
2333
2334=back
2335
2336=head1 SEE ALSO
2337
2338=over 4
2339
2340=item The GNU tar specification
2341
2342C<http://www.gnu.org/software/tar/manual/tar.html>
2343
2344=item The PAX format specification
2345
2346The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2347
2348=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2349
2350=item GNU tar intends to switch to POSIX compatibility
2351
2352GNU Tar authors have expressed their intention to become completely
2353POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2354
2355=item A Comparison between various tar implementations
2356
2357Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2358
2359=back
2360
2361=head1 AUTHOR
2362
2363This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2364
2365Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2366
2367=head1 ACKNOWLEDGEMENTS
2368
2369Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2370Rainer Tammer and especially Andrew Savige for their help and suggestions.
2371
2372=head1 COPYRIGHT
2373
2374This module is copyright (c) 2002 - 2009 Jos Boumans
2375E<lt>kane@cpan.orgE<gt>. All rights reserved.
2376
2377This library is free software; you may redistribute and/or modify
2378it under the same terms as Perl itself.
2379
2380=cut
2381