1#!/usr/bin/perl 2############################################################################## 3# Tool for using regular expressions against the contents of files in a tar 4# archive. See 'ptargrep --help' for more documentation. 5# 6 7BEGIN { pop @INC if $INC[-1] eq '.' } 8use strict; 9use warnings; 10 11use Pod::Usage qw(pod2usage); 12use Getopt::Long qw(GetOptions); 13use Archive::Tar qw(); 14use File::Path qw(mkpath); 15 16my(%opt, $pattern); 17 18if(!GetOptions(\%opt, 19 'basename|b', 20 'ignore-case|i', 21 'list-only|l', 22 'verbose|v', 23 'help|?', 24)) { 25 pod2usage(-exitval => 1, -verbose => 0); 26} 27 28 29pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; 30 31pod2usage(-exitval => 1, -verbose => 0, 32 -message => "No pattern specified", 33) unless @ARGV; 34make_pattern( shift(@ARGV) ); 35 36pod2usage(-exitval => 1, -verbose => 0, 37 -message => "No tar files specified", 38) unless @ARGV; 39 40process_archive($_) foreach @ARGV; 41 42exit 0; 43 44 45sub make_pattern { 46 my($pat) = @_; 47 48 if($opt{'ignore-case'}) { 49 $pattern = qr{(?im)$pat}; 50 } 51 else { 52 $pattern = qr{(?m)$pat}; 53 } 54} 55 56 57sub process_archive { 58 my($filename) = @_; 59 60 _log("Processing archive: $filename"); 61 my $next = Archive::Tar->iter($filename); 62 while( my $f = $next->() ) { 63 next unless $f->is_file; 64 match_file($f) if $f->size > 0; 65 } 66} 67 68 69sub match_file { 70 my($f) = @_; 71 my $path = $f->name; 72 my $prefix = $f->prefix; 73 if (defined $prefix) { 74 $path = File::Spec->catfile($prefix, $path); 75 } 76 77 _log("filename: %s (%d bytes)", $path, $f->size); 78 79 my $body = $f->get_content(); 80 if($body !~ $pattern) { 81 _log(" no match"); 82 return; 83 } 84 85 if($opt{'list-only'}) { 86 print $path, "\n"; 87 return; 88 } 89 90 save_file($path, $body); 91} 92 93 94sub save_file { 95 my($path, $body) = @_; 96 97 _log(" found match - extracting"); 98 my($fh); 99 my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; 100 if($dir and not $opt{basename}) { 101 _log(" writing to $dir/$file"); 102 $dir =~ s{\A/}{./}; 103 mkpath($dir) unless -d $dir; 104 open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; 105 } 106 else { 107 _log(" writing to ./$file"); 108 open $fh, '>', $file or die "open($file): $!"; 109 } 110 print $fh $body; 111 close($fh); 112} 113 114 115sub _log { 116 return unless $opt{verbose}; 117 my($format, @args) = @_; 118 warn sprintf($format, @args) . "\n"; 119} 120 121 122__END__ 123 124=head1 NAME 125 126ptargrep - Apply pattern matching to the contents of files in a tar archive 127 128=head1 SYNOPSIS 129 130 ptargrep [options] <pattern> <tar file> ... 131 132 Options: 133 134 --basename|-b ignore directory paths from archive 135 --ignore-case|-i do case-insensitive pattern matching 136 --list-only|-l list matching filenames rather than extracting matches 137 --verbose|-v write debugging message to STDERR 138 --help|-? detailed help message 139 140=head1 DESCRIPTION 141 142This utility allows you to apply pattern matching to B<the contents> of files 143contained in a tar archive. You might use this to identify all files in an 144archive which contain lines matching the specified pattern and either print out 145the pathnames or extract the files. 146 147The pattern will be used as a Perl regular expression (as opposed to a simple 148grep regex). 149 150Multiple tar archive filenames can be specified - they will each be processed 151in turn. 152 153=head1 OPTIONS 154 155=over 4 156 157=item B<--basename> (alias -b) 158 159When matching files are extracted, ignore the directory path from the archive 160and write to the current directory using the basename of the file from the 161archive. Beware: if two matching files in the archive have the same basename, 162the second file extracted will overwrite the first. 163 164=item B<--ignore-case> (alias -i) 165 166Make pattern matching case-insensitive. 167 168=item B<--list-only> (alias -l) 169 170Print the pathname of each matching file from the archive to STDOUT. Without 171this option, the default behaviour is to extract each matching file. 172 173=item B<--verbose> (alias -v) 174 175Log debugging info to STDERR. 176 177=item B<--help> (alias -?) 178 179Display this documentation. 180 181=back 182 183=head1 COPYRIGHT 184 185Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> 186 187This program is free software; you can redistribute it and/or modify it 188under the same terms as Perl itself. 189 190=cut 191 192 193 194