1#!/usr/bin/perl 2############################################################################## 3# Tool for using regular expressions against the contents of files in a tar 4# archive. See 'ptargrep --help' for more documentation. 5# 6 7BEGIN { pop @INC if $INC[-1] eq '.' } 8use strict; 9use warnings; 10 11use Pod::Usage qw(pod2usage); 12use Getopt::Long qw(GetOptions); 13use Archive::Tar qw(); 14use File::Path qw(mkpath); 15 16my(%opt, $pattern); 17 18if(!GetOptions(\%opt, 19 'basename|b', 20 'ignore-case|i', 21 'list-only|l', 22 'verbose|v', 23 'help|?', 24)) { 25 pod2usage(-exitval => 1, -verbose => 0); 26} 27 28 29pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; 30 31pod2usage(-exitval => 1, -verbose => 0, 32 -message => "No pattern specified", 33) unless @ARGV; 34make_pattern( shift(@ARGV) ); 35 36pod2usage(-exitval => 1, -verbose => 0, 37 -message => "No tar files specified", 38) unless @ARGV; 39 40process_archive($_) foreach @ARGV; 41 42exit 0; 43 44 45sub make_pattern { 46 my($pat) = @_; 47 48 if($opt{'ignore-case'}) { 49 $pattern = qr{(?im)$pat}; 50 } 51 else { 52 $pattern = qr{(?m)$pat}; 53 } 54} 55 56 57sub process_archive { 58 my($filename) = @_; 59 60 _log("Processing archive: $filename"); 61 my $next = Archive::Tar->iter($filename); 62 while( my $f = $next->() ) { 63 next unless $f->is_file; 64 match_file($f) if $f->size > 0; 65 } 66} 67 68 69sub match_file { 70 my($f) = @_; 71 my $path = $f->name; 72 73 _log("filename: %s (%d bytes)", $path, $f->size); 74 75 my $body = $f->get_content(); 76 if($body !~ $pattern) { 77 _log(" no match"); 78 return; 79 } 80 81 if($opt{'list-only'}) { 82 print $path, "\n"; 83 return; 84 } 85 86 save_file($path, $body); 87} 88 89 90sub save_file { 91 my($path, $body) = @_; 92 93 _log(" found match - extracting"); 94 my($fh); 95 my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; 96 if($dir and not $opt{basename}) { 97 _log(" writing to $dir/$file"); 98 $dir =~ s{\A/}{./}; 99 mkpath($dir) unless -d $dir; 100 open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; 101 } 102 else { 103 _log(" writing to ./$file"); 104 open $fh, '>', $file or die "open($file): $!"; 105 } 106 print $fh $body; 107 close($fh); 108} 109 110 111sub _log { 112 return unless $opt{verbose}; 113 my($format, @args) = @_; 114 warn sprintf($format, @args) . "\n"; 115} 116 117 118__END__ 119 120=head1 NAME 121 122ptargrep - Apply pattern matching to the contents of files in a tar archive 123 124=head1 SYNOPSIS 125 126 ptargrep [options] <pattern> <tar file> ... 127 128 Options: 129 130 --basename|-b ignore directory paths from archive 131 --ignore-case|-i do case-insensitive pattern matching 132 --list-only|-l list matching filenames rather than extracting matches 133 --verbose|-v write debugging message to STDERR 134 --help|-? detailed help message 135 136=head1 DESCRIPTION 137 138This utility allows you to apply pattern matching to B<the contents> of files 139contained in a tar archive. You might use this to identify all files in an 140archive which contain lines matching the specified pattern and either print out 141the pathnames or extract the files. 142 143The pattern will be used as a Perl regular expression (as opposed to a simple 144grep regex). 145 146Multiple tar archive filenames can be specified - they will each be processed 147in turn. 148 149=head1 OPTIONS 150 151=over 4 152 153=item B<--basename> (alias -b) 154 155When matching files are extracted, ignore the directory path from the archive 156and write to the current directory using the basename of the file from the 157archive. Beware: if two matching files in the archive have the same basename, 158the second file extracted will overwrite the first. 159 160=item B<--ignore-case> (alias -i) 161 162Make pattern matching case-insensitive. 163 164=item B<--list-only> (alias -l) 165 166Print the pathname of each matching file from the archive to STDOUT. Without 167this option, the default behaviour is to extract each matching file. 168 169=item B<--verbose> (alias -v) 170 171Log debugging info to STDERR. 172 173=item B<--help> (alias -?) 174 175Display this documentation. 176 177=back 178 179=head1 COPYRIGHT 180 181Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> 182 183This program is free software; you can redistribute it and/or modify it 184under the same terms as Perl itself. 185 186=cut 187 188 189 190