1*898184e3Ssthen#!/usr/bin/perl 2*898184e3Ssthen############################################################################## 3*898184e3Ssthen# Tool for using regular expressions against the contents of files in a tar 4*898184e3Ssthen# archive. See 'ptargrep --help' for more documentation. 5*898184e3Ssthen# 6*898184e3Ssthen 7*898184e3Ssthenuse strict; 8*898184e3Ssthenuse warnings; 9*898184e3Ssthen 10*898184e3Ssthenuse Pod::Usage qw(pod2usage); 11*898184e3Ssthenuse Getopt::Long qw(GetOptions); 12*898184e3Ssthenuse Archive::Tar qw(); 13*898184e3Ssthenuse File::Path qw(mkpath); 14*898184e3Ssthen 15*898184e3Ssthenmy(%opt, $pattern); 16*898184e3Ssthen 17*898184e3Ssthenif(!GetOptions(\%opt, 18*898184e3Ssthen 'basename|b', 19*898184e3Ssthen 'ignore-case|i', 20*898184e3Ssthen 'list-only|l', 21*898184e3Ssthen 'verbose|v', 22*898184e3Ssthen 'help|?', 23*898184e3Ssthen)) { 24*898184e3Ssthen pod2usage(-exitval => 1, -verbose => 0); 25*898184e3Ssthen} 26*898184e3Ssthen 27*898184e3Ssthen 28*898184e3Ssthenpod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; 29*898184e3Ssthen 30*898184e3Ssthenpod2usage(-exitval => 1, -verbose => 0, 31*898184e3Ssthen -message => "No pattern specified", 32*898184e3Ssthen) unless @ARGV; 33*898184e3Ssthenmake_pattern( shift(@ARGV) ); 34*898184e3Ssthen 35*898184e3Ssthenpod2usage(-exitval => 1, -verbose => 0, 36*898184e3Ssthen -message => "No tar files specified", 37*898184e3Ssthen) unless @ARGV; 38*898184e3Ssthen 39*898184e3Ssthenprocess_archive($_) foreach @ARGV; 40*898184e3Ssthen 41*898184e3Ssthenexit 0; 42*898184e3Ssthen 43*898184e3Ssthen 44*898184e3Ssthensub make_pattern { 45*898184e3Ssthen my($pat) = @_; 46*898184e3Ssthen 47*898184e3Ssthen if($opt{'ignore-case'}) { 48*898184e3Ssthen $pattern = qr{(?im)$pat}; 49*898184e3Ssthen } 50*898184e3Ssthen else { 51*898184e3Ssthen $pattern = qr{(?m)$pat}; 52*898184e3Ssthen } 53*898184e3Ssthen} 54*898184e3Ssthen 55*898184e3Ssthen 56*898184e3Ssthensub process_archive { 57*898184e3Ssthen my($filename) = @_; 58*898184e3Ssthen 59*898184e3Ssthen _log("Processing archive: $filename"); 60*898184e3Ssthen my $next = Archive::Tar->iter($filename); 61*898184e3Ssthen while( my $f = $next->() ) { 62*898184e3Ssthen next unless $f->is_file; 63*898184e3Ssthen match_file($f) if $f->size > 0; 64*898184e3Ssthen } 65*898184e3Ssthen} 66*898184e3Ssthen 67*898184e3Ssthen 68*898184e3Ssthensub match_file { 69*898184e3Ssthen my($f) = @_; 70*898184e3Ssthen my $path = $f->name; 71*898184e3Ssthen 72*898184e3Ssthen _log("filename: %s (%d bytes)", $path, $f->size); 73*898184e3Ssthen 74*898184e3Ssthen my $body = $f->get_content(); 75*898184e3Ssthen if($body !~ $pattern) { 76*898184e3Ssthen _log(" no match"); 77*898184e3Ssthen return; 78*898184e3Ssthen } 79*898184e3Ssthen 80*898184e3Ssthen if($opt{'list-only'}) { 81*898184e3Ssthen print $path, "\n"; 82*898184e3Ssthen return; 83*898184e3Ssthen } 84*898184e3Ssthen 85*898184e3Ssthen save_file($path, $body); 86*898184e3Ssthen} 87*898184e3Ssthen 88*898184e3Ssthen 89*898184e3Ssthensub save_file { 90*898184e3Ssthen my($path, $body) = @_; 91*898184e3Ssthen 92*898184e3Ssthen _log(" found match - extracting"); 93*898184e3Ssthen my($fh); 94*898184e3Ssthen my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; 95*898184e3Ssthen if($dir and not $opt{basename}) { 96*898184e3Ssthen _log(" writing to $dir/$file"); 97*898184e3Ssthen $dir =~ s{\A/}{./}; 98*898184e3Ssthen mkpath($dir) unless -d $dir; 99*898184e3Ssthen open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; 100*898184e3Ssthen } 101*898184e3Ssthen else { 102*898184e3Ssthen _log(" writing to ./$file"); 103*898184e3Ssthen open $fh, '>', $file or die "open($file): $!"; 104*898184e3Ssthen } 105*898184e3Ssthen print $fh $body; 106*898184e3Ssthen close($fh); 107*898184e3Ssthen} 108*898184e3Ssthen 109*898184e3Ssthen 110*898184e3Ssthensub _log { 111*898184e3Ssthen return unless $opt{verbose}; 112*898184e3Ssthen my($format, @args) = @_; 113*898184e3Ssthen warn sprintf($format, @args) . "\n"; 114*898184e3Ssthen} 115*898184e3Ssthen 116*898184e3Ssthen 117*898184e3Ssthen__END__ 118*898184e3Ssthen 119*898184e3Ssthen=head1 NAME 120*898184e3Ssthen 121*898184e3Ssthenptargrep - Apply pattern matching to the contents of files in a tar archive 122*898184e3Ssthen 123*898184e3Ssthen=head1 SYNOPSIS 124*898184e3Ssthen 125*898184e3Ssthen ptargrep [options] <pattern> <tar file> ... 126*898184e3Ssthen 127*898184e3Ssthen Options: 128*898184e3Ssthen 129*898184e3Ssthen --basename|-b ignore directory paths from archive 130*898184e3Ssthen --ignore-case|-i do case-insensitive pattern matching 131*898184e3Ssthen --list-only|-l list matching filenames rather than extracting matches 132*898184e3Ssthen --verbose|-v write debugging message to STDERR 133*898184e3Ssthen --help|-? detailed help message 134*898184e3Ssthen 135*898184e3Ssthen=head1 DESCRIPTION 136*898184e3Ssthen 137*898184e3SsthenThis utility allows you to apply pattern matching to B<the contents> of files 138*898184e3Ssthencontained in a tar archive. You might use this to identify all files in an 139*898184e3Ssthenarchive which contain lines matching the specified pattern and either print out 140*898184e3Ssthenthe pathnames or extract the files. 141*898184e3Ssthen 142*898184e3SsthenThe pattern will be used as a Perl regular expression (as opposed to a simple 143*898184e3Ssthengrep regex). 144*898184e3Ssthen 145*898184e3SsthenMultiple tar archive filenames can be specified - they will each be processed 146*898184e3Ssthenin turn. 147*898184e3Ssthen 148*898184e3Ssthen=head1 OPTIONS 149*898184e3Ssthen 150*898184e3Ssthen=over 4 151*898184e3Ssthen 152*898184e3Ssthen=item B<--basename> (alias -b) 153*898184e3Ssthen 154*898184e3SsthenWhen matching files are extracted, ignore the directory path from the archive 155*898184e3Ssthenand write to the current directory using the basename of the file from the 156*898184e3Ssthenarchive. Beware: if two matching files in the archive have the same basename, 157*898184e3Ssthenthe second file extracted will overwrite the first. 158*898184e3Ssthen 159*898184e3Ssthen=item B<--ignore-case> (alias -i) 160*898184e3Ssthen 161*898184e3SsthenMake pattern matching case-insensitive. 162*898184e3Ssthen 163*898184e3Ssthen=item B<--list-only> (alias -l) 164*898184e3Ssthen 165*898184e3SsthenPrint the pathname of each matching file from the archive to STDOUT. Without 166*898184e3Ssthenthis option, the default behaviour is to extract each matching file. 167*898184e3Ssthen 168*898184e3Ssthen=item B<--verbose> (alias -v) 169*898184e3Ssthen 170*898184e3SsthenLog debugging info to STDERR. 171*898184e3Ssthen 172*898184e3Ssthen=item B<--help> (alias -?) 173*898184e3Ssthen 174*898184e3SsthenDisplay this documentation. 175*898184e3Ssthen 176*898184e3Ssthen=back 177*898184e3Ssthen 178*898184e3Ssthen=head1 COPYRIGHT 179*898184e3Ssthen 180*898184e3SsthenCopyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> 181*898184e3Ssthen 182*898184e3SsthenThis program is free software; you can redistribute it and/or modify it 183*898184e3Ssthenunder the same terms as Perl itself. 184*898184e3Ssthen 185*898184e3Ssthen=cut 186*898184e3Ssthen 187*898184e3Ssthen 188*898184e3Ssthen 189