1898184e3Ssthen#!/usr/bin/perl 2898184e3Ssthen############################################################################## 3898184e3Ssthen# Tool for using regular expressions against the contents of files in a tar 4898184e3Ssthen# archive. See 'ptargrep --help' for more documentation. 5898184e3Ssthen# 6898184e3Ssthen 70b7734b3Safresh1BEGIN { pop @INC if $INC[-1] eq '.' } 8898184e3Ssthenuse strict; 9898184e3Ssthenuse warnings; 10898184e3Ssthen 11898184e3Ssthenuse Pod::Usage qw(pod2usage); 12898184e3Ssthenuse Getopt::Long qw(GetOptions); 13898184e3Ssthenuse Archive::Tar qw(); 14898184e3Ssthenuse File::Path qw(mkpath); 15898184e3Ssthen 16898184e3Ssthenmy(%opt, $pattern); 17898184e3Ssthen 18898184e3Ssthenif(!GetOptions(\%opt, 19898184e3Ssthen 'basename|b', 20898184e3Ssthen 'ignore-case|i', 21898184e3Ssthen 'list-only|l', 22898184e3Ssthen 'verbose|v', 23898184e3Ssthen 'help|?', 24898184e3Ssthen)) { 25898184e3Ssthen pod2usage(-exitval => 1, -verbose => 0); 26898184e3Ssthen} 27898184e3Ssthen 28898184e3Ssthen 29898184e3Ssthenpod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; 30898184e3Ssthen 31898184e3Ssthenpod2usage(-exitval => 1, -verbose => 0, 32898184e3Ssthen -message => "No pattern specified", 33898184e3Ssthen) unless @ARGV; 34898184e3Ssthenmake_pattern( shift(@ARGV) ); 35898184e3Ssthen 36898184e3Ssthenpod2usage(-exitval => 1, -verbose => 0, 37898184e3Ssthen -message => "No tar files specified", 38898184e3Ssthen) unless @ARGV; 39898184e3Ssthen 40898184e3Ssthenprocess_archive($_) foreach @ARGV; 41898184e3Ssthen 42898184e3Ssthenexit 0; 43898184e3Ssthen 44898184e3Ssthen 45898184e3Ssthensub make_pattern { 46898184e3Ssthen my($pat) = @_; 47898184e3Ssthen 48898184e3Ssthen if($opt{'ignore-case'}) { 49898184e3Ssthen $pattern = qr{(?im)$pat}; 50898184e3Ssthen } 51898184e3Ssthen else { 52898184e3Ssthen $pattern = qr{(?m)$pat}; 53898184e3Ssthen } 54898184e3Ssthen} 55898184e3Ssthen 56898184e3Ssthen 57898184e3Ssthensub process_archive { 58898184e3Ssthen my($filename) = @_; 59898184e3Ssthen 60898184e3Ssthen _log("Processing archive: $filename"); 61898184e3Ssthen my $next = Archive::Tar->iter($filename); 62898184e3Ssthen while( my $f = $next->() ) { 63898184e3Ssthen next unless $f->is_file; 64898184e3Ssthen match_file($f) if $f->size > 0; 65898184e3Ssthen } 66898184e3Ssthen} 67898184e3Ssthen 68898184e3Ssthen 69898184e3Ssthensub match_file { 70898184e3Ssthen my($f) = @_; 71898184e3Ssthen my $path = $f->name; 72*b8851fccSafresh1 my $prefix = $f->prefix; 73*b8851fccSafresh1 if (defined $prefix) { 74*b8851fccSafresh1 $path = File::Spec->catfile($prefix, $path); 75*b8851fccSafresh1 } 76898184e3Ssthen 77898184e3Ssthen _log("filename: %s (%d bytes)", $path, $f->size); 78898184e3Ssthen 79898184e3Ssthen my $body = $f->get_content(); 80898184e3Ssthen if($body !~ $pattern) { 81898184e3Ssthen _log(" no match"); 82898184e3Ssthen return; 83898184e3Ssthen } 84898184e3Ssthen 85898184e3Ssthen if($opt{'list-only'}) { 86898184e3Ssthen print $path, "\n"; 87898184e3Ssthen return; 88898184e3Ssthen } 89898184e3Ssthen 90898184e3Ssthen save_file($path, $body); 91898184e3Ssthen} 92898184e3Ssthen 93898184e3Ssthen 94898184e3Ssthensub save_file { 95898184e3Ssthen my($path, $body) = @_; 96898184e3Ssthen 97898184e3Ssthen _log(" found match - extracting"); 98898184e3Ssthen my($fh); 99898184e3Ssthen my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; 100898184e3Ssthen if($dir and not $opt{basename}) { 101898184e3Ssthen _log(" writing to $dir/$file"); 102898184e3Ssthen $dir =~ s{\A/}{./}; 103898184e3Ssthen mkpath($dir) unless -d $dir; 104898184e3Ssthen open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; 105898184e3Ssthen } 106898184e3Ssthen else { 107898184e3Ssthen _log(" writing to ./$file"); 108898184e3Ssthen open $fh, '>', $file or die "open($file): $!"; 109898184e3Ssthen } 110898184e3Ssthen print $fh $body; 111898184e3Ssthen close($fh); 112898184e3Ssthen} 113898184e3Ssthen 114898184e3Ssthen 115898184e3Ssthensub _log { 116898184e3Ssthen return unless $opt{verbose}; 117898184e3Ssthen my($format, @args) = @_; 118898184e3Ssthen warn sprintf($format, @args) . "\n"; 119898184e3Ssthen} 120898184e3Ssthen 121898184e3Ssthen 122898184e3Ssthen__END__ 123898184e3Ssthen 124898184e3Ssthen=head1 NAME 125898184e3Ssthen 126898184e3Ssthenptargrep - Apply pattern matching to the contents of files in a tar archive 127898184e3Ssthen 128898184e3Ssthen=head1 SYNOPSIS 129898184e3Ssthen 130898184e3Ssthen ptargrep [options] <pattern> <tar file> ... 131898184e3Ssthen 132898184e3Ssthen Options: 133898184e3Ssthen 134898184e3Ssthen --basename|-b ignore directory paths from archive 135898184e3Ssthen --ignore-case|-i do case-insensitive pattern matching 136898184e3Ssthen --list-only|-l list matching filenames rather than extracting matches 137898184e3Ssthen --verbose|-v write debugging message to STDERR 138898184e3Ssthen --help|-? detailed help message 139898184e3Ssthen 140898184e3Ssthen=head1 DESCRIPTION 141898184e3Ssthen 142898184e3SsthenThis utility allows you to apply pattern matching to B<the contents> of files 143898184e3Ssthencontained in a tar archive. You might use this to identify all files in an 144898184e3Ssthenarchive which contain lines matching the specified pattern and either print out 145898184e3Ssthenthe pathnames or extract the files. 146898184e3Ssthen 147898184e3SsthenThe pattern will be used as a Perl regular expression (as opposed to a simple 148898184e3Ssthengrep regex). 149898184e3Ssthen 150898184e3SsthenMultiple tar archive filenames can be specified - they will each be processed 151898184e3Ssthenin turn. 152898184e3Ssthen 153898184e3Ssthen=head1 OPTIONS 154898184e3Ssthen 155898184e3Ssthen=over 4 156898184e3Ssthen 157898184e3Ssthen=item B<--basename> (alias -b) 158898184e3Ssthen 159898184e3SsthenWhen matching files are extracted, ignore the directory path from the archive 160898184e3Ssthenand write to the current directory using the basename of the file from the 161898184e3Ssthenarchive. Beware: if two matching files in the archive have the same basename, 162898184e3Ssthenthe second file extracted will overwrite the first. 163898184e3Ssthen 164898184e3Ssthen=item B<--ignore-case> (alias -i) 165898184e3Ssthen 166898184e3SsthenMake pattern matching case-insensitive. 167898184e3Ssthen 168898184e3Ssthen=item B<--list-only> (alias -l) 169898184e3Ssthen 170898184e3SsthenPrint the pathname of each matching file from the archive to STDOUT. Without 171898184e3Ssthenthis option, the default behaviour is to extract each matching file. 172898184e3Ssthen 173898184e3Ssthen=item B<--verbose> (alias -v) 174898184e3Ssthen 175898184e3SsthenLog debugging info to STDERR. 176898184e3Ssthen 177898184e3Ssthen=item B<--help> (alias -?) 178898184e3Ssthen 179898184e3SsthenDisplay this documentation. 180898184e3Ssthen 181898184e3Ssthen=back 182898184e3Ssthen 183898184e3Ssthen=head1 COPYRIGHT 184898184e3Ssthen 185898184e3SsthenCopyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> 186898184e3Ssthen 187898184e3SsthenThis program is free software; you can redistribute it and/or modify it 188898184e3Ssthenunder the same terms as Perl itself. 189898184e3Ssthen 190898184e3Ssthen=cut 191898184e3Ssthen 192898184e3Ssthen 193898184e3Ssthen 194