1#!/usr/bin/perl 2############################################################################## 3# Tool for using regular expressions against the contents of files in a tar 4# archive. See 'ptargrep --help' for more documentation. 5# 6 7use strict; 8use warnings; 9 10use Pod::Usage qw(pod2usage); 11use Getopt::Long qw(GetOptions); 12use Archive::Tar qw(); 13use File::Path qw(mkpath); 14 15my(%opt, $pattern); 16 17if(!GetOptions(\%opt, 18 'basename|b', 19 'ignore-case|i', 20 'list-only|l', 21 'verbose|v', 22 'help|?', 23)) { 24 pod2usage(-exitval => 1, -verbose => 0); 25} 26 27 28pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help}; 29 30pod2usage(-exitval => 1, -verbose => 0, 31 -message => "No pattern specified", 32) unless @ARGV; 33make_pattern( shift(@ARGV) ); 34 35pod2usage(-exitval => 1, -verbose => 0, 36 -message => "No tar files specified", 37) unless @ARGV; 38 39process_archive($_) foreach @ARGV; 40 41exit 0; 42 43 44sub make_pattern { 45 my($pat) = @_; 46 47 if($opt{'ignore-case'}) { 48 $pattern = qr{(?im)$pat}; 49 } 50 else { 51 $pattern = qr{(?m)$pat}; 52 } 53} 54 55 56sub process_archive { 57 my($filename) = @_; 58 59 _log("Processing archive: $filename"); 60 my $next = Archive::Tar->iter($filename); 61 while( my $f = $next->() ) { 62 next unless $f->is_file; 63 match_file($f) if $f->size > 0; 64 } 65} 66 67 68sub match_file { 69 my($f) = @_; 70 my $path = $f->name; 71 72 _log("filename: %s (%d bytes)", $path, $f->size); 73 74 my $body = $f->get_content(); 75 if($body !~ $pattern) { 76 _log(" no match"); 77 return; 78 } 79 80 if($opt{'list-only'}) { 81 print $path, "\n"; 82 return; 83 } 84 85 save_file($path, $body); 86} 87 88 89sub save_file { 90 my($path, $body) = @_; 91 92 _log(" found match - extracting"); 93 my($fh); 94 my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z}; 95 if($dir and not $opt{basename}) { 96 _log(" writing to $dir/$file"); 97 $dir =~ s{\A/}{./}; 98 mkpath($dir) unless -d $dir; 99 open $fh, '>', "$dir/$file" or die "open($dir/$file): $!"; 100 } 101 else { 102 _log(" writing to ./$file"); 103 open $fh, '>', $file or die "open($file): $!"; 104 } 105 print $fh $body; 106 close($fh); 107} 108 109 110sub _log { 111 return unless $opt{verbose}; 112 my($format, @args) = @_; 113 warn sprintf($format, @args) . "\n"; 114} 115 116 117__END__ 118 119=head1 NAME 120 121ptargrep - Apply pattern matching to the contents of files in a tar archive 122 123=head1 SYNOPSIS 124 125 ptargrep [options] <pattern> <tar file> ... 126 127 Options: 128 129 --basename|-b ignore directory paths from archive 130 --ignore-case|-i do case-insensitive pattern matching 131 --list-only|-l list matching filenames rather than extracting matches 132 --verbose|-v write debugging message to STDERR 133 --help|-? detailed help message 134 135=head1 DESCRIPTION 136 137This utility allows you to apply pattern matching to B<the contents> of files 138contained in a tar archive. You might use this to identify all files in an 139archive which contain lines matching the specified pattern and either print out 140the pathnames or extract the files. 141 142The pattern will be used as a Perl regular expression (as opposed to a simple 143grep regex). 144 145Multiple tar archive filenames can be specified - they will each be processed 146in turn. 147 148=head1 OPTIONS 149 150=over 4 151 152=item B<--basename> (alias -b) 153 154When matching files are extracted, ignore the directory path from the archive 155and write to the current directory using the basename of the file from the 156archive. Beware: if two matching files in the archive have the same basename, 157the second file extracted will overwrite the first. 158 159=item B<--ignore-case> (alias -i) 160 161Make pattern matching case-insensitive. 162 163=item B<--list-only> (alias -l) 164 165Print the pathname of each matching file from the archive to STDOUT. Without 166this option, the default behaviour is to extract each matching file. 167 168=item B<--verbose> (alias -v) 169 170Log debugging info to STDERR. 171 172=item B<--help> (alias -?) 173 174Display this documentation. 175 176=back 177 178=head1 COPYRIGHT 179 180Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt> 181 182This program is free software; you can redistribute it and/or modify it 183under the same terms as Perl itself. 184 185=cut 186 187 188 189