1*898184e3Ssthen#!/usr/bin/perl
2*898184e3Ssthen##############################################################################
3*898184e3Ssthen# Tool for using regular expressions against the contents of files in a tar
4*898184e3Ssthen# archive.  See 'ptargrep --help' for more documentation.
5*898184e3Ssthen#
6*898184e3Ssthen
7*898184e3Ssthenuse strict;
8*898184e3Ssthenuse warnings;
9*898184e3Ssthen
10*898184e3Ssthenuse Pod::Usage   qw(pod2usage);
11*898184e3Ssthenuse Getopt::Long qw(GetOptions);
12*898184e3Ssthenuse Archive::Tar qw();
13*898184e3Ssthenuse File::Path   qw(mkpath);
14*898184e3Ssthen
15*898184e3Ssthenmy(%opt, $pattern);
16*898184e3Ssthen
17*898184e3Ssthenif(!GetOptions(\%opt,
18*898184e3Ssthen    'basename|b',
19*898184e3Ssthen    'ignore-case|i',
20*898184e3Ssthen    'list-only|l',
21*898184e3Ssthen    'verbose|v',
22*898184e3Ssthen    'help|?',
23*898184e3Ssthen)) {
24*898184e3Ssthen    pod2usage(-exitval => 1,  -verbose => 0);
25*898184e3Ssthen}
26*898184e3Ssthen
27*898184e3Ssthen
28*898184e3Ssthenpod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};
29*898184e3Ssthen
30*898184e3Ssthenpod2usage(-exitval => 1,  -verbose => 0,
31*898184e3Ssthen    -message => "No pattern specified",
32*898184e3Ssthen) unless @ARGV;
33*898184e3Ssthenmake_pattern( shift(@ARGV) );
34*898184e3Ssthen
35*898184e3Ssthenpod2usage(-exitval => 1,  -verbose => 0,
36*898184e3Ssthen    -message => "No tar files specified",
37*898184e3Ssthen) unless @ARGV;
38*898184e3Ssthen
39*898184e3Ssthenprocess_archive($_) foreach @ARGV;
40*898184e3Ssthen
41*898184e3Ssthenexit 0;
42*898184e3Ssthen
43*898184e3Ssthen
44*898184e3Ssthensub make_pattern {
45*898184e3Ssthen    my($pat) = @_;
46*898184e3Ssthen
47*898184e3Ssthen    if($opt{'ignore-case'}) {
48*898184e3Ssthen        $pattern = qr{(?im)$pat};
49*898184e3Ssthen    }
50*898184e3Ssthen    else {
51*898184e3Ssthen        $pattern = qr{(?m)$pat};
52*898184e3Ssthen    }
53*898184e3Ssthen}
54*898184e3Ssthen
55*898184e3Ssthen
56*898184e3Ssthensub process_archive {
57*898184e3Ssthen    my($filename) = @_;
58*898184e3Ssthen
59*898184e3Ssthen    _log("Processing archive: $filename");
60*898184e3Ssthen    my $next = Archive::Tar->iter($filename);
61*898184e3Ssthen    while( my $f = $next->() ) {
62*898184e3Ssthen        next unless $f->is_file;
63*898184e3Ssthen        match_file($f) if $f->size > 0;
64*898184e3Ssthen    }
65*898184e3Ssthen}
66*898184e3Ssthen
67*898184e3Ssthen
68*898184e3Ssthensub match_file {
69*898184e3Ssthen    my($f)   = @_;
70*898184e3Ssthen    my $path = $f->name;
71*898184e3Ssthen
72*898184e3Ssthen    _log("filename: %s  (%d bytes)", $path, $f->size);
73*898184e3Ssthen
74*898184e3Ssthen    my $body = $f->get_content();
75*898184e3Ssthen    if($body !~ $pattern) {
76*898184e3Ssthen        _log("  no match");
77*898184e3Ssthen        return;
78*898184e3Ssthen    }
79*898184e3Ssthen
80*898184e3Ssthen    if($opt{'list-only'}) {
81*898184e3Ssthen        print $path, "\n";
82*898184e3Ssthen        return;
83*898184e3Ssthen    }
84*898184e3Ssthen
85*898184e3Ssthen    save_file($path, $body);
86*898184e3Ssthen}
87*898184e3Ssthen
88*898184e3Ssthen
89*898184e3Ssthensub save_file {
90*898184e3Ssthen    my($path, $body) = @_;
91*898184e3Ssthen
92*898184e3Ssthen    _log("  found match - extracting");
93*898184e3Ssthen    my($fh);
94*898184e3Ssthen    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
95*898184e3Ssthen    if($dir and not $opt{basename}) {
96*898184e3Ssthen        _log("  writing to $dir/$file");
97*898184e3Ssthen        $dir =~ s{\A/}{./};
98*898184e3Ssthen        mkpath($dir) unless -d $dir;
99*898184e3Ssthen        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
100*898184e3Ssthen    }
101*898184e3Ssthen    else {
102*898184e3Ssthen        _log("  writing to ./$file");
103*898184e3Ssthen        open $fh, '>', $file or die "open($file): $!";
104*898184e3Ssthen    }
105*898184e3Ssthen    print $fh $body;
106*898184e3Ssthen    close($fh);
107*898184e3Ssthen}
108*898184e3Ssthen
109*898184e3Ssthen
110*898184e3Ssthensub _log {
111*898184e3Ssthen    return unless $opt{verbose};
112*898184e3Ssthen    my($format, @args) = @_;
113*898184e3Ssthen    warn sprintf($format, @args) . "\n";
114*898184e3Ssthen}
115*898184e3Ssthen
116*898184e3Ssthen
117*898184e3Ssthen__END__
118*898184e3Ssthen
119*898184e3Ssthen=head1 NAME
120*898184e3Ssthen
121*898184e3Ssthenptargrep - Apply pattern matching to the contents of files in a tar archive
122*898184e3Ssthen
123*898184e3Ssthen=head1 SYNOPSIS
124*898184e3Ssthen
125*898184e3Ssthen  ptargrep [options] <pattern> <tar file> ...
126*898184e3Ssthen
127*898184e3Ssthen  Options:
128*898184e3Ssthen
129*898184e3Ssthen   --basename|-b     ignore directory paths from archive
130*898184e3Ssthen   --ignore-case|-i  do case-insensitive pattern matching
131*898184e3Ssthen   --list-only|-l    list matching filenames rather than extracting matches
132*898184e3Ssthen   --verbose|-v      write debugging message to STDERR
133*898184e3Ssthen   --help|-?         detailed help message
134*898184e3Ssthen
135*898184e3Ssthen=head1 DESCRIPTION
136*898184e3Ssthen
137*898184e3SsthenThis utility allows you to apply pattern matching to B<the contents> of files
138*898184e3Ssthencontained in a tar archive.  You might use this to identify all files in an
139*898184e3Ssthenarchive which contain lines matching the specified pattern and either print out
140*898184e3Ssthenthe pathnames or extract the files.
141*898184e3Ssthen
142*898184e3SsthenThe pattern will be used as a Perl regular expression (as opposed to a simple
143*898184e3Ssthengrep regex).
144*898184e3Ssthen
145*898184e3SsthenMultiple tar archive filenames can be specified - they will each be processed
146*898184e3Ssthenin turn.
147*898184e3Ssthen
148*898184e3Ssthen=head1 OPTIONS
149*898184e3Ssthen
150*898184e3Ssthen=over 4
151*898184e3Ssthen
152*898184e3Ssthen=item B<--basename> (alias -b)
153*898184e3Ssthen
154*898184e3SsthenWhen matching files are extracted, ignore the directory path from the archive
155*898184e3Ssthenand write to the current directory using the basename of the file from the
156*898184e3Ssthenarchive.  Beware: if two matching files in the archive have the same basename,
157*898184e3Ssthenthe second file extracted will overwrite the first.
158*898184e3Ssthen
159*898184e3Ssthen=item B<--ignore-case> (alias -i)
160*898184e3Ssthen
161*898184e3SsthenMake pattern matching case-insensitive.
162*898184e3Ssthen
163*898184e3Ssthen=item B<--list-only> (alias -l)
164*898184e3Ssthen
165*898184e3SsthenPrint the pathname of each matching file from the archive to STDOUT.  Without
166*898184e3Ssthenthis option, the default behaviour is to extract each matching file.
167*898184e3Ssthen
168*898184e3Ssthen=item B<--verbose> (alias -v)
169*898184e3Ssthen
170*898184e3SsthenLog debugging info to STDERR.
171*898184e3Ssthen
172*898184e3Ssthen=item B<--help> (alias -?)
173*898184e3Ssthen
174*898184e3SsthenDisplay this documentation.
175*898184e3Ssthen
176*898184e3Ssthen=back
177*898184e3Ssthen
178*898184e3Ssthen=head1 COPYRIGHT
179*898184e3Ssthen
180*898184e3SsthenCopyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>
181*898184e3Ssthen
182*898184e3SsthenThis program is free software; you can redistribute it and/or modify it
183*898184e3Ssthenunder the same terms as Perl itself.
184*898184e3Ssthen
185*898184e3Ssthen=cut
186*898184e3Ssthen
187*898184e3Ssthen
188*898184e3Ssthen
189