1898184e3Ssthen#!/usr/bin/perl
2898184e3Ssthen##############################################################################
3898184e3Ssthen# Tool for using regular expressions against the contents of files in a tar
4898184e3Ssthen# archive.  See 'ptargrep --help' for more documentation.
5898184e3Ssthen#
6898184e3Ssthen
70b7734b3Safresh1BEGIN { pop @INC if $INC[-1] eq '.' }
8898184e3Ssthenuse strict;
9898184e3Ssthenuse warnings;
10898184e3Ssthen
11898184e3Ssthenuse Pod::Usage   qw(pod2usage);
12898184e3Ssthenuse Getopt::Long qw(GetOptions);
13898184e3Ssthenuse Archive::Tar qw();
14898184e3Ssthenuse File::Path   qw(mkpath);
15898184e3Ssthen
16898184e3Ssthenmy(%opt, $pattern);
17898184e3Ssthen
18898184e3Ssthenif(!GetOptions(\%opt,
19898184e3Ssthen    'basename|b',
20898184e3Ssthen    'ignore-case|i',
21898184e3Ssthen    'list-only|l',
22898184e3Ssthen    'verbose|v',
23898184e3Ssthen    'help|?',
24898184e3Ssthen)) {
25898184e3Ssthen    pod2usage(-exitval => 1,  -verbose => 0);
26898184e3Ssthen}
27898184e3Ssthen
28898184e3Ssthen
29898184e3Ssthenpod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};
30898184e3Ssthen
31898184e3Ssthenpod2usage(-exitval => 1,  -verbose => 0,
32898184e3Ssthen    -message => "No pattern specified",
33898184e3Ssthen) unless @ARGV;
34898184e3Ssthenmake_pattern( shift(@ARGV) );
35898184e3Ssthen
36898184e3Ssthenpod2usage(-exitval => 1,  -verbose => 0,
37898184e3Ssthen    -message => "No tar files specified",
38898184e3Ssthen) unless @ARGV;
39898184e3Ssthen
40898184e3Ssthenprocess_archive($_) foreach @ARGV;
41898184e3Ssthen
42898184e3Ssthenexit 0;
43898184e3Ssthen
44898184e3Ssthen
45898184e3Ssthensub make_pattern {
46898184e3Ssthen    my($pat) = @_;
47898184e3Ssthen
48898184e3Ssthen    if($opt{'ignore-case'}) {
49898184e3Ssthen        $pattern = qr{(?im)$pat};
50898184e3Ssthen    }
51898184e3Ssthen    else {
52898184e3Ssthen        $pattern = qr{(?m)$pat};
53898184e3Ssthen    }
54898184e3Ssthen}
55898184e3Ssthen
56898184e3Ssthen
57898184e3Ssthensub process_archive {
58898184e3Ssthen    my($filename) = @_;
59898184e3Ssthen
60898184e3Ssthen    _log("Processing archive: $filename");
61898184e3Ssthen    my $next = Archive::Tar->iter($filename);
62898184e3Ssthen    while( my $f = $next->() ) {
63898184e3Ssthen        next unless $f->is_file;
64898184e3Ssthen        match_file($f) if $f->size > 0;
65898184e3Ssthen    }
66898184e3Ssthen}
67898184e3Ssthen
68898184e3Ssthen
69898184e3Ssthensub match_file {
70898184e3Ssthen    my($f)   = @_;
71898184e3Ssthen    my $path = $f->name;
72*b8851fccSafresh1    my $prefix = $f->prefix;
73*b8851fccSafresh1    if (defined $prefix) {
74*b8851fccSafresh1        $path = File::Spec->catfile($prefix, $path);
75*b8851fccSafresh1    }
76898184e3Ssthen
77898184e3Ssthen    _log("filename: %s  (%d bytes)", $path, $f->size);
78898184e3Ssthen
79898184e3Ssthen    my $body = $f->get_content();
80898184e3Ssthen    if($body !~ $pattern) {
81898184e3Ssthen        _log("  no match");
82898184e3Ssthen        return;
83898184e3Ssthen    }
84898184e3Ssthen
85898184e3Ssthen    if($opt{'list-only'}) {
86898184e3Ssthen        print $path, "\n";
87898184e3Ssthen        return;
88898184e3Ssthen    }
89898184e3Ssthen
90898184e3Ssthen    save_file($path, $body);
91898184e3Ssthen}
92898184e3Ssthen
93898184e3Ssthen
94898184e3Ssthensub save_file {
95898184e3Ssthen    my($path, $body) = @_;
96898184e3Ssthen
97898184e3Ssthen    _log("  found match - extracting");
98898184e3Ssthen    my($fh);
99898184e3Ssthen    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
100898184e3Ssthen    if($dir and not $opt{basename}) {
101898184e3Ssthen        _log("  writing to $dir/$file");
102898184e3Ssthen        $dir =~ s{\A/}{./};
103898184e3Ssthen        mkpath($dir) unless -d $dir;
104898184e3Ssthen        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
105898184e3Ssthen    }
106898184e3Ssthen    else {
107898184e3Ssthen        _log("  writing to ./$file");
108898184e3Ssthen        open $fh, '>', $file or die "open($file): $!";
109898184e3Ssthen    }
110898184e3Ssthen    print $fh $body;
111898184e3Ssthen    close($fh);
112898184e3Ssthen}
113898184e3Ssthen
114898184e3Ssthen
115898184e3Ssthensub _log {
116898184e3Ssthen    return unless $opt{verbose};
117898184e3Ssthen    my($format, @args) = @_;
118898184e3Ssthen    warn sprintf($format, @args) . "\n";
119898184e3Ssthen}
120898184e3Ssthen
121898184e3Ssthen
122898184e3Ssthen__END__
123898184e3Ssthen
124898184e3Ssthen=head1 NAME
125898184e3Ssthen
126898184e3Ssthenptargrep - Apply pattern matching to the contents of files in a tar archive
127898184e3Ssthen
128898184e3Ssthen=head1 SYNOPSIS
129898184e3Ssthen
130898184e3Ssthen  ptargrep [options] <pattern> <tar file> ...
131898184e3Ssthen
132898184e3Ssthen  Options:
133898184e3Ssthen
134898184e3Ssthen   --basename|-b     ignore directory paths from archive
135898184e3Ssthen   --ignore-case|-i  do case-insensitive pattern matching
136898184e3Ssthen   --list-only|-l    list matching filenames rather than extracting matches
137898184e3Ssthen   --verbose|-v      write debugging message to STDERR
138898184e3Ssthen   --help|-?         detailed help message
139898184e3Ssthen
140898184e3Ssthen=head1 DESCRIPTION
141898184e3Ssthen
142898184e3SsthenThis utility allows you to apply pattern matching to B<the contents> of files
143898184e3Ssthencontained in a tar archive.  You might use this to identify all files in an
144898184e3Ssthenarchive which contain lines matching the specified pattern and either print out
145898184e3Ssthenthe pathnames or extract the files.
146898184e3Ssthen
147898184e3SsthenThe pattern will be used as a Perl regular expression (as opposed to a simple
148898184e3Ssthengrep regex).
149898184e3Ssthen
150898184e3SsthenMultiple tar archive filenames can be specified - they will each be processed
151898184e3Ssthenin turn.
152898184e3Ssthen
153898184e3Ssthen=head1 OPTIONS
154898184e3Ssthen
155898184e3Ssthen=over 4
156898184e3Ssthen
157898184e3Ssthen=item B<--basename> (alias -b)
158898184e3Ssthen
159898184e3SsthenWhen matching files are extracted, ignore the directory path from the archive
160898184e3Ssthenand write to the current directory using the basename of the file from the
161898184e3Ssthenarchive.  Beware: if two matching files in the archive have the same basename,
162898184e3Ssthenthe second file extracted will overwrite the first.
163898184e3Ssthen
164898184e3Ssthen=item B<--ignore-case> (alias -i)
165898184e3Ssthen
166898184e3SsthenMake pattern matching case-insensitive.
167898184e3Ssthen
168898184e3Ssthen=item B<--list-only> (alias -l)
169898184e3Ssthen
170898184e3SsthenPrint the pathname of each matching file from the archive to STDOUT.  Without
171898184e3Ssthenthis option, the default behaviour is to extract each matching file.
172898184e3Ssthen
173898184e3Ssthen=item B<--verbose> (alias -v)
174898184e3Ssthen
175898184e3SsthenLog debugging info to STDERR.
176898184e3Ssthen
177898184e3Ssthen=item B<--help> (alias -?)
178898184e3Ssthen
179898184e3SsthenDisplay this documentation.
180898184e3Ssthen
181898184e3Ssthen=back
182898184e3Ssthen
183898184e3Ssthen=head1 COPYRIGHT
184898184e3Ssthen
185898184e3SsthenCopyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>
186898184e3Ssthen
187898184e3SsthenThis program is free software; you can redistribute it and/or modify it
188898184e3Ssthenunder the same terms as Perl itself.
189898184e3Ssthen
190898184e3Ssthen=cut
191898184e3Ssthen
192898184e3Ssthen
193898184e3Ssthen
194