1#!/usr/bin/perl
2##############################################################################
3# Tool for using regular expressions against the contents of files in a tar
4# archive.  See 'ptargrep --help' for more documentation.
5#
6
7use strict;
8use warnings;
9
10use Pod::Usage   qw(pod2usage);
11use Getopt::Long qw(GetOptions);
12use Archive::Tar qw();
13use File::Path   qw(mkpath);
14
15my(%opt, $pattern);
16
17if(!GetOptions(\%opt,
18    'basename|b',
19    'ignore-case|i',
20    'list-only|l',
21    'verbose|v',
22    'help|?',
23)) {
24    pod2usage(-exitval => 1,  -verbose => 0);
25}
26
27
28pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};
29
30pod2usage(-exitval => 1,  -verbose => 0,
31    -message => "No pattern specified",
32) unless @ARGV;
33make_pattern( shift(@ARGV) );
34
35pod2usage(-exitval => 1,  -verbose => 0,
36    -message => "No tar files specified",
37) unless @ARGV;
38
39process_archive($_) foreach @ARGV;
40
41exit 0;
42
43
44sub make_pattern {
45    my($pat) = @_;
46
47    if($opt{'ignore-case'}) {
48        $pattern = qr{(?im)$pat};
49    }
50    else {
51        $pattern = qr{(?m)$pat};
52    }
53}
54
55
56sub process_archive {
57    my($filename) = @_;
58
59    _log("Processing archive: $filename");
60    my $next = Archive::Tar->iter($filename);
61    while( my $f = $next->() ) {
62        next unless $f->is_file;
63        match_file($f) if $f->size > 0;
64    }
65}
66
67
68sub match_file {
69    my($f)   = @_;
70    my $path = $f->name;
71
72    _log("filename: %s  (%d bytes)", $path, $f->size);
73
74    my $body = $f->get_content();
75    if($body !~ $pattern) {
76        _log("  no match");
77        return;
78    }
79
80    if($opt{'list-only'}) {
81        print $path, "\n";
82        return;
83    }
84
85    save_file($path, $body);
86}
87
88
89sub save_file {
90    my($path, $body) = @_;
91
92    _log("  found match - extracting");
93    my($fh);
94    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
95    if($dir and not $opt{basename}) {
96        _log("  writing to $dir/$file");
97        $dir =~ s{\A/}{./};
98        mkpath($dir) unless -d $dir;
99        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
100    }
101    else {
102        _log("  writing to ./$file");
103        open $fh, '>', $file or die "open($file): $!";
104    }
105    print $fh $body;
106    close($fh);
107}
108
109
110sub _log {
111    return unless $opt{verbose};
112    my($format, @args) = @_;
113    warn sprintf($format, @args) . "\n";
114}
115
116
117__END__
118
119=head1 NAME
120
121ptargrep - Apply pattern matching to the contents of files in a tar archive
122
123=head1 SYNOPSIS
124
125  ptargrep [options] <pattern> <tar file> ...
126
127  Options:
128
129   --basename|-b     ignore directory paths from archive
130   --ignore-case|-i  do case-insensitive pattern matching
131   --list-only|-l    list matching filenames rather than extracting matches
132   --verbose|-v      write debugging message to STDERR
133   --help|-?         detailed help message
134
135=head1 DESCRIPTION
136
137This utility allows you to apply pattern matching to B<the contents> of files
138contained in a tar archive.  You might use this to identify all files in an
139archive which contain lines matching the specified pattern and either print out
140the pathnames or extract the files.
141
142The pattern will be used as a Perl regular expression (as opposed to a simple
143grep regex).
144
145Multiple tar archive filenames can be specified - they will each be processed
146in turn.
147
148=head1 OPTIONS
149
150=over 4
151
152=item B<--basename> (alias -b)
153
154When matching files are extracted, ignore the directory path from the archive
155and write to the current directory using the basename of the file from the
156archive.  Beware: if two matching files in the archive have the same basename,
157the second file extracted will overwrite the first.
158
159=item B<--ignore-case> (alias -i)
160
161Make pattern matching case-insensitive.
162
163=item B<--list-only> (alias -l)
164
165Print the pathname of each matching file from the archive to STDOUT.  Without
166this option, the default behaviour is to extract each matching file.
167
168=item B<--verbose> (alias -v)
169
170Log debugging info to STDERR.
171
172=item B<--help> (alias -?)
173
174Display this documentation.
175
176=back
177
178=head1 COPYRIGHT
179
180Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>
181
182This program is free software; you can redistribute it and/or modify it
183under the same terms as Perl itself.
184
185=cut
186
187
188
189