1#!/usr/bin/perl
2##############################################################################
3# Tool for using regular expressions against the contents of files in a tar
4# archive.  See 'ptargrep --help' for more documentation.
5#
6
7BEGIN { pop @INC if $INC[-1] eq '.' }
8use strict;
9use warnings;
10
11use Pod::Usage   qw(pod2usage);
12use Getopt::Long qw(GetOptions);
13use Archive::Tar qw();
14use File::Path   qw(mkpath);
15
16my(%opt, $pattern);
17
18if(!GetOptions(\%opt,
19    'basename|b',
20    'ignore-case|i',
21    'list-only|l',
22    'verbose|v',
23    'help|?',
24)) {
25    pod2usage(-exitval => 1,  -verbose => 0);
26}
27
28
29pod2usage(-exitstatus => 0, -verbose => 2) if $opt{help};
30
31pod2usage(-exitval => 1,  -verbose => 0,
32    -message => "No pattern specified",
33) unless @ARGV;
34make_pattern( shift(@ARGV) );
35
36pod2usage(-exitval => 1,  -verbose => 0,
37    -message => "No tar files specified",
38) unless @ARGV;
39
40process_archive($_) foreach @ARGV;
41
42exit 0;
43
44
45sub make_pattern {
46    my($pat) = @_;
47
48    if($opt{'ignore-case'}) {
49        $pattern = qr{(?im)$pat};
50    }
51    else {
52        $pattern = qr{(?m)$pat};
53    }
54}
55
56
57sub process_archive {
58    my($filename) = @_;
59
60    _log("Processing archive: $filename");
61    my $next = Archive::Tar->iter($filename);
62    while( my $f = $next->() ) {
63        next unless $f->is_file;
64        match_file($f) if $f->size > 0;
65    }
66}
67
68
69sub match_file {
70    my($f)   = @_;
71    my $path = $f->name;
72    my $prefix = $f->prefix;
73    if (defined $prefix) {
74        $path = File::Spec->catfile($prefix, $path);
75    }
76
77    _log("filename: %s  (%d bytes)", $path, $f->size);
78
79    my $body = $f->get_content();
80    if($body !~ $pattern) {
81        _log("  no match");
82        return;
83    }
84
85    if($opt{'list-only'}) {
86        print $path, "\n";
87        return;
88    }
89
90    save_file($path, $body);
91}
92
93
94sub save_file {
95    my($path, $body) = @_;
96
97    _log("  found match - extracting");
98    my($fh);
99    my($dir, $file) = $path =~ m{\A(?:(.*)/)?([^/]+)\z};
100    if($dir and not $opt{basename}) {
101        _log("  writing to $dir/$file");
102        $dir =~ s{\A/}{./};
103        mkpath($dir) unless -d $dir;
104        open $fh, '>', "$dir/$file" or die "open($dir/$file): $!";
105    }
106    else {
107        _log("  writing to ./$file");
108        open $fh, '>', $file or die "open($file): $!";
109    }
110    print $fh $body;
111    close($fh);
112}
113
114
115sub _log {
116    return unless $opt{verbose};
117    my($format, @args) = @_;
118    warn sprintf($format, @args) . "\n";
119}
120
121
122__END__
123
124=head1 NAME
125
126ptargrep - Apply pattern matching to the contents of files in a tar archive
127
128=head1 SYNOPSIS
129
130  ptargrep [options] <pattern> <tar file> ...
131
132  Options:
133
134   --basename|-b     ignore directory paths from archive
135   --ignore-case|-i  do case-insensitive pattern matching
136   --list-only|-l    list matching filenames rather than extracting matches
137   --verbose|-v      write debugging message to STDERR
138   --help|-?         detailed help message
139
140=head1 DESCRIPTION
141
142This utility allows you to apply pattern matching to B<the contents> of files
143contained in a tar archive.  You might use this to identify all files in an
144archive which contain lines matching the specified pattern and either print out
145the pathnames or extract the files.
146
147The pattern will be used as a Perl regular expression (as opposed to a simple
148grep regex).
149
150Multiple tar archive filenames can be specified - they will each be processed
151in turn.
152
153=head1 OPTIONS
154
155=over 4
156
157=item B<--basename> (alias -b)
158
159When matching files are extracted, ignore the directory path from the archive
160and write to the current directory using the basename of the file from the
161archive.  Beware: if two matching files in the archive have the same basename,
162the second file extracted will overwrite the first.
163
164=item B<--ignore-case> (alias -i)
165
166Make pattern matching case-insensitive.
167
168=item B<--list-only> (alias -l)
169
170Print the pathname of each matching file from the archive to STDOUT.  Without
171this option, the default behaviour is to extract each matching file.
172
173=item B<--verbose> (alias -v)
174
175Log debugging info to STDERR.
176
177=item B<--help> (alias -?)
178
179Display this documentation.
180
181=back
182
183=head1 COPYRIGHT
184
185Copyright 2010 Grant McLean E<lt>grantm@cpan.orgE<gt>
186
187This program is free software; you can redistribute it and/or modify it
188under the same terms as Perl itself.
189
190=cut
191
192
193
194