1#! /usr/bin/perl -w
2
3use strict;
4
5use File::Find;
6use File::Temp qw/tempfile/;
7use File::Basename;
8
9use FileHandle;
10
11# It's there on FC1, but not on RHEL3
12my $have_perl_cmpstat = 0;
13
14
15#  Might want to add .iso or some .mov type exts ... however non-trivial savings
16# are often on those files.
17my $filter_re = qr/(?:
18		   ^[.]nfs. |
19		   [.]gz$  |
20		   [.]bz2$ |
21		   [.]rpm$ |
22		   [.]zip$ |
23		   [.]tmp$ |
24		   ~$      |
25		   \#$
26		  )/x;
27
28use Getopt::Long;
29use Pod::Usage;
30
31my $man = 0;
32my $help = 0;
33
34my $tidy_compress = 0;
35my $force_compress = 0;
36my $re_compress = 0;
37my $chown_compress = 0;
38my $once_compress = undef;
39my $verbose_compress = 0;
40my $zero_compress = 0;
41my $type_compress = "gzip";
42
43pod2usage(0) if !
44GetOptions ("force!"   => \$force_compress,
45	    "all!"     => \$re_compress,
46	    "chown!"   => \$chown_compress,
47	    "output|o=s" => \$once_compress,
48	    "tidy!"    => \$tidy_compress,
49	    "zero!"    => \$zero_compress,
50	    "type|t=s" => \$type_compress,
51	    "verbose+" => \$verbose_compress,
52	    "help|?"   => \$help,
53	    "man"      => \$man);
54pod2usage(-exitstatus => 0, -verbose => 1) if $help;
55pod2usage(-exitstatus => 0, -verbose => 2) if $man;
56
57if (($type_compress ne "gzip") && ($type_compress ne "bzip2") &&
58    ($type_compress ne "all"))
59  { pod2usage(-exitstatus => 1); }
60
61sub grep_files
62  { # Don't compress compressed files, or nfs...
63    grep(!/$filter_re/,  @_)
64  }
65
66our $out;
67our $fname;
68
69
70use Math::BigInt;
71
72sub p95
73  { # 95% of value
74    my $val = Math::BigInt->new(shift);
75
76    $val->bmul(95);
77    $val->bdiv(100);
78
79    return $val->bfloor();
80  }
81
82
83sub cleanup
84  {
85    my $in = shift;
86
87    close($in);
88    if (defined ($out))   { close($out);    $out   = undef; }
89    if (defined ($fname)) { unlink($fname); $fname = undef; }
90
91    return shift;
92  }
93
94sub zip__file
95  {
96    my $name              = shift;
97    my $type_compress     = shift;
98    my $ext_compress      = shift;
99    my $cmd_compress_args = shift;
100    my $other_sz          = shift;
101
102    my $namegz = shift || ($name . $ext_compress);
103
104    if (-l $name && -f $name)
105      { # deal with symlinks...
106	my $dst = readlink $name;
107
108	defined($dst) || die "Can't readlink $name: $!";
109
110	my $dst_gz = $dst . $ext_compress;
111	if (($dst !~ /$filter_re/) && -f $dst_gz)
112	  {
113	    unlink($namegz);
114	    print STDOUT "Symlink: $name => $dst\n" if ($verbose_compress > 1);
115	    symlink($dst_gz, $namegz) || die "Can't symlink($namegz): $!";
116	  }
117	return 0;
118      }
119
120    if (! -f _ || ! -r _)
121      {
122	return 0;
123      }
124
125    my @st_name   = stat _;
126    if (!$other_sz)
127      { $other_sz = $st_name[7]; }
128    if (!$re_compress)
129      {
130	if (-f $namegz)
131	  { # If .gz file is already newer, skip it...
132	    my @st_namegz = stat _;
133
134	    if ($st_name[9] < $st_namegz[9])
135	      {
136		if ($tidy_compress && # remove old
137		    (($st_namegz[7] >= p95($other_sz))))
138		  {
139		    unlink($namegz);
140		    return $other_sz;
141		  }
142		return $st_namegz[7];
143	      }
144	  }
145      }
146
147    if (!$force_compress)
148      { # This will error out...
149	($out, $fname) = tempfile("gzip-r.XXXXXXXX", SUFFIX => ".tmp",
150				  DIR => File::Basename::dirname($namegz));
151      }
152    else
153      {
154	eval {
155	  ($out, $fname) = tempfile("gzip-r.XXXXXXXX", SUFFIX => ".tmp",
156				    DIR => File::Basename::dirname($namegz));
157	};
158	return $other_sz if ($@);
159      }
160    binmode $out;
161
162    print STDOUT "Compress: $name\n" if ($verbose_compress > 0);
163
164    my $in = undef;
165    if (!$force_compress)
166      {
167	open($in, "-|", @$cmd_compress_args, "--", $name) ||
168	  die("Can't $$cmd_compress_args[0]: $!");
169      }
170    else
171      {
172	open($in, "-|", @$cmd_compress_args, "--", $name) ||
173	  return cleanup(undef, $other_sz);
174      }
175    binmode $in;
176
177    my $bs = 1024 * 8; # Do IO in 8k blocks
178    $/ = \$bs;
179
180    while (<$in>) { $out->print($_); }
181
182    # If the the gzip file is 95% of the original, delete it
183    # Or we are doing bzip2 and we already have a gzip file that is smaller
184    $out->autoflush(1);
185    my @st_namegz = stat $out;
186    if ($st_namegz[7] >= p95($other_sz))
187      {
188	if ($zero_compress)
189	  {
190	    $st_namegz[7] = p95($other_sz);
191	    truncate($out, 0);
192	  }
193	else
194	  {
195	    return cleanup($in, $other_sz);
196	  }
197      }
198    close($in) || die "Failed closing input: $!";
199
200    rename($fname, $namegz)                 || die "Can't rename($namegz): $!";
201    if ($have_perl_cmpstat)
202      {
203	File::Temp::cmpstat($out, $namegz)  || die "File moved $namegz: $!";
204      }
205    close($out)                             || die "Failed closing output: $!";
206    $out = undef;
207
208    # No stupid fchmod/fchown in perl, Grr....
209    chmod($st_name[2] & 0777, $namegz);
210    if ($chown_compress)
211      { chown($st_name[4], $st_name[5], $namegz); }
212    return $st_namegz[7];
213  }
214
215sub zip_file
216  {
217    my $name = $_;
218    my $other_sz = 0;
219
220    my ($dev,$ino,$mode,$nlink,$uid,$gid);
221
222    if ((($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($name)) &&
223	($dev != $File::Find::topdev))
224      {
225	$File::Find::prune = 1;
226	return;
227      }
228
229    $other_sz = zip__file($name, "gzip", ".gz",
230			  ["gzip", "--to-stdout", "--no-name", "--best"], 0)
231      if (($type_compress eq "gzip")  || ($type_compress eq "all"));
232
233    $other_sz = zip__file($name, "bzip2", ".bz2",
234			  ["bzip2", "--stdout", "--best"], $other_sz)
235      if (($type_compress eq "bzip2") || ($type_compress eq "all"));
236  }
237
238if (defined ($once_compress))
239  {
240    die " Can't use type=all with --output" if ($type_compress eq "all");
241
242    my $name = shift;
243
244    die " Too many arguments for --output"  if (@ARGV);
245
246    zip__file($name, "gzip",  ".gz",
247	      ["gzip", "--to-stdout", "--no-name", "--best"], 0, $once_compress)
248      if ($type_compress eq "gzip");
249
250    zip__file($name, "bzip2", ".bz2",
251	      ["bzip2", "--stdout", "--best"], 0, $once_compress)
252      if ($type_compress eq "bzip2");
253
254    exit;
255  }
256
257find({ preprocess => \&grep_files, wanted => \&zip_file }, @ARGV);
258
259END {
260  if (defined($out) && defined($fname))
261    {
262      File::Temp::unlink0($out, $fname) || die "Can't unlink($fname): $!"; $?;
263    }
264}
265
266__END__
267
268=head1 NAME
269
270gzip-r - Recursive "intelligent" gzip/bzip2
271
272=head1 SYNOPSIS
273
274gzip-r [options] [dirs|files ...]
275
276 Options:
277  --help -?         brief help message
278  --man             full documentation
279  --force           force mode
280  --all             compress files that already have a compressed version
281  --tidy            tidy unused files
282  --verbose         print filenames
283  --chown           chown compressed files
284  --output -o       compress a single file, passing the compressed filename
285  --type -t         type of compression files
286
287=head1 OPTIONS
288
289=over 8
290
291=item B<--help>
292
293Print a brief help message and exits.
294
295=item B<--man>
296
297Prints the manual page and exits.
298
299=item B<--force>
300
301Carry on compressing even if errors are encountered during tempfile creation.
302
303=item B<--all>
304
305Recompresses files even when the compressed versions are newer than their
306source.
307
308=item B<--chown>
309
310Make the compressed output files have the same owner as the input files.
311
312=item B<--output>
313
314Only compress a single file, the argument for the option is the compressed
315output filename.
316 Note: You can't use this option when using --type=all.
317 Note: You can't specify multiple sources.
318
319=item B<--tidy>
320
321Cleanup any old compressed files that wouldn't be created (due to not being
322significantly smaller).
323
324=item B<--type>
325
326Make the compression type either gzip, bzip2 or all.
327
328=item B<--verbose>
329
330Prints the name of each file being compressed followed by a newline, if
331specified once. If specified more than once also prints the name of each symlink
332created.
333
334=back
335
336
337=head1 DESCRIPTION
338
339B<gzip-r> will take all files from the directories and filenames passed
340as fname. If the extensions of the files are not likely to be compressible
341(Ie. .gz, .bz2, .rpm, .zip) or are tmp files (Ie. .tmp, ~, #) then they are
342skipped.
343 If the fname is a regular file a fname.gz output file will be generated
344(without removing the input fname file).
345 If the fname is a symlink a fname.gz symlink pointing to the target of fname
346with a .gz extension added will be created.
347
348 gzip is called with the options: --no-name --best
349
350=cut
351