1#! /usr/bin/perl -w 2 3use strict; 4 5use File::Find; 6use File::Temp qw/tempfile/; 7use File::Basename; 8 9use FileHandle; 10 11# It's there on FC1, but not on RHEL3 12my $have_perl_cmpstat = 0; 13 14 15# Might want to add .iso or some .mov type exts ... however non-trivial savings 16# are often on those files. 17my $filter_re = qr/(?: 18 ^[.]nfs. | 19 [.]gz$ | 20 [.]bz2$ | 21 [.]rpm$ | 22 [.]zip$ | 23 [.]tmp$ | 24 ~$ | 25 \#$ 26 )/x; 27 28use Getopt::Long; 29use Pod::Usage; 30 31my $man = 0; 32my $help = 0; 33 34my $tidy_compress = 0; 35my $force_compress = 0; 36my $re_compress = 0; 37my $chown_compress = 0; 38my $once_compress = undef; 39my $verbose_compress = 0; 40my $zero_compress = 0; 41my $type_compress = "gzip"; 42 43pod2usage(0) if ! 44GetOptions ("force!" => \$force_compress, 45 "all!" => \$re_compress, 46 "chown!" => \$chown_compress, 47 "output|o=s" => \$once_compress, 48 "tidy!" => \$tidy_compress, 49 "zero!" => \$zero_compress, 50 "type|t=s" => \$type_compress, 51 "verbose+" => \$verbose_compress, 52 "help|?" => \$help, 53 "man" => \$man); 54pod2usage(-exitstatus => 0, -verbose => 1) if $help; 55pod2usage(-exitstatus => 0, -verbose => 2) if $man; 56 57if (($type_compress ne "gzip") && ($type_compress ne "bzip2") && 58 ($type_compress ne "all")) 59 { pod2usage(-exitstatus => 1); } 60 61sub grep_files 62 { # Don't compress compressed files, or nfs... 63 grep(!/$filter_re/, @_) 64 } 65 66our $out; 67our $fname; 68 69 70use Math::BigInt; 71 72sub p95 73 { # 95% of value 74 my $val = Math::BigInt->new(shift); 75 76 $val->bmul(95); 77 $val->bdiv(100); 78 79 return $val->bfloor(); 80 } 81 82 83sub cleanup 84 { 85 my $in = shift; 86 87 close($in); 88 if (defined ($out)) { close($out); $out = undef; } 89 if (defined ($fname)) { unlink($fname); $fname = undef; } 90 91 return shift; 92 } 93 94sub zip__file 95 { 96 my $name = shift; 97 my $type_compress = shift; 98 my $ext_compress = shift; 99 my $cmd_compress_args = shift; 100 my $other_sz = shift; 101 102 my $namegz = shift || ($name . $ext_compress); 103 104 if (-l $name && -f $name) 105 { # deal with symlinks... 106 my $dst = readlink $name; 107 108 defined($dst) || die "Can't readlink $name: $!"; 109 110 my $dst_gz = $dst . $ext_compress; 111 if (($dst !~ /$filter_re/) && -f $dst_gz) 112 { 113 unlink($namegz); 114 print STDOUT "Symlink: $name => $dst\n" if ($verbose_compress > 1); 115 symlink($dst_gz, $namegz) || die "Can't symlink($namegz): $!"; 116 } 117 return 0; 118 } 119 120 if (! -f _ || ! -r _) 121 { 122 return 0; 123 } 124 125 my @st_name = stat _; 126 if (!$other_sz) 127 { $other_sz = $st_name[7]; } 128 if (!$re_compress) 129 { 130 if (-f $namegz) 131 { # If .gz file is already newer, skip it... 132 my @st_namegz = stat _; 133 134 if ($st_name[9] < $st_namegz[9]) 135 { 136 if ($tidy_compress && # remove old 137 (($st_namegz[7] >= p95($other_sz)))) 138 { 139 unlink($namegz); 140 return $other_sz; 141 } 142 return $st_namegz[7]; 143 } 144 } 145 } 146 147 if (!$force_compress) 148 { # This will error out... 149 ($out, $fname) = tempfile("gzip-r.XXXXXXXX", SUFFIX => ".tmp", 150 DIR => File::Basename::dirname($namegz)); 151 } 152 else 153 { 154 eval { 155 ($out, $fname) = tempfile("gzip-r.XXXXXXXX", SUFFIX => ".tmp", 156 DIR => File::Basename::dirname($namegz)); 157 }; 158 return $other_sz if ($@); 159 } 160 binmode $out; 161 162 print STDOUT "Compress: $name\n" if ($verbose_compress > 0); 163 164 my $in = undef; 165 if (!$force_compress) 166 { 167 open($in, "-|", @$cmd_compress_args, "--", $name) || 168 die("Can't $$cmd_compress_args[0]: $!"); 169 } 170 else 171 { 172 open($in, "-|", @$cmd_compress_args, "--", $name) || 173 return cleanup(undef, $other_sz); 174 } 175 binmode $in; 176 177 my $bs = 1024 * 8; # Do IO in 8k blocks 178 $/ = \$bs; 179 180 while (<$in>) { $out->print($_); } 181 182 # If the the gzip file is 95% of the original, delete it 183 # Or we are doing bzip2 and we already have a gzip file that is smaller 184 $out->autoflush(1); 185 my @st_namegz = stat $out; 186 if ($st_namegz[7] >= p95($other_sz)) 187 { 188 if ($zero_compress) 189 { 190 $st_namegz[7] = p95($other_sz); 191 truncate($out, 0); 192 } 193 else 194 { 195 return cleanup($in, $other_sz); 196 } 197 } 198 close($in) || die "Failed closing input: $!"; 199 200 rename($fname, $namegz) || die "Can't rename($namegz): $!"; 201 if ($have_perl_cmpstat) 202 { 203 File::Temp::cmpstat($out, $namegz) || die "File moved $namegz: $!"; 204 } 205 close($out) || die "Failed closing output: $!"; 206 $out = undef; 207 208 # No stupid fchmod/fchown in perl, Grr.... 209 chmod($st_name[2] & 0777, $namegz); 210 if ($chown_compress) 211 { chown($st_name[4], $st_name[5], $namegz); } 212 return $st_namegz[7]; 213 } 214 215sub zip_file 216 { 217 my $name = $_; 218 my $other_sz = 0; 219 220 my ($dev,$ino,$mode,$nlink,$uid,$gid); 221 222 if ((($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($name)) && 223 ($dev != $File::Find::topdev)) 224 { 225 $File::Find::prune = 1; 226 return; 227 } 228 229 $other_sz = zip__file($name, "gzip", ".gz", 230 ["gzip", "--to-stdout", "--no-name", "--best"], 0) 231 if (($type_compress eq "gzip") || ($type_compress eq "all")); 232 233 $other_sz = zip__file($name, "bzip2", ".bz2", 234 ["bzip2", "--stdout", "--best"], $other_sz) 235 if (($type_compress eq "bzip2") || ($type_compress eq "all")); 236 } 237 238if (defined ($once_compress)) 239 { 240 die " Can't use type=all with --output" if ($type_compress eq "all"); 241 242 my $name = shift; 243 244 die " Too many arguments for --output" if (@ARGV); 245 246 zip__file($name, "gzip", ".gz", 247 ["gzip", "--to-stdout", "--no-name", "--best"], 0, $once_compress) 248 if ($type_compress eq "gzip"); 249 250 zip__file($name, "bzip2", ".bz2", 251 ["bzip2", "--stdout", "--best"], 0, $once_compress) 252 if ($type_compress eq "bzip2"); 253 254 exit; 255 } 256 257find({ preprocess => \&grep_files, wanted => \&zip_file }, @ARGV); 258 259END { 260 if (defined($out) && defined($fname)) 261 { 262 File::Temp::unlink0($out, $fname) || die "Can't unlink($fname): $!"; $?; 263 } 264} 265 266__END__ 267 268=head1 NAME 269 270gzip-r - Recursive "intelligent" gzip/bzip2 271 272=head1 SYNOPSIS 273 274gzip-r [options] [dirs|files ...] 275 276 Options: 277 --help -? brief help message 278 --man full documentation 279 --force force mode 280 --all compress files that already have a compressed version 281 --tidy tidy unused files 282 --verbose print filenames 283 --chown chown compressed files 284 --output -o compress a single file, passing the compressed filename 285 --type -t type of compression files 286 287=head1 OPTIONS 288 289=over 8 290 291=item B<--help> 292 293Print a brief help message and exits. 294 295=item B<--man> 296 297Prints the manual page and exits. 298 299=item B<--force> 300 301Carry on compressing even if errors are encountered during tempfile creation. 302 303=item B<--all> 304 305Recompresses files even when the compressed versions are newer than their 306source. 307 308=item B<--chown> 309 310Make the compressed output files have the same owner as the input files. 311 312=item B<--output> 313 314Only compress a single file, the argument for the option is the compressed 315output filename. 316 Note: You can't use this option when using --type=all. 317 Note: You can't specify multiple sources. 318 319=item B<--tidy> 320 321Cleanup any old compressed files that wouldn't be created (due to not being 322significantly smaller). 323 324=item B<--type> 325 326Make the compression type either gzip, bzip2 or all. 327 328=item B<--verbose> 329 330Prints the name of each file being compressed followed by a newline, if 331specified once. If specified more than once also prints the name of each symlink 332created. 333 334=back 335 336 337=head1 DESCRIPTION 338 339B<gzip-r> will take all files from the directories and filenames passed 340as fname. If the extensions of the files are not likely to be compressible 341(Ie. .gz, .bz2, .rpm, .zip) or are tmp files (Ie. .tmp, ~, #) then they are 342skipped. 343 If the fname is a regular file a fname.gz output file will be generated 344(without removing the input fname file). 345 If the fname is a symlink a fname.gz symlink pointing to the target of fname 346with a .gz extension added will be created. 347 348 gzip is called with the options: --no-name --best 349 350=cut 351