1# Dir::Purge.pm -- Purge directories
2# RCS Info        : $Id: Purge.pm,v 1.6 2006/09/19 12:24:01 jv Exp $
3# Author          : Johan Vromans
4# Created On      : Wed May 17 12:58:02 2000
5# Last Modified By: Johan Vromans
6# Last Modified On: Tue Sep 19 14:23:56 2006
7# Update Count    : 161
8# Status          : Unknown, Use with caution!
9
10# Purge directories by strategy.
11#
12# This is also an exercise in weird programming techniques.
13
14package Dir::Purge;
15
16use strict;
17use Carp;
18
19use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
20$VERSION    = "1.02";
21@ISA        = qw(Exporter);
22@EXPORT     = qw(&purgedir);
23@EXPORT_OK  = qw(&purgedir_by_age);
24
25my $purge_by_age;		# strategy
26
27sub purgedir_by_age {
28    my @dirs = @_;
29    my $opts;
30    if ( UNIVERSAL::isa ($dirs[0], 'HASH') ) {
31	$opts = shift (@dirs);
32	my $strat = delete $opts->{strategy};
33	if ( defined $strat && $strat ne "by_age" ) {
34	    croak ("Invalid option: 'strategy'");
35	}
36	$opts->{strategy} = "by_age";
37    }
38    else {
39	$opts = { keep => shift(@dirs), strategy => "by_age" };
40    }
41    purgedir ($opts, @dirs);
42}
43
44
45# Common processing code. It verifies the arguments, directories and
46# calls $code->(...) to do the actual purging.
47# Nothing is done if any of the verifications fail.
48
49sub purgedir {
50
51    my (@dirs) = @_;
52    my $error = 0;
53    my $code = $purge_by_age;	# default: by age
54    my $ctl = { tag => "purgedir" };
55    my @opts = qw(keep strategy reverse include verbose test debug);
56
57    # Get the parameters. Only the 'keep' value is mandatory.
58    if ( UNIVERSAL::isa ($dirs[0], 'HASH') ) {
59	my $opts  = shift (@dirs);
60	@{$ctl}{@opts} = delete @{$opts}{@opts};
61	if ( $ctl->{strategy} ) {
62	    if ( $ctl->{strategy} eq "by_age" ) {
63		$code = $purge_by_age;
64	    }
65	    else {
66		carp ("Unsupported purge strategy: '$ctl->{strategy}'");
67		$error++;
68	    }
69	}
70	foreach (sort keys %$opts) {
71	    carp ("Unhandled option \"$_\"");
72	    $error++;
73	}
74    }
75    elsif ( $dirs[0] =~ /^-?\d+$/ ) {
76	$ctl->{keep} = shift (@dirs);
77    }
78
79    unless ( $ctl->{keep} ) {
80	croak ("Missing 'keep' value");
81    }
82    elsif ( $ctl->{keep} < 0 ) {
83	# Hmm. I would like to deprecate this, but on the other hand,
84	# a negative 'subscript' fits well in Perl.
85	#carp ("Negative 'keep' value is deprecated, ".
86	#      "use 'reverse => 1' instead");
87	$ctl->{keep} = -$ctl->{keep};
88	$ctl->{reverse} = !$ctl->{reverse};
89    }
90
91    $ctl->{verbose} = 1 unless defined ($ctl->{verbose});
92    $ctl->{verbose} = 9 if $ctl->{debug};
93
94    if ( $ctl->{include} ) {
95	if ( !ref($ctl->{include}) ) {
96	    croak("Invalid value for 'include': " . $ctl->{include});
97	}
98	elsif ( UNIVERSAL::isa($ctl->{include}, 'CODE') ) {
99	    # OK
100	}
101	elsif ( UNIVERSAL::isa($ctl->{include}, 'Regexp') ) {
102	    my $pat = $ctl->{include};
103	    $ctl->{include} = sub { $_[0] =~ $pat };
104	}
105	else {
106	    croak("Invalid value for 'include': " . $ctl->{include});
107	}
108    }
109
110    # Thouroughly check the directories, and refuse to do anything
111    # in case of problems.
112    warn ("$ctl->{tag}: checking directories\n") if $ctl->{verbose} > 1;
113    foreach my $dir ( @dirs ) {
114	# Must be a directory.
115	unless ( -d $dir ) {
116	    carp (-e _ ? "$dir: not a directory" : "$dir: not existing");
117	    $error++;
118	    next;
119	}
120	# We need write access since we are going to delete files.
121	unless ( -w _ ) {
122	    carp ("$dir: no write access");
123	    $error++;
124	}
125	# We need read access since we are going to get the file list.
126	unless ( -r _ ) {
127	    carp ("$dir: no read access");
128	    $error++;
129	}
130	# Probably need this as well, don't know.
131	unless ( -x _ ) {
132	    carp ("$dir: no access");
133	    $error++;
134	}
135    }
136
137    # If errors, bail out unless testing.
138    if ( $error ) {
139	if ( $ctl->{test} ) {
140	    carp ("$ctl->{tag}: errors detected, continuing");
141	}
142	else {
143	    croak ("$ctl->{tag}: errors detected, nothing done");
144	}
145    }
146
147    # Process the directories.
148    foreach my $dir ( @dirs ) {
149	$code->($ctl, $dir);
150    }
151};
152
153# Everything else is assumed to be small building-block routines to
154# implement a plethora of purge strategies.
155# Actually, I cannot think of any right now.
156
157# Gather file names and additional info.
158my $gather = sub {
159    my ($ctl, $dir, $what) = @_;
160
161    local (*DIR);
162    opendir (DIR, $dir)
163      or croak ("dir: $!");	# shouldn't happen -- we've checked!
164    my @files;
165    foreach ( readdir (DIR) ) {
166	next if $ctl->{include} && !$ctl->{include}->($_, $dir);
167	next if /^\./;
168	next unless -f "$dir/$_";
169	push (@files, [ "$dir/$_", $what->("$dir/$_") ]);
170    }
171    closedir (DIR);
172
173    warn ("$ctl->{tag}: $dir: ", scalar(@files), " files\n")
174      if $ctl->{verbose} > 1;
175    warn ("$ctl->{tag}: $dir: @{[map { $_->[0] } @files]}\n")
176      if $ctl->{debug};
177
178    \@files;
179};
180
181# Sort the list on the supplied info.
182my $sort = sub {
183    my ($ctl, $files) = @_;
184
185    my @sorted = map { $_->[0] } sort { $a->[1] <=> $b->[1] } @$files;
186    warn ("$ctl->{tag}: sorted: @sorted\n") if $ctl->{debug};
187    \@sorted;
188};
189
190# Remove the files to keep from the list.
191my $reduce = sub {
192    my ($ctl, $files) = @_;
193
194    if ( $ctl->{reverse} ) {
195	# Keep the newest files (tail of the list).
196	splice (@$files, @$files-$ctl->{keep}, $ctl->{keep});
197    }
198    else {
199	# Keep the oldest files (head of the list).
200	splice (@$files, 0, $ctl->{keep});
201    }
202    $files;
203};
204
205# Remove the files in the list.
206my $purge = sub {
207    my ($ctl, $files) = @_;
208
209    # Remove the selected files.
210    foreach ( @$files ) {
211	if ( $ctl->{test} ) {
212	    warn ("$ctl->{tag}: candidate: $_\n");
213	}
214	else {
215	    warn ("$ctl->{tag}: removing $_\n") if $ctl->{verbose};
216	    unlink ($_) or carp ("$_: $!");
217	}
218    }
219};
220
221# Processing routine: purge by file age.
222$purge_by_age = sub {
223    my ($ctl, $dir) = @_;
224
225    warn ("$ctl->{tag}: purging directory $dir (by age, keep $ctl->{keep})\n")
226      if $ctl->{verbose} > 1;
227
228    # Gather, with age info.
229    my $files = $gather->($ctl, $dir, sub { -M _ });
230
231    # Is there anything to do?
232    if ( @$files <= $ctl->{keep} ) {
233	warn ("$ctl->{tag}: $dir: below limit\n") if $ctl->{verbose} > 1;
234	return;
235    }
236
237    # Sort, reduce and purge.
238    $purge->($ctl, $reduce->($ctl, $sort->($ctl, $files)));
239};
240
2411;
242
243__END__
244
245=head1 NAME
246
247Dir::Purge - Purge directories to a given number of files.
248
249=head1 SYNOPSIS
250
251  perl -MDir::Purge -e 'purgedir (5, @ARGV)' /spare/backups
252
253  use Dir::Purge;
254  purgedir ({keep => 5, strategy => "by_age", verbose => 1}, "/spare/backups");
255
256  use Dir::Purge qw(purgedir_by_age);
257  purgedir_by_age (5, "/spare/backups");
258
259=head1 DESCRIPTION
260
261Dir::Purge implements functions to reduce the number of files in a
262directory according to a strategy. It currently provides one strategy:
263removal of files by age.
264
265By default, the module exports one user subroutine: C<purgedir>.
266
267The first argument of C<purgedir> should either be an integer,
268indicating the number of files to keep in each of the directories, or
269a reference to a hash with options. In either case, a value for the
270number of files to keep is mandatory.
271
272The other arguments are the names of the directories that must be
273purged. Note that this process is not recursive. Also, hidden files
274(name starts with a C<.>) and non-plain files (e.g., directories,
275symbolic links) are not taken into account.
276
277All directory arguments and options are checked before anything else
278is done. In particular, all arguments should point to existing
279directories and the program must have read, write, and search
280(execute) access to the directories.
281
282One additional function, C<purgedir_by_age>, can be exported on
283demand, or called by its fully qualified name. C<purgedir_by_age>
284calls C<purgedir> with the "by age" purge strategy preselected. Since
285this happens to be the default strategy for C<purgedir>, calling
286C<purgedir_by_age> is roughly equivalent to calling C<purgedir>.
287
288=head1 WARNING
289
290Removing files is a quite destructive operation. Supply the C<test>
291option, described below, to dry-run before production.
292
293=head1 OPTIONS
294
295Options are suppled by providing a hash reference as the first
296argument. The following calls are equivalent:
297
298  purgedir ({keep => 3, test => 1}, "/spare/backups");
299  purgedir_by_age ({keep => 3, test => 1}, "/spare/backups");
300  purgedir ({strategy => "by_age", keep => 3, test => 1}, "/spare/backups");
301
302All subroutines take the same arguments.
303
304=over 4
305
306=item keep
307
308The number of files to keep.
309A negative number will reverse the strategy. See option C<reverse> below.
310
311=item strategy
312
313Specifies the purge strategy.
314Default (and only allowed) value is "by_age".
315
316This option is for C<purgedir> only. The other subroutines should not
317be provided with a C<strategy> option.
318
319=item include
320
321If this is a reference to a subroutine, this subroutine is called with
322arguments ($file,$dir) and must return true for the file to be
323included in the list of candidates,
324
325If this is a regular expression, the file file will be included only
326if the expression matches the file name.
327
328=item reverse
329
330If true, the strategy will be reversed. For example, if the strategy
331is "by_age", the oldest files will be kept instead of the newest
332files.
333
334Another way to reverse the strategy is using a negative C<keep> value.
335This is not unlike Perl's array subscripts, which count from the end if
336negative.
337
338A negative C<keep> value can be combined with C<reverse> to reverse
339the reversed strategy again.
340
341=item verbose
342
343Verbosity of messages. Default value is 1, which will report the names
344of the files being removed. A value greater than 1 will produce more
345messages about what's going on. A value of 0 (zero) will suppress
346messages.
347
348=item debug
349
350For internal debugging only.
351
352=item test
353
354If true, no files will be removed. For testing.
355
356=back
357
358=head1 EXPORT
359
360Subroutine C<purgedir> is exported by default.
361
362Subroutine C<purgedir_by_age> may be exported on demand.
363
364Calling purgedir_by_age() is roughly equivalent to calling purgedir()
365with an options hash that includes C<strategy => "by_age">.
366
367The variable $Dir::Purge::VERSION may be used to inspect the version
368of the module.
369
370=head1 AUTHOR
371
372Johan Vromans (jvromans@squirrel.nl) wrote this module.
373
374=head1 COPYRIGHT AND DISCLAIMER
375
376This program is Copyright 2000 by Squirrel Consultancy. All rights
377reserved.
378
379This program is free software; you can redistribute it and/or modify
380it under the terms of either: a) the GNU General Public License as
381published by the Free Software Foundation; either version 1, or (at
382your option) any later version, or b) the "Artistic License" which
383comes with Perl.
384
385This program is distributed in the hope that it will be useful, but
386WITHOUT ANY WARRANTY; without even the implied warranty of
387MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either the
388GNU General Public License or the Artistic License for more details.
389
390=cut
391