1#!/usr/local/bin/perl -w
2use strict;
3
4# $Id: process-spam.pl,v 1.4 2004/10/07 03:49:44 perlstalker Exp $
5
6use Pod::Usage;
7use File::Find;
8use DBI;
9
10###############################################################################
11# Configuration
12###############################################################################
13# Database driver.
14my $db_driver = 'mysql';
15
16# Database name
17my $db_dbname = 'Accounts';
18
19# Database host name
20my $db_host = 'localhost';
21
22# Database username and password
23my $db_user = 'courier';
24my $db_pass = 'pass';
25
26# Number of times to retry DB connections.
27my $db_retries = 3;
28
29# SpamAssassin user preferences table.
30my $userprefs_table = 'SA_userprefs';
31
32# Log spam statistics
33my $spam_log = '/usr/local/www/cgi-data/spam.log';
34
35# Make script quieter.
36my $quiet = 0;
37
38# The default top level to the maildirs.
39my $default_maildir = '/var/mail/virtual';
40
41# Spam folder name
42my $spam_folder = 'Spam';
43
44# LEARNING OPTIONS
45# Path to sa-learn
46my $sa_learn = '/usr/local/bin/sa-learn';
47
48# Path to dspam
49my $dspam = '/usr/local/dspam/bin/dspam';
50
51# These folder named should match $learn_spam_folder and $learn_fp_folder
52# in sasql_conf.php
53# Learn spam folder
54my $learn_spam = 'Learn Spam';
55
56# Learn false positive
57my $learn_fp = 'Learn FP';
58
59# Hmm. It seems that sa-learn doesn't support SQL user prefs or
60# virtual users yet. :-( I've learned that SA is putting bayes stuff
61# in SQL in HEAD so perhaps soon.
62# Commands to run when learning spam
63my @learn_spam_cmds = (
64		       "$sa_learn --spam --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'",
65		       "$dspam --user {user} --addspam < '{file}'"
66		       );
67
68# Commands to run when learning false positives
69my @learn_fp_cmds = (
70		     "$sa_learn --ham --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'",
71		     "$dspam --user {user} --falsepositive < '{file}'"
72		     );
73
74###############################################################################
75# Code "Abandon hope all ye enter here."
76###############################################################################
77
78sub DEBUG { 0 };
79
80my $dbh = db_connect();
81my $sth = prep_sth($dbh);
82
83$| = 1;
84
85my @args = @ARGV;
86my $dry_run = 0;
87my $default_stale_days = 14;
88for (my $i = 0; $i < @args; $i++)
89{
90    if    ($args[$i] eq '-n') { $dry_run = 1; }
91    elsif ($args[$i] eq '-d') { $default_stale_days = $args[$i+1]; }
92}
93$default_stale_days = 14 if not $default_stale_days or $default_stale_days =~ /\D/;
94
95my $maildir = pop(@args) || $default_maildir;
96
97print "Dry run: Not doing anything\n" if $dry_run;
98
99my $spam_killed = 0;
100my $spam_killed_size = 0;
101my $spam_total = 0;
102my $spam_total_size = 0;
103
104process_spam();
105
106if (not $quiet) {
107    print "Total spam: $spam_total\n";
108    print "Total size: $spam_total_size bytes (", bytes_to_human($spam_total_size, 'm'), " M)\n";
109    print "Deleted: $spam_killed\n";
110    print "Deleted size: $spam_killed_size bytes (", bytes_to_human($spam_killed_size, 'm'), " M)\n";
111}
112
113if ($spam_log and not $dry_run)
114{
115    open (LOG, ">>$spam_log") or die "Can't append to $spam_log: $!\n";
116    print LOG time(), "|";
117    print LOG "$spam_total|$spam_total_size|$spam_killed|$spam_killed_size\n";
118    close LOG;
119}
120
121$dbh->disconnect;
122
123sub process_spam { find (\&check_spam, $maildir); }
124
125sub check_spam
126{
127    my $username = '';
128    if ($File::Find::dir =~ m!/\Qvirtual\E/	# dir
129	          (.+)		# domain
130	          /../		# 1st 2 chars of user
131                  (.+)		# username
132                  /Maildir/	# The Maildir
133	!x) {
134	$username = "$2\@$1";
135    }
136
137    if ($File::Find::dir =~ m!/\.\Q$spam_folder\E/(?:new|cur)$!o) {
138	rm_old_spam($File::Find::dir, $File::Find::name, $username);
139    } elsif ($File::Find::dir =~ m!\.\Q$learn_spam\E/(?:new|cur)$!o) {
140	learn_spam($File::Find::dir, $File::Find::name, $username);
141    } elsif ($File::Find::dir =~ m!\.\Q$learn_fp\E/(?:new|cur)$!o) {
142	learn_fp($File::Find::dir, $File::Find::name, $username);
143    }
144}
145
146sub rm_old_spam
147{
148    my $dir = shift;
149    my $name = shift;
150    my $user = shift;
151
152    my $stale_days = $default_stale_days;
153    my $age = int(-M $_);
154    my $size = -s _;
155    ++$spam_total;
156    $spam_total_size += $size;
157
158    print "$dir\n" if DEBUG;
159
160    if ($dir =~ m!/virtual/	# dir
161	          (.+)		# domain
162	          /../		# 1st 2 chars of user
163                  (.+)		# username
164                  /Maildir/	# The Maildir
165	!x)
166    {
167	my ($domain, $user) = ($1, $2);
168	my $acct = "$user\@$domain";
169	print "$acct\n" if DEBUG;
170	my $done = 0;
171	my $attempt = 0;
172	while ($attempt < $retries
173	       and not $done) {
174	    if ($sth->execute($acct))
175	    {
176		if (my $res = $sth->fetchrow_hashref())
177		{
178		    $stale_days = $res->{value};
179		}
180		$done = 1;
181	    } elsif ($sth->errstr =~ /MySQL server has gone away/) {
182		$dbh = db_connect();
183		$sth = prep_sth($dbh);
184	    }
185	    else
186	    {
187		warn "Can't read settings for $acct: ".$sth->errstr."\n";
188	    }
189	    $sth->finish;
190	}
191    }
192
193    print "User's stale_days = $stale_days\n" if DEBUG;
194
195    if ($age >= $stale_days)
196    {
197	if ($dry_run)
198	{
199	    print("unlink $name\t",
200		  "($age > ", "$stale_days",
201		  ")\n");
202	}
203	else
204	{
205	    print("unlink $name age = $age\n") if DEBUG;
206	    unlink $File::Find::name
207		or warn "Can't unlink $name: $!\n";
208	}
209	++$spam_killed;
210	$spam_killed_size += $size;
211    }
212}
213
214sub learn_spam { run_learners(@_, 1); }
215
216sub learn_fp { run_learners(@_, 0); }
217
218sub run_learners
219{
220    my $dir = shift;
221    my $file = shift;
222    my $user = shift;
223    my $spam = shift;
224
225    my $cmds;
226
227    if ($spam) {
228	$cmds = [@learn_spam_cmds];
229    } else {
230	$cmds = [@learn_fp_cmds];
231    }
232
233    foreach my $cmd (@$cmds) {
234	$cmd =~ s/\{user\}/$user/e;
235	$cmd =~ s/\{file\}/$file/e;
236	$cmd =~ s/\{dir\}/$dir/e;
237	if (DEBUG) {
238	    print "$cmd\n";
239	} else {
240	    system($cmd);
241	}
242    }
243    unlink $file unless DEBUG;
244}
245
246sub help() { pod2usage(); }
247
248sub bytes_to_human
249{
250    my $bytes = shift;
251    my $format = shift || 'k';
252    my $kb = $bytes / 1024;
253    my $mb = $kb / 1024;
254    my $gb = $mb / 1024;
255
256    if    (lc $format eq 'b') { return $bytes; }
257    elsif (lc $format eq 'k') { return $kb; }
258    elsif (lc $format eq 'm') { return $mb; }
259    elsif (lc $format eq 'g') { return $gb; }
260}
261
262sub db_connect
263{
264    my $dbh = DBI->connect("dbi:$db_driver:database=$db_dbname;host=$db_host",
265			   $db_user,
266			   $db_pass)
267	or die "Can't connect to DB: ".$DBI::errstr."\n";
268    return $dbh;
269}
270
271sub prep_sth
272{
273    my $dbh = shift;
274    my $sql = "SELECT value from $userprefs_table where username=? and preference='x-spam-days';";
275    my $sth = $dbh->prepare($sql) or die "Can't prepare SQL: ".$dbh->errstr."\n";
276    return $sth;
277}
278
279__END__
280
281=head1 NAME
282
283process-spam.pl - Clean out old spam
284
285=head1 SYNOPSIS
286
287 process-spam.pl [-n] [-d days] maildir
288
289=head1 DESCRIPTION
290
291Clean out old messages from users' .Spam folders.
292
293=over 4
294
295=item -n
296
297Dry run. Don't actually delete the files. Instead print what would have
298been done.
299
300=item -d days
301
302Number of days for a file to be considered old.
303
304=item maildir
305
306The location of the mail dirs.
307
308=back
309
310=head1 LICENSE
311
312 Copyright (c) 2003 Randy Smith
313 All rights reserved.
314
315 Redistribution and use in source and binary forms, with or without
316 modification, are permitted provided that the following conditions
317 are met:
318 1. Redistributions of source code must retain the above copyright
319    notice, this list of conditions and the following disclaimer.
320 2. Redistributions in binary form must reproduce the above copyright
321    notice, this list of conditions and the following disclaimer in the
322    documentation and/or other materials provided with the distribution.
323
324 THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
325 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
326 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
327 ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
328 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
329 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
330 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
331 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
332 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
333 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
334 SUCH DAMAGE.
335
336=head1 AUTHOR
337
338Randy Smith <randys@amigo.net>
339
340=cut
341
342