#!/usr/local/bin/perl -w use strict; # $Id: process-spam.pl,v 1.4 2004/10/07 03:49:44 perlstalker Exp $ use Pod::Usage; use File::Find; use DBI; ############################################################################### # Configuration ############################################################################### # Database driver. my $db_driver = 'mysql'; # Database name my $db_dbname = 'Accounts'; # Database host name my $db_host = 'localhost'; # Database username and password my $db_user = 'courier'; my $db_pass = 'pass'; # Number of times to retry DB connections. my $db_retries = 3; # SpamAssassin user preferences table. my $userprefs_table = 'SA_userprefs'; # Log spam statistics my $spam_log = '/usr/local/www/cgi-data/spam.log'; # Make script quieter. my $quiet = 0; # The default top level to the maildirs. my $default_maildir = '/var/mail/virtual'; # Spam folder name my $spam_folder = 'Spam'; # LEARNING OPTIONS # Path to sa-learn my $sa_learn = '/usr/local/bin/sa-learn'; # Path to dspam my $dspam = '/usr/local/dspam/bin/dspam'; # These folder named should match $learn_spam_folder and $learn_fp_folder # in sasql_conf.php # Learn spam folder my $learn_spam = 'Learn Spam'; # Learn false positive my $learn_fp = 'Learn FP'; # Hmm. It seems that sa-learn doesn't support SQL user prefs or # virtual users yet. :-( I've learned that SA is putting bayes stuff # in SQL in HEAD so perhaps soon. # Commands to run when learning spam my @learn_spam_cmds = ( "$sa_learn --spam --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'", "$dspam --user {user} --addspam < '{file}'" ); # Commands to run when learning false positives my @learn_fp_cmds = ( "$sa_learn --ham --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'", "$dspam --user {user} --falsepositive < '{file}'" ); ############################################################################### # Code "Abandon hope all ye enter here." ############################################################################### sub DEBUG { 0 }; my $dbh = db_connect(); my $sth = prep_sth($dbh); $| = 1; my @args = @ARGV; my $dry_run = 0; my $default_stale_days = 14; for (my $i = 0; $i < @args; $i++) { if ($args[$i] eq '-n') { $dry_run = 1; } elsif ($args[$i] eq '-d') { $default_stale_days = $args[$i+1]; } } $default_stale_days = 14 if not $default_stale_days or $default_stale_days =~ /\D/; my $maildir = pop(@args) || $default_maildir; print "Dry run: Not doing anything\n" if $dry_run; my $spam_killed = 0; my $spam_killed_size = 0; my $spam_total = 0; my $spam_total_size = 0; process_spam(); if (not $quiet) { print "Total spam: $spam_total\n"; print "Total size: $spam_total_size bytes (", bytes_to_human($spam_total_size, 'm'), " M)\n"; print "Deleted: $spam_killed\n"; print "Deleted size: $spam_killed_size bytes (", bytes_to_human($spam_killed_size, 'm'), " M)\n"; } if ($spam_log and not $dry_run) { open (LOG, ">>$spam_log") or die "Can't append to $spam_log: $!\n"; print LOG time(), "|"; print LOG "$spam_total|$spam_total_size|$spam_killed|$spam_killed_size\n"; close LOG; } $dbh->disconnect; sub process_spam { find (\&check_spam, $maildir); } sub check_spam { my $username = ''; if ($File::Find::dir =~ m!/\Qvirtual\E/ # dir (.+) # domain /../ # 1st 2 chars of user (.+) # username /Maildir/ # The Maildir !x) { $username = "$2\@$1"; } if ($File::Find::dir =~ m!/\.\Q$spam_folder\E/(?:new|cur)$!o) { rm_old_spam($File::Find::dir, $File::Find::name, $username); } elsif ($File::Find::dir =~ m!\.\Q$learn_spam\E/(?:new|cur)$!o) { learn_spam($File::Find::dir, $File::Find::name, $username); } elsif ($File::Find::dir =~ m!\.\Q$learn_fp\E/(?:new|cur)$!o) { learn_fp($File::Find::dir, $File::Find::name, $username); } } sub rm_old_spam { my $dir = shift; my $name = shift; my $user = shift; my $stale_days = $default_stale_days; my $age = int(-M $_); my $size = -s _; ++$spam_total; $spam_total_size += $size; print "$dir\n" if DEBUG; if ($dir =~ m!/virtual/ # dir (.+) # domain /../ # 1st 2 chars of user (.+) # username /Maildir/ # The Maildir !x) { my ($domain, $user) = ($1, $2); my $acct = "$user\@$domain"; print "$acct\n" if DEBUG; my $done = 0; my $attempt = 0; while ($attempt < $retries and not $done) { if ($sth->execute($acct)) { if (my $res = $sth->fetchrow_hashref()) { $stale_days = $res->{value}; } $done = 1; } elsif ($sth->errstr =~ /MySQL server has gone away/) { $dbh = db_connect(); $sth = prep_sth($dbh); } else { warn "Can't read settings for $acct: ".$sth->errstr."\n"; } $sth->finish; } } print "User's stale_days = $stale_days\n" if DEBUG; if ($age >= $stale_days) { if ($dry_run) { print("unlink $name\t", "($age > ", "$stale_days", ")\n"); } else { print("unlink $name age = $age\n") if DEBUG; unlink $File::Find::name or warn "Can't unlink $name: $!\n"; } ++$spam_killed; $spam_killed_size += $size; } } sub learn_spam { run_learners(@_, 1); } sub learn_fp { run_learners(@_, 0); } sub run_learners { my $dir = shift; my $file = shift; my $user = shift; my $spam = shift; my $cmds; if ($spam) { $cmds = [@learn_spam_cmds]; } else { $cmds = [@learn_fp_cmds]; } foreach my $cmd (@$cmds) { $cmd =~ s/\{user\}/$user/e; $cmd =~ s/\{file\}/$file/e; $cmd =~ s/\{dir\}/$dir/e; if (DEBUG) { print "$cmd\n"; } else { system($cmd); } } unlink $file unless DEBUG; } sub help() { pod2usage(); } sub bytes_to_human { my $bytes = shift; my $format = shift || 'k'; my $kb = $bytes / 1024; my $mb = $kb / 1024; my $gb = $mb / 1024; if (lc $format eq 'b') { return $bytes; } elsif (lc $format eq 'k') { return $kb; } elsif (lc $format eq 'm') { return $mb; } elsif (lc $format eq 'g') { return $gb; } } sub db_connect { my $dbh = DBI->connect("dbi:$db_driver:database=$db_dbname;host=$db_host", $db_user, $db_pass) or die "Can't connect to DB: ".$DBI::errstr."\n"; return $dbh; } sub prep_sth { my $dbh = shift; my $sql = "SELECT value from $userprefs_table where username=? and preference='x-spam-days';"; my $sth = $dbh->prepare($sql) or die "Can't prepare SQL: ".$dbh->errstr."\n"; return $sth; } __END__ =head1 NAME process-spam.pl - Clean out old spam =head1 SYNOPSIS process-spam.pl [-n] [-d days] maildir =head1 DESCRIPTION Clean out old messages from users' .Spam folders. =over 4 =item -n Dry run. Don't actually delete the files. Instead print what would have been done. =item -d days Number of days for a file to be considered old. =item maildir The location of the mail dirs. =back =head1 LICENSE Copyright (c) 2003 Randy Smith All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. =head1 AUTHOR Randy Smith =cut