1#!/usr/local/bin/perl -w 2use strict; 3 4# $Id: process-spam.pl,v 1.4 2004/10/07 03:49:44 perlstalker Exp $ 5 6use Pod::Usage; 7use File::Find; 8use DBI; 9 10############################################################################### 11# Configuration 12############################################################################### 13# Database driver. 14my $db_driver = 'mysql'; 15 16# Database name 17my $db_dbname = 'Accounts'; 18 19# Database host name 20my $db_host = 'localhost'; 21 22# Database username and password 23my $db_user = 'courier'; 24my $db_pass = 'pass'; 25 26# Number of times to retry DB connections. 27my $db_retries = 3; 28 29# SpamAssassin user preferences table. 30my $userprefs_table = 'SA_userprefs'; 31 32# Log spam statistics 33my $spam_log = '/usr/local/www/cgi-data/spam.log'; 34 35# Make script quieter. 36my $quiet = 0; 37 38# The default top level to the maildirs. 39my $default_maildir = '/var/mail/virtual'; 40 41# Spam folder name 42my $spam_folder = 'Spam'; 43 44# LEARNING OPTIONS 45# Path to sa-learn 46my $sa_learn = '/usr/local/bin/sa-learn'; 47 48# Path to dspam 49my $dspam = '/usr/local/dspam/bin/dspam'; 50 51# These folder named should match $learn_spam_folder and $learn_fp_folder 52# in sasql_conf.php 53# Learn spam folder 54my $learn_spam = 'Learn Spam'; 55 56# Learn false positive 57my $learn_fp = 'Learn FP'; 58 59# Hmm. It seems that sa-learn doesn't support SQL user prefs or 60# virtual users yet. :-( I've learned that SA is putting bayes stuff 61# in SQL in HEAD so perhaps soon. 62# Commands to run when learning spam 63my @learn_spam_cmds = ( 64 "$sa_learn --spam --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'", 65 "$dspam --user {user} --addspam < '{file}'" 66 ); 67 68# Commands to run when learning false positives 69my @learn_fp_cmds = ( 70 "$sa_learn --ham --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'", 71 "$dspam --user {user} --falsepositive < '{file}'" 72 ); 73 74############################################################################### 75# Code "Abandon hope all ye enter here." 76############################################################################### 77 78sub DEBUG { 0 }; 79 80my $dbh = db_connect(); 81my $sth = prep_sth($dbh); 82 83$| = 1; 84 85my @args = @ARGV; 86my $dry_run = 0; 87my $default_stale_days = 14; 88for (my $i = 0; $i < @args; $i++) 89{ 90 if ($args[$i] eq '-n') { $dry_run = 1; } 91 elsif ($args[$i] eq '-d') { $default_stale_days = $args[$i+1]; } 92} 93$default_stale_days = 14 if not $default_stale_days or $default_stale_days =~ /\D/; 94 95my $maildir = pop(@args) || $default_maildir; 96 97print "Dry run: Not doing anything\n" if $dry_run; 98 99my $spam_killed = 0; 100my $spam_killed_size = 0; 101my $spam_total = 0; 102my $spam_total_size = 0; 103 104process_spam(); 105 106if (not $quiet) { 107 print "Total spam: $spam_total\n"; 108 print "Total size: $spam_total_size bytes (", bytes_to_human($spam_total_size, 'm'), " M)\n"; 109 print "Deleted: $spam_killed\n"; 110 print "Deleted size: $spam_killed_size bytes (", bytes_to_human($spam_killed_size, 'm'), " M)\n"; 111} 112 113if ($spam_log and not $dry_run) 114{ 115 open (LOG, ">>$spam_log") or die "Can't append to $spam_log: $!\n"; 116 print LOG time(), "|"; 117 print LOG "$spam_total|$spam_total_size|$spam_killed|$spam_killed_size\n"; 118 close LOG; 119} 120 121$dbh->disconnect; 122 123sub process_spam { find (\&check_spam, $maildir); } 124 125sub check_spam 126{ 127 my $username = ''; 128 if ($File::Find::dir =~ m!/\Qvirtual\E/ # dir 129 (.+) # domain 130 /../ # 1st 2 chars of user 131 (.+) # username 132 /Maildir/ # The Maildir 133 !x) { 134 $username = "$2\@$1"; 135 } 136 137 if ($File::Find::dir =~ m!/\.\Q$spam_folder\E/(?:new|cur)$!o) { 138 rm_old_spam($File::Find::dir, $File::Find::name, $username); 139 } elsif ($File::Find::dir =~ m!\.\Q$learn_spam\E/(?:new|cur)$!o) { 140 learn_spam($File::Find::dir, $File::Find::name, $username); 141 } elsif ($File::Find::dir =~ m!\.\Q$learn_fp\E/(?:new|cur)$!o) { 142 learn_fp($File::Find::dir, $File::Find::name, $username); 143 } 144} 145 146sub rm_old_spam 147{ 148 my $dir = shift; 149 my $name = shift; 150 my $user = shift; 151 152 my $stale_days = $default_stale_days; 153 my $age = int(-M $_); 154 my $size = -s _; 155 ++$spam_total; 156 $spam_total_size += $size; 157 158 print "$dir\n" if DEBUG; 159 160 if ($dir =~ m!/virtual/ # dir 161 (.+) # domain 162 /../ # 1st 2 chars of user 163 (.+) # username 164 /Maildir/ # The Maildir 165 !x) 166 { 167 my ($domain, $user) = ($1, $2); 168 my $acct = "$user\@$domain"; 169 print "$acct\n" if DEBUG; 170 my $done = 0; 171 my $attempt = 0; 172 while ($attempt < $retries 173 and not $done) { 174 if ($sth->execute($acct)) 175 { 176 if (my $res = $sth->fetchrow_hashref()) 177 { 178 $stale_days = $res->{value}; 179 } 180 $done = 1; 181 } elsif ($sth->errstr =~ /MySQL server has gone away/) { 182 $dbh = db_connect(); 183 $sth = prep_sth($dbh); 184 } 185 else 186 { 187 warn "Can't read settings for $acct: ".$sth->errstr."\n"; 188 } 189 $sth->finish; 190 } 191 } 192 193 print "User's stale_days = $stale_days\n" if DEBUG; 194 195 if ($age >= $stale_days) 196 { 197 if ($dry_run) 198 { 199 print("unlink $name\t", 200 "($age > ", "$stale_days", 201 ")\n"); 202 } 203 else 204 { 205 print("unlink $name age = $age\n") if DEBUG; 206 unlink $File::Find::name 207 or warn "Can't unlink $name: $!\n"; 208 } 209 ++$spam_killed; 210 $spam_killed_size += $size; 211 } 212} 213 214sub learn_spam { run_learners(@_, 1); } 215 216sub learn_fp { run_learners(@_, 0); } 217 218sub run_learners 219{ 220 my $dir = shift; 221 my $file = shift; 222 my $user = shift; 223 my $spam = shift; 224 225 my $cmds; 226 227 if ($spam) { 228 $cmds = [@learn_spam_cmds]; 229 } else { 230 $cmds = [@learn_fp_cmds]; 231 } 232 233 foreach my $cmd (@$cmds) { 234 $cmd =~ s/\{user\}/$user/e; 235 $cmd =~ s/\{file\}/$file/e; 236 $cmd =~ s/\{dir\}/$dir/e; 237 if (DEBUG) { 238 print "$cmd\n"; 239 } else { 240 system($cmd); 241 } 242 } 243 unlink $file unless DEBUG; 244} 245 246sub help() { pod2usage(); } 247 248sub bytes_to_human 249{ 250 my $bytes = shift; 251 my $format = shift || 'k'; 252 my $kb = $bytes / 1024; 253 my $mb = $kb / 1024; 254 my $gb = $mb / 1024; 255 256 if (lc $format eq 'b') { return $bytes; } 257 elsif (lc $format eq 'k') { return $kb; } 258 elsif (lc $format eq 'm') { return $mb; } 259 elsif (lc $format eq 'g') { return $gb; } 260} 261 262sub db_connect 263{ 264 my $dbh = DBI->connect("dbi:$db_driver:database=$db_dbname;host=$db_host", 265 $db_user, 266 $db_pass) 267 or die "Can't connect to DB: ".$DBI::errstr."\n"; 268 return $dbh; 269} 270 271sub prep_sth 272{ 273 my $dbh = shift; 274 my $sql = "SELECT value from $userprefs_table where username=? and preference='x-spam-days';"; 275 my $sth = $dbh->prepare($sql) or die "Can't prepare SQL: ".$dbh->errstr."\n"; 276 return $sth; 277} 278 279__END__ 280 281=head1 NAME 282 283process-spam.pl - Clean out old spam 284 285=head1 SYNOPSIS 286 287 process-spam.pl [-n] [-d days] maildir 288 289=head1 DESCRIPTION 290 291Clean out old messages from users' .Spam folders. 292 293=over 4 294 295=item -n 296 297Dry run. Don't actually delete the files. Instead print what would have 298been done. 299 300=item -d days 301 302Number of days for a file to be considered old. 303 304=item maildir 305 306The location of the mail dirs. 307 308=back 309 310=head1 LICENSE 311 312 Copyright (c) 2003 Randy Smith 313 All rights reserved. 314 315 Redistribution and use in source and binary forms, with or without 316 modification, are permitted provided that the following conditions 317 are met: 318 1. Redistributions of source code must retain the above copyright 319 notice, this list of conditions and the following disclaimer. 320 2. Redistributions in binary form must reproduce the above copyright 321 notice, this list of conditions and the following disclaimer in the 322 documentation and/or other materials provided with the distribution. 323 324 THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 325 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 326 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 327 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 328 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 329 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 330 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 332 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 333 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 334 SUCH DAMAGE. 335 336=head1 AUTHOR 337 338Randy Smith <randys@amigo.net> 339 340=cut 341 342