1#!/usr/bin/env perl 2 3# $Id: load-sa-rules.pl 1528 2011-05-31 10:09:15Z rjl $ 4 5######################################################################## 6# MAIA MAILGUARD LICENSE v.1.0 7# 8# Copyright 2004 by Robert LeBlanc <rjl@renaissoft.com> 9# David Morton <mortonda@dgrmm.net> 10# All rights reserved. 11# 12# PREAMBLE 13# 14# This License is designed for users of Maia Mailguard 15# ("the Software") who wish to support the Maia Mailguard project by 16# leaving "Maia Mailguard" branding information in the HTML output 17# of the pages generated by the Software, and providing links back 18# to the Maia Mailguard home page. Users who wish to remove this 19# branding information should contact the copyright owner to obtain 20# a Rebranding License. 21# 22# DEFINITION OF TERMS 23# 24# The "Software" refers to Maia Mailguard, including all of the 25# associated PHP, Perl, and SQL scripts, documentation files, graphic 26# icons and logo images. 27# 28# GRANT OF LICENSE 29# 30# Redistribution and use in source and binary forms, with or without 31# modification, are permitted provided that the following conditions 32# are met: 33# 34# 1. Redistributions of source code must retain the above copyright 35# notice, this list of conditions and the following disclaimer. 36# 37# 2. Redistributions in binary form must reproduce the above copyright 38# notice, this list of conditions and the following disclaimer in the 39# documentation and/or other materials provided with the distribution. 40# 41# 3. The end-user documentation included with the redistribution, if 42# any, must include the following acknowledgment: 43# 44# "This product includes software developed by Robert LeBlanc 45# <rjl@renaissoft.com>." 46# 47# Alternately, this acknowledgment may appear in the software itself, 48# if and wherever such third-party acknowledgments normally appear. 49# 50# 4. At least one of the following branding conventions must be used: 51# 52# a. The Maia Mailguard logo appears in the page-top banner of 53# all HTML output pages in an unmodified form, and links 54# directly to the Maia Mailguard home page; or 55# 56# b. The "Powered by Maia Mailguard" graphic appears in the HTML 57# output of all gateway pages that lead to this software, 58# linking directly to the Maia Mailguard home page; or 59# 60# c. A separate Rebranding License is obtained from the copyright 61# owner, exempting the Licensee from 4(a) and 4(b), subject to 62# the additional conditions laid out in that license document. 63# 64# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 65# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 66# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 67# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 68# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 69# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 70# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 71# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 72# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 73# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 74# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 75######################################################################## 76 77 use DBI; 78 use Mail::SpamAssassin; 79 use Getopt::Long; 80 81 # SpamAssassin core rules directory ($system_rules_dir) 82 my @default_rules_path = ( 83 "/var/lib/spamassassin/%%VERSION%%", 84 "/usr/share/spamassassin", 85 "%%PREFIX%%/share/spamassassin", 86 "/usr/local/share/spamassassin", 87 ); 88 89 # SpamAssassin local.cf directory ($local_cf_dir) 90 my @site_rules_path = ( 91 "/etc/mail/spamassassin", 92 "%%PREFIX%%/etc/mail/spamassassin", 93 "%%PREFIX%%/etc/spamassassin", 94 "/usr/local/etc/spamassassin", 95 "/usr/pkg/etc/spamassassin", 96 "/usr/etc/spamassassin", 97 "/etc/spamassassin", 98 ); 99 100 # SpamAssassin user_prefs directory ($user_rules_dir) 101 my @user_rules_path = ( 102 "/var/lib/maia/.spamassassin", 103 "/var/amavisd/.spamassassin", 104 "/var/amavis/.spamassassin", 105 "/home/amavis/.spamassassin", 106 "~/.spamassassin", 107 ); 108 109 # prototypes 110 sub fatal($); 111 sub output($); 112 sub expand_macros($$$); 113 sub first_existing_path($$@); 114 sub scan_rule_file($$$$); 115 sub scan_score_file($$$); 116 sub scan_rules_directory($$$$); 117 118 # name of this script 119 my $script_name = "load-sa-rules"; 120 121 # read configuration file (/usr/local/etc/maia/maia.conf) 122 my $config_file = "/usr/local/etc/maia/maia.conf"; 123 unless (my $rv = do $config_file) { 124 fatal(sprintf("Couldn't parse %s: %s", $config_file, $@)) if $@; 125 fatal(sprintf("Couldn't open %s", $config_file)) if (!defined($rv) || !$rv); 126 }; 127 128 my $help = 0; 129 my $debug = 0; 130 my $quiet = 0; 131 my $reload_descriptions = 0; 132 133 GetOptions("local-cf-dir=s" => \$local_cf_dir, # --local-cf-dir=<directory> 134 "system-rules-dir=s" => \$system_rules_dir, # --system-rules-dir=<directory> 135 "user-rules-dir=s" => \$user_rules_dir, # --user-rules-dir=<directory> 136 "reload_descriptions" => \$reload_descriptions, # --reload_descriptions 137 "help" => \$help, # --help 138 "debug" => \$debug, # --debug 139 "quiet" => \$quiet); # --quiet 140 141 # Resolve any debug/quiet conflicts 142 if ($debug && $quiet) { 143 $debug = 0; 144 $quiet = 0; 145 output("Warning: --debug and --quiet negate each other."); 146 } 147 148 # Display usage information 149 if ($help) { 150 output("load-sa-rules.pl\n" . 151 " --local-cf-dir=<directory> : SpamAssassin local.cf directory\n" . 152 " --system-rules-dir=<directory> : SpamAssassin core rules directory\n" . 153 " --user-rules-dir=<directory> : SpamAssassin user_prefs directory\n" . 154 " --reload_descriptions : force reload of all descriptions\n" . 155 " --help : display this help text\n" . 156 " --debug : display detailed debugging information\n" . 157 " --quiet : display only error messages\n"); 158 exit; 159 } 160 161 my $sa = Mail::SpamAssassin->new(); 162 my $sa_version = $sa->VERSION; 163 my $sa_prefix = $sa->{PREFIX}; 164 165 # defaults (overridden by values in /usr/local/etc/maia/maia.conf) 166 if (defined($local_cf_dir)) { 167 $local_cf_dir = expand_macros($sa_version, $sa_prefix, $local_cf_dir); 168 fatal(sprintf("Directory %s does not exist!", $local_cf_dir)) 169 if (!-e $local_cf_dir); 170 } else { 171 $local_cf_dir = first_existing_path($sa_version, $sa_prefix, @site_rules_path); 172 fatal("Couldn't find local.cf directory (set \$local_cf_dir in maia.conf)") 173 if (!defined($local_cf_dir)); 174 } 175 if (defined($system_rules_dir)) { 176 $system_rules_dir = expand_macros($sa_version, $sa_prefix, $system_rules_dir); 177 fatal(sprintf("Directory %s does not exist!", $system_rules_dir)) 178 if (!-e $system_rules_dir); 179 } else { 180 $system_rules_dir = first_existing_path($sa_version, $sa_prefix, @default_rules_path); 181 fatal("Couldn't find SpamAssassin rules directory (set \$system_rules_dir in maia.conf)") 182 if (!defined($system_rules_dir)); 183 } 184 if (defined($user_rules_dir)) { 185 $user_rules_dir = expand_macros($sa_version, $sa_prefix, $user_rules_dir); 186 if (!-e $user_rules_dir) { 187 output(sprintf("Warning: Directory %s does not exist! Skipping...", $user_rules_dir)); 188 $user_rules_dir = undef; 189 } 190 } else { 191 $user_rules_dir = first_existing_path($sa_version, $sa_prefix, @user_rules_path); 192 output("Warning: Couldn't find amavis user's user_prefs directory (optional: set \$user_rules_dir in maia.conf)") 193 if (!defined($user_rules_dir) && !$quiet); 194 } 195 196 if ($debug) { 197 output(sprintf("SpamAssassin core rules directory = %s", $system_rules_dir)); 198 output(sprintf("SpamAssassin local.cf directory = %s", $local_cf_dir)); 199 output(sprintf("SpamAssassin user_prefs directory = %s", 200 (defined($user_rules_dir) ? $user_rules_dir : "(not found)"))); 201 } 202 203 my $dbh; 204 205 # database configuration 206 if (defined($dsn) && defined($username) && defined($password)) { 207 $dbh = DBI->connect($dsn, $username, $password) 208 or fatal("Can't connect to the Maia database (verify \$dsn, \$username, and \$password in maia.conf)"); 209 } else { 210 fatal("Can't connect to the Maia database (missing \$dsn, \$username, or \$password in maia.conf)"); 211 } 212 213 # Scan the rules directories in this specific order: 214 # 215 # 1. Default rules (e.g. /usr/share/spamassassin) 216 # 2. Site rules (e.g. /etc/mail/spamassassin) 217 # 3. User rules (e.g. /var/lib/maia/.spamassassin) 218 # 219 # The order is critical, since later rules override 220 # the scores of earlier ones (e.g. a user rule could 221 # assign a score of 0 to a rule to disable it, etc.). 222 my $rules_added = 0; 223 my $rules_skipped = 0; 224 my $added = 0; 225 my $skipped = 0; 226 ($added, $skipped) = scan_rules_directory($dbh, $system_rules_dir, $debug, 1); 227 $rules_added += $added; 228 $rules_skipped += $skipped; 229 $added = 0; 230 $skipped = 0; 231 ($added, $skipped) = scan_rules_directory($dbh, $local_cf_dir, $debug, 0); 232 $rules_added += $added; 233 $rules_skipped += $skipped; 234 if (defined($user_rules_dir)) { 235 $added = 0; 236 $skipped = 0; 237 ($added, $skipped) = scan_rules_directory($dbh, $user_rules_dir, $debug, 0); 238 $rules_added += $added; 239 $rules_skipped += $skipped; 240 } 241 my $total = $rules_added + $rules_skipped; 242 243 output(sprintf("%d new rules added (%d rules total), all scores updated.", 244 $rules_added, $total)) 245 if (!$quiet); 246 247 # Disconnect from the database 248 $dbh->disconnect; 249 250 # We're done. 251 exit; 252 253 254 # Die, printing a time-stamped error message. 255 sub fatal($) { 256 my ($msg) = @_; 257 258 output("FATAL ERROR: " . $msg); 259 exit 1; 260 } 261 262 263 # Write a time-stamped string to stdout for logging purposes. 264 sub output($) { 265 my ($msg) = @_; 266 my ($year, $month, $day, $hour, $minute, $second); 267 268 my ($second, $minute, $hour, $day, $month, $year) = (localtime)[0,1,2,3,4,5]; 269 270 printf("%04d-%02d-%02d %02d:%02d:%02d Maia: [%s] %s\n", 271 $year+1900, $month+1, $day, $hour, $minute, $second, $script_name, $msg); 272 } 273 274 275 # Perform macro replacements for %%PREFIX%% and %%VERSION%%, 276 # and the '~' for home directories. 277 sub expand_macros($$$) { 278 my ($sa_version, $sa_prefix, $path) = @_; 279 280 $path =~ s/%%PREFIX%%/$sa_prefix/g; 281 $path =~ s/%%VERSION%%/$sa_version/g; 282 $path =~ s/^~/($ENV{HOME} || $ENV{LOGDIR} || (getpwuid($>))[7])/gex; 283 284 return $path; 285 } 286 287 288 # Find the first existing directory in a list. 289 sub first_existing_path($$@) { 290 my ($sa_version, $sa_prefix, @pathlist) = @_; 291 292 foreach my $path (@pathlist) { 293 $path = expand_macros($sa_version, $sa_prefix, $path); 294 return $path if (defined $path && -e $path); 295 } 296 297 return undef; 298 } 299 300 301 # Scan a file for "description" strings, which map rule names 302 # to text explanations. Each of these represents a SpamAssassin 303 # rule. If this rule doesn't already exist in the database, 304 # insert it, with a default score of 1.0 (per SpamAssassin's 305 # documentation). 306 sub scan_rule_file($$$$) { 307 my ($dbh, $file, $reload_descriptions, $debug) = @_; 308 my($select, $insert, $sth, $sth2, $line, @row); 309 my $rules_added = 0; 310 my $rules_skipped = 0; 311 312 output(sprintf("Checking %s for new rules...", $file)) 313 if ($debug); 314 open RULEFILE, "<" . $file 315 or fatal(sprintf("Couldn't read %s", $file)); 316 $select = "SELECT id, rule_description FROM maia_sa_rules WHERE rule_name LIKE ?"; 317 $sth = $dbh->prepare($select) 318 or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr)); 319 my($default_score, $rule_name, $rule_description); 320 while ($line = <RULEFILE>) { 321 if ($line =~ /^\s*describe\s*([A-Za-z0-9_]+)[\s\t]*(.*)\s*\n$/si) { 322 323 $sth->execute($1) 324 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 325 if (!(@row = $sth->fetchrow_array()) || $row[1] eq '' || $reload_descriptions) { 326 $rule_name = $1; 327 $rule_description = $2; 328 if ($debug) { 329 if ($row[1] eq '' || $reload_descriptions) { 330 output(sprintf("updating rule description: %s (%s)", 331 $rule_name, $rule_description)); 332 } else { 333 output(sprintf("Adding new rule: %s (%s)", 334 $rule_name, $rule_description)); 335 } 336 } 337 if ($rule_name =~ /^T_.+$/) { # test rule 338 $default_score = 0.01; 339 } elsif ($rule_name =~ /^__.+$/) { # meta-rule 340 $default_score = 0.00; 341 } else { 342 $default_score = 1.00; 343 } 344 if (!@row) { 345 $insert = "INSERT INTO maia_sa_rules (rule_name, rule_description, rule_score_0, " . 346 "rule_score_1, rule_score_2, rule_score_3) " . 347 "VALUES (?, ?, ?, ?, ?, ?)"; 348 $sth2 = $dbh->prepare($insert) 349 or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr)); 350 $sth2->execute($rule_name, $rule_description, $default_score, 351 $default_score, $default_score, $default_score) 352 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 353 $rules_added++; 354 } else { 355 $update = "UPDATE maia_sa_rules SET rule_description = ? WHERE id = ?"; 356 $sth2 = $dbh->prepare($update) 357 or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr)); 358 $sth2->execute($rule_description, $row[0]) 359 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 360 } 361 } else { 362 output(sprintf("Skipping existing rule: %s", $1)) 363 if ($debug); 364 $rules_skipped++; 365 } 366 } 367 } 368 $sth->finish; 369 close RULEFILE; 370 371 output(sprintf("%d new rules added, %d existing rules skipped.", 372 $rules_added, $rules_skipped)) 373 if ($debug); 374 375 return ($rules_added, $rules_skipped); 376 } 377 378 379 # Scan a file for "score" strings, which map rule names to 380 # numeric scores. Any rule without an explicit score retains 381 # its default score of 1.0, per the SpamAssassin documentation. 382 sub scan_score_file($$$) { 383 my ($dbh, $file, $debug) = @_; 384 my($select, $update, $sth, $sth2, $line, @row); 385 386 output(sprintf("Checking %s for updated scores...", $file)) 387 if ($debug); 388 open SCOREFILE, "<" . $file 389 or fatal(sprintf("Unable to open %s", $file)); 390 $select = "SELECT id FROM maia_sa_rules WHERE rule_name LIKE ?"; 391 $sth = $dbh->prepare($select) 392 or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr)); 393 $update = "UPDATE maia_sa_rules SET rule_score_0 = ?, " . 394 "rule_score_1 = ?, " . 395 "rule_score_2 = ?, " . 396 "rule_score_3 = ? " . 397 "WHERE id = ?"; 398 $sth2 = $dbh->prepare($update) 399 or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr)); 400 my(@score, $rule_name, $rule_id); 401 while ($line = <SCOREFILE>) { 402 403 # Scores for all four rulesets explicitly provided, e.g. 404 # score RULE_NAME 0 1 2 3 405 if ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) { 406 $sth->execute($rule_name = $1) 407 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 408 if (@row = $sth->fetchrow_array()) { 409 $score[0] = $2; 410 $score[1] = $3; 411 $score[2] = $4; 412 $score[3] = $5; 413 $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint 414 output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]", 415 $rule_name, $score[0], $score[1], $score[2], $score[3])) 416 if ($debug); 417 $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id) 418 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 419 } 420 421 # Scores for three rulesets explicitly provided, so set 422 # the fourth ruleset score to the value of the third, per 423 # the SpamAssassin documentation. 424 # score RULE_NAME 1 2 3 425 } elsif ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) { 426 427 $sth->execute($rule_name = $1) 428 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 429 if (@row = $sth->fetchrow_array()) { 430 $score[0] = $2; 431 $score[1] = $3; 432 $score[2] = $4; 433 $score[3] = $4; 434 $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint 435 output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]", 436 $rule_name, $score[0], $score[1], $score[2], $score[3])) 437 if ($debug); 438 $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id) 439 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 440 } 441 442 # Scores for two rulesets explicitly provided, so set 443 # the third and fourth scores to the value of the second, 444 # per the SpamAssassin documentation. 445 # score RULE_NAME 1 2 446 } elsif ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) { 447 448 $sth->execute($rule_name = $1) 449 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 450 if (@row = $sth->fetchrow_array()) { 451 $score[0] = $2; 452 $score[1] = $3; 453 $score[2] = $3; 454 $score[3] = $3; 455 $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint 456 output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]", 457 $rule_name, $score[0], $score[1], $score[2], $score[3])) 458 if ($debug); 459 $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id) 460 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 461 } 462 463 # Scores for only one ruleset explicitly provided, so set 464 # the second, third, and fourth scores to the value of the 465 # first, per the SpamAssassin documentation. 466 # score RULE_NAME 1 467 } elsif ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) { 468 469 $sth->execute($rule_name = $1) 470 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 471 if (@row = $sth->fetchrow_array()) { 472 $score[0] = $2; 473 $score[1] = $2; 474 $score[2] = $2; 475 $score[3] = $2; 476 $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint 477 output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]", 478 $rule_name, $score[0], $score[1], $score[2], $score[3])) 479 if ($debug); 480 $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id) 481 or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr)); 482 } 483 } 484 } 485 $sth->finish; 486 close SCOREFILE; 487 } 488 489 490 # Scan all the *.cf and user_prefs files in a subdirectory, 491 # looking for SpamAssassin rules. If $recurse is true, then 492 # also check any subdirectories beneath this one. 493 sub scan_rules_directory($$$$) { 494 my($dbh, $dir, $debug, $recurse) = @_; 495 my(@file_list) = glob($dir . "/*"); 496 my $rules_added = 0; 497 my $rules_skipped = 0; 498 499 output(sprintf("Scanning %s for SpamAssassin rules", $dir)) 500 if ($debug); 501 502 # depth-first traversal of any subdirectories 503 if ($recurse) { 504 foreach my $file (@file_list) 505 { 506 if (-d $file) { 507 my ($added, $skipped) = scan_rules_directory($dbh, $file, $debug, 1); 508 $rules_added += $added; 509 $rules_skipped += $skipped; 510 } 511 } 512 } 513 514 # look for any new rules in the directory 515 foreach my $file (@file_list) 516 { 517 if ($file =~ /^(.+\.cf|.*user_prefs)$/si) { 518 my ($added, $skipped) = scan_rule_file($dbh, $file, $reload_descriptions, $debug); 519 $rules_added += $added; 520 $rules_skipped += $skipped; 521 } 522 } 523 524 # update the scores of the rules in the directory 525 foreach my $file (@file_list) 526 { 527 if ($file =~ /^(.+\.cf|.*user_prefs)$/si) { 528 scan_score_file($dbh, $file, $debug); 529 } 530 } 531 532 return ($rules_added, $rules_skipped); 533 } 534