1#!/usr/bin/env perl
2
3# $Id: load-sa-rules.pl 1528 2011-05-31 10:09:15Z rjl $
4
5########################################################################
6# MAIA MAILGUARD LICENSE v.1.0
7#
8# Copyright 2004 by Robert LeBlanc <rjl@renaissoft.com>
9#                   David Morton   <mortonda@dgrmm.net>
10# All rights reserved.
11#
12# PREAMBLE
13#
14# This License is designed for users of Maia Mailguard
15# ("the Software") who wish to support the Maia Mailguard project by
16# leaving "Maia Mailguard" branding information in the HTML output
17# of the pages generated by the Software, and providing links back
18# to the Maia Mailguard home page.  Users who wish to remove this
19# branding information should contact the copyright owner to obtain
20# a Rebranding License.
21#
22# DEFINITION OF TERMS
23#
24# The "Software" refers to Maia Mailguard, including all of the
25# associated PHP, Perl, and SQL scripts, documentation files, graphic
26# icons and logo images.
27#
28# GRANT OF LICENSE
29#
30# Redistribution and use in source and binary forms, with or without
31# modification, are permitted provided that the following conditions
32# are met:
33#
34# 1. Redistributions of source code must retain the above copyright
35#    notice, this list of conditions and the following disclaimer.
36#
37# 2. Redistributions in binary form must reproduce the above copyright
38#    notice, this list of conditions and the following disclaimer in the
39#    documentation and/or other materials provided with the distribution.
40#
41# 3. The end-user documentation included with the redistribution, if
42#    any, must include the following acknowledgment:
43#
44#    "This product includes software developed by Robert LeBlanc
45#    <rjl@renaissoft.com>."
46#
47#    Alternately, this acknowledgment may appear in the software itself,
48#    if and wherever such third-party acknowledgments normally appear.
49#
50# 4. At least one of the following branding conventions must be used:
51#
52#    a. The Maia Mailguard logo appears in the page-top banner of
53#       all HTML output pages in an unmodified form, and links
54#       directly to the Maia Mailguard home page; or
55#
56#    b. The "Powered by Maia Mailguard" graphic appears in the HTML
57#       output of all gateway pages that lead to this software,
58#       linking directly to the Maia Mailguard home page; or
59#
60#    c. A separate Rebranding License is obtained from the copyright
61#       owner, exempting the Licensee from 4(a) and 4(b), subject to
62#       the additional conditions laid out in that license document.
63#
64# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
65# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
66# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
67# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
68# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
69# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
70# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
71# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
72# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
73# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
74# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
75########################################################################
76
77    use DBI;
78    use Mail::SpamAssassin;
79    use Getopt::Long;
80
81    # SpamAssassin core rules directory ($system_rules_dir)
82    my @default_rules_path = (
83        "/var/lib/spamassassin/%%VERSION%%",
84        "/usr/share/spamassassin",
85        "%%PREFIX%%/share/spamassassin",
86        "/usr/local/share/spamassassin",
87    );
88
89    # SpamAssassin local.cf directory ($local_cf_dir)
90    my @site_rules_path = (
91        "/etc/mail/spamassassin",
92        "%%PREFIX%%/etc/mail/spamassassin",
93        "%%PREFIX%%/etc/spamassassin",
94        "/usr/local/etc/spamassassin",
95        "/usr/pkg/etc/spamassassin",
96        "/usr/etc/spamassassin",
97        "/etc/spamassassin",
98    );
99
100    # SpamAssassin user_prefs directory ($user_rules_dir)
101    my @user_rules_path = (
102        "/var/lib/maia/.spamassassin",
103        "/var/amavisd/.spamassassin",
104        "/var/amavis/.spamassassin",
105        "/home/amavis/.spamassassin",
106        "~/.spamassassin",
107    );
108
109    # prototypes
110    sub fatal($);
111    sub output($);
112    sub expand_macros($$$);
113    sub first_existing_path($$@);
114    sub scan_rule_file($$$$);
115    sub scan_score_file($$$);
116    sub scan_rules_directory($$$$);
117
118    # name of this script
119    my $script_name = "load-sa-rules";
120
121    # read configuration file (/usr/local/etc/maia/maia.conf)
122    my $config_file = "/usr/local/etc/maia/maia.conf";
123    unless (my $rv = do $config_file) {
124        fatal(sprintf("Couldn't parse %s: %s", $config_file, $@)) if $@;
125        fatal(sprintf("Couldn't open %s", $config_file)) if (!defined($rv) || !$rv);
126    };
127
128    my $help = 0;
129    my $debug = 0;
130    my $quiet = 0;
131    my $reload_descriptions = 0;
132
133    GetOptions("local-cf-dir=s" => \$local_cf_dir,         # --local-cf-dir=<directory>
134               "system-rules-dir=s" => \$system_rules_dir, # --system-rules-dir=<directory>
135               "user-rules-dir=s" => \$user_rules_dir,     # --user-rules-dir=<directory>
136               "reload_descriptions" => \$reload_descriptions, # --reload_descriptions
137               "help" => \$help,                           # --help
138               "debug" => \$debug,                         # --debug
139               "quiet" => \$quiet);                        # --quiet
140
141    # Resolve any debug/quiet conflicts
142    if ($debug && $quiet) {
143        $debug = 0;
144        $quiet = 0;
145        output("Warning: --debug and --quiet negate each other.");
146    }
147
148    # Display usage information
149    if ($help) {
150        output("load-sa-rules.pl\n" .
151               "   --local-cf-dir=<directory>     : SpamAssassin local.cf directory\n" .
152               "   --system-rules-dir=<directory> : SpamAssassin core rules directory\n" .
153               "   --user-rules-dir=<directory>   : SpamAssassin user_prefs directory\n" .
154               "   --reload_descriptions          : force reload of all descriptions\n" .
155               "   --help                         : display this help text\n" .
156               "   --debug                        : display detailed debugging information\n" .
157               "   --quiet                        : display only error messages\n");
158        exit;
159    }
160
161    my $sa = Mail::SpamAssassin->new();
162    my $sa_version = $sa->VERSION;
163    my $sa_prefix = $sa->{PREFIX};
164
165    # defaults (overridden by values in /usr/local/etc/maia/maia.conf)
166    if (defined($local_cf_dir)) {
167        $local_cf_dir = expand_macros($sa_version, $sa_prefix, $local_cf_dir);
168        fatal(sprintf("Directory %s does not exist!", $local_cf_dir))
169            if (!-e $local_cf_dir);
170    } else {
171        $local_cf_dir = first_existing_path($sa_version, $sa_prefix, @site_rules_path);
172        fatal("Couldn't find local.cf directory (set \$local_cf_dir in maia.conf)")
173            if (!defined($local_cf_dir));
174    }
175    if (defined($system_rules_dir)) {
176        $system_rules_dir = expand_macros($sa_version, $sa_prefix, $system_rules_dir);
177        fatal(sprintf("Directory %s does not exist!", $system_rules_dir))
178            if (!-e $system_rules_dir);
179    } else {
180        $system_rules_dir = first_existing_path($sa_version, $sa_prefix, @default_rules_path);
181        fatal("Couldn't find SpamAssassin rules directory (set \$system_rules_dir in maia.conf)")
182            if (!defined($system_rules_dir));
183    }
184    if (defined($user_rules_dir)) {
185        $user_rules_dir = expand_macros($sa_version, $sa_prefix, $user_rules_dir);
186        if (!-e $user_rules_dir) {
187            output(sprintf("Warning: Directory %s does not exist!  Skipping...", $user_rules_dir));
188            $user_rules_dir = undef;
189        }
190    } else {
191        $user_rules_dir = first_existing_path($sa_version, $sa_prefix, @user_rules_path);
192        output("Warning: Couldn't find amavis user's user_prefs directory (optional: set \$user_rules_dir in maia.conf)")
193            if (!defined($user_rules_dir) && !$quiet);
194    }
195
196    if ($debug) {
197        output(sprintf("SpamAssassin core rules directory = %s", $system_rules_dir));
198        output(sprintf("SpamAssassin local.cf directory = %s", $local_cf_dir));
199        output(sprintf("SpamAssassin user_prefs directory = %s",
200            (defined($user_rules_dir) ? $user_rules_dir : "(not found)")));
201    }
202
203    my $dbh;
204
205    # database configuration
206    if (defined($dsn) && defined($username) && defined($password)) {
207        $dbh = DBI->connect($dsn, $username, $password)
208            or fatal("Can't connect to the Maia database (verify \$dsn, \$username, and \$password in maia.conf)");
209    } else {
210        fatal("Can't connect to the Maia database (missing \$dsn, \$username, or \$password in maia.conf)");
211    }
212
213    # Scan the rules directories in this specific order:
214    #
215    #    1. Default rules (e.g. /usr/share/spamassassin)
216    #    2. Site rules (e.g. /etc/mail/spamassassin)
217    #    3. User rules (e.g. /var/lib/maia/.spamassassin)
218    #
219    # The order is critical, since later rules override
220    # the scores of earlier ones (e.g. a user rule could
221    # assign a score of 0 to a rule to disable it, etc.).
222    my $rules_added = 0;
223    my $rules_skipped = 0;
224    my $added = 0;
225    my $skipped = 0;
226    ($added, $skipped) = scan_rules_directory($dbh, $system_rules_dir, $debug, 1);
227    $rules_added += $added;
228    $rules_skipped += $skipped;
229    $added = 0;
230    $skipped = 0;
231    ($added, $skipped) = scan_rules_directory($dbh, $local_cf_dir, $debug, 0);
232    $rules_added += $added;
233    $rules_skipped += $skipped;
234    if (defined($user_rules_dir)) {
235        $added = 0;
236        $skipped = 0;
237        ($added, $skipped) = scan_rules_directory($dbh, $user_rules_dir, $debug, 0);
238        $rules_added += $added;
239        $rules_skipped += $skipped;
240    }
241    my $total = $rules_added + $rules_skipped;
242
243    output(sprintf("%d new rules added (%d rules total), all scores updated.",
244        $rules_added, $total))
245        if (!$quiet);
246
247    # Disconnect from the database
248    $dbh->disconnect;
249
250    # We're done.
251    exit;
252
253
254    # Die, printing a time-stamped error message.
255    sub fatal($) {
256        my ($msg) = @_;
257
258        output("FATAL ERROR: " . $msg);
259        exit 1;
260    }
261
262
263    # Write a time-stamped string to stdout for logging purposes.
264    sub output($) {
265        my ($msg) = @_;
266        my ($year, $month, $day, $hour, $minute, $second);
267
268        my ($second, $minute, $hour, $day, $month, $year) = (localtime)[0,1,2,3,4,5];
269
270        printf("%04d-%02d-%02d %02d:%02d:%02d Maia: [%s] %s\n",
271               $year+1900, $month+1, $day, $hour, $minute, $second, $script_name, $msg);
272    }
273
274
275    # Perform macro replacements for %%PREFIX%% and %%VERSION%%,
276    # and the '~' for home directories.
277    sub expand_macros($$$) {
278        my ($sa_version, $sa_prefix, $path) = @_;
279
280        $path =~ s/%%PREFIX%%/$sa_prefix/g;
281        $path =~ s/%%VERSION%%/$sa_version/g;
282        $path =~ s/^~/($ENV{HOME} || $ENV{LOGDIR} || (getpwuid($>))[7])/gex;
283
284        return $path;
285    }
286
287
288    # Find the first existing directory in a list.
289    sub first_existing_path($$@) {
290        my ($sa_version, $sa_prefix, @pathlist) = @_;
291
292        foreach my $path (@pathlist) {
293            $path = expand_macros($sa_version, $sa_prefix, $path);
294            return $path if (defined $path && -e $path);
295        }
296
297        return undef;
298    }
299
300
301    # Scan a file for "description" strings, which map rule names
302    # to text explanations.  Each of these represents a SpamAssassin
303    # rule.  If this rule doesn't already exist in the database,
304    # insert it, with a default score of 1.0 (per SpamAssassin's
305    # documentation).
306    sub scan_rule_file($$$$) {
307        my ($dbh, $file, $reload_descriptions, $debug) = @_;
308        my($select, $insert, $sth, $sth2, $line, @row);
309        my $rules_added = 0;
310        my $rules_skipped = 0;
311
312        output(sprintf("Checking %s for new rules...", $file))
313            if ($debug);
314        open RULEFILE, "<" . $file
315            or fatal(sprintf("Couldn't read %s", $file));
316        $select = "SELECT id, rule_description FROM maia_sa_rules WHERE rule_name LIKE ?";
317        $sth = $dbh->prepare($select)
318            or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr));
319        my($default_score, $rule_name, $rule_description);
320        while ($line = <RULEFILE>) {
321            if ($line =~ /^\s*describe\s*([A-Za-z0-9_]+)[\s\t]*(.*)\s*\n$/si) {
322
323                $sth->execute($1)
324                    or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
325                if (!(@row = $sth->fetchrow_array()) || $row[1] eq '' || $reload_descriptions) {
326                    $rule_name = $1;
327                    $rule_description = $2;
328                    if ($debug) {
329                        if ($row[1] eq '' || $reload_descriptions) {
330                             output(sprintf("updating rule description: %s (%s)",
331                                    $rule_name, $rule_description));
332                        } else {
333                   	        output(sprintf("Adding new rule: %s (%s)",
334                                $rule_name, $rule_description));
335                        }
336                    }
337               	    if ($rule_name =~ /^T_.+$/) { # test rule
338               	        $default_score = 0.01;
339               	    } elsif ($rule_name =~ /^__.+$/) { # meta-rule
340               	        $default_score = 0.00;
341               	    } else {
342               	        $default_score = 1.00;
343               	    }
344               	    if (!@row) {
345                        $insert = "INSERT INTO maia_sa_rules (rule_name, rule_description, rule_score_0, " .
346                                      "rule_score_1, rule_score_2, rule_score_3) " .
347                                      "VALUES (?, ?, ?, ?, ?, ?)";
348                        $sth2 = $dbh->prepare($insert)
349                            or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr));
350                        $sth2->execute($rule_name, $rule_description, $default_score,
351                                       $default_score, $default_score, $default_score)
352                            or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
353                        $rules_added++;
354                    } else {
355                        $update = "UPDATE maia_sa_rules SET rule_description = ? WHERE id = ?";
356                        $sth2 = $dbh->prepare($update)
357                            or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr));
358                        $sth2->execute($rule_description, $row[0])
359                            or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
360                    }
361                } else {
362                    output(sprintf("Skipping existing rule: %s", $1))
363                        if ($debug);
364                    $rules_skipped++;
365                }
366            }
367        }
368        $sth->finish;
369        close RULEFILE;
370
371        output(sprintf("%d new rules added, %d existing rules skipped.",
372            $rules_added, $rules_skipped))
373            if ($debug);
374
375        return ($rules_added, $rules_skipped);
376    }
377
378
379    # Scan a file for "score" strings, which map rule names to
380    # numeric scores.  Any rule without an explicit score retains
381    # its default score of 1.0, per the SpamAssassin documentation.
382    sub scan_score_file($$$) {
383        my ($dbh, $file, $debug) = @_;
384        my($select, $update, $sth, $sth2, $line, @row);
385
386        output(sprintf("Checking %s for updated scores...", $file))
387            if ($debug);
388    	open SCOREFILE, "<" . $file
389            or fatal(sprintf("Unable to open %s", $file));
390        $select = "SELECT id FROM maia_sa_rules WHERE rule_name LIKE ?";
391        $sth = $dbh->prepare($select)
392            or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr));
393        $update = "UPDATE maia_sa_rules SET rule_score_0 = ?, " .
394                                           "rule_score_1 = ?, " .
395                                           "rule_score_2 = ?, " .
396                                           "rule_score_3 = ? " .
397                  "WHERE id = ?";
398        $sth2 = $dbh->prepare($update)
399            or fatal(sprintf("Couldn't prepare query: %s", $dbh->errstr));
400        my(@score, $rule_name, $rule_id);
401        while ($line = <SCOREFILE>) {
402
403            # Scores for all four rulesets explicitly provided, e.g.
404            # score RULE_NAME 0 1 2 3
405            if ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) {
406    	        $sth->execute($rule_name = $1)
407                    or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
408                if (@row = $sth->fetchrow_array()) {
409                    $score[0] = $2;
410                    $score[1] = $3;
411                    $score[2] = $4;
412                    $score[3] = $5;
413                    $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint
414                    output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]",
415                           $rule_name, $score[0], $score[1], $score[2], $score[3]))
416                        if ($debug);
417                    $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id)
418                         or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
419                }
420
421            # Scores for three rulesets explicitly provided, so set
422            # the fourth ruleset score to the value of the third, per
423            # the SpamAssassin documentation.
424            # score RULE_NAME 1 2 3
425            } elsif ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) {
426
427                $sth->execute($rule_name = $1)
428                    or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
429                if (@row = $sth->fetchrow_array()) {
430                    $score[0] = $2;
431                    $score[1] = $3;
432                    $score[2] = $4;
433                    $score[3] = $4;
434                    $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint
435                    output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]",
436                           $rule_name, $score[0], $score[1], $score[2], $score[3]))
437                        if ($debug);
438                    $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id)
439                        or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
440                }
441
442            # Scores for two rulesets explicitly provided, so set
443            # the third and fourth scores to the value of the second,
444            # per the SpamAssassin documentation.
445            # score RULE_NAME 1 2
446            } elsif ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) {
447
448                $sth->execute($rule_name = $1)
449                    or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
450                if (@row = $sth->fetchrow_array()) {
451                    $score[0] = $2;
452                    $score[1] = $3;
453                    $score[2] = $3;
454                    $score[3] = $3;
455                    $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint
456                    output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]",
457                           $rule_name, $score[0], $score[1], $score[2], $score[3]))
458                        if ($debug);
459                    $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id)
460                        or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
461                }
462
463            # Scores for only one ruleset explicitly provided, so set
464            # the second, third, and fourth scores to the value of the
465            # first, per the SpamAssassin documentation.
466            # score RULE_NAME 1
467            } elsif ($line =~ /^\s*score\s+([A-Za-z0-9_]+)[\s\t]+([0-9\-\.]+)[\s\t]*\n$/si) {
468
469                $sth->execute($rule_name = $1)
470                    or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
471                if (@row = $sth->fetchrow_array()) {
472                    $score[0] = $2;
473                    $score[1] = $2;
474                    $score[2] = $2;
475                    $score[3] = $2;
476                    $rule_id = $1 if $row[0] =~ /^([1-9]+[0-9]*)$/si; # untaint
477                    output(sprintf("Updating %-30s [%8.3f] [%8.3f] [%8.3f] [%8.3f]",
478                           $rule_name, $score[0], $score[1], $score[2], $score[3]))
479                        if ($debug);
480                    $sth2->execute($score[0], $score[1], $score[2], $score[3], $rule_id)
481                        or fatal(sprintf("Couldn't execute query: %s", $dbh->errstr));
482                }
483            }
484        }
485        $sth->finish;
486        close SCOREFILE;
487    }
488
489
490    # Scan all the *.cf and user_prefs files in a subdirectory,
491    # looking for SpamAssassin rules.  If $recurse is true, then
492    # also check any subdirectories beneath this one.
493    sub scan_rules_directory($$$$) {
494    	my($dbh, $dir, $debug, $recurse) = @_;
495        my(@file_list) = glob($dir . "/*");
496        my $rules_added = 0;
497        my $rules_skipped = 0;
498
499        output(sprintf("Scanning %s for SpamAssassin rules", $dir))
500            if ($debug);
501
502        # depth-first traversal of any subdirectories
503        if ($recurse) {
504            foreach my $file (@file_list)
505            {
506                if (-d $file) {
507                    my ($added, $skipped) = scan_rules_directory($dbh, $file, $debug, 1);
508                    $rules_added += $added;
509                    $rules_skipped += $skipped;
510                }
511            }
512        }
513
514        # look for any new rules in the directory
515        foreach my $file (@file_list)
516        {
517            if ($file =~ /^(.+\.cf|.*user_prefs)$/si) {
518                my ($added, $skipped) = scan_rule_file($dbh, $file, $reload_descriptions, $debug);
519                $rules_added += $added;
520                $rules_skipped += $skipped;
521            }
522        }
523
524        # update the scores of the rules in the directory
525        foreach my $file (@file_list)
526        {
527            if ($file =~ /^(.+\.cf|.*user_prefs)$/si) {
528                scan_score_file($dbh, $file, $debug);
529            }
530        }
531
532        return ($rules_added, $rules_skipped);
533    }
534