1# <@LICENSE> 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to you under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at: 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# </@LICENSE> 17 18=head1 NAME 19 20Mail::SpamAssassin::Conf - SpamAssassin configuration file 21 22=head1 SYNOPSIS 23 24 # a comment 25 26 rewrite_header Subject *****SPAM***** 27 28 full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i 29 describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618 30 31 header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i 32 describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters 33 34 score A_HREF_TO_REMOVE 2.0 35 36 lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com 37 38 lang pt_BR report O programa detetor de Spam ZOE [...] 39 40=head1 DESCRIPTION 41 42SpamAssassin is configured using traditional UNIX-style configuration files, 43loaded from the C</usr/share/spamassassin> and C</etc/mail/spamassassin> 44directories. 45 46The following web page lists the most important configuration settings 47used to configure SpamAssassin; novices are encouraged to read it first: 48 49 http://wiki.apache.org/spamassassin/ImportantInitialConfigItems 50 51=head1 FILE FORMAT 52 53The C<#> character starts a comment, which continues until end of line. 54B<NOTE:> if the C<#> character is to be used as part of a rule or 55configuration option, it must be escaped with a backslash. i.e.: C<\#> 56 57Whitespace in the files is not significant, but please note that starting a 58line with whitespace is deprecated, as we reserve its use for multi-line rule 59definitions, at some point in the future. 60 61Currently, each rule or configuration setting must fit on one-line; multi-line 62settings are not supported yet. 63 64File and directory paths can use C<~> to refer to the user's home 65directory, but no other shell-style path extensions such as globing or 66C<~user/> are supported. 67 68Where appropriate below, default values are listed in parentheses. 69 70Test names ("SYMBOLIC_TEST_NAME") can only contain alphanumerics/underscores, 71can not start with digit, and must be less than 128 characters. 72 73=head1 USER PREFERENCES 74 75The following options can be used in both site-wide (C<local.cf>) and 76user-specific (C<user_prefs>) configuration files to customize how 77SpamAssassin handles incoming email messages. 78 79=cut 80 81package Mail::SpamAssassin::Conf; 82 83use strict; 84use warnings; 85# use bytes; 86use re 'taint'; 87 88use Mail::SpamAssassin::NetSet; 89use Mail::SpamAssassin::Constants qw(:sa :ip); 90use Mail::SpamAssassin::Conf::Parser; 91use Mail::SpamAssassin::Logger; 92use Mail::SpamAssassin::Util qw(untaint_var compile_regexp); 93use File::Spec; 94 95our @ISA = qw(); 96 97our $COLLECT_REGRESSION_TESTS; # Used only for unit tests. 98 99# odd => eval test. Not constants so they can be shared with Parser 100# TODO: move to Constants.pm? 101our $TYPE_HEAD_TESTS = 0x0008; 102our $TYPE_HEAD_EVALS = 0x0009; 103our $TYPE_BODY_TESTS = 0x000a; 104our $TYPE_BODY_EVALS = 0x000b; 105our $TYPE_FULL_TESTS = 0x000c; 106our $TYPE_FULL_EVALS = 0x000d; 107our $TYPE_RAWBODY_TESTS = 0x000e; 108our $TYPE_RAWBODY_EVALS = 0x000f; 109our $TYPE_URI_TESTS = 0x0010; 110our $TYPE_URI_EVALS = 0x0011; 111our $TYPE_META_TESTS = 0x0012; 112our $TYPE_RBL_EVALS = 0x0013; 113our $TYPE_EMPTY_TESTS = 0x0014; 114 115my @rule_types = ("body_tests", "uri_tests", "uri_evals", 116 "head_tests", "head_evals", "body_evals", "full_tests", 117 "full_evals", "rawbody_tests", "rawbody_evals", 118 "rbl_evals", "meta_tests"); 119 120#Removed $VERSION per BUG 6422 121#$VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later 122 123# these are variables instead of constants so that other classes can 124# access them; if they're constants, they'd have to go in Constants.pm 125# TODO: move to Constants.pm? 126our $CONF_TYPE_STRING = 1; 127our $CONF_TYPE_BOOL = 2; 128our $CONF_TYPE_NUMERIC = 3; 129our $CONF_TYPE_HASH_KEY_VALUE = 4; 130our $CONF_TYPE_ADDRLIST = 5; 131our $CONF_TYPE_TEMPLATE = 6; 132our $CONF_TYPE_NOARGS = 7; 133our $CONF_TYPE_STRINGLIST = 8; 134our $CONF_TYPE_IPADDRLIST = 9; 135our $CONF_TYPE_DURATION = 10; 136our $MISSING_REQUIRED_VALUE = '-99999999999999'; # string expected by parser 137our $INVALID_VALUE = '-99999999999998'; 138our $INVALID_HEADER_FIELD_NAME = '-99999999999997'; 139 140# set to "1" by the test suite code, to record regression tests 141# $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1; 142 143# search for "sub new {" to find the start of the code 144########################################################################### 145 146sub set_default_commands { 147 my($self) = @_; 148 149 # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt. 150 # push each config item like this, to avoid a POD bug; it can't just accept 151 # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies. 152 my @cmds; 153 154=head2 SCORING OPTIONS 155 156=over 4 157 158=item required_score n.nn (default: 5) 159 160Set the score required before a mail is considered spam. C<n.nn> can 161be an integer or a real number. 5.0 is the default setting, and is 162quite aggressive; it would be suitable for a single-user setup, but if 163you're an ISP installing SpamAssassin, you should probably set the 164default to be more conservative, like 8.0 or 10.0. It is not 165recommended to automatically delete or discard messages marked as 166spam, as your users B<will> complain, but if you choose to do so, only 167delete messages with an exceptionally high score such as 15.0 or 168higher. This option was previously known as C<required_hits> and that 169name is still accepted, but is deprecated. 170 171=cut 172 173 push (@cmds, { 174 setting => 'required_score', 175 aliases => ['required_hits'], # backward compatible 176 default => 5, 177 type => $CONF_TYPE_NUMERIC, 178 }); 179 180=item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ] 181 182Assign scores (the number of points for a hit) to a given test. 183Scores can be positive or negative real numbers or integers. 184C<SYMBOLIC_TEST_NAME> is the symbolic name used by SpamAssassin for 185that test; for example, 'FROM_ENDS_IN_NUMS'. 186 187If only one valid score is listed, then that score is always used 188for a test. 189 190If four valid scores are listed, then the score that is used depends 191on how SpamAssassin is being used. The first score is used when 192both Bayes and network tests are disabled (score set 0). The second 193score is used when Bayes is disabled, but network tests are enabled 194(score set 1). The third score is used when Bayes is enabled and 195network tests are disabled (score set 2). The fourth score is used 196when Bayes is enabled and network tests are enabled (score set 3). 197 198Setting a rule's score to 0 will disable that rule from running. 199 200If any of the score values are surrounded by parenthesis '()', then 201all of the scores in the line are considered to be relative to the 202already set score. ie: '(3)' means increase the score for this 203rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase 204the score for this rule by 3 in score sets 0 and 2 only. 205 206If no score is given for a test by the end of the configuration, 207a default score is assigned: a score of 1.0 is used for all tests, 208except those whose names begin with 'T_' (this is used to indicate a 209rule in testing) which receive 0.01. 210 211Note that test names which begin with '__' are indirect rules used 212to compose meta-match rules and can also act as prerequisites to 213other rules. They are not scored or listed in the 'tests hit' 214reports, but assigning a score of 0 to an indirect rule will disable 215it from running. 216 217=cut 218 219 push (@cmds, { 220 setting => 'score', 221 is_frequent => 1, 222 code => sub { 223 my ($self, $key, $value, $line) = @_; 224 my($rule, @scores) = split(/\s+/, $value); 225 unless (defined $value && $value !~ /^$/ && 226 (scalar @scores == 1 || scalar @scores == 4)) { 227 info("config: score: requires a symbolic rule name and 1 or 4 scores"); 228 return $MISSING_REQUIRED_VALUE; 229 } 230 231 # Figure out if we're doing relative scores, remove the parens if we are 232 my $relative = 0; 233 foreach (@scores) { 234 local ($1); 235 if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) { 236 $relative = 1; 237 } 238 unless (/^-?\d+(?:\.\d+)?$/) { 239 info("config: score: the non-numeric score ($_) is not valid, " . 240 "a numeric score is required"); 241 return $INVALID_VALUE; 242 } 243 } 244 245 if ($relative && !exists $self->{scoreset}->[0]->{$rule}) { 246 info("config: score: relative score without previous setting in " . 247 "configuration"); 248 return $INVALID_VALUE; 249 } 250 251 # If we're only passed 1 score, copy it to the other scoresets 252 if (@scores) { 253 if (@scores != 4) { 254 @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] ); 255 } 256 257 # Set the actual scoreset values appropriately 258 for my $index (0..3) { 259 my $score = $relative ? 260 $self->{scoreset}->[$index]->{$rule} + $scores[$index] : 261 $scores[$index]; 262 263 $self->{scoreset}->[$index]->{$rule} = $score + 0.0; 264 } 265 } 266 } 267 }); 268 269=back 270 271=head2 WHITELIST AND BLACKLIST OPTIONS 272 273=over 4 274 275=item whitelist_from user@example.com 276 277Used to whitelist sender addresses which send mail that is often tagged 278(incorrectly) as spam. 279 280Use of this setting is not recommended, since it blindly trusts the message, 281which is routinely and easily forged by spammers and phish senders. The 282recommended solution is to instead use C<whitelist_auth> or other authenticated 283whitelisting methods, or C<whitelist_from_rcvd>. 284 285Whitelist and blacklist addresses are now file-glob-style patterns, so 286C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work. 287Specifically, C<*> and C<?> are allowed, but all other metacharacters 288are not. Regular expressions are not used for security reasons. 289Matching is case-insensitive. 290 291Multiple addresses per line, separated by spaces, is OK. Multiple 292C<whitelist_from> lines are also OK. 293 294The headers checked for whitelist addresses are as follows: if C<Resent-From> 295is set, use that; otherwise check all addresses taken from the following 296set of headers: 297 298 Envelope-Sender 299 Resent-Sender 300 X-Envelope-From 301 From 302 303In addition, the "envelope sender" data, taken from the SMTP envelope data 304where this is available, is looked up. See C<envelope_sender_header>. 305 306e.g. 307 308 whitelist_from joe@example.com fred@example.com 309 whitelist_from *@example.com 310 311=cut 312 313 push (@cmds, { 314 setting => 'whitelist_from', 315 type => $CONF_TYPE_ADDRLIST, 316 }); 317 318=item unwhitelist_from user@example.com 319 320Used to remove a default whitelist_from entry, so for example a distribution 321whitelist_from can be overridden in a local.cf file, or an individual user can 322override a whitelist_from entry in their own C<user_prefs> file. 323The specified email address has to match exactly (although case-insensitively) 324the address previously used in a whitelist_from line, which implies that a 325wildcard only matches literally the same wildcard (not 'any' address). 326 327e.g. 328 329 unwhitelist_from joe@example.com fred@example.com 330 unwhitelist_from *@example.com 331 332=cut 333 334 push (@cmds, { 335 command => 'unwhitelist_from', 336 setting => 'whitelist_from', 337 type => $CONF_TYPE_ADDRLIST, 338 code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value 339 }); 340 341=item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net 342 343Works similarly to whitelist_from, except that in addition to matching 344a sender address, a relay's rDNS name or its IP address must match too 345for the whitelisting rule to fire. The first parameter is a sender's e-mail 346address to whitelist, and the second is a string to match the relay's rDNS, 347or its IP address. Matching is case-insensitive. 348 349This second parameter is matched against a TCP-info information field as 350provided in a FROM clause of a trace information (i.e. in a Received header 351field, see RFC 5321). Only the Received header fields inserted by trusted 352hosts are considered. This parameter can either be a full hostname, or a 353domain component of that hostname, or an IP address (optionally followed 354by a slash and a prefix length) in square brackets. The address prefix 355(mask) length with a slash may stand within brackets along with an address, 356or may follow the bracketed address. Reverse DNS lookup is done by an MTA, 357not by SpamAssassin. 358 359For backward compatibility as an alternative to a CIDR notation, an IPv4 360address in brackets may be truncated on classful boundaries to cover whole 361subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>. 362 363In other words, if the host that connected to your MX had an IP address 364192.0.2.123 that mapped to 'sendinghost.example.org', you should specify 365C<sendinghost.example.org>, or C<example.org>, or C<[192.0.2.123]>, or 366C<[192.0.2.0/24]>, or C<[192.0.2]> here. 367 368Note that this requires that C<internal_networks> be correct. For simple 369cases, it will be, but for a complex network you may get better results 370by setting that parameter. 371 372It also requires that your mail exchangers be configured to perform DNS 373reverse lookups on the connecting host's IP address, and to record the 374result in the generated Received header field according to RFC 5321. 375 376e.g. 377 378 whitelist_from_rcvd joe@example.com example.com 379 whitelist_from_rcvd *@* mail.example.org 380 whitelist_from_rcvd *@axkit.org [192.0.2.123] 381 whitelist_from_rcvd *@axkit.org [192.0.2.0/24] 382 whitelist_from_rcvd *@axkit.org [192.0.2.0]/24 383 whitelist_from_rcvd *@axkit.org [2001:db8:1234::/48] 384 whitelist_from_rcvd *@axkit.org [2001:db8:1234::]/48 385 386=item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net 387 388Same as C<whitelist_from_rcvd>, but used for the default whitelist entries 389in the SpamAssassin distribution. The whitelist score is lower, because 390these are often targets for spammer spoofing. 391 392=cut 393 394 push (@cmds, { 395 setting => 'whitelist_from_rcvd', 396 type => $CONF_TYPE_ADDRLIST, 397 code => sub { 398 my ($self, $key, $value, $line) = @_; 399 unless (defined $value && $value !~ /^$/) { 400 return $MISSING_REQUIRED_VALUE; 401 } 402 unless ($value =~ /^\S+\s+\S+$/) { 403 return $INVALID_VALUE; 404 } 405 $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd', 406 split(/\s+/, $value)); 407 } 408 }); 409 410 push (@cmds, { 411 setting => 'def_whitelist_from_rcvd', 412 type => $CONF_TYPE_ADDRLIST, 413 code => sub { 414 my ($self, $key, $value, $line) = @_; 415 unless (defined $value && $value !~ /^$/) { 416 return $MISSING_REQUIRED_VALUE; 417 } 418 unless ($value =~ /^\S+\s+\S+$/) { 419 return $INVALID_VALUE; 420 } 421 $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd', 422 split(/\s+/, $value)); 423 } 424 }); 425 426=item whitelist_allows_relays user@example.com 427 428Specify addresses which are in C<whitelist_from_rcvd> that sometimes 429send through a mail relay other than the listed ones. By default mail 430with a From address that is in C<whitelist_from_rcvd> that does not match 431the relay will trigger a forgery rule. Including the address in 432C<whitelist_allows_relay> prevents that. 433 434Whitelist and blacklist addresses are now file-glob-style patterns, so 435C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work. 436Specifically, C<*> and C<?> are allowed, but all other metacharacters 437are not. Regular expressions are not used for security reasons. 438Matching is case-insensitive. 439 440Multiple addresses per line, separated by spaces, is OK. Multiple 441C<whitelist_allows_relays> lines are also OK. 442 443The specified email address does not have to match exactly the address 444previously used in a whitelist_from_rcvd line as it is compared to the 445address in the header. 446 447e.g. 448 449 whitelist_allows_relays joe@example.com fred@example.com 450 whitelist_allows_relays *@example.com 451 452=cut 453 454 push (@cmds, { 455 setting => 'whitelist_allows_relays', 456 type => $CONF_TYPE_ADDRLIST, 457 }); 458 459=item unwhitelist_from_rcvd user@example.com 460 461Used to remove a default whitelist_from_rcvd or def_whitelist_from_rcvd 462entry, so for example a distribution whitelist_from_rcvd can be overridden 463in a local.cf file, or an individual user can override a whitelist_from_rcvd 464entry in their own C<user_prefs> file. 465 466The specified email address has to match exactly the address previously 467used in a whitelist_from_rcvd line. 468 469e.g. 470 471 unwhitelist_from_rcvd joe@example.com fred@example.com 472 unwhitelist_from_rcvd *@axkit.org 473 474=cut 475 476 push (@cmds, { 477 setting => 'unwhitelist_from_rcvd', 478 type => $CONF_TYPE_ADDRLIST, 479 code => sub { 480 my ($self, $key, $value, $line) = @_; 481 unless (defined $value && $value !~ /^$/) { 482 return $MISSING_REQUIRED_VALUE; 483 } 484 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) { 485 return $INVALID_VALUE; 486 } 487 $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd', 488 split (/\s+/, $value)); 489 $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd', 490 split (/\s+/, $value)); 491 } 492 }); 493 494=item blacklist_from user@example.com 495 496Used to specify addresses which send mail that is often tagged (incorrectly) as 497non-spam, but which the user doesn't want. Same format as C<whitelist_from>. 498 499=cut 500 501 push (@cmds, { 502 setting => 'blacklist_from', 503 type => $CONF_TYPE_ADDRLIST, 504 }); 505 506=item unblacklist_from user@example.com 507 508Used to remove a default blacklist_from entry, so for example a 509distribution blacklist_from can be overridden in a local.cf file, or 510an individual user can override a blacklist_from entry in their own 511C<user_prefs> file. The specified email address has to match exactly 512the address previously used in a blacklist_from line. 513 514 515e.g. 516 517 unblacklist_from joe@example.com fred@example.com 518 unblacklist_from *@spammer.com 519 520=cut 521 522 523 push (@cmds, { 524 command => 'unblacklist_from', 525 setting => 'blacklist_from', 526 type => $CONF_TYPE_ADDRLIST, 527 code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value 528 }); 529 530 531=item whitelist_to user@example.com 532 533If the given address appears as a recipient in the message headers 534(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will 535be whitelisted. Useful if you're deploying SpamAssassin system-wide, 536and don't want some users to have their mail filtered. Same format 537as C<whitelist_from>. 538 539There are three levels of To-whitelisting, C<whitelist_to>, C<more_spam_to> 540and C<all_spam_to>. Users in the first level may still get some spammish 541mails blocked, but users in C<all_spam_to> should never get mail blocked. 542 543The headers checked for whitelist addresses are as follows: if C<Resent-To> or 544C<Resent-Cc> are set, use those; otherwise check all addresses taken from the 545following set of headers: 546 547 To 548 Cc 549 Apparently-To 550 Delivered-To 551 Envelope-Recipients 552 Apparently-Resent-To 553 X-Envelope-To 554 Envelope-To 555 X-Delivered-To 556 X-Original-To 557 X-Rcpt-To 558 X-Real-To 559 560=item more_spam_to user@example.com 561 562See above. 563 564=item all_spam_to user@example.com 565 566See above. 567 568=cut 569 570 push (@cmds, { 571 setting => 'whitelist_to', 572 type => $CONF_TYPE_ADDRLIST, 573 }); 574 push (@cmds, { 575 setting => 'more_spam_to', 576 type => $CONF_TYPE_ADDRLIST, 577 }); 578 push (@cmds, { 579 setting => 'all_spam_to', 580 type => $CONF_TYPE_ADDRLIST, 581 }); 582 583=item blacklist_to user@example.com 584 585If the given address appears as a recipient in the message headers 586(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will 587be blacklisted. Same format as C<blacklist_from>. 588 589=cut 590 591 push (@cmds, { 592 setting => 'blacklist_to', 593 type => $CONF_TYPE_ADDRLIST, 594 }); 595 596=item whitelist_auth user@example.com 597 598Used to specify addresses which send mail that is often tagged (incorrectly) as 599spam. This is different from C<whitelist_from> and C<whitelist_from_rcvd> in 600that it first verifies that the message was sent by an authorized sender for 601the address, before whitelisting. 602 603Authorization is performed using one of the installed sender-authorization 604schemes: SPF (using C<Mail::SpamAssassin::Plugin::SPF>), or DKIM (using 605C<Mail::SpamAssassin::Plugin::DKIM>). Note that those plugins must be active, 606and working, for this to operate. 607 608Using C<whitelist_auth> is roughly equivalent to specifying duplicate 609C<whitelist_from_spf>, C<whitelist_from_dk>, and C<whitelist_from_dkim> lines 610for each of the addresses specified. 611 612e.g. 613 614 whitelist_auth joe@example.com fred@example.com 615 whitelist_auth *@example.com 616 617=item def_whitelist_auth user@example.com 618 619Same as C<whitelist_auth>, but used for the default whitelist entries 620in the SpamAssassin distribution. The whitelist score is lower, because 621these are often targets for spammer spoofing. 622 623=cut 624 625 push (@cmds, { 626 setting => 'whitelist_auth', 627 type => $CONF_TYPE_ADDRLIST, 628 }); 629 630 push (@cmds, { 631 setting => 'def_whitelist_auth', 632 type => $CONF_TYPE_ADDRLIST, 633 }); 634 635=item unwhitelist_auth user@example.com 636 637Used to remove a C<whitelist_auth> or C<def_whitelist_auth> entry. The 638specified email address has to match exactly the address previously used. 639 640e.g. 641 642 unwhitelist_auth joe@example.com fred@example.com 643 unwhitelist_auth *@example.com 644 645=cut 646 647 push (@cmds, { 648 setting => 'unwhitelist_auth', 649 type => $CONF_TYPE_ADDRLIST, 650 code => sub { 651 my ($self, $key, $value, $line) = @_; 652 unless (defined $value && $value !~ /^$/) { 653 return $MISSING_REQUIRED_VALUE; 654 } 655 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) { 656 return $INVALID_VALUE; 657 } 658 $self->{parser}->remove_from_addrlist('whitelist_auth', 659 split (/\s+/, $value)); 660 $self->{parser}->remove_from_addrlist('def_whitelist_auth', 661 split (/\s+/, $value)); 662 } 663 }); 664 665 666=item enlist_uri_host (listname) host ... 667 668Adds one or more host names or domain names to a named list of URI domains. 669The named list can then be consulted through a check_uri_host_listed() 670eval rule implemented by the WLBLEval plugin, which takes the list name as 671an argument. Parenthesis around a list name are literal - a required syntax. 672 673Host names may optionally be prefixed by an exclamation mark '!', which 674produces false as a result if this entry matches. This makes it easier 675to exclude some subdomains when their superdomain is listed, for example: 676 677 enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com 678 679No wildcards are supported, but subdomains do match implicitly. Lists 680are independent. Search for each named list starts by looking up the 681full hostname first, then leading fields are progressively stripped off 682(e.g.: sub.example.com, example.com, com) until a match is found or we run 683out of fields. The first matching entry (the most specific) determines if a 684lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result. 685 686If an URL found in a message contains an IP address in place of a host name, 687the given list must specify the exact same IP address (instead of a host name) 688in order to match. 689 690Use the delist_uri_host directive to neutralize previous enlist_uri_host 691settings. 692 693Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives 694blacklist_uri_host and whitelist_uri_host and corresponding default rules, 695but the names 'BLACK' and 'WHITE' are otherwise not special or reserved. 696 697=cut 698 699 push (@cmds, { 700 command => 'enlist_uri_host', 701 setting => 'uri_host_lists', 702 type => $CONF_TYPE_ADDRLIST, 703 code => sub { 704 my($conf, $key, $value, $line) = @_; 705 local($1,$2); 706 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) { 707 return $MISSING_REQUIRED_VALUE; 708 } 709 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist() 710 # note: must not factor out dereferencing, as otherwise 711 # subhashes would spring up in a copy and be lost 712 foreach my $host ( split(/\s+/, lc $2) ) { 713 my $v = $host =~ s/^!// ? 0 : 1; 714 $conf->{uri_host_lists}{$listname}{$host} = $v; 715 } 716 } 717 }); 718 719=item delist_uri_host [ (listname) ] host ... 720 721Removes one or more specified host names from a named list of URI domains. 722Removing an unlisted name is ignored (is not an error). Listname is optional, 723if specified then just the named list is affected, otherwise hosts are 724removed from all URI host lists created so far. Parenthesis around a list 725name are a required syntax. 726 727Note that directives in configuration files are processed in sequence, 728the delist_uri_host only applies to previously listed entries and has 729no effect on enlisted entries in yet-to-be-processed directives. 730 731For convenience (similarity to the enlist_uri_host directive) hostnames 732may be prefixed by a an exclamation mark, which is stripped off from each 733name and has no meaning here. 734 735=cut 736 737 push (@cmds, { 738 command => 'delist_uri_host', 739 setting => 'uri_host_lists', 740 type => $CONF_TYPE_ADDRLIST, 741 code => sub { 742 my($conf, $key, $value, $line) = @_; 743 local($1,$2); 744 if ($value !~ /^ (?: \( (.+?) \) \s+ )? (.+) \z/sx) { 745 return $MISSING_REQUIRED_VALUE; 746 } 747 my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}}; 748 my @args = split(/\s+/, lc $2); 749 foreach my $listname (@listnames) { 750 foreach my $host (@args) { 751 my $v = $host =~ s/^!// ? 0 : 1; 752 delete $conf->{uri_host_lists}{$listname}{$host}; 753 } 754 } 755 } 756 }); 757 758=item enlist_addrlist (listname) user@example.com 759 760Adds one or more addresses to a named list of addresses. 761The named list can then be consulted through a check_from_in_list() or a 762check_to_in_list() eval rule implemented by the WLBLEval plugin, which takes 763the list name as an argument. Parenthesis around a list name are literal - a 764required syntax. 765 766Listed addresses are file-glob-style patterns, so C<friend@somewhere.com>, 767C<*@isp.com>, or C<*.domain.net> will all work. 768Specifically, C<*> and C<?> are allowed, but all other metacharacters 769are not. Regular expressions are not used for security reasons. 770Matching is case-insensitive. 771 772Multiple addresses per line, separated by spaces, is OK. Multiple 773C<enlist_addrlist> lines are also OK. 774 775Enlisting an address to the list named blacklist_to is synonymous to using the 776directive blacklist_to 777 778Enlisting an address to the list named blacklist_from is synonymous to using the 779directive blacklist_from 780 781Enlisting an address to the list named whitelist_to is synonymous to using the 782directive whitelist_to 783 784Enlisting an address to the list named whitelist_from is synonymous to using the 785directive whitelist_from 786 787e.g. 788 789 enlist_addrlist (PAYPAL_ADDRESS) service@paypal.com 790 enlist_addrlist (PAYPAL_ADDRESS) *@paypal.co.uk 791 792=cut 793 794 push (@cmds, { 795 setting => 'enlist_addrlist', 796 type => $CONF_TYPE_ADDRLIST, 797 code => sub { 798 my($conf, $key, $value, $line) = @_; 799 local($1,$2); 800 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) { 801 return $MISSING_REQUIRED_VALUE; 802 } 803 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist() 804 # note: must not factor out dereferencing, as otherwise 805 # subhashes would spring up in a copy and be lost 806 $conf->{parser}->add_to_addrlist ($listname, split(/\s+/, $value)); 807 } 808 }); 809 810=item blacklist_uri_host host-or-domain ... 811 812Is a shorthand for a directive: enlist_uri_host (BLACK) host ... 813 814Please see directives enlist_uri_host and delist_uri_host for details. 815 816=cut 817 818 push (@cmds, { 819 command => 'blacklist_uri_host', 820 setting => 'uri_host_lists', 821 type => $CONF_TYPE_ADDRLIST, 822 code => sub { 823 my($conf, $key, $value, $line) = @_; 824 foreach my $host ( split(/\s+/, lc $value) ) { 825 my $v = $host =~ s/^!// ? 0 : 1; 826 $conf->{uri_host_lists}{'BLACK'}{$host} = $v; 827 } 828 } 829 }); 830 831=item whitelist_uri_host host-or-domain ... 832 833Is a shorthand for a directive: enlist_uri_host (BLACK) host ... 834 835Please see directives enlist_uri_host and delist_uri_host for details. 836 837=cut 838 839 push (@cmds, { 840 command => 'whitelist_uri_host', 841 setting => 'uri_host_lists', 842 type => $CONF_TYPE_ADDRLIST, 843 code => sub { 844 my($conf, $key, $value, $line) = @_; 845 foreach my $host ( split(/\s+/, lc $value) ) { 846 my $v = $host =~ s/^!// ? 0 : 1; 847 $conf->{uri_host_lists}{'WHITE'}{$host} = $v; 848 } 849 } 850 }); 851 852=back 853 854=head2 BASIC MESSAGE TAGGING OPTIONS 855 856=over 4 857 858=item rewrite_header { subject | from | to } STRING 859 860By default, suspected spam messages will not have the C<Subject>, 861C<From> or C<To> lines tagged to indicate spam. By setting this option, 862the header will be tagged with C<STRING> to indicate that a message is 863spam. For the From or To headers, this will take the form of an RFC 2822 864comment following the address in parentheses. For the Subject header, 865this will be prepended to the original subject. Note that you should 866only use the _REQD_ and _SCORE_ tags when rewriting the Subject header 867if C<report_safe> is 0. Otherwise, you may not be able to remove 868the SpamAssassin markup via the normal methods. More information 869about tags is explained below in the B<TEMPLATE TAGS> section. 870 871Parentheses are not permitted in STRING if rewriting the From or To headers. 872(They will be converted to square brackets.) 873 874If C<rewrite_header subject> is used, but the message being rewritten 875does not already contain a C<Subject> header, one will be created. 876 877A null value for C<STRING> will remove any existing rewrite for the specified 878header. 879 880=cut 881 882 push (@cmds, { 883 setting => 'rewrite_header', 884 type => $CONF_TYPE_HASH_KEY_VALUE, 885 code => sub { 886 my ($self, $key, $value, $line) = @_; 887 my($hdr, $string) = split(/\s+/, $value, 2); 888 $hdr = ucfirst(lc($hdr)); 889 890 if ($hdr =~ /^$/) { 891 return $MISSING_REQUIRED_VALUE; 892 } 893 # We only deal with From, Subject, and To ... 894 elsif ($hdr =~ /^(?:From|Subject|To)$/) { 895 unless (defined $string && $string =~ /\S/) { 896 delete $self->{rewrite_header}->{$hdr}; 897 return; 898 } 899 900 if ($hdr ne 'Subject') { 901 $string =~ tr/()/[]/; 902 } 903 $self->{rewrite_header}->{$hdr} = $string; 904 return; 905 } 906 else { 907 # if we get here, note the issue, then we'll fail through for an error. 908 info("config: rewrite_header: ignoring $hdr, not From, Subject, or To"); 909 return $INVALID_VALUE; 910 } 911 } 912 }); 913 914=item subjprefix 915 916Add a prefix in emails Subject if a rule is matched. 917To enable this option "rewrite_header Subject" config 918option must be enabled as well. 919 920The check C<if can(Mail::SpamAssassin::Conf::feature_subjprefix)> 921should be used to silence warnings in previous 922SpamAssassin versions. 923 924To be able to use this feature a C<add_header all Subjprefix _SUBJPREFIX_> 925configuration line could be needed when the glue between the MTA and SpamAssassin 926rewrites the email content. 927 928Here is an example on how to use this feature: 929 930 rewrite_header Subject *****SPAM***** 931 add_header all Subjprefix _SUBJPREFIX_ 932 body OLEMACRO_MALICE eval:check_olemacro_malice() 933 describe OLEMACRO_MALICE Dangerous Office Macro 934 score OLEMACRO_MALICE 5.0 935 if can(Mail::SpamAssassin::Conf::feature_subjprefix) 936 subjprefix OLEMACRO_MALICE [VIRUS] 937 endif 938 939=cut 940 941 push (@cmds, { 942 command => 'subjprefix', 943 setting => 'subjprefix', 944 is_frequent => 1, 945 type => $CONF_TYPE_HASH_KEY_VALUE, 946 }); 947 948=item add_header { spam | ham | all } header_name string 949 950Customized headers can be added to the specified type of messages (spam, 951ham, or "all" to add to either). All headers begin with C<X-Spam-> 952(so a C<header_name> Foo will generate a header called X-Spam-Foo). 953header_name is restricted to the character set [A-Za-z0-9_-]. 954 955The order of C<add_header> configuration options is preserved, inserted 956headers will follow this order of declarations. When combining C<add_header> 957with C<clear_headers> and C<remove_header>, keep in mind that C<add_header> 958appends a new header to the current list, after first removing any existing 959header fields of the same name. Note also that C<add_header>, C<clear_headers> 960and C<remove_header> may appear in multiple .cf files, which are interpreted 961in alphabetic order. 962 963C<string> can contain tags as explained below in the B<TEMPLATE TAGS> section. 964You can also use C<\n> and C<\t> in the header to add newlines and tabulators 965as desired. A backslash has to be written as \\, any other escaped chars will 966be silently removed. 967 968All headers will be folded if fold_headers is set to C<1>. Note: Manually 969adding newlines via C<\n> disables any further automatic wrapping (ie: 970long header lines are possible). The lines will still be properly folded 971(marked as continuing) though. 972 973You can customize existing headers with B<add_header> (only the specified 974subset of messages will be changed). 975 976See also C<clear_headers> and C<remove_header> for removing headers. 977 978Here are some examples (these are the defaults, note that Checker-Version can 979not be changed or removed): 980 981 add_header spam Flag _YESNOCAPS_ 982 add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_ 983 add_header all Level _STARS(*)_ 984 add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_ 985 986=cut 987 988 push (@cmds, { 989 setting => 'add_header', 990 code => sub { 991 my ($self, $key, $value, $line) = @_; 992 local ($1,$2,$3); 993 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) { 994 return $INVALID_VALUE; 995 } 996 997 my ($type, $name, $hline) = ($1, $2, $3); 998 if ($hline =~ /^"(.*)"$/) { 999 $hline = $1; 1000 } 1001 my @line = split( 1002 /\\\\/, # split at double backslashes, 1003 $hline."\n" # newline needed to make trailing backslashes work 1004 ); 1005 foreach (@line) { 1006 s/\\t/\t/g; # expand tabs 1007 s/\\n/\n/g; # expand newlines 1008 s/\\.//g; # purge all other escapes 1009 }; 1010 $hline = join("\\", @line); 1011 chop($hline); # remove dummy newline again 1012 if (($type eq "ham") || ($type eq "all")) { 1013 $self->{headers_ham} = 1014 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_ham}} ]; 1015 push(@{$self->{headers_ham}}, [$name, $hline]); 1016 } 1017 if (($type eq "spam") || ($type eq "all")) { 1018 $self->{headers_spam} = 1019 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_spam}} ]; 1020 push(@{$self->{headers_spam}}, [$name, $hline]); 1021 } 1022 } 1023 }); 1024 1025=item remove_header { spam | ham | all } header_name 1026 1027Headers can be removed from the specified type of messages (spam, ham, 1028or "all" to remove from either). All headers begin with C<X-Spam-> 1029(so C<header_name> will be appended to C<X-Spam->). 1030 1031See also C<clear_headers> for removing all the headers at once. 1032 1033Note that B<X-Spam-Checker-Version> is not removable because the version 1034information is needed by mail administrators and developers to debug 1035problems. Without at least one header, it might not even be possible to 1036determine that SpamAssassin is running. 1037 1038=cut 1039 1040 push (@cmds, { 1041 setting => 'remove_header', 1042 code => sub { 1043 my ($self, $key, $value, $line) = @_; 1044 local ($1,$2); 1045 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s*$/) { 1046 return $INVALID_VALUE; 1047 } 1048 1049 my ($type, $name) = ($1, $2); 1050 return if ( $name eq "Checker-Version" ); 1051 1052 $name = lc($name); 1053 if (($type eq "ham") || ($type eq "all")) { 1054 $self->{headers_ham} = 1055 [ grep { lc($_->[0]) ne $name } @{$self->{headers_ham}} ]; 1056 } 1057 if (($type eq "spam") || ($type eq "all")) { 1058 $self->{headers_spam} = 1059 [ grep { lc($_->[0]) ne $name } @{$self->{headers_spam}} ]; 1060 } 1061 } 1062 }); 1063 1064=item clear_headers 1065 1066Clear the list of headers to be added to messages. You may use this 1067before any B<add_header> options to prevent the default headers from being 1068added to the message. 1069 1070C<add_header>, C<clear_headers> and C<remove_header> may appear in multiple 1071.cf files, which are interpreted in alphabetic order, so C<clear_headers> 1072in a later file will remove all added headers from previously interpreted 1073configuration files, which may or may not be desired. 1074 1075Note that B<X-Spam-Checker-Version> is not removable because the version 1076information is needed by mail administrators and developers to debug 1077problems. Without at least one header, it might not even be possible to 1078determine that SpamAssassin is running. 1079 1080=cut 1081 1082 push (@cmds, { 1083 setting => 'clear_headers', 1084 type => $CONF_TYPE_NOARGS, 1085 code => sub { 1086 my ($self, $key, $value, $line) = @_; 1087 unless (!defined $value || $value eq '') { 1088 return $INVALID_VALUE; 1089 } 1090 my @h = grep { lc($_->[0]) eq "checker-version" } 1091 @{$self->{headers_ham}}; 1092 $self->{headers_ham} = !@h ? [] : [ $h[0] ]; 1093 $self->{headers_spam} = !@h ? [] : [ $h[0] ]; 1094 } 1095 }); 1096 1097=item report_safe ( 0 | 1 | 2 ) (default: 1) 1098 1099if this option is set to 1, if an incoming message is tagged as spam, 1100instead of modifying the original message, SpamAssassin will create a 1101new report message and attach the original message as a message/rfc822 1102MIME part (ensuring the original message is completely preserved, not 1103easily opened, and easier to recover). 1104 1105If this option is set to 2, then original messages will be attached with 1106a content type of text/plain instead of message/rfc822. This setting 1107may be required for safety reasons on certain broken mail clients that 1108automatically load attachments without any action by the user. This 1109setting may also make it somewhat more difficult to extract or view the 1110original message. 1111 1112If this option is set to 0, incoming spam is only modified by adding 1113some C<X-Spam-> headers and no changes will be made to the body. In 1114addition, a header named B<X-Spam-Report> will be added to spam. You 1115can use the B<remove_header> option to remove that header after setting 1116B<report_safe> to 0. 1117 1118See B<report_safe_copy_headers> if you want to copy headers from 1119the original mail into tagged messages. 1120 1121=cut 1122 1123 push (@cmds, { 1124 setting => 'report_safe', 1125 default => 1, 1126 type => $CONF_TYPE_NUMERIC, 1127 code => sub { 1128 my ($self, $key, $value, $line) = @_; 1129 if ($value eq '') { 1130 return $MISSING_REQUIRED_VALUE; 1131 } 1132 elsif ($value !~ /^[012]$/) { 1133 return $INVALID_VALUE; 1134 } 1135 1136 $self->{report_safe} = $value+0; 1137 if (! $self->{report_safe} && 1138 ! (grep { lc($_->[0]) eq "report" } @{$self->{headers_spam}}) ) { 1139 push(@{$self->{headers_spam}}, ["Report", "_REPORT_"]); 1140 } 1141 } 1142 }); 1143 1144=item report_wrap_width (default: 70) 1145 1146This option sets the wrap width for description lines in the X-Spam-Report 1147header, not accounting for tab width. 1148 1149=cut 1150 1151 push (@cmds, { 1152 setting => 'report_wrap_width', 1153 default => '70', 1154 type => $CONF_TYPE_NUMERIC, 1155 }); 1156 1157=back 1158 1159=head2 LANGUAGE OPTIONS 1160 1161=over 4 1162 1163=item ok_locales xx [ yy zz ... ] (default: all) 1164 1165This option is used to specify which locales are considered OK for 1166incoming mail. Mail using the B<character sets> that are allowed by 1167this option will not be marked as possibly being spam in a foreign 1168language. 1169 1170If you receive lots of spam in foreign languages, and never get any non-spam in 1171these languages, this may help. Note that all ISO-8859-* character sets, and 1172Windows code page character sets, are always permitted by default. 1173 1174Set this to C<all> to allow all character sets. This is the default. 1175 1176The rules C<CHARSET_FARAWAY>, C<CHARSET_FARAWAY_BODY>, and 1177C<CHARSET_FARAWAY_HEADERS> are triggered based on how this is set. 1178 1179Examples: 1180 1181 ok_locales all (allow all locales) 1182 ok_locales en (only allow English) 1183 ok_locales en ja zh (allow English, Japanese, and Chinese) 1184 1185Note: if there are multiple ok_locales lines, only the last one is used. 1186 1187Select the locales to allow from the list below: 1188 1189=over 4 1190 1191=item en - Western character sets in general 1192 1193=item ja - Japanese character sets 1194 1195=item ko - Korean character sets 1196 1197=item ru - Cyrillic character sets 1198 1199=item th - Thai character sets 1200 1201=item zh - Chinese (both simplified and traditional) character sets 1202 1203=back 1204 1205=cut 1206 1207 push (@cmds, { 1208 setting => 'ok_locales', 1209 default => 'all', 1210 type => $CONF_TYPE_STRING, 1211 }); 1212 1213=item normalize_charset ( 0 | 1) (default: 0) 1214 1215Whether to decode non- UTF-8 and non-ASCII textual parts and recode them 1216to UTF-8 before the text is given over to rules processing. The character 1217set used for attempted decoding is primarily based on a declared character 1218set in a Content-Type header, but if the decoding attempt fails a module 1219Encode::Detect::Detector is consulted (if available) to provide a guess 1220based on the actual text, and decoding is re-attempted. Even if the option 1221is enabled no unnecessary decoding and re-encoding work is done when 1222possible (like with an all-ASCII text with a US-ASCII or extended ASCII 1223character set declaration, e.g. UTF-8 or ISO-8859-nn or Windows-nnnn). 1224 1225Unicode support in old versions of perl or in a core module Encode is likely 1226to be buggy in places, so if the normalize_charset function is enabled 1227it is advised to stick to more recent versions of perl (preferably 5.12 1228or later). The module Encode::Detect::Detector is optional, when necessary 1229it will be used if it is available. 1230 1231=cut 1232 1233 push (@cmds, { 1234 setting => 'normalize_charset', 1235 default => 0, 1236 type => $CONF_TYPE_BOOL, 1237 code => sub { 1238 my ($self, $key, $value, $line) = @_; 1239 unless (defined $value && $value !~ /^$/) { 1240 return $MISSING_REQUIRED_VALUE; 1241 } 1242 if (lc $value eq 'yes' || $value eq '1') { $value = 1 } 1243 elsif (lc $value eq 'no' || $value eq '0') { $value = 0 } 1244 else { return $INVALID_VALUE } 1245 1246 $self->{normalize_charset} = $value; 1247 1248 unless ($] > 5.008004) { 1249 $self->{parser}->lint_warn("config: normalize_charset requires Perl 5.8.5 or later"); 1250 $self->{normalize_charset} = 0; 1251 return $INVALID_VALUE; 1252 } 1253 require HTML::Parser; 1254 #changed to eval to use VERSION so that this version was not incorrectly parsed for CPAN 1255 unless ( eval { HTML::Parser->VERSION(3.46) } ) { 1256 $self->{parser}->lint_warn("config: normalize_charset requires HTML::Parser 3.46 or later"); 1257 $self->{normalize_charset} = 0; 1258 return $INVALID_VALUE; 1259 } 1260 unless (eval 'require Encode') { 1261 $self->{parser}->lint_warn("config: normalize_charset requires Encode"); 1262 $self->{normalize_charset} = 0; 1263 return $INVALID_VALUE; 1264 } 1265 } 1266 }); 1267 1268=back 1269 1270=head2 NETWORK TEST OPTIONS 1271 1272=over 4 1273 1274=item trusted_networks IPaddress[/masklen] ... (default: none) 1275 1276What networks or hosts are 'trusted' in your setup. B<Trusted> in this case 1277means that relay hosts on these networks are considered to not be potentially 1278operated by spammers, open relays, or open proxies. A trusted host could 1279conceivably relay spam, but will not originate it, and will not forge header 1280data. DNS blacklist checks will never query for hosts on these networks. 1281 1282See C<http://wiki.apache.org/spamassassin/TrustPath> for more information. 1283 1284MXes for your domain(s) and internal relays should B<also> be specified using 1285the C<internal_networks> setting. When there are 'trusted' hosts that 1286are not MXes or internal relays for your domain(s) they should B<only> be 1287specified in C<trusted_networks>. 1288 1289The C<IPaddress> can be an IPv4 address (in a dot-quad form), or an IPv6 1290address optionally enclosed in square brackets. Scoped link-local IPv6 1291addresses are syntactically recognized but the interface scope is currently 1292ignored (e.g. [fe80::1234%eth0] ) and should be avoided. 1293 1294If a C</masklen> is specified, it is considered a CIDR-style 'netmask' length, 1295specified in bits. If it is not specified, but less than 4 octets of an IPv4 1296address are specified with a trailing dot, an implied netmask length covers 1297all addresses in remaining octets (i.e. implied masklen is /8 or /16 or /24). 1298If masklen is not specified, and there is not trailing dot, then just a single 1299IP address specified is used, as if the masklen were C</32> with an IPv4 1300address, or C</128> in case of an IPv6 address. 1301 1302If a network or host address is prefaced by a C<!> the matching network or 1303host will be excluded from the list even if a less specific (shorter netmask 1304length) subnet is later specified in the list. This allows a subset of 1305a wider network to be exempt. In case of specifying overlapping subnets, 1306specify more specific subnets first (tighter matching, i.e. with a longer 1307netmask length), followed by less specific (shorter netmask length) subnets 1308to get predictable results regardless of the search algorithm used - when 1309Net::Patricia module is installed the search finds the tightest matching 1310entry in the list, while a sequential search as used in absence of the 1311module Net::Patricia will find the first matching entry in the list. 1312 1313Note: 127.0.0.0/8 and ::1 are always included in trusted_networks, regardless 1314of your config. 1315 1316Examples: 1317 1318 trusted_networks 192.168.0.0/16 # all in 192.168.*.* 1319 trusted_networks 192.168. # all in 192.168.*.* 1320 trusted_networks 212.17.35.15 # just that host 1321 trusted_networks !10.0.1.5 10.0.1/24 # all in 10.0.1.* but not 10.0.1.5 1322 trusted_networks 2001:db8:1::1 !2001:db8:1::/64 2001:db8::/32 1323 # 2001:db8::/32 and 2001:db8:1::1/128, except the rest of 2001:db8:1::/64 1324 1325This operates additively, so a C<trusted_networks> line after another one 1326will append new entries to the list of trusted networks. To clear out the 1327existing entries, use C<clear_trusted_networks>. 1328 1329If C<trusted_networks> is not set and C<internal_networks> is, the value 1330of C<internal_networks> will be used for this parameter. 1331 1332If neither C<trusted_networks> or C<internal_networks> is set, a basic 1333inference algorithm is applied. This works as follows: 1334 1335=over 4 1336 1337=item * 1338 1339If the 'from' host has an IP address in a private (RFC 1918) network range, 1340then it's trusted 1341 1342=item * 1343 1344If there are authentication tokens in the received header, and 1345the previous host was trusted, then this host is also trusted 1346 1347=item * 1348 1349Otherwise this host, and all further hosts, are consider untrusted. 1350 1351=back 1352 1353=cut 1354 1355 push (@cmds, { 1356 setting => 'trusted_networks', 1357 type => $CONF_TYPE_IPADDRLIST, 1358 }); 1359 1360=item clear_trusted_networks 1361 1362Empty the list of trusted networks. 1363 1364=cut 1365 1366 push (@cmds, { 1367 setting => 'clear_trusted_networks', 1368 type => $CONF_TYPE_NOARGS, 1369 code => sub { 1370 my ($self, $key, $value, $line) = @_; 1371 unless (!defined $value || $value eq '') { 1372 return $INVALID_VALUE; 1373 } 1374 $self->{trusted_networks} = $self->new_netset('trusted_networks',1); 1375 $self->{trusted_networks_configured} = 0; 1376 } 1377 }); 1378 1379=item internal_networks IPaddress[/masklen] ... (default: none) 1380 1381What networks or hosts are 'internal' in your setup. B<Internal> means 1382that relay hosts on these networks are considered to be MXes for your 1383domain(s), or internal relays. This uses the same syntax as 1384C<trusted_networks>, above - see there for details. 1385 1386This value is used when checking 'dial-up' or dynamic IP address 1387blocklists, in order to detect direct-to-MX spamming. 1388 1389Trusted relays that accept mail directly from dial-up connections 1390(i.e. are also performing a role of mail submission agents - MSA) 1391should not be listed in C<internal_networks>. List them only in 1392C<trusted_networks>. 1393 1394If C<trusted_networks> is set and C<internal_networks> is not, the value 1395of C<trusted_networks> will be used for this parameter. 1396 1397If neither C<trusted_networks> nor C<internal_networks> is set, no addresses 1398will be considered local; in other words, any relays past the machine where 1399SpamAssassin is running will be considered external. 1400 1401Every entry in C<internal_networks> must appear in C<trusted_networks>; in 1402other words, C<internal_networks> is always a subset of the trusted set. 1403 1404Note: 127/8 and ::1 are always included in internal_networks, regardless of 1405your config. 1406 1407=cut 1408 1409 push (@cmds, { 1410 setting => 'internal_networks', 1411 type => $CONF_TYPE_IPADDRLIST, 1412 }); 1413 1414=item clear_internal_networks 1415 1416Empty the list of internal networks. 1417 1418=cut 1419 1420 push (@cmds, { 1421 setting => 'clear_internal_networks', 1422 type => $CONF_TYPE_NOARGS, 1423 code => sub { 1424 my ($self, $key, $value, $line) = @_; 1425 unless (!defined $value || $value eq '') { 1426 return $INVALID_VALUE; 1427 } 1428 $self->{internal_networks} = $self->new_netset('internal_networks',1); 1429 $self->{internal_networks_configured} = 0; 1430 } 1431 }); 1432 1433=item msa_networks IPaddress[/masklen] ... (default: none) 1434 1435The networks or hosts which are acting as MSAs in your setup (but not also 1436as MX relays). This uses the same syntax as C<trusted_networks>, above - see 1437there for details. 1438 1439B<MSA> means that the relay hosts on these networks accept mail from your 1440own users and authenticates them appropriately. These relays will never 1441accept mail from hosts that aren't authenticated in some way. Examples of 1442authentication include, IP lists, SMTP AUTH, POP-before-SMTP, etc. 1443 1444All relays found in the message headers after the MSA relay will take 1445on the same trusted and internal classifications as the MSA relay itself, 1446as defined by your I<trusted_networks> and I<internal_networks> configuration. 1447 1448For example, if the MSA relay is trusted and internal so will all of the 1449relays that precede it. 1450 1451When using msa_networks to identify an MSA it is recommended that you treat 1452that MSA as both trusted and internal. When an MSA is not included in 1453msa_networks you should treat the MSA as trusted but not internal, however 1454if the MSA is also acting as an MX or intermediate relay you must always 1455treat it as both trusted and internal and ensure that the MSA includes 1456visible auth tokens in its Received header to identify submission clients. 1457 1458B<Warning:> Never include an MSA that also acts as an MX (or is also an 1459intermediate relay for an MX) or otherwise accepts mail from 1460non-authenticated users in msa_networks. Doing so will result in unknown 1461external relays being trusted. 1462 1463=cut 1464 1465 push (@cmds, { 1466 setting => 'msa_networks', 1467 type => $CONF_TYPE_IPADDRLIST, 1468 }); 1469 1470=item clear_msa_networks 1471 1472Empty the list of msa networks. 1473 1474=cut 1475 1476 push (@cmds, { 1477 setting => 'clear_msa_networks', 1478 type => $CONF_TYPE_NOARGS, 1479 code => sub { 1480 my ($self, $key, $value, $line) = @_; 1481 unless (!defined $value || $value eq '') { 1482 return $INVALID_VALUE; 1483 } 1484 $self->{msa_networks} = 1485 $self->new_netset('msa_networks',0); # no loopback IP 1486 $self->{msa_networks_configured} = 0; 1487 } 1488 }); 1489 1490=item originating_ip_headers header ... (default: X-Yahoo-Post-IP X-Originating-IP X-Apparently-From X-SenderIP) 1491 1492A list of header field names from which an originating IP address can 1493be obtained. For example, webmail servers may record a client IP address 1494in X-Originating-IP. 1495 1496These IP addresses are virtually appended into the Received: chain, so they 1497are used in RBL checks where appropriate. 1498 1499Currently the IP addresses are not added into X-Spam-Relays-* header fields, 1500but they may be in the future. 1501 1502=cut 1503 1504 push (@cmds, { 1505 setting => 'originating_ip_headers', 1506 default => [], 1507 type => $CONF_TYPE_STRINGLIST, 1508 code => sub { 1509 my ($self, $key, $value, $line) = @_; 1510 unless (defined $value && $value !~ /^$/) { 1511 return $MISSING_REQUIRED_VALUE; 1512 } 1513 foreach my $hfname (split(/\s+/, $value)) { 1514 # avoid duplicates, consider header field names case-insensitive 1515 push(@{$self->{originating_ip_headers}}, $hfname) 1516 if !grep(lc($_) eq lc($hfname), @{$self->{originating_ip_headers}}); 1517 } 1518 } 1519 }); 1520 1521=item clear_originating_ip_headers 1522 1523Empty the list of 'originating IP address' header field names. 1524 1525=cut 1526 1527 push (@cmds, { 1528 setting => 'clear_originating_ip_headers', 1529 type => $CONF_TYPE_NOARGS, 1530 code => sub { 1531 my ($self, $key, $value, $line) = @_; 1532 unless (!defined $value || $value eq '') { 1533 return $INVALID_VALUE; 1534 } 1535 $self->{originating_ip_headers} = []; 1536 } 1537 }); 1538 1539=item always_trust_envelope_sender ( 0 | 1 ) (default: 0) 1540 1541Trust the envelope sender even if the message has been passed through one or 1542more trusted relays. See also C<envelope_sender_header>. 1543 1544=cut 1545 1546 push (@cmds, { 1547 setting => 'always_trust_envelope_sender', 1548 default => 0, 1549 type => $CONF_TYPE_BOOL, 1550 }); 1551 1552=item skip_rbl_checks ( 0 | 1 ) (default: 0) 1553 1554Turning on the skip_rbl_checks setting will disable the DNSEval plugin, 1555which implements Real-time Block List (or: Blackhole List) (RBL) lookups. 1556 1557By default, SpamAssassin will run RBL checks. Individual blocklists may 1558be disabled selectively by setting a score of a corresponding rule to 0. 1559 1560See also a related configuration parameter skip_uribl_checks, 1561which controls the URIDNSBL plugin (documented in the URIDNSBL man page). 1562 1563=cut 1564 1565 push (@cmds, { 1566 setting => 'skip_rbl_checks', 1567 default => 0, 1568 type => $CONF_TYPE_BOOL, 1569 }); 1570 1571=item dns_available { yes | no | test[: domain1 domain2...] } (default: yes) 1572 1573Tells SpamAssassin whether DNS resolving is available or not. A value I<yes> 1574indicates DNS resolving is available, a value I<no> indicates DNS resolving 1575is not available - both of these values apply unconditionally and skip initial 1576DNS tests, which can be slow or unreliable. 1577 1578When the option value is a I<test> (with or without arguments), SpamAssassin 1579will query some domain names on the internet during initialization, attempting 1580to determine if DNS resolving is working or not. A space-separated list 1581of domain names may be specified explicitly, or left to a built-in default 1582of a dozen or so domain names. From an explicit or a default list a subset 1583of three domain names is picked randomly for checking. The test queries for 1584NS records of these domain: if at least one query returns a success then 1585SpamAssassin considers DNS resolving as available, otherwise not. 1586 1587The problem is that the test can introduce some startup delay if a network 1588connection is down, and in some cases it can wrongly guess that DNS is 1589unavailable because a test connection failed, what causes disabling several 1590DNS-dependent tests. 1591 1592Please note, the DNS test queries for NS records, so specify domain names, 1593not host names. 1594 1595Since version 3.4.0 of SpamAssassin a default setting for option 1596I<dns_available> is I<yes>. A default in older versions was I<test>. 1597 1598=cut 1599 1600 push (@cmds, { 1601 setting => 'dns_available', 1602 default => 'yes', 1603 type => $CONF_TYPE_STRING, 1604 code => sub { 1605 my ($self, $key, $value, $line) = @_; 1606 if ($value =~ /^test(?::\s*\S.*)?$/) { 1607 $self->{dns_available} = $value; 1608 } 1609 elsif ($value =~ /^(?:yes|1)$/) { 1610 $self->{dns_available} = 'yes'; 1611 } 1612 elsif ($value =~ /^(?:no|0)$/) { 1613 $self->{dns_available} = 'no'; 1614 } 1615 else { 1616 return $INVALID_VALUE; 1617 } 1618 } 1619 }); 1620 1621=item dns_server ip-addr-port (default: entries provided by Net::DNS) 1622 1623Specifies an IP address of a DNS server, and optionally its port number. 1624The I<dns_server> directive may be specified multiple times, each entry 1625adding to a list of available resolving name servers. The I<ip-addr-port> 1626argument can either be an IPv4 or IPv6 address, optionally enclosed in 1627brackets, and optionally followed by a colon and a port number. In absence 1628of a port number a standard port number 53 is assumed. When an IPv6 address 1629is specified along with a port number, the address B<must> be enclosed in 1630brackets to avoid parsing ambiguity regarding a colon separator. A scoped 1631link-local IP address is allowed (assuming underlying modules allow it). 1632 1633Examples : 1634 dns_server 127.0.0.1 1635 dns_server 127.0.0.1:53 1636 dns_server [127.0.0.1]:53 1637 dns_server [::1]:53 1638 dns_server fe80::1%lo0 1639 dns_server [fe80::1%lo0]:53 1640 1641In absence of I<dns_server> directives, the list of name servers is provided 1642by Net::DNS module, which typically obtains the list from /etc/resolv.conf, 1643but this may be platform dependent. Please consult the Net::DNS::Resolver 1644documentation for details. 1645 1646=cut 1647 1648 push (@cmds, { 1649 setting => 'dns_server', 1650 type => $CONF_TYPE_STRING, 1651 code => sub { 1652 my ($self, $key, $value, $line) = @_; 1653 my($address,$port); local($1,$2,$3); 1654 if ($value =~ /^(?: \[ ([^\]]*) \] | ([^:]*) ) : (\d+) \z/sx) { 1655 $address = defined $1 ? $1 : $2; $port = $3; 1656 } elsif ($value =~ /^(?: \[ ([^\]]*) \] | 1657 ([0-9A-F.:]+ (?: %[A-Z0-9._~-]* )? ) ) \z/six) { 1658 $address = defined $1 ? $1 : $2; $port = '53'; 1659 } else { 1660 return $INVALID_VALUE; 1661 } 1662 my $scope = ''; # scoped IP address? 1663 $scope = $1 if $address =~ s/ ( % [A-Z0-9._~-]* ) \z//xsi; 1664 my $IP_ADDRESS = IP_ADDRESS; # IP_ADDRESS regexp does not handle scope 1665 if ($address =~ /$IP_ADDRESS/ && $port >= 1 && $port <= 65535) { 1666 $self->{dns_servers} = [] if !$self->{dns_servers}; 1667 # checked, untainted, stored in a normalized form 1668 push(@{$self->{dns_servers}}, untaint_var("[$address$scope]:$port")); 1669 } else { 1670 return $INVALID_VALUE; 1671 } 1672 } 1673 }); 1674 1675=item clear_dns_servers 1676 1677Empty the list of explicitly configured DNS servers through a I<dns_server> 1678directive, falling back to Net::DNS -supplied defaults. 1679 1680=cut 1681 1682 push (@cmds, { 1683 setting => 'clear_dns_servers', 1684 type => $CONF_TYPE_NOARGS, 1685 code => sub { 1686 my ($self, $key, $value, $line) = @_; 1687 unless (!defined $value || $value eq '') { 1688 return $INVALID_VALUE; 1689 } 1690 undef $self->{dns_servers}; 1691 } 1692 }); 1693 1694=item dns_local_ports_permit ranges... 1695 1696Add the specified ports or ports ranges to the set of allowed port numbers 1697that can be used as local port numbers when sending DNS queries to a resolver. 1698 1699The argument is a whitespace-separated or a comma-separated list of 1700single port numbers n, or port number pairs (i.e. m-n) delimited by a '-', 1701representing a range. Allowed port numbers are between 1 and 65535. 1702 1703Directives I<dns_local_ports_permit> and I<dns_local_ports_avoid> are processed 1704in order in which they appear in configuration files. Each directive adds 1705(or subtracts) its subsets of ports to a current set of available ports. 1706Whatever is left in the set by the end of configuration processing 1707is made available to a DNS resolving client code. 1708 1709If the resulting set of port numbers is empty (see also the directive 1710I<dns_local_ports_none>), then SpamAssassin does not apply its ports 1711randomization logic, but instead leaves the operating system to choose 1712a suitable free local port number. 1713 1714The initial set consists of all port numbers in the range 1024-65535. 1715Note that system config files already modify the set and remove all the 1716IANA registered port numbers and some other ranges, so there is rarely 1717a need to adjust the ranges by site-specific directives. 1718 1719See also directives I<dns_local_ports_permit> and I<dns_local_ports_none>. 1720 1721=cut 1722 1723 push (@cmds, { 1724 setting => 'dns_local_ports_permit', 1725 type => $CONF_TYPE_STRING, 1726 is_admin => 1, 1727 code => sub { 1728 my($self, $key, $value, $line) = @_; 1729 my(@port_ranges); local($1,$2); 1730 foreach my $range (split(/[ \t,]+/, $value)) { 1731 if ($range =~ /^(\d{1,5})\z/) { 1732 # don't allow adding a port number 0 1733 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE } 1734 push(@port_ranges, [$1,$1]); 1735 } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) { 1736 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE } 1737 if ($2 < 1 || $2 > 65535) { return $INVALID_VALUE } 1738 push(@port_ranges, [$1,$2]); 1739 } else { 1740 return $INVALID_VALUE; 1741 } 1742 } 1743 foreach my $p (@port_ranges) { 1744 undef $self->{dns_available_portscount}; # invalidate derived data 1745 set_ports_range(\$self->{dns_available_ports_bitset}, 1746 $p->[0], $p->[1], 1); 1747 } 1748 } 1749 }); 1750 1751=item dns_local_ports_avoid ranges... 1752 1753Remove specified ports or ports ranges from the set of allowed port numbers 1754that can be used as local port numbers when sending DNS queries to a resolver. 1755 1756Please see directive I<dns_local_ports_permit> for details. 1757 1758=cut 1759 1760 push (@cmds, { 1761 setting => 'dns_local_ports_avoid', 1762 type => $CONF_TYPE_STRING, 1763 is_admin => 1, 1764 code => sub { 1765 my($self, $key, $value, $line) = @_; 1766 my(@port_ranges); local($1,$2); 1767 foreach my $range (split(/[ \t,]+/, $value)) { 1768 if ($range =~ /^(\d{1,5})\z/) { 1769 if ($1 > 65535) { return $INVALID_VALUE } 1770 # don't mind clearing also the port number 0 1771 push(@port_ranges, [$1,$1]); 1772 } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) { 1773 if ($1 > 65535 || $2 > 65535) { return $INVALID_VALUE } 1774 push(@port_ranges, [$1,$2]); 1775 } else { 1776 return $INVALID_VALUE; 1777 } 1778 } 1779 foreach my $p (@port_ranges) { 1780 undef $self->{dns_available_portscount}; # invalidate derived data 1781 set_ports_range(\$self->{dns_available_ports_bitset}, 1782 $p->[0], $p->[1], 0); 1783 } 1784 } 1785 }); 1786 1787=item dns_local_ports_none 1788 1789Is a fast shorthand for: 1790 1791 dns_local_ports_avoid 1-65535 1792 1793leaving the set of available DNS query local port numbers empty. In all 1794respects (apart from speed) it is equivalent to the shown directive, and can 1795be freely mixed with I<dns_local_ports_permit> and I<dns_local_ports_avoid>. 1796 1797If the resulting set of port numbers is empty, then SpamAssassin does not 1798apply its ports randomization logic, but instead leaves the operating system 1799to choose a suitable free local port number. 1800 1801See also directives I<dns_local_ports_permit> and I<dns_local_ports_avoid>. 1802 1803=cut 1804 1805 push (@cmds, { 1806 setting => 'dns_local_ports_none', 1807 type => $CONF_TYPE_NOARGS, 1808 is_admin => 1, 1809 code => sub { 1810 my ($self, $key, $value, $line) = @_; 1811 unless (!defined $value || $value eq '') { 1812 return $INVALID_VALUE; 1813 } 1814 undef $self->{dns_available_portscount}; # invalidate derived data 1815 wipe_ports_range(\$self->{dns_available_ports_bitset}, 0); 1816 } 1817 }); 1818 1819=item dns_test_interval n (default: 600 seconds) 1820 1821If dns_available is set to I<test>, the dns_test_interval time in number 1822of seconds will tell SpamAssassin how often to retest for working DNS. 1823A numeric value is optionally suffixed by a time unit (s, m, h, d, w, 1824indicating seconds (default), minutes, hours, days, weeks). 1825 1826=cut 1827 1828 push (@cmds, { 1829 setting => 'dns_test_interval', 1830 default => 600, 1831 type => $CONF_TYPE_DURATION, 1832 }); 1833 1834=item dns_options opts (default: norotate, nodns0x20, edns=4096) 1835 1836Provides a (whitespace or comma -separated) list of options applying 1837to DNS resolving. Available options are: I<rotate>, I<dns0x20> and 1838I<edns> (or I<edns0>). Option name may be negated by prepending a I<no> 1839(e.g. I<norotate>, I<NoEDNS>) to counteract a previously enabled option. 1840Option names are not case-sensitive. The I<dns_options> directive may 1841appear in configuration files multiple times, the last setting prevails. 1842 1843Option I<edns> (or I<edsn0>) may take a value which specifies a requestor's 1844acceptable UDP payload size according to EDNS0 specifications (RFC 6891, 1845ex RFC 2671) e.g. I<edns=4096>. When EDNS0 is off (I<noedns> or I<edns=512>) 1846a traditional implied UDP payload size is 512 bytes, which is also a minimum 1847allowed value for this option. When the option is specified but a value 1848is not provided, a conservative default of 1220 bytes is implied. It is 1849recommended to keep I<edns> enabled when using a local recursive DNS server 1850which supports EDNS0 (like most modern DNS servers do), a suitable setting 1851in this case is I<edns=4096>, which is also a default. Allowing UDP payload 1852size larger than 512 bytes can avoid truncation of resource records in large 1853DNS responses (like in TXT records of some SPF and DKIM responses, or when 1854an unreasonable number of A records is published by some domain). The option 1855should be disabled when a recursive DNS server is only reachable through 1856non- RFC 6891 compliant middleboxes (such as some old-fashioned firewall) 1857which bans DNS UDP payload sizes larger than 512 bytes. A suitable value 1858when a non-local recursive DNS server is used and a middlebox B<does> allow 1859EDNS0 but blocks fragmented IP packets is perhaps 1220 bytes, allowing a 1860DNS UDP packet to fit within a single IP packet in most cases (a slightly 1861less conservative range would be 1280-1410 bytes). 1862 1863Option I<rotate> causes SpamAssassin to choose a DNS server at random 1864from all servers listed in C</etc/resolv.conf> every I<dns_test_interval> 1865seconds, effectively spreading the load over all currently available DNS 1866servers when there are many spamd workers. 1867 1868Option I<dns0x20> enables randomization of letters in a DNS query label 1869according to draft-vixie-dnsext-dns0x20, decreasing a chance of collisions 1870of responses (by chance or by a malicious intent) by increasing spread 1871as provided by a 16-bit query ID and up to 16 bits of a port number, 1872with additional bits as encoded by flipping case (upper/lower) of letters 1873in a query. The number of additional random bits corresponds to the number 1874of letters in a query label. Should work reliably with all mainstream 1875DNS servers - do not turn on if you see frequent info messages 1876"dns: no callback for id:" in the log, or if RBL or URIDNS lookups 1877do not work for no apparent reason. 1878 1879=cut 1880 1881 push (@cmds, { 1882 setting => 'dns_options', 1883 type => $CONF_TYPE_HASH_KEY_VALUE, 1884 code => sub { 1885 my ($self, $key, $value, $line) = @_; 1886 foreach my $option (split (/[\s,]+/, lc $value)) { 1887 local($1,$2); 1888 if ($option =~ /^no(rotate|dns0x20)\z/) { 1889 $self->{dns_options}->{$1} = 0; 1890 } elsif ($option =~ /^no(edns)0?\z/) { 1891 $self->{dns_options}->{$1} = 0; 1892 } elsif ($option =~ /^(rotate|dns0x20)\z/) { 1893 $self->{dns_options}->{$1} = 1; 1894 } elsif ($option =~ /^(edns)0? (?: = (\d+) )? \z/x) { 1895 # RFC 6891 (ex RFC 2671) - EDNS0, value is a requestor's UDP payload 1896 # size, defaults to some UDP packet size likely to fit into a single 1897 # IP packet which is more likely to pass firewalls which choke on IP 1898 # fragments. RFC 2460: min MTU is 1280 for IPv6, minus 40 bytes for 1899 # basic header, yielding 1240. RFC 3226 prescribes a min of 1220 for 1900 # RFC 2535 compliant servers. RFC 6891: choosing between 1280 and 1901 # 1410 bytes for IP (v4 or v6) over Ethernet would be reasonable. 1902 # 1903 $self->{dns_options}->{$1} = $2 || 1220; 1904 return $INVALID_VALUE if $self->{dns_options}->{$1} < 512; 1905 } else { 1906 return $INVALID_VALUE; 1907 } 1908 } 1909 } 1910 }); 1911 1912=item dns_query_restriction (allow|deny) domain1 domain2 ... 1913 1914Option allows disabling of rules which would result in a DNS query to one of 1915the listed domains. The first argument must be a literal C<allow> or C<deny>, 1916remaining arguments are domains names. 1917 1918Most DNS queries (with some exceptions) are subject to dns_query_restriction. 1919A domain to be queried is successively stripped-off of its leading labels 1920(thus yielding a series of its parent domains), and on each iteration a 1921check is made against an associative array generated by dns_query_restriction 1922options. Search stops at the first match (i.e. the tightest match), and the 1923matching entry with its C<allow> or C<deny> value then controls whether a 1924DNS query is allowed to be launched. 1925 1926If no match is found an implicit default is to allow a query. The purpose of 1927an explicit C<allow> entry is to be able to override a previously configured 1928C<deny> on the same domain or to override an entry (possibly yet to be 1929configured in subsequent config directives) on one of its parent domains. 1930Thus an 'allow zen.spamhaus.org' with a 'deny spamhaus.org' would permit 1931DNS queries on a specific DNS BL zone but deny queries to other zones under 1932the same parent domain. 1933 1934Domains are matched case-insensitively, no wildcards are recognized, 1935there should be no leading or trailing dot. 1936 1937Specifying a block on querying a domain name has a similar effect as setting 1938a score of corresponding DNSBL and URIBL rules to zero, and can be a handy 1939alternative to hunting for such rules when a site policy does not allow 1940certain DNS block lists to be queried. 1941 1942Example: 1943 dns_query_restriction deny dnswl.org surbl.org 1944 dns_query_restriction allow zen.spamhaus.org 1945 dns_query_restriction deny spamhaus.org mailspike.net spamcop.net 1946 1947=cut 1948 1949 push (@cmds, { 1950 setting => 'dns_query_restriction', 1951 type => $CONF_TYPE_STRING, 1952 code => sub { 1953 my ($self, $key, $value, $line) = @_; 1954 defined $value && $value =~ s/^(allow|deny)\s+//i 1955 or return $INVALID_VALUE; 1956 my $blocked = lc($1) eq 'deny' ? 1 : 0; 1957 foreach my $domain (split(/\s+/, $value)) { 1958 $domain =~ s/^\.//; $domain =~ s/\.\z//; # strip dots 1959 $self->{dns_query_blocked}{lc $domain} = $blocked; 1960 } 1961 } 1962 }); 1963 1964=item clear_dns_query_restriction 1965 1966The option removes any entries entered by previous 'dns_query_restriction' 1967options, leaving the list empty, i.e. allowing DNS queries for any domain 1968(including any DNS BL zone). 1969 1970=cut 1971 1972 push (@cmds, { 1973 setting => 'clear_dns_query_restriction', 1974 aliases => ['clear_dns_query_restrictions'], 1975 type => $CONF_TYPE_NOARGS, 1976 code => sub { 1977 my ($self, $key, $value, $line) = @_; 1978 return $INVALID_VALUE if defined $value && $value ne ''; 1979 delete $self->{dns_query_blocked}; 1980 } 1981 }); 1982 1983=back 1984 1985=head2 LEARNING OPTIONS 1986 1987=over 4 1988 1989=item use_learner ( 0 | 1 ) (default: 1) 1990 1991Whether to use any machine-learning classifiers with SpamAssassin, such as the 1992default 'BAYES_*' rules. Setting this to 0 will disable use of any and all 1993human-trained classifiers. 1994 1995=cut 1996 1997 push (@cmds, { 1998 setting => 'use_learner', 1999 default => 1, 2000 type => $CONF_TYPE_BOOL, 2001 }); 2002 2003=item use_bayes ( 0 | 1 ) (default: 1) 2004 2005Whether to use the naive-Bayesian-style classifier built into 2006SpamAssassin. This is a master on/off switch for all Bayes-related 2007operations. 2008 2009=cut 2010 2011 push (@cmds, { 2012 setting => 'use_bayes', 2013 default => 1, 2014 type => $CONF_TYPE_BOOL, 2015 }); 2016 2017=item use_bayes_rules ( 0 | 1 ) (default: 1) 2018 2019Whether to use rules using the naive-Bayesian-style classifier built 2020into SpamAssassin. This allows you to disable the rules while leaving 2021auto and manual learning enabled. 2022 2023=cut 2024 2025 push (@cmds, { 2026 setting => 'use_bayes_rules', 2027 default => 1, 2028 type => $CONF_TYPE_BOOL, 2029 }); 2030 2031=item bayes_auto_learn ( 0 | 1 ) (default: 1) 2032 2033Whether SpamAssassin should automatically feed high-scoring mails (or 2034low-scoring mails, for non-spam) into its learning systems. The only 2035learning system supported currently is a naive-Bayesian-style classifier. 2036 2037See the documentation for the 2038C<Mail::SpamAssassin::Plugin::AutoLearnThreshold> plugin module 2039for details on how Bayes auto-learning is implemented by default. 2040 2041=cut 2042 2043 push (@cmds, { 2044 setting => 'bayes_auto_learn', 2045 default => 1, 2046 type => $CONF_TYPE_BOOL, 2047 }); 2048 2049=item bayes_token_sources (default: header visible invisible uri) 2050 2051Controls which sources in a mail message can contribute tokens (e.g. words, 2052phrases, etc.) to a Bayes classifier. The argument is a space-separated list 2053of keywords: I<header>, I<visible>, I<invisible>, I<uri>, I<mimepart>), each 2054of which may be prefixed by a I<no> to indicate its exclusion. Additionally 2055two reserved keywords are allowed: I<all> and I<none> (or: I<noall>). The list 2056of keywords is processed sequentially: a keyword I<all> adds all available 2057keywords to a set being built, a I<none> or I<noall> clears the set, other 2058non-negated keywords are added to the set, and negated keywords are removed 2059from the set. Keywords are case-insensitive. 2060 2061The default set is: I<header> I<visible> I<invisible> I<uri>, which is 2062equivalent for example to: I<All> I<NoMIMEpart>. The reason why I<mimepart> 2063is not currently in a default set is that it is a newer source (introduced 2064with SpamAssassin version 3.4.1) and not much experience has yet been gathered 2065regarding its usefulness. 2066 2067See also option C<bayes_ignore_header> for a fine-grained control on individual 2068header fields under the umbrella of a more general keyword I<header> here. 2069 2070Keywords imply the following data sources: 2071 2072=over 4 2073 2074=item I<header> - tokens collected from a message header section 2075 2076=item I<visible> - words from visible text (plain or HTML) in a message body 2077 2078=item I<invisible> - hidden/invisible text in HTML parts of a message body 2079 2080=item I<uri> - URIs collected from a message body 2081 2082=item I<mimepart> - digests (hashes) of all MIME parts (textual or non-textual) of a message, computed after Base64 and quoted-printable decoding, suffixed by their Content-Type 2083 2084=item I<all> - adds all the above keywords to the set being assembled 2085 2086=item I<none> or I<noall> - removes all keywords from the set 2087 2088=back 2089 2090The C<bayes_token_sources> directive may appear multiple times, its keywords 2091are interpreted sequentially, adding or removing items from the final set 2092as they appear in their order in C<bayes_token_sources> directive(s). 2093 2094=cut 2095 2096 push (@cmds, { 2097 setting => 'bayes_token_sources', 2098 default => { map(($_,1), qw(header visible invisible uri)) }, # mimepart 2099 type => $CONF_TYPE_HASH_KEY_VALUE, 2100 code => sub { 2101 my ($self, $key, $value, $line) = @_; 2102 return $MISSING_REQUIRED_VALUE if $value eq ''; 2103 my $h = ($self->{bayes_token_sources} ||= {}); 2104 my %all_kw = map(($_,1), qw(header visible invisible uri mimepart)); 2105 foreach (split(/\s+/, lc $value)) { 2106 if (/^(none|noall)\z/) { 2107 %$h = (); 2108 } elsif ($_ eq 'all') { 2109 %$h = %all_kw; 2110 } elsif (/^(no)?(.+)\z/s && exists $all_kw{$2}) { 2111 $h->{$2} = defined $1 ? 0 : 1; 2112 } else { 2113 return $INVALID_VALUE; 2114 } 2115 } 2116 } 2117 }); 2118 2119=item bayes_ignore_header header_name 2120 2121If you receive mail filtered by upstream mail systems, like 2122a spam-filtering ISP or mailing list, and that service adds 2123new headers (as most of them do), these headers may provide 2124inappropriate cues to the Bayesian classifier, allowing it 2125to take a "short cut". To avoid this, list the headers using this 2126setting. Example: 2127 2128 bayes_ignore_header X-Upstream-Spamfilter 2129 bayes_ignore_header X-Upstream-SomethingElse 2130 2131=cut 2132 2133 push (@cmds, { 2134 setting => 'bayes_ignore_header', 2135 default => [], 2136 type => $CONF_TYPE_STRINGLIST, 2137 code => sub { 2138 my ($self, $key, $value, $line) = @_; 2139 if ($value eq '') { 2140 return $MISSING_REQUIRED_VALUE; 2141 } 2142 push (@{$self->{bayes_ignore_headers}}, split(/\s+/, $value)); 2143 } 2144 }); 2145 2146=item bayes_ignore_from user@example.com 2147 2148Bayesian classification and autolearning will not be performed on mail 2149from the listed addresses. Program C<sa-learn> will also ignore the 2150listed addresses if it is invoked using the C<--use-ignores> option. 2151One or more addresses can be listed, see C<whitelist_from>. 2152 2153Spam messages from certain senders may contain many words that 2154frequently occur in ham. For example, one might read messages from a 2155preferred bookstore but also get unwanted spam messages from other 2156bookstores. If the unwanted messages are learned as spam then any 2157messages discussing books, including the preferred bookstore and 2158antiquarian messages would be in danger of being marked as spam. The 2159addresses of the annoying bookstores would be listed. (Assuming they 2160were halfway legitimate and didn't send you mail through myriad 2161affiliates.) 2162 2163Those who have pieces of spam in legitimate messages or otherwise 2164receive ham messages containing potentially spammy words might fear 2165that some spam messages might be in danger of being marked as ham. 2166The addresses of the spam mailing lists, correspondents, etc. would 2167be listed. 2168 2169=cut 2170 2171 push (@cmds, { 2172 setting => 'bayes_ignore_from', 2173 type => $CONF_TYPE_ADDRLIST, 2174 }); 2175 2176=item bayes_ignore_to user@example.com 2177 2178Bayesian classification and autolearning will not be performed on mail 2179to the listed addresses. See C<bayes_ignore_from> for details. 2180 2181=cut 2182 2183 push (@cmds, { 2184 setting => 'bayes_ignore_to', 2185 type => $CONF_TYPE_ADDRLIST, 2186 }); 2187 2188=item bayes_min_ham_num (Default: 200) 2189 2190=item bayes_min_spam_num (Default: 200) 2191 2192To be accurate, the Bayes system does not activate until a certain number of 2193ham (non-spam) and spam have been learned. The default is 200 of each ham and 2194spam, but you can tune these up or down with these two settings. 2195 2196=cut 2197 2198 push (@cmds, { 2199 setting => 'bayes_min_ham_num', 2200 default => 200, 2201 type => $CONF_TYPE_NUMERIC, 2202 }); 2203 push (@cmds, { 2204 setting => 'bayes_min_spam_num', 2205 default => 200, 2206 type => $CONF_TYPE_NUMERIC, 2207 }); 2208 2209=item bayes_learn_during_report (Default: 1) 2210 2211The Bayes system will, by default, learn any reported messages 2212(C<spamassassin -r>) as spam. If you do not want this to happen, set 2213this option to 0. 2214 2215=cut 2216 2217 push (@cmds, { 2218 setting => 'bayes_learn_during_report', 2219 default => 1, 2220 type => $CONF_TYPE_BOOL, 2221 }); 2222 2223=item bayes_sql_override_username 2224 2225Used by BayesStore::SQL storage implementation. 2226 2227If this options is set the BayesStore::SQL module will override the set 2228username with the value given. This could be useful for implementing global or 2229group bayes databases. 2230 2231=cut 2232 2233 push (@cmds, { 2234 setting => 'bayes_sql_override_username', 2235 default => '', 2236 type => $CONF_TYPE_STRING, 2237 }); 2238 2239=item bayes_use_hapaxes (default: 1) 2240 2241Should the Bayesian classifier use hapaxes (words/tokens that occur only 2242once) when classifying? This produces significantly better hit-rates. 2243 2244=cut 2245 2246 push (@cmds, { 2247 setting => 'bayes_use_hapaxes', 2248 default => 1, 2249 type => $CONF_TYPE_BOOL, 2250 }); 2251 2252=item bayes_journal_max_size (default: 102400) 2253 2254SpamAssassin will opportunistically sync the journal and the database. 2255It will do so once a day, but will sync more often if the journal file 2256size goes above this setting, in bytes. If set to 0, opportunistic 2257syncing will not occur. 2258 2259=cut 2260 2261 push (@cmds, { 2262 setting => 'bayes_journal_max_size', 2263 default => 102400, 2264 type => $CONF_TYPE_NUMERIC, 2265 }); 2266 2267=item bayes_expiry_max_db_size (default: 150000) 2268 2269What should be the maximum size of the Bayes tokens database? When expiry 2270occurs, the Bayes system will keep either 75% of the maximum value, or 2271100,000 tokens, whichever has a larger value. 150,000 tokens is roughly 2272equivalent to a 8Mb database file. 2273 2274=cut 2275 2276 push (@cmds, { 2277 setting => 'bayes_expiry_max_db_size', 2278 default => 150000, 2279 type => $CONF_TYPE_NUMERIC, 2280 }); 2281 2282=item bayes_auto_expire (default: 1) 2283 2284If enabled, the Bayes system will try to automatically expire old tokens 2285from the database. Auto-expiry occurs when the number of tokens in the 2286database surpasses the bayes_expiry_max_db_size value. If a bayes datastore 2287backend does not implement individual key/value expirations, the setting 2288is silently ignored. 2289 2290=cut 2291 2292 push (@cmds, { 2293 setting => 'bayes_auto_expire', 2294 default => 1, 2295 type => $CONF_TYPE_BOOL, 2296 }); 2297 2298=item bayes_token_ttl (default: 3w, i.e. 3 weeks) 2299 2300Time-to-live / expiration time in seconds for tokens kept in a Bayes database. 2301A numeric value is optionally suffixed by a time unit (s, m, h, d, w, 2302indicating seconds (default), minutes, hours, days, weeks). 2303 2304If bayes_auto_expire is true and a Bayes datastore backend supports it 2305(currently only Redis), this setting controls deletion of expired tokens 2306from a bayes database. The value is observed on a best-effort basis, exact 2307timing promises are not necessarily kept. If a bayes datastore backend 2308does not implement individual key/value expirations, the setting is silently 2309ignored. 2310 2311=cut 2312 2313 push (@cmds, { 2314 setting => 'bayes_token_ttl', 2315 default => 3*7*24*60*60, # seconds (3 weeks) 2316 type => $CONF_TYPE_DURATION, 2317 }); 2318 2319=item bayes_seen_ttl (default: 8d, i.e. 8 days) 2320 2321Time-to-live / expiration time in seconds for 'seen' entries 2322(i.e. mail message digests with their status) kept in a Bayes database. 2323A numeric value is optionally suffixed by a time unit (s, m, h, d, w, 2324indicating seconds (default), minutes, hours, days, weeks). 2325 2326If bayes_auto_expire is true and a Bayes datastore backend supports it 2327(currently only Redis), this setting controls deletion of expired 'seen' 2328entries from a bayes database. The value is observed on a best-effort basis, 2329exact timing promises are not necessarily kept. If a bayes datastore backend 2330does not implement individual key/value expirations, the setting is silently 2331ignored. 2332 2333=cut 2334 2335 push (@cmds, { 2336 setting => 'bayes_seen_ttl', 2337 default => 8*24*60*60, # seconds (8 days) 2338 type => $CONF_TYPE_DURATION, 2339 }); 2340 2341=item bayes_learn_to_journal (default: 0) 2342 2343If this option is set, whenever SpamAssassin does Bayes learning, it 2344will put the information into the journal instead of directly into the 2345database. This lowers contention for locking the database to execute 2346an update, but will also cause more access to the journal and cause a 2347delay before the updates are actually committed to the Bayes database. 2348 2349=cut 2350 2351 push (@cmds, { 2352 setting => 'bayes_learn_to_journal', 2353 default => 0, 2354 type => $CONF_TYPE_BOOL, 2355 }); 2356 2357=back 2358 2359=head2 MISCELLANEOUS OPTIONS 2360 2361=over 4 2362 2363=item time_limit n (default: 300) 2364 2365Specifies a limit on elapsed time in seconds that SpamAssassin is allowed 2366to spend before providing a result. The value may be fractional and must 2367not be negative, zero is interpreted as unlimited. The default is 300 2368seconds for consistency with the spamd default setting of --timeout-child . 2369 2370This is a best-effort advisory setting, processing will not be abruptly 2371aborted at an arbitrary point in processing when the time limit is exceeded, 2372but only on reaching one of locations in the program flow equipped with a 2373time test. Currently equipped with the test are the main checking loop, 2374asynchronous DNS lookups, plugins which are calling external programs. 2375Rule evaluation is guarded by starting a timer (alarm) on each set of 2376compiled rules. 2377 2378When a message is passed to Mail::SpamAssassin::parse, a deadline time 2379is established as a sum of current time and the C<time_limit> setting. 2380 2381This deadline may also be specified by a caller through an option 2382'master_deadline' in $suppl_attrib on a call to parse(), possibly providing 2383a more accurate deadline taking into account past and expected future 2384processing of a message in a mail filtering setup. If both the config 2385option as well as a 'master_deadline' option in a call are provided, 2386the shorter time limit of the two is used (since version 3.3.2). 2387Note that spamd (and possibly third-party callers of SpamAssassin) will 2388supply the 'master_deadline' option in a call based on its --timeout-child 2389option (or equivalent), unlike the command line C<spamassassin>, which has 2390no such command line option. 2391 2392When a time limit is exceeded, most of the remaining tests will be skipped, 2393as well as auto-learning. Whatever tests fired so far will determine the 2394final score. The behaviour is similar to short-circuiting with attribute 'on', 2395as implemented by a Shortcircuit plugin. A synthetic hit on a rule named 2396TIME_LIMIT_EXCEEDED with a near-zero default score is generated, so that 2397the report will reflect the event. A score for TIME_LIMIT_EXCEEDED may 2398be provided explicitly in a configuration file, for example to achieve 2399whitelisting or blacklisting effect for messages with long processing times. 2400 2401The C<time_limit> option is a useful protection against excessive processing 2402time on certain degenerate or unusually long or complex mail messages, as well 2403as against some DoS attacks. It is also needed in time-critical pre-queue 2404filtering setups (e.g. milter, proxy, integration with MTA), where message 2405processing must finish before a SMTP client times out. RFC 5321 prescribes 2406in section 4.5.3.2.6 the 'DATA Termination' time limit of 10 minutes, 2407although it is not unusual to see some SMTP clients abort sooner on waiting 2408for a response. A sensible C<time_limit> for a pre-queue filtering setup is 2409maybe 50 seconds, assuming that clients are willing to wait at least a minute. 2410 2411=cut 2412 2413 push (@cmds, { 2414 setting => 'time_limit', 2415 default => 300, 2416 type => $CONF_TYPE_DURATION, 2417 }); 2418 2419=item lock_method type 2420 2421Select the file-locking method used to protect database files on-disk. By 2422default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you 2423are sure that the database files you'll be using for Bayes and AWL storage will 2424never be accessed over NFS, a non-NFS-safe locking system can be selected. 2425 2426This will be quite a bit faster, but may risk file corruption if the files are 2427ever accessed by multiple clients at once, and one or more of them is accessing 2428them through an NFS filesystem. 2429 2430Note that different platforms require different locking systems. 2431 2432The supported locking systems for C<type> are as follows: 2433 2434=over 4 2435 2436=item I<nfssafe> - an NFS-safe locking system 2437 2438=item I<flock> - simple UNIX C<flock()> locking 2439 2440=item I<win32> - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>. 2441 2442=back 2443 2444nfssafe and flock are only available on UNIX, and win32 is only available 2445on Windows. By default, SpamAssassin will choose either nfssafe or 2446win32 depending on the platform in use. 2447 2448=cut 2449 2450 push (@cmds, { 2451 setting => 'lock_method', 2452 default => '', 2453 type => $CONF_TYPE_STRING, 2454 code => sub { 2455 my ($self, $key, $value, $line) = @_; 2456 if ($value !~ /^(nfssafe|flock|win32)$/) { 2457 return $INVALID_VALUE; 2458 } 2459 2460 $self->{lock_method} = $value; 2461 # recreate the locker 2462 $self->{main}->create_locker(); 2463 } 2464 }); 2465 2466=item fold_headers ( 0 | 1 ) (default: 1) 2467 2468By default, headers added by SpamAssassin will be whitespace folded. 2469In other words, they will be broken up into multiple lines instead of 2470one very long one and each continuation line will have a tabulator 2471prepended to mark it as a continuation of the preceding one. 2472 2473The automatic wrapping can be disabled here. Note that this can generate very 2474long lines. RFC 2822 required that header lines do not exceed 998 characters 2475(not counting the final CRLF). 2476 2477=cut 2478 2479 push (@cmds, { 2480 setting => 'fold_headers', 2481 default => 1, 2482 type => $CONF_TYPE_BOOL, 2483 }); 2484 2485=item report_safe_copy_headers header_name ... 2486 2487If using C<report_safe>, a few of the headers from the original message 2488are copied into the wrapper header (From, To, Cc, Subject, Date, etc.) 2489If you want to have other headers copied as well, you can add them 2490using this option. You can specify multiple headers on the same line, 2491separated by spaces, or you can just use multiple lines. 2492 2493=cut 2494 2495 push (@cmds, { 2496 setting => 'report_safe_copy_headers', 2497 default => [], 2498 type => $CONF_TYPE_STRINGLIST, 2499 code => sub { 2500 my ($self, $key, $value, $line) = @_; 2501 if ($value eq '') { 2502 return $MISSING_REQUIRED_VALUE; 2503 } 2504 push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value)); 2505 } 2506 }); 2507 2508=item envelope_sender_header Name-Of-Header 2509 2510SpamAssassin will attempt to discover the address used in the 'MAIL FROM:' 2511phase of the SMTP transaction that delivered this message, if this data has 2512been made available by the SMTP server. This is used in the C<EnvelopeFrom> 2513pseudo-header, and for various rules such as SPF checking. 2514 2515By default, various MTAs will use different headers, such as the following: 2516 2517 X-Envelope-From 2518 Envelope-Sender 2519 X-Sender 2520 Return-Path 2521 2522SpamAssassin will attempt to use these, if some heuristics (such as the header 2523placement in the message, or the absence of fetchmail signatures) appear to 2524indicate that they are safe to use. However, it may choose the wrong headers 2525in some mailserver configurations. (More discussion of this can be found 2526in bug 2142 and bug 4747 in the SpamAssassin BugZilla.) 2527 2528To avoid this heuristic failure, the C<envelope_sender_header> setting may be 2529helpful. Name the header that your MTA or MDA adds to messages containing the 2530address used at the MAIL FROM step of the SMTP transaction. 2531 2532If the header in question contains C<E<lt>> or C<E<gt>> characters at the start 2533and end of the email address in the right-hand side, as in the SMTP 2534transaction, these will be stripped. 2535 2536If the header is not found in a message, or if it's value does not contain an 2537C<@> sign, SpamAssassin will issue a warning in the logs and fall back to its 2538default heuristics. 2539 2540(Note for MTA developers: we would prefer if the use of a single header be 2541avoided in future, since that precludes 'downstream' spam scanning. 2542C<http://wiki.apache.org/spamassassin/EnvelopeSenderInReceived> details a 2543better proposal, storing the envelope sender at each hop in the C<Received> 2544header.) 2545 2546example: 2547 2548 envelope_sender_header X-SA-Exim-Mail-From 2549 2550=cut 2551 2552 push (@cmds, { 2553 setting => 'envelope_sender_header', 2554 default => undef, 2555 type => $CONF_TYPE_STRING, 2556 }); 2557 2558=item describe SYMBOLIC_TEST_NAME description ... 2559 2560Used to describe a test. This text is shown to users in the detailed report. 2561 2562Note that test names which begin with '__' are reserved for meta-match 2563sub-rules, and are not scored or listed in the 'tests hit' reports. 2564 2565Also note that by convention, rule descriptions should be limited in 2566length to no more than 50 characters. 2567 2568=cut 2569 2570 push (@cmds, { 2571 command => 'describe', 2572 setting => 'descriptions', 2573 is_frequent => 1, 2574 type => $CONF_TYPE_HASH_KEY_VALUE, 2575 }); 2576 2577=item report_charset CHARSET (default: unset) 2578 2579Set the MIME Content-Type charset used for the text/plain report which 2580is attached to spam mail messages. 2581 2582=cut 2583 2584 push (@cmds, { 2585 setting => 'report_charset', 2586 default => '', 2587 type => $CONF_TYPE_STRING, 2588 }); 2589 2590=item report ...some text for a report... 2591 2592Set the report template which is attached to spam mail messages. See the 2593C<10_default_prefs.cf> configuration file in C</usr/share/spamassassin> for an 2594example. 2595 2596If you change this, try to keep it under 78 columns. Each C<report> 2597line appends to the existing template, so use C<clear_report_template> 2598to restart. 2599 2600Tags can be included as explained above. 2601 2602=cut 2603 2604 push (@cmds, { 2605 command => 'report', 2606 setting => 'report_template', 2607 default => '', 2608 type => $CONF_TYPE_TEMPLATE, 2609 }); 2610 2611=item clear_report_template 2612 2613Clear the report template. 2614 2615=cut 2616 2617 push (@cmds, { 2618 command => 'clear_report_template', 2619 setting => 'report_template', 2620 type => $CONF_TYPE_NOARGS, 2621 code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear 2622 }); 2623 2624=item report_contact ...text of contact address... 2625 2626Set what _CONTACTADDRESS_ is replaced with in the above report text. 2627By default, this is 'the administrator of that system', since the hostname 2628of the system the scanner is running on is also included. 2629 2630=cut 2631 2632 push (@cmds, { 2633 setting => 'report_contact', 2634 default => 'the administrator of that system', 2635 type => $CONF_TYPE_STRING, 2636 }); 2637 2638=item report_hostname ...hostname to use... 2639 2640Set what _HOSTNAME_ is replaced with in the above report text. 2641By default, this is determined dynamically as whatever the host running 2642SpamAssassin calls itself. 2643 2644=cut 2645 2646 push (@cmds, { 2647 setting => 'report_hostname', 2648 default => '', 2649 type => $CONF_TYPE_STRING, 2650 }); 2651 2652=item unsafe_report ...some text for a report... 2653 2654Set the report template which is attached to spam mail messages which contain a 2655non-text/plain part. See the C<10_default_prefs.cf> configuration file in 2656C</usr/share/spamassassin> for an example. 2657 2658Each C<unsafe-report> line appends to the existing template, so use 2659C<clear_unsafe_report_template> to restart. 2660 2661Tags can be used in this template (see above for details). 2662 2663=cut 2664 2665 push (@cmds, { 2666 command => 'unsafe_report', 2667 setting => 'unsafe_report_template', 2668 default => '', 2669 type => $CONF_TYPE_TEMPLATE, 2670 }); 2671 2672=item clear_unsafe_report_template 2673 2674Clear the unsafe_report template. 2675 2676=cut 2677 2678 push (@cmds, { 2679 command => 'clear_unsafe_report_template', 2680 setting => 'unsafe_report_template', 2681 type => $CONF_TYPE_NOARGS, 2682 code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear 2683 }); 2684 2685=item mbox_format_from_regex 2686 2687Set a specific regular expression to be used for mbox file From separators. 2688 2689For example, this setting will allow sa-learn to process emails stored in 2690a kmail 2 mbox: 2691 2692mbox_format_from_regex /^From \S+ ?[[:upper:]][[:lower:]]{2}(?:, \d\d [[:upper:]][[:lower:]]{2} \d{4} [0-2]\d:\d\d:\d\d [+-]\d{4}| [[:upper:]][[:lower:]]{2} [ 1-3]\d [ 0-2]\d:\d\d:\d\d \d{4})/ 2693 2694 2695=cut 2696 2697 push (@cmds, { 2698 setting => 'mbox_format_from_regex', 2699 type => $CONF_TYPE_STRING 2700 }); 2701 2702 2703=item parse_dkim_uris ( 0 | 1 ) (default: 1) 2704 2705If this option is set to 1 and the message contains DKIM headers, the headers will be parsed for URIs to process alongside URIs found in the body with some rules and modules (ex. URIDNSBL) 2706 2707=cut 2708 2709 push (@cmds, { 2710 setting => 'parse_dkim_uris', 2711 default => 1, 2712 type => $CONF_TYPE_BOOL, 2713 }); 2714 2715=back 2716 2717=head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS 2718 2719These settings differ from the ones above, in that they are considered 2720'privileged'. Only users running C<spamassassin> from their procmailrc's or 2721forward files, or sysadmins editing a file in C</etc/mail/spamassassin>, can 2722use them. C<spamd> users cannot use them in their C<user_prefs> files, for 2723security and efficiency reasons, unless C<allow_user_rules> is enabled (and 2724then, they may only add rules from below). 2725 2726=over 4 2727 2728=item allow_user_rules ( 0 | 1 ) (default: 0) 2729 2730This setting allows users to create rules (and only rules) in their 2731C<user_prefs> files for use with C<spamd>. It defaults to off, because 2732this could be a severe security hole. It may be possible for users to 2733gain root level access if C<spamd> is run as root. It is NOT a good 2734idea, unless you have some other way of ensuring that users' tests are 2735safe. Don't use this unless you are certain you know what you are 2736doing. Furthermore, this option causes spamassassin to recompile all 2737the tests each time it processes a message for a user with a rule in 2738his/her C<user_prefs> file, which could have a significant effect on 2739server load. It is not recommended. 2740 2741Note that it is not currently possible to use C<allow_user_rules> to modify an 2742existing system rule from a C<user_prefs> file with C<spamd>. 2743 2744=cut 2745 2746 push (@cmds, { 2747 setting => 'allow_user_rules', 2748 is_priv => 1, 2749 default => 0, 2750 type => $CONF_TYPE_BOOL, 2751 code => sub { 2752 my ($self, $key, $value, $line) = @_; 2753 if ($value eq '') { 2754 return $MISSING_REQUIRED_VALUE; 2755 } 2756 elsif ($value !~ /^[01]$/) { 2757 return $INVALID_VALUE; 2758 } 2759 2760 $self->{allow_user_rules} = $value+0; 2761 dbg("config: " . ($self->{allow_user_rules} ? "allowing":"not allowing") . " user rules!"); 2762 } 2763 }); 2764 2765=item redirector_pattern /pattern/modifiers 2766 2767A regex pattern that matches both the redirector site portion, and 2768the target site portion of a URI. 2769 2770Note: The target URI portion must be surrounded in parentheses and 2771 no other part of the pattern may create a backreference. 2772 2773Example: http://chkpt.zdnet.com/chkpt/whatever/spammer.domain/yo/dude 2774 2775 redirector_pattern /^https?:\/\/(?:opt\.)?chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i 2776 2777=cut 2778 2779 push (@cmds, { 2780 setting => 'redirector_pattern', 2781 is_priv => 1, 2782 default => [], 2783 type => $CONF_TYPE_STRINGLIST, 2784 code => sub { 2785 my ($self, $key, $value, $line) = @_; 2786 2787 $value =~ s/^\s+//; 2788 if ($value eq '') { 2789 return $MISSING_REQUIRED_VALUE; 2790 } 2791 2792 my ($rec, $err) = compile_regexp($value, 1); 2793 if (!$rec) { 2794 dbg("config: invalid redirector_pattern '$value': $err"); 2795 return $INVALID_VALUE; 2796 } 2797 2798 push @{$self->{main}->{conf}->{redirector_patterns}}, $rec; 2799 } 2800 }); 2801 2802=item header SYMBOLIC_TEST_NAME header op /pattern/modifiers [if-unset: STRING] 2803 2804Define a test. C<SYMBOLIC_TEST_NAME> is a symbolic test name, such as 2805'FROM_ENDS_IN_NUMS'. C<header> is the name of a mail header field, 2806such as 'Subject', 'To', 'From', etc. Header field names are matched 2807case-insensitively (conforming to RFC 5322 section 1.2.2), except for 2808all-capitals metaheader fields such as ALL, MESSAGEID, ALL-TRUSTED. 2809 2810Appending a modifier C<:raw> to a header field name will inhibit decoding of 2811quoted-printable or base-64 encoded strings, and will preserve all whitespace 2812inside the header string. The C<:raw> may also be applied to pseudo-headers 2813e.g. C<ALL:raw> will return a pristine (unmodified) header section. 2814 2815Appending a modifier C<:addr> to a header field name will cause everything 2816except the first email address to be removed from the header field. It is 2817mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with 2818their 'Resent-*' counterparts, and the 'Return-Path'. 2819 2820Appending a modifier C<:name> to a header field name will cause everything 2821except the first display name to be removed from the header field. It is 2822mainly applicable to header fields containing a single mail address: 'From', 2823'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts. 2824 2825It is syntactically permitted to append more than one modifier to a header 2826field name, although currently most combinations achieve no additional effect, 2827for example C<From:addr:raw> or C<From:raw:addr> is currently the same as 2828C<From:addr> . 2829 2830For example, appending C<:addr> to a header name will result in example@foo 2831in all of the following cases: 2832 2833=over 4 2834 2835=item example@foo 2836 2837=item example@foo (Foo Blah) 2838 2839=item example@foo, example@bar 2840 2841=item display: example@foo (Foo Blah), example@bar ; 2842 2843=item Foo Blah E<lt>example@fooE<gt> 2844 2845=item "Foo Blah" E<lt>example@fooE<gt> 2846 2847=item "'Foo Blah'" E<lt>example@fooE<gt> 2848 2849=back 2850 2851For example, appending C<:name> to a header name will result in "Foo Blah" 2852(without quotes) in all of the following cases: 2853 2854=over 4 2855 2856=item example@foo (Foo Blah) 2857 2858=item example@foo (Foo Blah), example@bar 2859 2860=item display: example@foo (Foo Blah), example@bar ; 2861 2862=item Foo Blah E<lt>example@fooE<gt> 2863 2864=item "Foo Blah" E<lt>example@fooE<gt> 2865 2866=item "'Foo Blah'" E<lt>example@fooE<gt> 2867 2868=back 2869 2870There are several special pseudo-headers that can be specified: 2871 2872=over 4 2873 2874=item C<ALL> can be used to mean the text of all the message's headers. 2875Note that all whitespace inside the headers, at line folds, is currently 2876compressed into a single space (' ') character. To obtain a pristine 2877(unmodified) header section, use C<ALL:raw> - the :raw modifier is documented 2878above. Also similar that return headers added by specific relays: ALL-TRUSTED, 2879ALL-INTERNAL, ALL-UNTRUSTED, ALL-EXTERNAL. 2880 2881=item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc' 2882headers. 2883 2884=item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP 2885transaction that delivered this message, if this data has been made available 2886by the SMTP server. See C<envelope_sender_header> for more information 2887on how to set this. 2888 2889=item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message; 2890some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id' 2891or to 'X-Message-Id', then uses its own one in the 'Message-Id' header. 2892The value returned for this symbol is the text from all 3 headers, separated 2893by newlines. 2894 2895=item C<X-Spam-Relays-Untrusted>, C<X-Spam-Relays-Trusted>, 2896C<X-Spam-Relays-Internal> and C<X-Spam-Relays-External> represent a portable, 2897pre-parsed representation of the message's network path, as recorded in the 2898Received headers, divided into 'trusted' vs 'untrusted' and 'internal' vs 2899'external' sets. See C<http://wiki.apache.org/spamassassin/TrustedRelays> for 2900more details. 2901 2902=back 2903 2904C<op> is either C<=~> (contains regular expression) or C<!~> (does not contain 2905regular expression), and C<pattern> is a valid Perl regular expression, with 2906C<modifiers> as regexp modifiers in the usual style. Note that multi-line 2907rules are not supported, even if you use C<x> as a modifier. Also note that 2908the C<#> character must be escaped (C<\#>) or else it will be considered to be 2909the start of a comment and not part of the regexp. 2910 2911If the header specified matches multiple headers, their text will be 2912concatenated with embedded \n's. Therefore you may wish to use C</m> if you 2913use C<^> or C<$> in your regular expression. 2914 2915If the C<[if-unset: STRING]> tag is present, then C<STRING> will 2916be used if the header is not found in the mail message. 2917 2918Test names must not start with a number, and must contain only 2919alphanumerics and underscores. It is suggested that lower-case characters 2920not be used, and names have a length of no more than 22 characters, 2921as an informal convention. Dashes are not allowed. 2922 2923Note that test names which begin with '__' are reserved for meta-match 2924sub-rules, and are not scored or listed in the 'tests hit' reports. 2925Test names which begin with 'T_' are reserved for tests which are 2926undergoing QA, and these are given a very low score. 2927 2928If you add or modify a test, please be sure to run a sanity check afterwards 2929by running C<spamassassin --lint>. This will avoid confusing error 2930messages, or other tests being skipped as a side-effect. 2931 2932=item header SYMBOLIC_TEST_NAME exists:header_field_name 2933 2934Define a header field existence test. C<header_field_name> is the name 2935of a header field to test for existence. Not to be confused with a 2936test for a nonempty header field body, which can be implemented by a 2937C<header SYMBOLIC_TEST_NAME header =~ /\S/> rule as described above. 2938 2939=item header SYMBOLIC_TEST_NAME eval:name_of_eval_method([arguments]) 2940 2941Define a header eval test. C<name_of_eval_method> is the name of 2942a method registered by a C<Mail::SpamAssassin::Plugin> object. 2943C<arguments> are optional arguments to the function call. 2944 2945=item header SYMBOLIC_TEST_NAME eval:check_rbl('set', 'zone' [, 'sub-test']) 2946 2947Check a DNSBL (a DNS blacklist or whitelist). This will retrieve Received: 2948headers from the message, extract the IP addresses, select which ones are 2949'untrusted' based on the C<trusted_networks> logic, and query that DNSBL 2950zone. There's a few things to note: 2951 2952=over 4 2953 2954=item duplicated or private IPs 2955 2956Duplicated IPs are only queried once and reserved IPs are not queried. 2957Private IPs are those listed in 2958C<https://www.iana.org/assignments/ipv4-address-space>, 2959C<http://duxcw.com/faq/network/privip.htm>, 2960C<http://duxcw.com/faq/network/autoip.htm>, or 2961C<https://tools.ietf.org/html/rfc5735> as private. 2962 2963=item the 'set' argument 2964 2965This is used as a 'zone ID'. If you want to look up a multiple-meaning zone 2966like SORBS, you can then query the results from that zone using it; 2967but all check_rbl_sub() calls must use that zone ID. 2968 2969Also, if more than one IP address gets a DNSBL hit for a particular rule, it 2970does not affect the score because rules only trigger once per message. 2971 2972=item the 'zone' argument 2973 2974This is the root zone of the DNSBL. 2975 2976The domain name is considered to be a fully qualified domain name 2977(i.e. not subject to DNS resolver's search or default domain options). 2978No trailing period is needed, and will be removed if specified. 2979 2980=item the 'sub-test' argument 2981 2982This optional argument behaves the same as the sub-test argument in 2983C<check_rbl_sub()> below. 2984 2985=item selecting all IPs except for the originating one 2986 2987This is accomplished by placing '-notfirsthop' at the end of the set name. 2988This is useful for querying against DNS lists which list dialup IP 2989addresses; the first hop may be a dialup, but as long as there is at least 2990one more hop, via their outgoing SMTP server, that's legitimate, and so 2991should not gain points. If there is only one hop, that will be queried 2992anyway, as it should be relaying via its outgoing SMTP server instead of 2993sending directly to your MX (mail exchange). 2994 2995=item selecting IPs by whether they are trusted 2996 2997When checking a 'nice' DNSBL (a DNS whitelist), you cannot trust the IP 2998addresses in Received headers that were not added by trusted relays. To 2999test the first IP address that can be trusted, place '-firsttrusted' at the 3000end of the set name. That should test the IP address of the relay that 3001connected to the most remote trusted relay. 3002 3003Note that this requires that SpamAssassin know which relays are trusted. For 3004simple cases, SpamAssassin can make a good estimate. For complex cases, you 3005may get better results by setting C<trusted_networks> manually. 3006 3007In addition, you can test all untrusted IP addresses by placing '-untrusted' 3008at the end of the set name. Important note -- this does NOT include the 3009IP address from the most recent 'untrusted line', as used in '-firsttrusted' 3010above. That's because we're talking about the trustworthiness of the 3011IP address data, not the source header line, here; and in the case of 3012the most recent header (the 'firsttrusted'), that data can be trusted. 3013See the Wiki page at C<http://wiki.apache.org/spamassassin/TrustedRelays> 3014for more information on this. 3015 3016=item Selecting just the last external IP 3017 3018By using '-lastexternal' at the end of the set name, you can select only 3019the external host that connected to your internal network, or at least 3020the last external host with a public IP. 3021 3022=back 3023 3024=item header SYMBOLIC_TEST_NAME eval:check_rbl_txt('set', 'zone') 3025 3026Same as check_rbl(), except querying using IN TXT instead of IN A records. 3027If the zone supports it, it will result in a line of text describing 3028why the IP is listed, typically a hyperlink to a database entry. 3029 3030=item header SYMBOLIC_TEST_NAME eval:check_rbl_sub('set', 'sub-test') 3031 3032Create a sub-test for 'set'. If you want to look up a multi-meaning zone 3033like relays.osirusoft.com, you can then query the results from that zone 3034using the zone ID from the original query. The sub-test may either be an 3035IPv4 dotted address for RBLs that return multiple A records, or a 3036non-negative decimal number to specify a bitmask for RBLs that return a 3037single A record containing a bitmask of results, or a regular expression. 3038 3039Note: the set name must be exactly the same for as the main query rule, 3040including selections like '-notfirsthop' appearing at the end of the set 3041name. 3042 3043=cut 3044 3045 push (@cmds, { 3046 setting => 'header', 3047 is_frequent => 1, 3048 is_priv => 1, 3049 code => sub { 3050 my ($self, $key, $value, $line) = @_; 3051 local($1); 3052 if ($value !~ s/^(\S+)\s+//) { 3053 return $INVALID_VALUE; 3054 } 3055 my $rulename = $1; 3056 if ($value eq '') { 3057 return $MISSING_REQUIRED_VALUE; 3058 } 3059 if ($value =~ /^(?:rbl)?eval:(.*)$/) { 3060 my $fn = $1; 3061 if ($fn !~ /^\w+\(.*\)$/) { 3062 return $INVALID_VALUE; 3063 } 3064 if ($fn =~ /^check_(?:rbl|dns)/) { 3065 $self->{parser}->add_test ($rulename, $fn, $TYPE_RBL_EVALS); 3066 } 3067 else { 3068 $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS); 3069 } 3070 } 3071 else { 3072 # Detailed parsing in add_test 3073 $self->{parser}->add_test ($rulename, $value, $TYPE_HEAD_TESTS); 3074 } 3075 } 3076 }); 3077 3078=item body SYMBOLIC_TEST_NAME /pattern/modifiers 3079 3080Define a body pattern test. C<pattern> is a Perl regular expression. Note: 3081as per the header tests, C<#> must be escaped (C<\#>) or else it is considered 3082the beginning of a comment. 3083 3084The 'body' in this case is the textual parts of the message body; any 3085non-text MIME parts are stripped, and the message decoded from 3086Quoted-Printable or Base-64-encoded format if necessary. Parts declared as 3087text/html will be rendered from HTML to text. 3088 3089All body paragraphs (double-newline-separated blocks text) are turned into a 3090line breaks removed, whitespace normalized single line. Any lines longer 3091than 2kB are split into shorter separate lines (from a boundary when 3092possible), this may unexpectedly prevent pattern from matching. Patterns 3093are matched independently against each of these lines. 3094 3095Note that by default the message Subject header is considered part of the 3096body and becomes the first line when running the rules. If you don't want 3097to match Subject along with body text, use "tflags RULENAME nosubject". 3098 3099=item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) 3100 3101Define a body eval test. See above. 3102 3103=cut 3104 3105 push (@cmds, { 3106 setting => 'body', 3107 is_frequent => 1, 3108 is_priv => 1, 3109 code => sub { 3110 my ($self, $key, $value, $line) = @_; 3111 local($1); 3112 if ($value !~ s/^(\S+)\s+//) { 3113 return $INVALID_VALUE; 3114 } 3115 my $rulename = $1; 3116 if ($value eq '') { 3117 return $MISSING_REQUIRED_VALUE; 3118 } 3119 if ($value =~ /^eval:(.*)$/) { 3120 my $fn = $1; 3121 if ($fn !~ /^\w+\(.*\)$/) { 3122 return $INVALID_VALUE; 3123 } 3124 $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS); 3125 } else { 3126 $self->{parser}->add_test ($rulename, $value, $TYPE_BODY_TESTS); 3127 } 3128 } 3129 }); 3130 3131=item uri SYMBOLIC_TEST_NAME /pattern/modifiers 3132 3133Define a uri pattern test. C<pattern> is a Perl regular expression. Note: as 3134per the header tests, C<#> must be escaped (C<\#>) or else it is considered 3135the beginning of a comment. 3136 3137The 'uri' in this case is a list of all the URIs in the body of the email, 3138and the test will be run on each and every one of those URIs, adjusting the 3139score if a match is found. Use this test instead of one of the body tests 3140when you need to match a URI, as it is more accurately bound to the start/end 3141points of the URI, and will also be faster. 3142 3143=cut 3144 3145# we don't do URI evals yet - maybe later 3146# if (/^uri\s+(\S+)\s+eval:(.*)$/) { 3147# $self->{parser}->add_test ($1, $2, $TYPE_URI_EVALS); 3148# next; 3149# } 3150 push (@cmds, { 3151 setting => 'uri', 3152 is_priv => 1, 3153 code => sub { 3154 my ($self, $key, $value, $line) = @_; 3155 local($1); 3156 if ($value !~ s/^(\S+)\s+//) { 3157 return $INVALID_VALUE; 3158 } 3159 my $rulename = $1; 3160 if ($value eq '') { 3161 return $MISSING_REQUIRED_VALUE; 3162 } 3163 $self->{parser}->add_test ($rulename, $value, $TYPE_URI_TESTS); 3164 } 3165 }); 3166 3167=item rawbody SYMBOLIC_TEST_NAME /pattern/modifiers 3168 3169Define a raw-body pattern test. C<pattern> is a Perl regular expression. 3170Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is 3171considered the beginning of a comment. 3172 3173The 'raw body' of a message is the raw data inside all textual parts. The 3174text will be decoded from base64 or quoted-printable encoding, but HTML 3175tags and line breaks will still be present. Multiline expressions will 3176need to be used to match strings that are broken by line breaks. 3177 3178Note that the text is split into 2-4kB chunks (from a word boundary when 3179possible), this may unexpectedly prevent pattern from matching. Patterns 3180are matched independently against each of these chunks. 3181 3182=item rawbody SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) 3183 3184Define a raw-body eval test. See above. 3185 3186=cut 3187 3188 push (@cmds, { 3189 setting => 'rawbody', 3190 is_frequent => 1, 3191 is_priv => 1, 3192 code => sub { 3193 my ($self, $key, $value, $line) = @_; 3194 local($1); 3195 if ($value !~ s/^(\S+)\s+//) { 3196 return $INVALID_VALUE; 3197 } 3198 my $rulename = $1; 3199 if ($value eq '') { 3200 return $MISSING_REQUIRED_VALUE; 3201 } 3202 if ($value =~ /^eval:(.*)$/) { 3203 my $fn = $1; 3204 if ($fn !~ /^\w+\(.*\)$/) { 3205 return $INVALID_VALUE; 3206 } 3207 $self->{parser}->add_test ($rulename, $fn, $TYPE_RAWBODY_EVALS); 3208 } else { 3209 $self->{parser}->add_test ($rulename, $value, $TYPE_RAWBODY_TESTS); 3210 } 3211 } 3212 }); 3213 3214=item full SYMBOLIC_TEST_NAME /pattern/modifiers 3215 3216Define a full message pattern test. C<pattern> is a Perl regular expression. 3217Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is 3218considered the beginning of a comment. 3219 3220The full message is the pristine message headers plus the pristine message 3221body, including all MIME data such as images, other attachments, MIME 3222boundaries, etc. 3223 3224=item full SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) 3225 3226Define a full message eval test. See above. 3227 3228=cut 3229 3230 push (@cmds, { 3231 setting => 'full', 3232 is_priv => 1, 3233 code => sub { 3234 my ($self, $key, $value, $line) = @_; 3235 local($1); 3236 if ($value !~ s/^(\S+)\s+//) { 3237 return $INVALID_VALUE; 3238 } 3239 my $rulename = $1; 3240 if ($value eq '') { 3241 return $MISSING_REQUIRED_VALUE; 3242 } 3243 if ($value =~ /^eval:(.*)$/) { 3244 my $fn = $1; 3245 if ($fn !~ /^\w+\(.*\)$/) { 3246 return $INVALID_VALUE; 3247 } 3248 $self->{parser}->add_test ($rulename, $fn, $TYPE_FULL_EVALS); 3249 } else { 3250 $self->{parser}->add_test ($rulename, $value, $TYPE_FULL_TESTS); 3251 } 3252 } 3253 }); 3254 3255=item meta SYMBOLIC_TEST_NAME boolean expression 3256 3257Define a boolean expression test in terms of other tests that have 3258been hit or not hit. For example: 3259 3260meta META1 TEST1 && !(TEST2 || TEST3) 3261 3262Note that English language operators ("and", "or") will be treated as 3263rule names, and that there is no C<XOR> operator. 3264 3265=item meta SYMBOLIC_TEST_NAME boolean arithmetic expression 3266 3267Can also define an arithmetic expression in terms of other tests, 3268with an unhit test having the value "0" and a hit test having a 3269nonzero value. The value of a hit meta test is that of its arithmetic 3270expression. The value of a hit eval test is that returned by its 3271method. The value of a hit header, body, rawbody, uri, or full test 3272which has the "multiple" tflag is the number of times the test hit. 3273The value of any other type of hit test is "1". 3274 3275For example: 3276 3277meta META2 (3 * TEST1 - 2 * TEST2) E<gt> 0 3278 3279Note that Perl builtins and functions, like C<abs()>, B<can't> be 3280used, and will be treated as rule names. 3281 3282If you want to define a meta-rule, but do not want its individual sub-rules to 3283count towards the final score unless the entire meta-rule matches, give the 3284sub-rules names that start with '__' (two underscores). SpamAssassin will 3285ignore these for scoring. 3286 3287=item meta SYMBOLIC_TEST_NAME ... rules_matching(RULEGLOB) ... 3288 3289Special function that will expand to list of matching rulenames. Can be 3290used anywhere in expressions. Argument supports glob style rulename 3291matching (* = anything, ? = one character). Matching is case-sensitive. 3292 3293For example, this will hit if at least two __FOO_* rule hits: 3294 3295 body __FOO_1 /xxx/ 3296 body __FOO_2 /yyy/ 3297 body __FOO_3 /zzz/ 3298 meta FOO_META rules_matching(__FOO_*) >= 2 3299 3300Which would be the same as: 3301 3302 meta FOO_META (__FOO_1 + __FOO_2 + __FOO_3) >= 2 3303 3304 3305=cut 3306 3307 push (@cmds, { 3308 setting => 'meta', 3309 is_frequent => 1, 3310 is_priv => 1, 3311 code => sub { 3312 my ($self, $key, $value, $line) = @_; 3313 local($1); 3314 if ($value !~ s/^(\S+)\s+//) { 3315 return $INVALID_VALUE; 3316 } 3317 my $rulename = $1; 3318 if ($value eq '') { 3319 return $MISSING_REQUIRED_VALUE; 3320 } 3321 if ($value =~ /\*\s*\*/) { 3322 info("config: found invalid '**' or '* *' operator in meta command"); 3323 return $INVALID_VALUE; 3324 } 3325 $self->{parser}->add_test ($rulename, $value, $TYPE_META_TESTS); 3326 } 3327 }); 3328 3329=item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ] 3330 3331Defines the name of a test that should be "reused" during the scoring 3332process. If a message has an X-Spam-Status header that shows a hit for 3333this rule or any of the old rule names given, a hit will be added for 3334this rule when B<mass-check --reuse> is used. Examples: 3335 3336C<reuse SPF_PASS> 3337 3338C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1> 3339 3340The actual logic for reuse tests is done by 3341B<Mail::SpamAssassin::Plugin::Reuse>. 3342 3343=cut 3344 3345 push (@cmds, { 3346 setting => 'reuse', 3347 is_priv => 1, 3348 code => sub { 3349 my ($self, $key, $value, $line) = @_; 3350 if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) { 3351 return $INVALID_VALUE; 3352 } 3353 my $rule_name = $1; 3354 # don't overwrite tests, just define them so scores, priorities work 3355 if (!exists $self->{tests}->{$rule_name}) { 3356 $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS); 3357 } 3358 } 3359 }); 3360 3361=item tflags SYMBOLIC_TEST_NAME flags 3362 3363Used to set flags on a test. Parameter is a space-separated list of flag 3364names or flag name = value pairs. 3365These flags are used in the score-determination back end system for details 3366of the test's behaviour. Please see C<bayes_auto_learn> for more information 3367about tflag interaction with those systems. The following flags can be set: 3368 3369=over 4 3370 3371=item net 3372 3373The test is a network test, and will not be run in the mass checking system 3374or if B<-L> is used, therefore its score should not be modified. 3375 3376=item nice 3377 3378The test is intended to compensate for common false positives, and should be 3379assigned a negative score. 3380 3381=item userconf 3382 3383The test requires user configuration before it can be used (like 3384language-specific tests). 3385 3386=item learn 3387 3388The test requires training before it can be used. 3389 3390=item noautolearn 3391 3392The test will explicitly be ignored when calculating the score for 3393learning systems. 3394 3395=item autolearn_force 3396 3397The test will be subject to less stringent autolearn thresholds. 3398 3399Normally, SpamAssassin will require 3 points from the header and 3 3400points from the body to be auto-learned as spam. This option keeps 3401the threshold at 6 points total but changes it to have no regard to the 3402source of the points. 3403 3404=item noawl 3405 3406This flag is specific when using AWL plugin. 3407 3408Normally, AWL plugin normalizes scores via auto-whitelist. In some scenarios 3409it works against the system administrator when trying to add some rules to 3410correct miss-classified email. When AWL plugin searches the email and finds 3411the noawl flag it will exit without normalizing the score nor storing the 3412value in db. 3413 3414=item multiple 3415 3416The test will be evaluated multiple times, for use with meta rules. 3417Only affects header, body, rawbody, uri, and full tests. 3418 3419=item maxhits=N 3420 3421If B<multiple> is specified, limit the number of hits found to N. 3422If the rule is used in a meta that counts the hits (e.g. __RULENAME E<gt> 5), 3423this is a way to avoid wasted extra work (use "tflags multiple maxhits=6"). 3424 3425For example: 3426 3427 uri __KAM_COUNT_URIS /^./ 3428 tflags __KAM_COUNT_URIS multiple maxhits=16 3429 describe __KAM_COUNT_URIS A multiple match used to count URIs in a message 3430 3431 meta __KAM_HAS_0_URIS (__KAM_COUNT_URIS == 0) 3432 meta __KAM_HAS_1_URIS (__KAM_COUNT_URIS >= 1) 3433 meta __KAM_HAS_2_URIS (__KAM_COUNT_URIS >= 2) 3434 meta __KAM_HAS_3_URIS (__KAM_COUNT_URIS >= 3) 3435 meta __KAM_HAS_4_URIS (__KAM_COUNT_URIS >= 4) 3436 meta __KAM_HAS_5_URIS (__KAM_COUNT_URIS >= 5) 3437 meta __KAM_HAS_10_URIS (__KAM_COUNT_URIS >= 10) 3438 meta __KAM_HAS_15_URIS (__KAM_COUNT_URIS >= 15) 3439 3440=item nosubject 3441 3442Used only for B<body> rules. If specified, Subject header will not be a 3443part of the matched body text. See I<body> for more info. 3444 3445=item ips_only 3446 3447This flag is specific to rules invoking an URIDNSBL plugin, 3448it is documented there. 3449 3450=item domains_only 3451 3452This flag is specific to rules invoking an URIDNSBL plugin, 3453it is documented there. 3454 3455=item ns 3456 3457This flag is specific to rules invoking an URIDNSBL plugin, 3458it is documented there. 3459 3460=item a 3461 3462This flag is specific to rules invoking an URIDNSBL plugin, 3463it is documented there. 3464 3465=back 3466 3467=cut 3468 3469 push (@cmds, { 3470 setting => 'tflags', 3471 is_frequent => 1, 3472 is_priv => 1, 3473 type => $CONF_TYPE_HASH_KEY_VALUE, 3474 }); 3475 3476=item priority SYMBOLIC_TEST_NAME n 3477 3478Assign a specific priority to a test. All tests, except for DNS and Meta 3479tests, are run in increasing priority value order (negative priority values 3480are run before positive priority values). The default test priority is 0 3481(zero). 3482 3483The values C<-99999999999999> and C<-99999999999998> have a special meaning 3484internally, and should not be used. 3485 3486=cut 3487 3488 push (@cmds, { 3489 setting => 'priority', 3490 is_priv => 1, 3491 type => $CONF_TYPE_HASH_KEY_VALUE, 3492 code => sub { 3493 my ($self, $key, $value, $line) = @_; 3494 my ($rulename, $priority) = split(/\s+/, $value, 2); 3495 unless (defined $priority) { 3496 return $MISSING_REQUIRED_VALUE; 3497 } 3498 unless ($rulename =~ IS_RULENAME) { 3499 return $INVALID_VALUE; 3500 } 3501 unless ($priority =~ /^-?\d+$/) { 3502 return $INVALID_VALUE; 3503 } 3504 $self->{priority}->{$rulename} = $priority; 3505 } 3506 }); 3507 3508=back 3509 3510=head1 ADMINISTRATOR SETTINGS 3511 3512These settings differ from the ones above, in that they are considered 'more 3513privileged' -- even more than the ones in the B<PRIVILEGED SETTINGS> section. 3514No matter what C<allow_user_rules> is set to, these can never be set from a 3515user's C<user_prefs> file when spamc/spamd is being used. However, all 3516settings can be used by local programs run directly by the user. 3517 3518=over 4 3519 3520=item version_tag string 3521 3522This tag is appended to the SA version in the X-Spam-Status header. You should 3523include it when you modify your ruleset, especially if you plan to distribute it. 3524A good choice for I<string> is your last name or your initials followed by a 3525number which you increase with each change. 3526 3527The version_tag will be lowercased, and any non-alphanumeric or period 3528character will be replaced by an underscore. 3529 3530e.g. 3531 3532 version_tag myrules1 # version=2.41-myrules1 3533 3534=cut 3535 3536 push (@cmds, { 3537 setting => 'version_tag', 3538 is_admin => 1, 3539 code => sub { 3540 my ($self, $key, $value, $line) = @_; 3541 if ($value eq '') { 3542 return $MISSING_REQUIRED_VALUE; 3543 } 3544 my $tag = lc($value); 3545 $tag =~ tr/a-z0-9./_/c; 3546 foreach (@Mail::SpamAssassin::EXTRA_VERSION) { 3547 if($_ eq $tag) { $tag = undef; last; } 3548 } 3549 push(@Mail::SpamAssassin::EXTRA_VERSION, $tag) if($tag); 3550 } 3551 }); 3552 3553=item test SYMBOLIC_TEST_NAME (ok|fail) Some string to test against 3554 3555Define a regression testing string. You can have more than one regression test 3556string per symbolic test name. Simply specify a string that you wish the test 3557to match. 3558 3559These tests are only run as part of the test suite - they should not affect the 3560general running of SpamAssassin. 3561 3562=cut 3563 3564 push (@cmds, { 3565 setting => 'test', 3566 is_admin => 1, 3567 code => sub { 3568 return unless defined $COLLECT_REGRESSION_TESTS; 3569 my ($self, $key, $value, $line) = @_; 3570 local ($1,$2,$3); 3571 if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; } 3572 $self->{parser}->add_regression_test($1, $2, $3); 3573 } 3574 }); 3575 3576=item body_part_scan_size (default: 50000) 3577 3578Per mime-part scan size limit in bytes for "body" type rules. 3579The decoded/stripped mime-part is truncated approx to this size. 3580Helps scanning large messages safely, so it's not necessary to 3581skip them completely. Disabled with 0. 3582 3583=cut 3584 3585 push (@cmds, { 3586 setting => 'body_part_scan_size', 3587 is_admin => 1, 3588 default => 50000, 3589 type => $CONF_TYPE_NUMERIC, 3590 }); 3591 3592 3593=item rawbody_part_scan_size (default: 500000) 3594 3595Like body_part_scan_size, for "rawbody" type rules. 3596 3597=cut 3598 3599 push (@cmds, { 3600 setting => 'rawbody_part_scan_size', 3601 is_admin => 1, 3602 default => 500000, 3603 type => $CONF_TYPE_NUMERIC, 3604 }); 3605 3606=item rbl_timeout t [t_min] [zone] (default: 15 3) 3607 3608All DNS queries are made at the beginning of a check and we try to read 3609the results at the end. This value specifies the maximum period of time 3610(in seconds) to wait for a DNS query. If most of the DNS queries have 3611succeeded for a particular message, then SpamAssassin will not wait for 3612the full period to avoid wasting time on unresponsive server(s), but will 3613shrink the timeout according to a percentage of queries already completed. 3614As the number of queries remaining approaches 0, the timeout value will 3615gradually approach a t_min value, which is an optional second parameter 3616and defaults to 0.2 * t. If t is smaller than t_min, the initial timeout 3617is set to t_min. Here is a chart of queries remaining versus the timeout 3618in seconds, for the default 15 second / 3 second timeout setting: 3619 3620 queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0% 3621 timeout 15 14.9 14.5 13.9 13.1 12.0 10.7 9.1 7.3 5.3 3 3622 3623For example, if 20 queries are made at the beginning of a message check 3624and 16 queries have returned (leaving 20%), the remaining 4 queries should 3625finish within 7.3 seconds since their query started or they will be timed out. 3626Note that timed out queries are only aborted when there is nothing else left 3627for SpamAssassin to do - long evaluation of other rules may grant queries 3628additional time. 3629 3630If a parameter 'zone' is specified (it must end with a letter, which 3631distinguishes it from other numeric parametrs), then the setting only 3632applies to DNS queries against the specified DNS domain (host, domain or 3633RBL (sub)zone). Matching is case-insensitive, the actual domain may be a 3634subdomain of the specified zone. 3635 3636=cut 3637 3638 push (@cmds, { 3639 setting => 'rbl_timeout', 3640 is_admin => 1, 3641 default => 15, 3642 code => sub { 3643 my ($self, $key, $value, $line) = @_; 3644 unless (defined $value && $value !~ /^$/) { 3645 return $MISSING_REQUIRED_VALUE; 3646 } 3647 local ($1,$2,$3); 3648 unless ($value =~ /^ ( \+? \d+ (?: \. \d*)? [smhdw]? ) 3649 (?: \s+ ( \+? \d+ (?: \. \d*)? [smhdw]? ) )? 3650 (?: \s+ (\S* [a-zA-Z]) )? $/xsi) { 3651 return $INVALID_VALUE; 3652 } 3653 my($timeout, $timeout_min, $zone) = ($1, $2, $3); 3654 foreach ($timeout, $timeout_min) { 3655 if (defined $_ && s/\s*([smhdw])\z//i) { 3656 $_ *= { s => 1, m => 60, h => 3600, 3657 d => 24*3600, w => 7*24*3600 }->{lc $1}; 3658 } 3659 } 3660 if (!defined $zone) { # a global setting 3661 $self->{rbl_timeout} = 0 + $timeout; 3662 $self->{rbl_timeout_min} = 0 + $timeout_min if defined $timeout_min; 3663 } 3664 else { # per-zone settings 3665 $zone =~ s/^\.//; $zone =~ s/\.\z//; # strip leading and trailing dot 3666 $zone = lc $zone; 3667 $self->{by_zone}{$zone}{rbl_timeout} = 0 + $timeout; 3668 $self->{by_zone}{$zone}{rbl_timeout_min} = 3669 0 + $timeout_min if defined $timeout_min; 3670 } 3671 }, 3672 type => $CONF_TYPE_DURATION, 3673 }); 3674 3675=item util_rb_tld tld1 tld2 ... 3676 3677This option maintains list of valid TLDs in the RegistryBoundaries code. 3678TLDs include things like com, net, org, etc. 3679 3680=cut 3681 3682 push (@cmds, { 3683 setting => 'util_rb_tld', 3684 is_admin => 1, 3685 code => sub { 3686 my ($self, $key, $value, $line) = @_; 3687 unless (defined $value && $value !~ /^$/) { 3688 return $MISSING_REQUIRED_VALUE; 3689 } 3690 unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) { 3691 return $INVALID_VALUE; 3692 } 3693 foreach (split(/\s+/, $value)) { 3694 $self->{valid_tlds}{lc $_} = 1; 3695 } 3696 } 3697 }); 3698 3699=item util_rb_2tld 2tld-1.tld 2tld-2.tld ... 3700 3701This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries 3702code. 2TLDs include things like co.uk, fed.us, etc. 3703 3704=cut 3705 3706 push (@cmds, { 3707 setting => 'util_rb_2tld', 3708 is_admin => 1, 3709 code => sub { 3710 my ($self, $key, $value, $line) = @_; 3711 unless (defined $value && $value !~ /^$/) { 3712 return $MISSING_REQUIRED_VALUE; 3713 } 3714 unless ($value =~ /^[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) { 3715 return $INVALID_VALUE; 3716 } 3717 foreach (split(/\s+/, $value)) { 3718 $self->{two_level_domains}{lc $_} = 1; 3719 } 3720 } 3721 }); 3722 3723=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ... 3724 3725This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries 3726code. 3TLDs include things like demon.co.uk, plc.co.im, etc. 3727 3728=cut 3729 3730 push (@cmds, { 3731 setting => 'util_rb_3tld', 3732 is_admin => 1, 3733 code => sub { 3734 my ($self, $key, $value, $line) = @_; 3735 unless (defined $value && $value !~ /^$/) { 3736 return $MISSING_REQUIRED_VALUE; 3737 } 3738 unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) { 3739 return $INVALID_VALUE; 3740 } 3741 foreach (split(/\s+/, $value)) { 3742 $self->{three_level_domains}{lc $_} = 1; 3743 } 3744 } 3745 }); 3746 3747=item clear_util_rb 3748 3749Empty internal list of valid TLDs (including 2nd and 3rd level) which 3750RegistryBoundaries code uses. Only useful if you want to override the 3751standard lists supplied by sa-update. 3752 3753=cut 3754 3755 push (@cmds, { 3756 setting => 'clear_util_rb', 3757 type => $CONF_TYPE_NOARGS, 3758 code => sub { 3759 my ($self, $key, $value, $line) = @_; 3760 unless (!defined $value || $value eq '') { 3761 return $INVALID_VALUE; 3762 } 3763 $self->{valid_tlds} = (); 3764 $self->{two_level_domains} = (); 3765 $self->{three_level_domains} = (); 3766 dbg("config: cleared tld lists"); 3767 } 3768 }); 3769 3770=item bayes_path /path/filename (default: ~/.spamassassin/bayes) 3771 3772This is the directory and filename for Bayes databases. Several databases 3773will be created, with this as the base directory and filename, with C<_toks>, 3774C<_seen>, etc. appended to the base. The default setting results in files 3775called C<~/.spamassassin/bayes_seen>, C<~/.spamassassin/bayes_toks>, etc. 3776 3777By default, each user has their own in their C<~/.spamassassin> directory with 3778mode 0700/0600. For system-wide SpamAssassin use, you may want to reduce disk 3779space usage by sharing this across all users. However, Bayes appears to be 3780more effective with individual user databases. 3781 3782=cut 3783 3784 push (@cmds, { 3785 setting => 'bayes_path', 3786 is_admin => 1, 3787 default => '__userstate__/bayes', 3788 type => $CONF_TYPE_STRING, 3789 code => sub { 3790 my ($self, $key, $value, $line) = @_; 3791 unless (defined $value && $value !~ /^$/) { 3792 return $MISSING_REQUIRED_VALUE; 3793 } 3794 if (-d $value) { 3795 return $INVALID_VALUE; 3796 } 3797 $self->{bayes_path} = $value; 3798 } 3799 }); 3800 3801=item bayes_file_mode (default: 0700) 3802 3803The file mode bits used for the Bayesian filtering database files. 3804 3805Make sure you specify this using the 'x' mode bits set, as it may also be used 3806to create directories. However, if a file is created, the resulting file will 3807not have any execute bits set (the umask is set to 111). The argument is a 3808string of octal digits, it is converted to a numeric value internally. 3809 3810=cut 3811 3812 push (@cmds, { 3813 setting => 'bayes_file_mode', 3814 is_admin => 1, 3815 default => '0700', 3816 type => $CONF_TYPE_NUMERIC, 3817 code => sub { 3818 my ($self, $key, $value, $line) = @_; 3819 if ($value !~ /^0?[0-7]{3}$/) { return $INVALID_VALUE } 3820 $self->{bayes_file_mode} = untaint_var($value); 3821 } 3822 }); 3823 3824=item bayes_store_module Name::Of::BayesStore::Module 3825 3826If this option is set, the module given will be used as an alternate 3827to the default bayes storage mechanism. It must conform to the 3828published storage specification (see 3829Mail::SpamAssassin::BayesStore). For example, set this to 3830Mail::SpamAssassin::BayesStore::SQL to use the generic SQL storage 3831module. 3832 3833=cut 3834 3835 push (@cmds, { 3836 setting => 'bayes_store_module', 3837 is_admin => 1, 3838 default => '', 3839 type => $CONF_TYPE_STRING, 3840 code => sub { 3841 my ($self, $key, $value, $line) = @_; 3842 local ($1); 3843 if ($value !~ /^([_A-Za-z0-9:]+)$/) { return $INVALID_VALUE; } 3844 $self->{bayes_store_module} = $1; 3845 } 3846 }); 3847 3848=item bayes_sql_dsn DBI::databasetype:databasename:hostname:port 3849 3850Used for BayesStore::SQL storage implementation. 3851 3852This option give the connect string used to connect to the SQL based Bayes storage. 3853 3854=cut 3855 3856 push (@cmds, { 3857 setting => 'bayes_sql_dsn', 3858 is_admin => 1, 3859 default => '', 3860 type => $CONF_TYPE_STRING, 3861 }); 3862 3863=item bayes_sql_username 3864 3865Used by BayesStore::SQL storage implementation. 3866 3867This option gives the username used by the above DSN. 3868 3869=cut 3870 3871 push (@cmds, { 3872 setting => 'bayes_sql_username', 3873 is_admin => 1, 3874 default => '', 3875 type => $CONF_TYPE_STRING, 3876 }); 3877 3878=item bayes_sql_password 3879 3880Used by BayesStore::SQL storage implementation. 3881 3882This option gives the password used by the above DSN. 3883 3884=cut 3885 3886 push (@cmds, { 3887 setting => 'bayes_sql_password', 3888 is_admin => 1, 3889 default => '', 3890 type => $CONF_TYPE_STRING, 3891 }); 3892 3893=item bayes_sql_username_authorized ( 0 | 1 ) (default: 0) 3894 3895Whether to call the services_authorized_for_username plugin hook in BayesSQL. 3896If the hook does not determine that the user is allowed to use bayes or is 3897invalid then then database will not be initialized. 3898 3899NOTE: By default the user is considered invalid until a plugin returns 3900a true value. If you enable this, but do not have a proper plugin 3901loaded, all users will turn up as invalid. 3902 3903The username passed into the plugin can be affected by the 3904bayes_sql_override_username config option. 3905 3906=cut 3907 3908 push (@cmds, { 3909 setting => 'bayes_sql_username_authorized', 3910 is_admin => 1, 3911 default => 0, 3912 type => $CONF_TYPE_BOOL, 3913 }); 3914 3915=item user_scores_dsn DBI:databasetype:databasename:hostname:port 3916 3917If you load user scores from an SQL database, this will set the DSN 3918used to connect. Example: C<DBI:mysql:spamassassin:localhost> 3919 3920If you load user scores from an LDAP directory, this will set the DSN used to 3921connect. You have to write the DSN as an LDAP URL, the components being the 3922host and port to connect to, the base DN for the search, the scope of the 3923search (base, one or sub), the single attribute being the multivalued attribute 3924used to hold the configuration data (space separated pairs of key and value, 3925just as in a file) and finally the filter being the expression used to filter 3926out the wanted username. Note that the filter expression is being used in a 3927sprintf statement with the username as the only parameter, thus is can hold a 3928single __USERNAME__ expression. This will be replaced with the username. 3929 3930Example: C<ldap://localhost:389/dc=koehntopp,dc=de?saconfig?uid=__USERNAME__> 3931 3932=cut 3933 3934 push (@cmds, { 3935 setting => 'user_scores_dsn', 3936 is_admin => 1, 3937 default => '', 3938 type => $CONF_TYPE_STRING, 3939 }); 3940 3941=item user_scores_sql_username username 3942 3943The authorized username to connect to the above DSN. 3944 3945=cut 3946 3947 push (@cmds, { 3948 setting => 'user_scores_sql_username', 3949 is_admin => 1, 3950 default => '', 3951 type => $CONF_TYPE_STRING, 3952 }); 3953 3954=item user_scores_sql_password password 3955 3956The password for the database username, for the above DSN. 3957 3958=cut 3959 3960 push (@cmds, { 3961 setting => 'user_scores_sql_password', 3962 is_admin => 1, 3963 default => '', 3964 type => $CONF_TYPE_STRING, 3965 }); 3966 3967=item user_scores_sql_custom_query query 3968 3969This option gives you the ability to create a custom SQL query to 3970retrieve user scores and preferences. In order to work correctly your 3971query should return two values, the preference name and value, in that 3972order. In addition, there are several "variables" that you can use 3973as part of your query, these variables will be substituted for the 3974current values right before the query is run. The current allowed 3975variables are: 3976 3977=over 4 3978 3979=item _TABLE_ 3980 3981The name of the table where user scores and preferences are stored. Currently 3982hardcoded to userpref, to change this value you need to create a new custom 3983query with the new table name. 3984 3985=item _USERNAME_ 3986 3987The current user's username. 3988 3989=item _MAILBOX_ 3990 3991The portion before the @ as derived from the current user's username. 3992 3993=item _DOMAIN_ 3994 3995The portion after the @ as derived from the current user's username, this 3996value may be null. 3997 3998=back 3999 4000The query must be one continuous line in order to parse correctly. 4001 4002Here are several example queries, please note that these are broken up 4003for easy reading, in your config it should be one continuous line. 4004 4005=over 4 4006 4007=item Current default query: 4008 4009C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username ASC> 4010 4011=item Use global and then domain level defaults: 4012 4013C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' OR username = '@~'||_DOMAIN_ ORDER BY username ASC> 4014 4015=item Maybe global prefs should override user prefs: 4016 4017C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username DESC> 4018 4019=back 4020 4021=cut 4022 4023 push (@cmds, { 4024 setting => 'user_scores_sql_custom_query', 4025 is_admin => 1, 4026 default => undef, 4027 type => $CONF_TYPE_STRING, 4028 }); 4029 4030=item user_scores_ldap_username 4031 4032This is the Bind DN used to connect to the LDAP server. It defaults 4033to the empty string (""), allowing anonymous binding to work. 4034 4035Example: C<cn=master,dc=koehntopp,dc=de> 4036 4037=cut 4038 4039 push (@cmds, { 4040 setting => 'user_scores_ldap_username', 4041 is_admin => 1, 4042 default => '', 4043 type => $CONF_TYPE_STRING, 4044 }); 4045 4046=item user_scores_ldap_password 4047 4048This is the password used to connect to the LDAP server. It defaults 4049to the empty string (""). 4050 4051=cut 4052 4053 push (@cmds, { 4054 setting => 'user_scores_ldap_password', 4055 is_admin => 1, 4056 default => '', 4057 type => $CONF_TYPE_STRING, 4058 }); 4059 4060=item user_scores_fallback_to_global (default: 1) 4061 4062Fall back to global scores and settings if userprefs can't be loaded 4063from SQL or LDAP, instead of passing the message through unprocessed. 4064 4065=cut 4066 4067 push (@cmds, { 4068 setting => 'user_scores_fallback_to_global', 4069 is_admin => 1, 4070 default => 1, 4071 type => $CONF_TYPE_BOOL, 4072 }); 4073 4074=item loadplugin [Mail::SpamAssassin::Plugin::]ModuleName [/path/module.pm] 4075 4076Load a SpamAssassin plugin module. The C<ModuleName> is the perl module 4077name, used to create the plugin object itself. 4078 4079Module naming is strict, name must only contain alphanumeric characters or 4080underscores. File must have .pm extension. 4081 4082C</path/module.pm> is the file to load, containing the module's perl code; 4083if it's specified as a relative path, it's considered to be relative to the 4084current configuration file. If it is omitted, the module will be loaded 4085using perl's search path (the C<@INC> array). 4086 4087See C<Mail::SpamAssassin::Plugin> for more details on writing plugins. 4088 4089=cut 4090 4091 push (@cmds, { 4092 setting => 'loadplugin', 4093 is_admin => 1, 4094 code => sub { 4095 my ($self, $key, $value, $line) = @_; 4096 if ($value eq '') { 4097 return $MISSING_REQUIRED_VALUE; 4098 } 4099 my ($package, $path); 4100 local ($1,$2); 4101 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) { 4102 ($package, $path) = ($1, $2); 4103 } else { 4104 return $INVALID_VALUE; 4105 } 4106 $self->load_plugin ($package, $path); 4107 } 4108 }); 4109 4110=item tryplugin ModuleName [/path/module.pm] 4111 4112Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in 4113the filesystem. 4114 4115=cut 4116 4117 push (@cmds, { 4118 setting => 'tryplugin', 4119 is_admin => 1, 4120 code => sub { 4121 my ($self, $key, $value, $line) = @_; 4122 if ($value eq '') { 4123 return $MISSING_REQUIRED_VALUE; 4124 } 4125 my ($package, $path); 4126 local ($1,$2); 4127 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) { 4128 ($package, $path) = ($1, $2); 4129 } else { 4130 return $INVALID_VALUE; 4131 } 4132 $self->load_plugin ($package, $path, 1); 4133 } 4134 }); 4135 4136=item ignore_always_matching_regexps (Default: 0) 4137 4138Ignore any rule which contains a regexp which always matches. 4139Currently only catches regexps which contain '||', or which begin or 4140end with a '|'. Also ignore rules with C<some> combinatorial explosions. 4141 4142=cut 4143 4144 push (@cmds, { 4145 setting => 'ignore_always_matching_regexps', 4146 is_admin => 1, 4147 default => 0, 4148 type => $CONF_TYPE_BOOL, 4149 }); 4150 4151=back 4152 4153=head1 PREPROCESSING OPTIONS 4154 4155=over 4 4156 4157=item include filename 4158 4159Include configuration lines from C<filename>. Relative paths are considered 4160relative to the current configuration file or user preferences file. 4161 4162=item if (boolean perl expression) 4163 4164Used to support conditional interpretation of the configuration 4165file. Lines between this and a corresponding C<else> or C<endif> line 4166will be ignored unless the expression evaluates as true 4167(in the perl sense; that is, defined and non-0 and non-empty string). 4168 4169The conditional accepts a limited subset of perl for security -- just enough to 4170perform basic arithmetic comparisons. The following input is accepted: 4171 4172=over 4 4173 4174=item numbers, whitespace, arithmetic operations and grouping 4175 4176Namely these characters and ranges: 4177 4178 ( ) - + * / _ . , < = > ! ~ 0-9 whitespace 4179 4180=item version 4181 4182This will be replaced with the version number of the currently-running 4183SpamAssassin engine. Note: The version used is in the internal SpamAssassin 4184version format which is C<x.yyyzzz>, where x is major version, y is minor 4185version, and z is maintenance version. So 3.0.0 is C<3.000000>, and 3.4.80 4186is C<3.004080>. 4187 4188=item perl_version 4189 4190(Introduced in 3.4.1) This will be replaced with the version number of the 4191currently-running perl engine. Note: The version used is in the $] version 4192format which is C<x.yyyzzz>, where x is major version, y is minor version, 4193and z is maintenance version. So 5.8.8 is C<5.008008>, and 5.10.0 is 4194C<5.010000>. Use to protect rules that incorporate RE syntax elements 4195introduced in later versions of perl, such as the C<++> non-backtracking 4196match introduced in perl 5.10. For example: 4197 4198 # Avoid lint error on older perl installs 4199 # Check SA version first to avoid warnings on checking perl_version on older SA 4200 if version > 3.004001 && perl_version >= 5.018000 4201 body INVALID_RE_SYNTAX_IN_PERL_BEFORE_5_18 /(?[ \p{Thai} & \p{Digit} ])/ 4202 endif 4203 4204Note that the above will still generate a warning on perl older than 5.10.0; 4205to avoid that warning do this instead: 4206 4207 # Avoid lint error on older perl installs 4208 if can(Mail::SpamAssassin::Conf::perl_min_version_5010000) 4209 body INVALID_RE_SYNTAX_IN_PERL_5_8 /\w++/ 4210 endif 4211 4212Warning: a can() test is only defined for perl 5.10.0! 4213 4214 4215=item plugin(Name::Of::Plugin) 4216 4217This is a function call that returns C<1> if the plugin named 4218C<Name::Of::Plugin> is loaded, or C<undef> otherwise. 4219 4220=item has(Name::Of::Package::function_name) 4221 4222This is a function call that returns C<1> if the perl package named 4223C<Name::Of::Package> includes a function called C<function_name>, or C<undef> 4224otherwise. Note that packages can be SpamAssassin plugins or built-in classes, 4225there's no difference in this respect. Internally this invokes UNIVERSAL::can. 4226 4227=item can(Name::Of::Package::function_name) 4228 4229This is a function call that returns C<1> if the perl package named 4230C<Name::Of::Package> includes a function called C<function_name> 4231B<and> that function returns a true value when called with no arguments, 4232otherwise C<undef> is returned. 4233 4234Is similar to C<has>, except that it also calls the named function, 4235testing its return value (unlike the perl function UNIVERSAL::can). 4236This makes it possible for a 'feature' function to determine its result 4237value at run time. 4238 4239=back 4240 4241If the end of a configuration file is reached while still inside a 4242C<if> scope, a warning will be issued, but parsing will restart on 4243the next file. 4244 4245For example: 4246 4247 if (version > 3.000000) 4248 header MY_FOO ... 4249 endif 4250 4251 loadplugin MyPlugin plugintest.pm 4252 4253 if plugin (MyPlugin) 4254 header MY_PLUGIN_FOO eval:check_for_foo() 4255 score MY_PLUGIN_FOO 0.1 4256 endif 4257 4258=item ifplugin PluginModuleName 4259 4260An alias for C<if plugin(PluginModuleName)>. 4261 4262=item else 4263 4264Used to support conditional interpretation of the configuration 4265file. Lines between this and a corresponding C<endif> line, 4266will be ignored unless the conditional expression evaluates as false 4267(in the perl sense; that is, not defined and not 0 and non-empty string). 4268 4269=item require_version n.nnnnnn 4270 4271Indicates that the entire file, from this line on, requires a certain 4272version of SpamAssassin to run. If a different (older or newer) version 4273of SpamAssassin tries to read the configuration from this file, it will 4274output a warning instead, and ignore it. 4275 4276Note: The version used is in the internal SpamAssassin version format which is 4277C<x.yyyzzz>, where x is major version, y is minor version, and z is maintenance 4278version. So 3.0.0 is C<3.000000>, and 3.4.80 is C<3.004080>. 4279 4280=cut 4281 4282 push (@cmds, { 4283 setting => 'require_version', 4284 type => $CONF_TYPE_STRING, 4285 code => sub { 4286 } 4287 }); 4288 4289=back 4290 4291=head1 TEMPLATE TAGS 4292 4293The following C<tags> can be used as placeholders in certain options. 4294They will be replaced by the corresponding value when they are used. 4295 4296Some tags can take an argument (in parentheses). The argument is 4297optional, and the default is shown below. 4298 4299 _YESNO_ "Yes" for spam, "No" for nonspam (=ham) 4300 _YESNO(spam_str,ham_str)_ returns the first argument ("Yes" if missing) 4301 for spam, and the second argument ("No" if missing) for ham 4302 _YESNOCAPS_ "YES" for spam, "NO" for nonspam (=ham) 4303 _YESNOCAPS(spam_str,ham_str)_ same as _YESNO(...)_, but uppercased 4304 _SCORE(PAD)_ message score, if PAD is included and is either spaces or 4305 zeroes, then pad scores with that many spaces or zeroes 4306 (default, none) ie: _SCORE(0)_ makes 2.4 become 02.4, 4307 _SCORE(00)_ is 002.4. 12.3 would be 12.3 and 012.3 4308 respectively. 4309 _REQD_ message threshold 4310 _VERSION_ version (eg. 3.0.0 or 3.1.0-r26142-foo1) 4311 _SUBVERSION_ sub-version/code revision date (eg. 2004-01-10) 4312 _RULESVERSION_ comma-separated list of rules versions, retrieved from 4313 an '# UPDATE version' comment in rules files; if there is 4314 more than one set of rules (update channels) the order 4315 is unspecified (currently sorted by names of files); 4316 _HOSTNAME_ hostname of the machine the mail was processed on 4317 _REMOTEHOSTNAME_ hostname of the machine the mail was sent from, only 4318 available with spamd 4319 _REMOTEHOSTADDR_ ip address of the machine the mail was sent from, only 4320 available with spamd 4321 _BAYES_ bayes score 4322 _TOKENSUMMARY_ number of new, neutral, spammy, and hammy tokens found 4323 _BAYESTC_ number of new tokens found 4324 _BAYESTCLEARNED_ number of seen tokens found 4325 _BAYESTCSPAMMY_ number of spammy tokens found 4326 _BAYESTCHAMMY_ number of hammy tokens found 4327 _HAMMYTOKENS(N)_ the N most significant hammy tokens (default, 5) 4328 _SPAMMYTOKENS(N)_ the N most significant spammy tokens (default, 5) 4329 _DATE_ rfc-2822 date of scan 4330 _STARS(*)_ one "*" (use any character) for each full score point 4331 (note: limited to 50 'stars') 4332 _SENDERDOMAIN_ a domain name of the envelope sender address, lowercased 4333 _AUTHORDOMAIN_ a domain name of the author address (the From header 4334 field), lowercased; note that RFC 5322 allows a mail 4335 message to have multiple authors - currently only the 4336 domain name of the first email address is returned 4337 _RELAYSTRUSTED_ relays used and deemed to be trusted (see the 4338 'X-Spam-Relays-Trusted' pseudo-header) 4339 _RELAYSUNTRUSTED_ relays used that can not be trusted (see the 4340 'X-Spam-Relays-Untrusted' pseudo-header) 4341 _RELAYSINTERNAL_ relays used and deemed to be internal (see the 4342 'X-Spam-Relays-Internal' pseudo-header) 4343 _RELAYSEXTERNAL_ relays used and deemed to be external (see the 4344 'X-Spam-Relays-External' pseudo-header) 4345 _LASTEXTERNALIP_ IP address of client in the external-to-internal 4346 SMTP handover 4347 _LASTEXTERNALRDNS_ reverse-DNS of client in the external-to-internal 4348 SMTP handover 4349 _LASTEXTERNALHELO_ HELO string used by client in the external-to-internal 4350 SMTP handover 4351 _AUTOLEARN_ autolearn status ("ham", "no", "spam", "disabled", 4352 "failed", "unavailable") 4353 _AUTOLEARNSCORE_ portion of message score used by autolearn 4354 _TESTS(,)_ tests hit separated by "," (or other separator) 4355 _TESTSSCORES(,)_ as above, except with scores appended (eg. AWL=-3.0,...) 4356 _SUBTESTS(,)_ subtests (start with "__") hit separated by "," 4357 (or other separator) 4358 _SUBTESTSCOLLAPSED(,)_ subtests (start with "__") hit separated by "," 4359 (or other separator) with duplicated rules collapsed 4360 _DCCB_ DCC's "Brand" 4361 _DCCR_ DCC's results 4362 _PYZOR_ Pyzor results 4363 _RBL_ full results for positive RBL queries in DNS URI format 4364 _LANGUAGES_ possible languages of mail 4365 _PREVIEW_ content preview 4366 _REPORT_ terse report of tests hit (for header reports) 4367 _SUBJPREFIX_ subject prefix based on rules, to be prepended to Subject 4368 header by SpamAssassin caller 4369 _SUMMARY_ summary of tests hit for standard report (for body reports) 4370 _CONTACTADDRESS_ contents of the 'report_contact' setting 4371 _HEADER(NAME)_ includes the value of a message header. value is the same 4372 as is found for header rules (see elsewhere in this doc) 4373 _TIMING_ timing breakdown report 4374 _ADDEDHEADERHAM_ resulting header fields as requested by add_header for spam 4375 _ADDEDHEADERSPAM_ resulting header fields as requested by add_header for ham 4376 _ADDEDHEADER_ same as ADDEDHEADERHAM for ham or ADDEDHEADERSPAM for spam 4377 4378If a tag reference uses the name of a tag which is not in this list or defined 4379by a loaded plugin, the reference will be left intact and not replaced by any 4380value. 4381All template tag names should be restricted to the character set [A-Za-z0-9(,)]. 4382 4383Additional, plugin specific, template tags can be found in the documentation for 4384the following plugins: 4385 4386 L<Mail::SpamAssassin::Plugin::ASN> 4387 L<Mail::SpamAssassin::Plugin::AWL> 4388 L<Mail::SpamAssassin::Plugin::TxRep> 4389 4390The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument 4391which specifies a format. See the B<HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT> 4392section, below, for details. 4393 4394=head2 HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT 4395 4396The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument 4397which specifies a format: C<_SPAMMYTOKENS(N,FMT)_>, C<_HAMMYTOKENS(N,FMT)_> 4398The following formats are available: 4399 4400=over 4 4401 4402=item short 4403 4404Only the tokens themselves are listed. 4405I<For example, preference file entry:> 4406 4407C<add_header all Spammy _SPAMMYTOKENS(2,short)_> 4408 4409I<Results in message header:> 4410 4411C<X-Spam-Spammy: remove.php, UD:jpg> 4412 4413Indicating that the top two spammy tokens found are C<remove.php> 4414and C<UD:jpg>. (The token itself follows the last colon, the 4415text before the colon indicates something about the token. 4416C<UD> means the token looks like it might be part of a domain name.) 4417 4418=item compact 4419 4420The token probability, an abbreviated declassification distance (see 4421example), and the token are listed. 4422I<For example, preference file entry:> 4423 4424C<add_header all Spammy _SPAMMYTOKENS(2,compact)_> 4425 4426I<Results in message header:> 4427 4428C<0.989-6--remove.php, 0.988-+--UD:jpg> 4429 4430Indicating that the probabilities of the top two tokens are 0.989 and 44310.988, respectively. The first token has a declassification distance 4432of 6, meaning that if the token had appeared in at least 6 more ham 4433messages it would not be considered spammy. The C<+> for the second 4434token indicates a declassification distance greater than 9. 4435 4436=item long 4437 4438Probability, declassification distance, number of times seen in a ham 4439message, number of times seen in a spam message, age and the token are 4440listed. 4441 4442I<For example, preference file entry:> 4443 4444C<add_header all Spammy _SPAMMYTOKENS(2,long)_> 4445 4446I<Results in message header:> 4447 4448C<X-Spam-Spammy: 0.989-6--0h-4s--4d--remove.php, 0.988-33--2h-25s--1d--UD:jpg> 4449 4450In addition to the information provided by the compact option, 4451the long option shows that the first token appeared in zero 4452ham messages and four spam messages, and that it was last 4453seen four days ago. The second token appeared in two ham messages, 445425 spam messages and was last seen one day ago. 4455(Unlike the C<compact> option, the long option shows declassification 4456distances that are greater than 9.) 4457 4458=back 4459 4460=cut 4461 4462 return \@cmds; 4463} 4464 4465########################################################################### 4466 4467# settings that were once part of core, but are now in (possibly-optional) 4468# bundled plugins. These will be warned about, but do not generate a fatal 4469# error when "spamassassin --lint" is run like a normal syntax error would. 4470 4471our @MIGRATED_SETTINGS = qw{ 4472 ok_languages 4473}; 4474 4475########################################################################### 4476 4477sub new { 4478 my $class = shift; 4479 $class = ref($class) || $class; 4480 my $self = { 4481 main => shift, 4482 registered_commands => [], 4483 }; bless ($self, $class); 4484 4485 $self->{parser} = Mail::SpamAssassin::Conf::Parser->new($self); 4486 $self->{parser}->register_commands($self->set_default_commands()); 4487 4488 $self->{errors} = 0; 4489 $self->{plugins_loaded} = { }; 4490 4491 $self->{tests} = { }; 4492 $self->{test_types} = { }; 4493 $self->{scoreset} = [ {}, {}, {}, {} ]; 4494 $self->{scoreset_current} = 0; 4495 $self->set_score_set (0); 4496 $self->{tflags} = { }; 4497 $self->{source_file} = { }; 4498 4499 # keep descriptions in a slow but space-efficient single-string 4500 # data structure 4501 # NOTE: Deprecated usage of TieOneStringHash as of 10/2018, it's an 4502 # absolute pig, doubling config parse time, while benchmarks indicate 4503 # no difference in resident memory size! 4504 $self->{descriptions} = { }; 4505 #tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash' 4506 # or warn "tie failed"; 4507 $self->{subjprefix} = { }; 4508 4509 # after parsing, tests are refiled into these hashes for each test type. 4510 # this allows e.g. a full-text test to be rewritten as a body test in 4511 # the user's user_prefs file. 4512 $self->{body_tests} = { }; 4513 $self->{uri_tests} = { }; 4514 $self->{uri_evals} = { }; # not used/implemented yet 4515 $self->{head_tests} = { }; 4516 $self->{head_evals} = { }; 4517 $self->{body_evals} = { }; 4518 $self->{full_tests} = { }; 4519 $self->{full_evals} = { }; 4520 $self->{rawbody_tests} = { }; 4521 $self->{rawbody_evals} = { }; 4522 $self->{meta_tests} = { }; 4523 $self->{eval_plugins} = { }; 4524 $self->{duplicate_rules} = { }; 4525 4526 # testing stuff 4527 $self->{regression_tests} = { }; 4528 4529 $self->{rewrite_header} = { }; 4530 $self->{want_rebuild_for_type} = { }; 4531 $self->{user_defined_rules} = { }; 4532 $self->{headers_spam} = [ ]; 4533 $self->{headers_ham} = [ ]; 4534 4535 $self->{bayes_ignore_headers} = [ ]; 4536 $self->{bayes_ignore_from} = { }; 4537 $self->{bayes_ignore_to} = { }; 4538 4539 $self->{whitelist_auth} = { }; 4540 $self->{def_whitelist_auth} = { }; 4541 $self->{whitelist_from} = { }; 4542 $self->{whitelist_allows_relays} = { }; 4543 $self->{blacklist_from} = { }; 4544 $self->{whitelist_from_rcvd} = { }; 4545 $self->{def_whitelist_from_rcvd} = { }; 4546 4547 $self->{blacklist_to} = { }; 4548 $self->{whitelist_to} = { }; 4549 $self->{more_spam_to} = { }; 4550 $self->{all_spam_to} = { }; 4551 4552 $self->{trusted_networks} = $self->new_netset('trusted_networks',1); 4553 $self->{internal_networks} = $self->new_netset('internal_networks',1); 4554 $self->{msa_networks} = $self->new_netset('msa_networks',0); # no loopback IP 4555 $self->{trusted_networks_configured} = 0; 4556 $self->{internal_networks_configured} = 0; 4557 4558 # Make sure we add in X-Spam-Checker-Version 4559 { my $r = [ "Checker-Version", 4560 "SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_" ]; 4561 push(@{$self->{headers_spam}}, $r); 4562 push(@{$self->{headers_ham}}, $r); 4563 } 4564 4565 # RFC 6891: A good compromise may be the use of an EDNS maximum payload size 4566 # of 4096 octets as a starting point. 4567 $self->{dns_options}->{edns} = 4096; 4568 4569 # these should potentially be settable by end-users 4570 # perhaps via plugin? 4571 $self->{num_check_received} = 9; 4572 $self->{bayes_expiry_pct} = 0.75; 4573 $self->{bayes_expiry_period} = 43200; 4574 $self->{bayes_expiry_max_exponent} = 9; 4575 4576 $self->{encapsulated_content_description} = 'original message before SpamAssassin'; 4577 4578 $self; 4579} 4580 4581sub mtime { 4582 my $self = shift; 4583 if (@_) { 4584 $self->{mtime} = shift; 4585 } 4586 return $self->{mtime}; 4587} 4588 4589########################################################################### 4590 4591sub parse_scores_only { 4592 my ($self) = @_; 4593 $self->{parser}->parse ($_[1], 1); 4594} 4595 4596sub parse_rules { 4597 my ($self) = @_; 4598 $self->{parser}->parse ($_[1], 0); 4599} 4600 4601########################################################################### 4602 4603sub set_score_set { 4604 my ($self, $set) = @_; 4605 $self->{scores} = $self->{scoreset}->[$set]; 4606 $self->{scoreset_current} = $set; 4607 dbg("config: score set $set chosen."); 4608} 4609 4610sub get_score_set { 4611 my($self) = @_; 4612 return $self->{scoreset_current}; 4613} 4614 4615sub get_rule_types { 4616 my ($self) = @_; 4617 return @rule_types; 4618} 4619 4620sub get_rule_keys { 4621 my ($self, $test_type, $priority) = @_; 4622 4623 # special case rbl_evals since they do not have a priority 4624 if ($test_type eq 'rbl_evals') { 4625 return keys(%{$self->{$test_type}}); 4626 } 4627 4628 if (defined($priority)) { 4629 return keys(%{$self->{$test_type}->{$priority}}); 4630 } 4631 else { 4632 my @rules; 4633 foreach my $pri (keys(%{$self->{priorities}})) { 4634 push(@rules, keys(%{$self->{$test_type}->{$pri}})); 4635 } 4636 return @rules; 4637 } 4638} 4639 4640sub get_rule_value { 4641 my ($self, $test_type, $rulename, $priority) = @_; 4642 4643 # special case rbl_evals since they do not have a priority 4644 if ($test_type eq 'rbl_evals') { 4645 return @{$self->{$test_type}->{$rulename}}; 4646 } 4647 4648 if (defined($priority)) { 4649 return $self->{$test_type}->{$priority}->{$rulename}; 4650 } 4651 else { 4652 foreach my $pri (keys(%{$self->{priorities}})) { 4653 if (exists($self->{$test_type}->{$pri}->{$rulename})) { 4654 return $self->{$test_type}->{$pri}->{$rulename}; 4655 } 4656 } 4657 return; # if we get here we didn't find the rule 4658 } 4659} 4660 4661sub delete_rule { 4662 my ($self, $test_type, $rulename, $priority) = @_; 4663 4664 # special case rbl_evals since they do not have a priority 4665 if ($test_type eq 'rbl_evals') { 4666 return delete($self->{$test_type}->{$rulename}); 4667 } 4668 4669 if (defined($priority)) { 4670 return delete($self->{$test_type}->{$priority}->{$rulename}); 4671 } 4672 else { 4673 foreach my $pri (keys(%{$self->{priorities}})) { 4674 if (exists($self->{$test_type}->{$pri}->{$rulename})) { 4675 return delete($self->{$test_type}->{$pri}->{$rulename}); 4676 } 4677 } 4678 return; # if we get here we didn't find the rule 4679 } 4680} 4681 4682# trim_rules ($regexp) 4683# 4684# Remove all rules that don't match the given regexp (or are sub-rules of 4685# meta-tests that match the regexp). 4686 4687sub trim_rules { 4688 my ($self, $regexp) = @_; 4689 4690 my ($rec, $err) = compile_regexp($regexp, 0); 4691 if (!$rec) { 4692 die "config: trim_rules: invalid regexp '$regexp': $err"; 4693 } 4694 4695 my @all_rules; 4696 4697 foreach my $rule_type ($self->get_rule_types()) { 4698 push(@all_rules, $self->get_rule_keys($rule_type)); 4699 } 4700 4701 my @rules_to_keep = grep(/$rec/, @all_rules); 4702 4703 if (@rules_to_keep == 0) { 4704 die "config: trim_rules: all rules excluded, nothing to test\n"; 4705 } 4706 4707 my @meta_tests = grep(/$rec/, $self->get_rule_keys('meta_tests')); 4708 foreach my $meta (@meta_tests) { 4709 push(@rules_to_keep, $self->add_meta_depends($meta)) 4710 } 4711 4712 my %rules_to_keep_hash; 4713 4714 foreach my $rule (@rules_to_keep) { 4715 $rules_to_keep_hash{$rule} = 1; 4716 } 4717 4718 foreach my $rule_type ($self->get_rule_types()) { 4719 foreach my $rulekey ($self->get_rule_keys($rule_type)) { 4720 $self->delete_rule($rule_type, $rulekey) 4721 if (!$rules_to_keep_hash{$rulekey}); 4722 } 4723 } 4724} # trim_rules() 4725 4726sub add_meta_depends { 4727 my ($self, $meta) = @_; 4728 4729 my @rules; 4730 my @tokens = $self->get_rule_value('meta_tests', $meta) =~ m/(\w+)/g; 4731 4732 @tokens = grep(!/^\d+$/, @tokens); 4733 # @tokens now only consists of sub-rules 4734 4735 foreach my $token (@tokens) { 4736 die "config: meta test $meta depends on itself\n" if $token eq $meta; 4737 push(@rules, $token); 4738 4739 # If the sub-rule is a meta-test, recurse 4740 if ($self->get_rule_value('meta_tests', $token)) { 4741 push(@rules, $self->add_meta_depends($token)); 4742 } 4743 } # foreach my $token (@tokens) 4744 4745 return @rules; 4746} # add_meta_depends() 4747 4748sub is_rule_active { 4749 my ($self, $test_type, $rulename, $priority) = @_; 4750 4751 # special case rbl_evals since they do not have a priority 4752 if ($test_type eq 'rbl_evals') { 4753 return 0 unless ($self->{$test_type}->{$rulename}); 4754 return ($self->{scores}->{$rulename}); 4755 } 4756 4757 # first determine if the rule is defined 4758 if (defined($priority)) { 4759 # we have a specific priority 4760 return 0 unless ($self->{$test_type}->{$priority}->{$rulename}); 4761 } 4762 else { 4763 # no specific priority so we must loop over all currently defined 4764 # priorities to see if the rule is defined 4765 my $found_p = 0; 4766 foreach my $pri (keys %{$self->{priorities}}) { 4767 if ($self->{$test_type}->{$pri}->{$rulename}) { 4768 $found_p = 1; 4769 last; 4770 } 4771 } 4772 return 0 unless ($found_p); 4773 } 4774 4775 return ($self->{scores}->{$rulename}); 4776} 4777 4778########################################################################### 4779 4780# treats a bitset argument as a bit vector of all possible port numbers (8 kB) 4781# and sets bit values to $value (0 or 1) in the specified range of port numbers 4782# 4783sub set_ports_range { 4784 my($bitset_ref, $port_range_lo, $port_range_hi, $value) = @_; 4785 $port_range_lo = 0 if $port_range_lo < 0; 4786 $port_range_hi = 65535 if $port_range_hi > 65535; 4787 if (!defined $$bitset_ref) { # provide a sensible default 4788 wipe_ports_range($bitset_ref, 1); # turn on all bits 0..65535 4789 vec($$bitset_ref,$_,1) = 0 for 0..1023; # avoid 0 and privileged ports 4790 } elsif ($$bitset_ref eq '') { # repopulate the bitset (late configuration) 4791 wipe_ports_range($bitset_ref, 0); # turn off all bits 0..65535 4792 } 4793 $value = !$value ? 0 : 1; 4794 for (my $j = $port_range_lo; $j <= $port_range_hi; $j++) { 4795 vec($$bitset_ref,$j,1) = $value; 4796 } 4797} 4798 4799sub wipe_ports_range { 4800 my($bitset_ref, $value) = @_; 4801 $value = !$value ? "\000" : "\377"; 4802 $$bitset_ref = $value x 8192; # quickly turn all bits 0..65535 on or off 4803} 4804 4805########################################################################### 4806 4807sub add_to_addrlist { 4808 my $self = shift; $self->{parser}->add_to_addrlist(@_); 4809} 4810sub add_to_addrlist_rcvd { 4811 my $self = shift; $self->{parser}->add_to_addrlist_rcvd(@_); 4812} 4813sub remove_from_addrlist { 4814 my $self = shift; $self->{parser}->remove_from_addrlist(@_); 4815} 4816sub remove_from_addrlist_rcvd { 4817 my $self = shift; $self->{parser}->remove_from_addrlist_rcvd(@_); 4818} 4819 4820########################################################################### 4821 4822sub regression_tests { 4823 my $self = shift; 4824 if (@_ == 1) { 4825 # we specified a symbolic name, return the strings 4826 my $name = shift; 4827 my $tests = $self->{regression_tests}->{$name}; 4828 return @$tests; 4829 } 4830 else { 4831 # no name asked for, just return the symbolic names we have tests for 4832 return keys %{$self->{regression_tests}}; 4833 } 4834} 4835 4836########################################################################### 4837 4838sub finish_parsing { 4839 my ($self, $user) = @_; 4840 $self->{parser}->finish_parsing($user); 4841} 4842 4843########################################################################### 4844 4845sub found_any_rules { 4846 my ($self) = @_; 4847 if (!defined $self->{found_any_rules}) { 4848 $self->{found_any_rules} = (scalar keys %{$self->{tests}} > 0); 4849 } 4850 return $self->{found_any_rules}; 4851} 4852 4853########################################################################### 4854 4855sub get_description_for_rule { 4856 my ($self, $rule) = @_; 4857 # as silly as it looks, localized $1 here prevents an outer $1 from getting 4858 # tainted by the expression or assignment in the next line, bug 6148 4859 local($1); 4860 my $rule_descr = $self->{descriptions}->{$rule}; 4861 return $rule_descr; 4862} 4863 4864########################################################################### 4865 4866sub maybe_header_only { 4867 my($self,$rulename) = @_; 4868 my $type = $self->{test_types}->{$rulename}; 4869 4870 if ($rulename =~ /AUTOLEARNTEST/i) { 4871 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}."); 4872 } 4873 4874 return 0 if (!defined ($type)); 4875 4876 if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) { 4877 return 1; 4878 4879 } elsif ($type == $TYPE_META_TESTS) { 4880 my $tflags = $self->{tflags}->{$rulename}; 4881 $tflags ||= ''; 4882 if ($tflags =~ m/\bnet\b/i) { 4883 return 0; 4884 } else { 4885 return 1; 4886 } 4887 } 4888 4889 return 0; 4890} 4891 4892sub maybe_body_only { 4893 my($self,$rulename) = @_; 4894 my $type = $self->{test_types}->{$rulename}; 4895 4896 if ($rulename =~ /AUTOLEARNTEST/i) { 4897 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}."); 4898 } 4899 4900 return 0 if (!defined ($type)); 4901 4902 if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS) 4903 || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS)) 4904 { 4905 # some rawbody go off of headers... 4906 return 1; 4907 4908 } elsif ($type == $TYPE_META_TESTS) { 4909 my $tflags = $self->{tflags}->{$rulename}; $tflags ||= ''; 4910 if ($tflags =~ m/\bnet\b/i) { 4911 return 0; 4912 } else { 4913 return 1; 4914 } 4915 } 4916 4917 return 0; 4918} 4919 4920########################################################################### 4921 4922sub load_plugin { 4923 my ($self, $package, $path, $silent) = @_; 4924 $self->{main}->{plugins}->load_plugin($package, $path, $silent); 4925} 4926 4927sub load_plugin_succeeded { 4928 my ($self, $plugin, $package, $path) = @_; 4929 $self->{plugins_loaded}->{$package} = 1; 4930} 4931 4932sub register_eval_rule { 4933 my ($self, $pluginobj, $nameofsub) = @_; 4934 $self->{eval_plugins}->{$nameofsub} = $pluginobj; 4935} 4936 4937########################################################################### 4938 4939sub clone { 4940 my ($self, $source, $dest) = @_; 4941 4942 unless (defined $source) { 4943 $source = $self; 4944 } 4945 unless (defined $dest) { 4946 $dest = $self; 4947 } 4948 4949 my %done; 4950 4951 # keys that should not be copied in ->clone(). 4952 # bug 4179: include want_rebuild_for_type, so that if a user rule 4953 # is defined, its method will be recompiled for future scans in 4954 # order to *remove* the generated method calls 4955 my @NON_COPIED_KEYS = qw( 4956 main eval_plugins plugins_loaded registered_commands sed_path_cache parser 4957 scoreset scores want_rebuild_for_type 4958 ); 4959 4960 # special cases. first, skip anything that cannot be changed 4961 # by users, and the stuff we take care of here 4962 foreach my $var (@NON_COPIED_KEYS) { 4963 $done{$var} = undef; 4964 } 4965 4966 # keys that should can be copied using a ->clone() method, in ->clone() 4967 my @CLONABLE_KEYS = qw( 4968 internal_networks trusted_networks msa_networks 4969 ); 4970 4971 foreach my $key (@CLONABLE_KEYS) { 4972 $dest->{$key} = $source->{$key}->clone(); 4973 $done{$key} = undef; 4974 } 4975 4976 # two-level hashes 4977 foreach my $key (qw(uri_host_lists askdns)) { 4978 my $v = $source->{$key}; 4979 my $dest_key_ref = $dest->{$key} = {}; # must start from scratch! 4980 while(my($k2,$v2) = each %{$v}) { 4981 %{$dest_key_ref->{$k2}} = %{$v2}; 4982 } 4983 $done{$key} = undef; 4984 } 4985 4986 # bug 4179: be smarter about cloning the rule-type structures; 4987 # some are like this: $self->{type}->{priority}->{name} = 'value'; 4988 # which is an extra level that the below code won't deal with 4989 foreach my $t (@rule_types) { 4990 foreach my $k (keys %{$source->{$t}}) { 4991 my $v = $source->{$t}->{$k}; 4992 my $i = ref $v; 4993 if ($i eq 'HASH') { 4994 %{$dest->{$t}->{$k}} = %{$v}; 4995 } 4996 elsif ($i eq 'ARRAY') { 4997 @{$dest->{$t}->{$k}} = @{$v}; 4998 } 4999 else { 5000 $dest->{$t}->{$k} = $v; 5001 } 5002 } 5003 $done{$t} = undef; 5004 } 5005 5006 # and now, copy over all the rest -- the less complex cases. 5007 while(my($k,$v) = each %{$source}) { 5008 next if exists $done{$k}; # we handled it above 5009 $done{$k} = undef; 5010 my $i = ref($v); 5011 5012 # Not a reference, or a scalar? Just copy the value over. 5013 if ($i eq '') { 5014 $dest->{$k} = $v; 5015 } 5016 elsif ($i eq 'SCALAR') { 5017 $dest->{$k} = $$v; 5018 } 5019 elsif ($i eq 'ARRAY') { 5020 @{$dest->{$k}} = @{$v}; 5021 } 5022 elsif ($i eq 'HASH') { 5023 %{$dest->{$k}} = %{$v}; 5024 } 5025 elsif ($i eq 'Regexp') { 5026 $dest->{$k} = $v; 5027 } 5028 else { 5029 # throw a warning for debugging -- should never happen in normal usage 5030 warn "config: dup unknown type $k, $i\n"; 5031 } 5032 } 5033 5034 foreach my $cmd (@{$self->{registered_commands}}) { 5035 my $k = $cmd->{setting}; 5036 next if exists $done{$k}; # we handled it above 5037 $done{$k} = undef; 5038 $dest->{$k} = $source->{$k}; 5039 } 5040 5041 # scoresets 5042 delete $dest->{scoreset}; 5043 for my $i (0 .. 3) { 5044 %{$dest->{scoreset}->[$i]} = %{$source->{scoreset}->[$i]}; 5045 } 5046 5047 # deal with $conf->{scores}, it needs to be a reference into the scoreset 5048 # hash array dealy. Do it at the end since scoreset_current isn't set 5049 # otherwise. 5050 $dest->{scores} = $dest->{scoreset}->[$dest->{scoreset_current}]; 5051 5052 # ensure we don't copy the path cache from the master 5053 delete $dest->{sed_path_cache}; 5054 5055 return 1; 5056} 5057 5058########################################################################### 5059 5060sub free_uncompiled_rule_source { 5061 my ($self) = @_; 5062 5063 if (!$self->{main}->{keep_config_parsing_metadata} && 5064 !$self->{allow_user_rules}) 5065 { 5066 delete $self->{if_stack}; 5067 #delete $self->{source_file}; 5068 #delete $self->{meta_dependencies}; 5069 } 5070} 5071 5072sub new_netset { 5073 my ($self, $netset_name, $add_loopback) = @_; 5074 my $set = Mail::SpamAssassin::NetSet->new($netset_name); 5075 if ($add_loopback) { 5076 $set->add_cidr('127.0.0.0/8'); 5077 $set->add_cidr('::1'); 5078 } 5079 return $set; 5080} 5081 5082########################################################################### 5083 5084sub finish { 5085 my ($self) = @_; 5086 #untie %{$self->{descriptions}}; 5087 %{$self} = (); 5088} 5089 5090########################################################################### 5091 5092sub sa_die { Mail::SpamAssassin::sa_die(@_); } 5093 5094########################################################################### 5095 5096# subroutines available to conditionalize rules, for example: 5097# if (can(Mail::SpamAssassin::Conf::feature_originating_ip_headers)) 5098 5099sub feature_originating_ip_headers { 1 } 5100sub feature_dns_local_ports_permit_avoid { 1 } 5101sub feature_bayes_auto_learn_on_error { 1 } 5102sub feature_uri_host_listed { 1 } 5103sub feature_yesno_takes_args { 1 } 5104sub feature_bug6558_free { 1 } 5105sub feature_edns { 1 } # supports 'dns_options edns' config option 5106sub feature_dns_query_restriction { 1 } # supported config option 5107sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries 5108sub feature_compile_regexp { 1 } # Util::compile_regexp 5109sub feature_meta_rules_matching { 1 } # meta rules_matching() expression 5110sub feature_subjprefix { 1 } # add subject prefixes rule option 5111sub has_tflags_nosubject { 1 } # tflags nosubject 5112sub perl_min_version_5010000 { return $] >= 5.010000 } # perl version check ("perl_version" not neatly backwards-compatible) 5113 5114########################################################################### 5115 51161; 5117__END__ 5118 5119=head1 LOCALI[SZ]ATION 5120 5121A line starting with the text C<lang xx> will only be interpreted 5122if the user is in that locale, allowing test descriptions and 5123templates to be set for that language. 5124 5125The locales string should specify either both the language and country, e.g. 5126C<lang pt_BR>, or just the language, e.g. C<lang de>. 5127 5128=head1 SEE ALSO 5129 5130Mail::SpamAssassin(3) 5131spamassassin(1) 5132spamd(1) 5133 5134=cut 5135