1# <@LICENSE> 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to you under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at: 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# </@LICENSE> 17 18=head1 NAME 19 20Mail::SpamAssassin::Conf - SpamAssassin configuration file 21 22=head1 SYNOPSIS 23 24 # a comment 25 26 rewrite_header Subject *****SPAM***** 27 28 full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i 29 describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618 30 31 header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i 32 describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters 33 34 score A_HREF_TO_REMOVE 2.0 35 36 lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com 37 38 lang pt_BR report O programa detetor de Spam ZOE [...] 39 40=head1 DESCRIPTION 41 42SpamAssassin is configured using traditional UNIX-style configuration files, 43loaded from the C</usr/share/spamassassin> and C</etc/mail/spamassassin> 44directories. 45 46The following web page lists the most important configuration settings 47used to configure SpamAssassin; novices are encouraged to read it first: 48 49 http://wiki.apache.org/spamassassin/ImportantInitialConfigItems 50 51=head1 FILE FORMAT 52 53The C<#> character starts a comment, which continues until end of line. 54B<NOTE:> if the C<#> character is to be used as part of a rule or 55configuration option, it must be escaped with a backslash. i.e.: C<\#> 56 57Whitespace in the files is not significant, but please note that starting a 58line with whitespace is deprecated, as we reserve its use for multi-line rule 59definitions, at some point in the future. 60 61Currently, each rule or configuration setting must fit on one-line; multi-line 62settings are not supported yet. 63 64File and directory paths can use C<~> to refer to the user's home 65directory, but no other shell-style path extensions such as globing or 66C<~user/> are supported. 67 68Where appropriate below, default values are listed in parentheses. 69 70Test names ("SYMBOLIC_TEST_NAME") can only contain alphanumerics/underscores, 71can not start with digit, and must be less than 128 characters. 72 73=head1 USER PREFERENCES 74 75The following options can be used in both site-wide (C<local.cf>) and 76user-specific (C<user_prefs>) configuration files to customize how 77SpamAssassin handles incoming email messages. 78 79=cut 80 81package Mail::SpamAssassin::Conf; 82 83use strict; 84use warnings; 85# use bytes; 86use re 'taint'; 87 88use Mail::SpamAssassin::NetSet; 89use Mail::SpamAssassin::Constants qw(:sa :ip); 90use Mail::SpamAssassin::Conf::Parser; 91use Mail::SpamAssassin::Logger; 92use Mail::SpamAssassin::Util qw(untaint_var idn_to_ascii compile_regexp); 93use File::Spec; 94 95our @ISA = qw(); 96 97our $COLLECT_REGRESSION_TESTS; # Used only for unit tests. 98 99# odd => eval test. Not constants so they can be shared with Parser 100# TODO: move to Constants.pm? 101our $TYPE_HEAD_TESTS = 0x0008; 102our $TYPE_HEAD_EVALS = 0x0009; 103our $TYPE_BODY_TESTS = 0x000a; 104our $TYPE_BODY_EVALS = 0x000b; 105our $TYPE_FULL_TESTS = 0x000c; 106our $TYPE_FULL_EVALS = 0x000d; 107our $TYPE_RAWBODY_TESTS = 0x000e; 108our $TYPE_RAWBODY_EVALS = 0x000f; 109our $TYPE_URI_TESTS = 0x0010; 110our $TYPE_URI_EVALS = 0x0011; 111our $TYPE_META_TESTS = 0x0012; 112our $TYPE_RBL_EVALS = 0x0013; 113our $TYPE_EMPTY_TESTS = 0x0014; 114 115my @rule_types = ("body_tests", "uri_tests", "uri_evals", 116 "head_tests", "head_evals", "body_evals", "full_tests", 117 "full_evals", "rawbody_tests", "rawbody_evals", 118 "rbl_evals", "meta_tests"); 119 120# Map internal ruletype to descriptive ruletype string 121our %TYPE_AS_STRING = ( 122 $TYPE_HEAD_TESTS => 'header', 123 $TYPE_HEAD_EVALS => 'header', 124 $TYPE_BODY_TESTS => 'body', 125 $TYPE_BODY_EVALS => 'body', 126 $TYPE_FULL_TESTS => 'full', 127 $TYPE_FULL_EVALS => 'full', 128 $TYPE_RAWBODY_TESTS => 'rawbody', 129 $TYPE_RAWBODY_EVALS => 'rawbody', 130 $TYPE_URI_TESTS => 'uri', 131 $TYPE_URI_EVALS => 'uri', 132 $TYPE_META_TESTS => 'meta', 133 $TYPE_RBL_EVALS => 'header', 134 $TYPE_EMPTY_TESTS => 'empty', 135); 136 137#Removed $VERSION per BUG 6422 138#$VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later 139 140# these are variables instead of constants so that other classes can 141# access them; if they're constants, they'd have to go in Constants.pm 142# TODO: move to Constants.pm? 143our $CONF_TYPE_STRING = 1; 144our $CONF_TYPE_BOOL = 2; 145our $CONF_TYPE_NUMERIC = 3; 146our $CONF_TYPE_HASH_KEY_VALUE = 4; 147our $CONF_TYPE_ADDRLIST = 5; 148our $CONF_TYPE_TEMPLATE = 6; 149our $CONF_TYPE_NOARGS = 7; 150our $CONF_TYPE_STRINGLIST = 8; 151our $CONF_TYPE_IPADDRLIST = 9; 152our $CONF_TYPE_DURATION = 10; 153our $MISSING_REQUIRED_VALUE = '-99999999999999'; # string expected by parser 154our $INVALID_VALUE = '-99999999999998'; 155our $INVALID_HEADER_FIELD_NAME = '-99999999999997'; 156 157# set to "1" by the test suite code, to record regression tests 158# $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1; 159 160# search for "sub new {" to find the start of the code 161########################################################################### 162 163sub set_default_commands { 164 my($self) = @_; 165 166 # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt. 167 # push each config item like this, to avoid a POD bug; it can't just accept 168 # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies. 169 my @cmds; 170 171=head2 SCORING OPTIONS 172 173=over 4 174 175=item required_score n.nn (default: 5) 176 177Set the score required before a mail is considered spam. C<n.nn> can 178be an integer or a real number. 5.0 is the default setting, and is 179quite aggressive; it would be suitable for a single-user setup, but if 180you're an ISP installing SpamAssassin, you should probably set the 181default to be more conservative, like 8.0 or 10.0. It is not 182recommended to automatically delete or discard messages marked as 183spam, as your users B<will> complain, but if you choose to do so, only 184delete messages with an exceptionally high score such as 15.0 or 185higher. This option was previously known as C<required_hits> and that 186name is still accepted, but is deprecated. 187 188=cut 189 190 push (@cmds, { 191 setting => 'required_score', 192 aliases => ['required_hits'], # backward compatible 193 default => 5, 194 type => $CONF_TYPE_NUMERIC, 195 }); 196 197=item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ] 198 199Assign scores (the number of points for a hit) to a given test. 200Scores can be positive or negative real numbers or integers. 201C<SYMBOLIC_TEST_NAME> is the symbolic name used by SpamAssassin for 202that test; for example, 'FROM_ENDS_IN_NUMS'. 203 204If only one valid score is listed, then that score is always used 205for a test. 206 207If four valid scores are listed, then the score that is used depends 208on how SpamAssassin is being used. The first score is used when 209both Bayes and network tests are disabled (score set 0). The second 210score is used when Bayes is disabled, but network tests are enabled 211(score set 1). The third score is used when Bayes is enabled and 212network tests are disabled (score set 2). The fourth score is used 213when Bayes is enabled and network tests are enabled (score set 3). 214 215Setting a rule's score to 0 will disable that rule from running. 216 217If any of the score values are surrounded by parenthesis '()', then 218all of the scores in the line are considered to be relative to the 219already set score. ie: '(3)' means increase the score for this 220rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase 221the score for this rule by 3 in score sets 0 and 2 only. 222 223If no score is given for a test by the end of the configuration, 224a default score is assigned: a score of 1.0 is used for all tests, 225except those whose names begin with 'T_' (this is used to indicate a 226rule in testing) which receive 0.01. 227 228Note that test names which begin with '__' are indirect rules used 229to compose meta-match rules and can also act as prerequisites to 230other rules. They are not scored or listed in the 'tests hit' 231reports, but assigning a score of 0 to an indirect rule will disable 232it from running. 233 234=cut 235 236 push (@cmds, { 237 setting => 'score', 238 code => sub { 239 my ($self, $key, $value, $line) = @_; 240 my($rule, @scores) = split(/\s+/, $value); 241 unless (defined $value && $value !~ /^$/ && 242 (scalar @scores == 1 || scalar @scores == 4)) { 243 info("config: score: requires a symbolic rule name and 1 or 4 scores"); 244 return $MISSING_REQUIRED_VALUE; 245 } 246 247 # Figure out if we're doing relative scores, remove the parens if we are 248 my $relative = 0; 249 foreach (@scores) { 250 local ($1); 251 if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) { 252 $relative = 1; 253 } 254 unless (/^-?\d+(?:\.\d+)?$/) { 255 info("config: score: the non-numeric score ($_) is not valid, " . 256 "a numeric score is required"); 257 return $INVALID_VALUE; 258 } 259 } 260 261 if ($relative && !exists $self->{scoreset}->[0]->{$rule}) { 262 info("config: score: relative score without previous setting in " . 263 "configuration"); 264 return $INVALID_VALUE; 265 } 266 267 # If we're only passed 1 score, copy it to the other scoresets 268 if (@scores) { 269 if (@scores != 4) { 270 @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] ); 271 } 272 273 # Set the actual scoreset values appropriately 274 for my $index (0..3) { 275 my $score = $relative ? 276 $self->{scoreset}->[$index]->{$rule} + $scores[$index] : 277 $scores[$index]; 278 279 $self->{scoreset}->[$index]->{$rule} = $score + 0.0; 280 } 281 } 282 } 283 }); 284 285=back 286 287=head2 WHITELIST AND BLACKLIST OPTIONS 288 289=over 4 290 291=item welcomelist_from user@example.com 292 293Previously whitelist_from which will work interchangeably until 4.1. 294 295Used to whitelist sender addresses which send mail that is often tagged 296(incorrectly) as spam. 297 298Use of this setting is not recommended, since it blindly trusts the message, 299which is routinely and easily forged by spammers and phish senders. The 300recommended solution is to instead use C<welcomelist_auth> or other authenticated 301whitelisting methods, or C<welcomelist_from_rcvd>. 302 303Whitelist and blacklist addresses are now file-glob-style patterns, so 304C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work. 305Specifically, C<*> and C<?> are allowed, but all other metacharacters 306are not. Regular expressions are not used for security reasons. 307Matching is case-insensitive. 308 309Multiple addresses per line, separated by spaces, is OK. Multiple 310C<welcomelist_from> lines are also OK. 311 312The headers checked for whitelist addresses are as follows: if C<Resent-From> 313is set, use that; otherwise check all addresses taken from the following 314set of headers: 315 316 Envelope-Sender 317 Resent-Sender 318 X-Envelope-From 319 From 320 321In addition, the "envelope sender" data, taken from the SMTP envelope data 322where this is available, is looked up. See C<envelope_sender_header>. 323 324e.g. 325 326 welcomelist_from joe@example.com fred@example.com 327 welcomelist_from *@example.com 328 329=cut 330 331 push (@cmds, { 332 setting => 'welcomelist_from', 333 type => $CONF_TYPE_ADDRLIST, 334 aliases => ['whitelist_from'], # backward compatible - to be removed for 4.1 335 }); 336 337=item unwhitelist_from user@example.com 338 339Used to remove a default welcomelist_from (previously whitelist_from) entry, so for example a distribution 340welcomelist_from can be overridden in a local.cf file, or an individual user can 341override a welcomelist_from entry in their own C<user_prefs> file. 342The specified email address has to match exactly (although case-insensitively) 343the address previously used in a welcomelist_from line, which implies that a 344wildcard only matches literally the same wildcard (not 'any' address). 345 346e.g. 347 348 unwhitelist_from joe@example.com fred@example.com 349 unwhitelist_from *@example.com 350 351=cut 352 353 push (@cmds, { 354 command => 'unwhitelist_from', 355 setting => 'welcomelist_from', 356 type => $CONF_TYPE_ADDRLIST, 357 code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value 358 }); 359 360=item welcomelist_from_rcvd addr@lists.sourceforge.net sourceforge.net 361 362Previously whitelist_from_rcvd which will work interchangeably until 4.1. 363 364Works similarly to welcomelist_from (previously whitelist_from), except that in addition to matching 365a sender address, a relay's rDNS name or its IP address must match too 366for the whitelisting rule to fire. The first parameter is a sender's e-mail 367address to whitelist, and the second is a string to match the relay's rDNS, 368or its IP address. Matching is case-insensitive. 369 370This second parameter is matched against a TCP-info information field as 371provided in a FROM clause of a trace information (i.e. in a Received header 372field, see RFC 5321). Only the Received header fields inserted by trusted 373hosts are considered. This parameter can either be a full hostname, or a 374domain component of that hostname, or an IP address (optionally followed 375by a slash and a prefix length) in square brackets. The address prefix 376(mask) length with a slash may stand within brackets along with an address, 377or may follow the bracketed address. Reverse DNS lookup is done by an MTA, 378not by SpamAssassin. 379 380For backward compatibility as an alternative to a CIDR notation, an IPv4 381address in brackets may be truncated on classful boundaries to cover whole 382subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>. 383 384In other words, if the host that connected to your MX had an IP address 385192.0.2.123 that mapped to 'sendinghost.example.org', you should specify 386C<sendinghost.example.org>, or C<example.org>, or C<[192.0.2.123]>, or 387C<[192.0.2.0/24]>, or C<[192.0.2]> here. 388 389Note that this requires that C<internal_networks> be correct. For simple 390cases, it will be, but for a complex network you may get better results 391by setting that parameter. 392 393It also requires that your mail exchangers be configured to perform DNS 394reverse lookups on the connecting host's IP address, and to record the 395result in the generated Received header field according to RFC 5321. 396 397e.g. 398 399 welcomelist_from_rcvd joe@example.com example.com 400 welcomelist_from_rcvd *@* mail.example.org 401 welcomelist_from_rcvd *@axkit.org [192.0.2.123] 402 welcomelist_from_rcvd *@axkit.org [192.0.2.0/24] 403 welcomelist_from_rcvd *@axkit.org [192.0.2.0]/24 404 welcomelist_from_rcvd *@axkit.org [2001:db8:1234::/48] 405 welcomelist_from_rcvd *@axkit.org [2001:db8:1234::]/48 406 407=item def_welcomelist_from_rcvd addr@lists.sourceforge.net sourceforge.net 408 409Previously def_whitelist_from_rcvd which will work interchangeably until 4.1. 410 411Same as C<welcomelist_from_rcvd>, but used for the default welcomelist entries 412in the SpamAssassin distribution. The welcomelist score is lower, because 413these are often targets for spammer spoofing. 414 415=cut 416 417 push (@cmds, { 418 setting => 'welcomelist_from_rcvd', 419 aliases => ['whitelist_from_rcvd'], # backward compatible - to be removed for 4.1 420 type => $CONF_TYPE_ADDRLIST, 421 code => sub { 422 my ($self, $key, $value, $line) = @_; 423 unless (defined $value && $value !~ /^$/) { 424 return $MISSING_REQUIRED_VALUE; 425 } 426 unless ($value =~ /^\S+\s+\S+$/) { 427 return $INVALID_VALUE; 428 } 429 $self->{parser}->add_to_addrlist_rcvd ('welcomelist_from_rcvd', 430 split(/\s+/, $value)); 431 } 432 }); 433 434 push (@cmds, { 435 setting => 'def_welcomelist_from_rcvd', 436 aliases => ['def_whitelist_from_rcvd'], 437 type => $CONF_TYPE_ADDRLIST, 438 code => sub { 439 my ($self, $key, $value, $line) = @_; 440 unless (defined $value && $value !~ /^$/) { 441 return $MISSING_REQUIRED_VALUE; 442 } 443 unless ($value =~ /^\S+\s+\S+$/) { 444 return $INVALID_VALUE; 445 } 446 $self->{parser}->add_to_addrlist_rcvd ('def_welcomelist_from_rcvd', 447 split(/\s+/, $value)); 448 } 449 }); 450 451=item whitelist_allows_relays user@example.com 452 453Specify addresses which are in C<welcomelist_from_rcvd> that sometimes 454send through a mail relay other than the listed ones. By default mail 455with a From address that is in C<welcomelist_from_rcvd> that does not match 456the relay will trigger a forgery rule. Including the address in 457C<whitelist_allows_relay> prevents that. 458 459Whitelist and blacklist addresses are now file-glob-style patterns, so 460C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work. 461Specifically, C<*> and C<?> are allowed, but all other metacharacters 462are not. Regular expressions are not used for security reasons. 463Matching is case-insensitive. 464 465Multiple addresses per line, separated by spaces, is OK. Multiple 466C<whitelist_allows_relays> lines are also OK. 467 468The specified email address does not have to match exactly the address 469previously used in a welcomelist_from_rcvd line as it is compared to the 470address in the header. 471 472e.g. 473 474 whitelist_allows_relays joe@example.com fred@example.com 475 whitelist_allows_relays *@example.com 476 477=cut 478 479 push (@cmds, { 480 setting => 'whitelist_allows_relays', 481 type => $CONF_TYPE_ADDRLIST, 482 }); 483 484=item unwelcomelist_from_rcvd user@example.com 485 486Previously unwhitelist_from_rcvd which will work interchangeably until 4.1. 487 488Used to remove a default welcomelist_from_rcvd (previously whitelist_from_rcvd) or def_welcomelist_from_rcvd (previously def_whitelist_from_rcvd) 489entry, so for example a distribution welcomelist_from_rcvd can be overridden 490in a local.cf file, or an individual user can override a welcomelist_from_rcvd 491entry in their own C<user_prefs> file. 492 493The specified email address has to match exactly the address previously 494used in a welcomelist_from_rcvd line. 495 496e.g. 497 498 unwelcomelist_from_rcvd joe@example.com fred@example.com 499 unwelcomelist_from_rcvd *@axkit.org 500 501=cut 502 503 push (@cmds, { 504 setting => 'unwelcomelist_from_rcvd', 505 aliases => ['unwhitelist_from_rcvd'], 506 type => $CONF_TYPE_ADDRLIST, 507 code => sub { 508 my ($self, $key, $value, $line) = @_; 509 unless (defined $value && $value !~ /^$/) { 510 return $MISSING_REQUIRED_VALUE; 511 } 512 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) { 513 return $INVALID_VALUE; 514 } 515 $self->{parser}->remove_from_addrlist_rcvd('welcomelist_from_rcvd', 516 split (/\s+/, $value)); 517 $self->{parser}->remove_from_addrlist_rcvd('def_welcomelist_from_rcvd', 518 split (/\s+/, $value)); 519 } 520 }); 521 522=item blacklist_from user@example.com 523 524Used to specify addresses which send mail that is often tagged (incorrectly) as 525non-spam, but which the user doesn't want. Same format as C<welcomelist_from>. 526 527=cut 528 529 push (@cmds, { 530 setting => 'blacklist_from', 531 type => $CONF_TYPE_ADDRLIST, 532 }); 533 534=item unblacklist_from user@example.com 535 536Used to remove a default blacklist_from entry, so for example a 537distribution blacklist_from can be overridden in a local.cf file, or 538an individual user can override a blacklist_from entry in their own 539C<user_prefs> file. The specified email address has to match exactly 540the address previously used in a blacklist_from line. 541 542 543e.g. 544 545 unblacklist_from joe@example.com fred@example.com 546 unblacklist_from *@spammer.com 547 548=cut 549 550 551 push (@cmds, { 552 command => 'unblacklist_from', 553 setting => 'blacklist_from', 554 type => $CONF_TYPE_ADDRLIST, 555 code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value 556 }); 557 558 559=item welcomelist_to user@example.com 560 561Previously whitelist_to which will work interchangeably until 4.1. 562 563If the given address appears as a recipient in the message headers 564(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will 565be listed as allowed. Useful if you're deploying SpamAssassin system-wide, 566and don't want some users to have their mail filtered. Same format 567as C<welcomelist_from>. 568 569There are three levels of To-welcomelisting, C<welcomelist_to>, C<more_spam_to> 570and C<all_spam_to>. Users in the first level may still get some spammish 571mails blocked, but users in C<all_spam_to> should never get mail blocked. 572 573The headers checked for welcomelist addresses are as follows: if C<Resent-To> or 574C<Resent-Cc> are set, use those; otherwise check all addresses taken from the 575following set of headers: 576 577 To 578 Cc 579 Apparently-To 580 Delivered-To 581 Envelope-Recipients 582 Apparently-Resent-To 583 X-Envelope-To 584 Envelope-To 585 X-Delivered-To 586 X-Original-To 587 X-Rcpt-To 588 X-Real-To 589 590=item more_spam_to user@example.com 591 592See above. 593 594=item all_spam_to user@example.com 595 596See above. 597 598=cut 599 600 push (@cmds, { 601 setting => 'welcomelist_to', 602 type => $CONF_TYPE_ADDRLIST, 603 aliases => ['whitelist_to'], # backward compatible - to be removed for 4.1 604 }); 605 push (@cmds, { 606 setting => 'more_spam_to', 607 type => $CONF_TYPE_ADDRLIST, 608 }); 609 push (@cmds, { 610 setting => 'all_spam_to', 611 type => $CONF_TYPE_ADDRLIST, 612 }); 613 614=item blacklist_to user@example.com 615 616If the given address appears as a recipient in the message headers 617(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will 618be blacklisted. Same format as C<blacklist_from>. 619 620=cut 621 622 push (@cmds, { 623 setting => 'blacklist_to', 624 type => $CONF_TYPE_ADDRLIST, 625 }); 626 627=item welcomelist_auth user@example.com 628 629Previously whitelist_auth which will work interchangeably until 4.1. 630 631Used to specify addresses which send mail that is often tagged (incorrectly) as 632spam. This is different from C<welcomelist_from> and C<welcomelist_from_rcvd> in 633that it first verifies that the message was sent by an authorized sender for 634the address, before whitelisting. 635 636Authorization is performed using one of the installed sender-authorization 637schemes: SPF (using C<Mail::SpamAssassin::Plugin::SPF>), or DKIM (using 638C<Mail::SpamAssassin::Plugin::DKIM>). Note that those plugins must be active, 639and working, for this to operate. 640 641Using C<welcomelist_auth> is roughly equivalent to specifying duplicate 642C<whitelist_from_spf>, C<whitelist_from_dk>, and C<welcomelist_from_dkim> lines 643for each of the addresses specified. 644 645e.g. 646 647 welcomelist_auth joe@example.com fred@example.com 648 welcomelist_auth *@example.com 649 650=item def_welcomelist_auth user@example.com 651 652Previously def_whitelist_auth which will work interchangeably until 4.1. 653 654Same as C<welcomelist_auth>, but used for the default welcomelist entries 655in the SpamAssassin distribution. The welcomelist score is lower, because 656these are often targets for spammer spoofing. 657 658=cut 659 660 push (@cmds, { 661 setting => 'welcomelist_auth', 662 aliases => ['whitelist_auth'], # backward compatible - to be removed for 4.1 663 type => $CONF_TYPE_ADDRLIST, 664 }); 665 666 push (@cmds, { 667 setting => 'def_welcomelist_auth', 668 aliases => ['def_whitelist_auth'], # backward compatible - to be removed for 4.1 669 type => $CONF_TYPE_ADDRLIST, 670 }); 671 672=item unwhitelist_auth user@example.com 673 674Previously unwhitelist_auth which will work interchangeably until 4.1. 675 676Used to remove a C<welcomelist_auth> or C<def_welcomelist_auth> entry. The 677specified email address has to match exactly the address previously used. 678 679e.g. 680 681 unwelcomelist_auth joe@example.com fred@example.com 682 unwelcomelist_auth *@example.com 683 684=cut 685 686 push (@cmds, { 687 setting => 'unwelcomelist_auth', 688 aliases => ['unwhitelist_auth'], 689 type => $CONF_TYPE_ADDRLIST, 690 code => sub { 691 my ($self, $key, $value, $line) = @_; 692 unless (defined $value && $value !~ /^$/) { 693 return $MISSING_REQUIRED_VALUE; 694 } 695 unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) { 696 return $INVALID_VALUE; 697 } 698 $self->{parser}->remove_from_addrlist('welcomelist_auth', 699 split (/\s+/, $value)); 700 $self->{parser}->remove_from_addrlist('def_welcomelist_auth', 701 split (/\s+/, $value)); 702 } 703 }); 704 705 706=item enlist_uri_host (listname) host ... 707 708Adds one or more host names or domain names to a named list of URI domains. 709The named list can then be consulted through a check_uri_host_listed() 710eval rule implemented by the WLBLEval plugin, which takes the list name as 711an argument. Parenthesis around a list name are literal - a required syntax. 712 713Host names may optionally be prefixed by an exclamation mark '!', which 714produces false as a result if this entry matches. This makes it easier 715to exclude some subdomains when their superdomain is listed, for example: 716 717 enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com 718 719No wildcards are supported, but subdomains do match implicitly. Lists 720are independent. Search for each named list starts by looking up the 721full hostname first, then leading fields are progressively stripped off 722(e.g.: sub.example.com, example.com, com) until a match is found or we run 723out of fields. The first matching entry (the most specific) determines if a 724lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result. 725 726If an URL found in a message contains an IP address in place of a host name, 727the given list must specify the exact same IP address (instead of a host name) 728in order to match. 729 730Use the delist_uri_host directive to neutralize previous enlist_uri_host 731settings. 732 733Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives 734blocklist_uri_host and welcomelist_uri_host and corresponding default rules, 735but the names 'BLACK' and 'WHITE' are otherwise not special or reserved. 736 737=cut 738 739 push (@cmds, { 740 command => 'enlist_uri_host', 741 setting => 'uri_host_lists', 742 type => $CONF_TYPE_HASH_KEY_VALUE, 743 code => sub { 744 my($conf, $key, $value, $line) = @_; 745 local($1,$2); 746 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) { 747 return $MISSING_REQUIRED_VALUE; 748 } 749 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist() 750 # note: must not factor out dereferencing, as otherwise 751 # subhashes would spring up in a copy and be lost 752 foreach my $host ( split(/\s+/, lc $2) ) { 753 my $v = $host =~ s/^!// ? 0 : 1; 754 $conf->{uri_host_lists}{$listname}{$host} = $v; 755 } 756 } 757 }); 758 759=item delist_uri_host [ (listname) ] host ... 760 761Removes one or more specified host names from a named list of URI domains. 762Removing an unlisted name is ignored (is not an error). Listname is optional, 763if specified then just the named list is affected, otherwise hosts are 764removed from all URI host lists created so far. Parenthesis around a list 765name are a required syntax. 766 767Note that directives in configuration files are processed in sequence, 768the delist_uri_host only applies to previously listed entries and has 769no effect on enlisted entries in yet-to-be-processed directives. 770 771For convenience (similarity to the enlist_uri_host directive) hostnames 772may be prefixed by a an exclamation mark, which is stripped off from each 773name and has no meaning here. 774 775=cut 776 777 push (@cmds, { 778 command => 'delist_uri_host', 779 setting => 'uri_host_lists', 780 type => $CONF_TYPE_HASH_KEY_VALUE, 781 code => sub { 782 my($conf, $key, $value, $line) = @_; 783 local($1,$2); 784 if ($value !~ /^ (?: \( (.+?) \) \s+ )? (.+) \z/sx) { 785 return $MISSING_REQUIRED_VALUE; 786 } 787 my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}}; 788 my @args = split(/\s+/, lc $2); 789 foreach my $listname (@listnames) { 790 foreach my $host (@args) { 791 my $v = $host =~ s/^!// ? 0 : 1; 792 delete $conf->{uri_host_lists}{$listname}{$host}; 793 } 794 } 795 } 796 }); 797 798=item enlist_addrlist (listname) user@example.com 799 800Adds one or more addresses to a named list of addresses. 801The named list can then be consulted through a check_from_in_list() or a 802check_to_in_list() eval rule implemented by the WLBLEval plugin, which takes 803the list name as an argument. Parenthesis around a list name are literal - a 804required syntax. 805 806Listed addresses are file-glob-style patterns, so C<friend@somewhere.com>, 807C<*@isp.com>, or C<*.domain.net> will all work. 808Specifically, C<*> and C<?> are allowed, but all other metacharacters 809are not. Regular expressions are not used for security reasons. 810Matching is case-insensitive. 811 812Multiple addresses per line, separated by spaces, is OK. Multiple 813C<enlist_addrlist> lines are also OK. 814 815Enlisting an address to the list named blacklist_to is synonymous to using the 816directive blacklist_to 817 818Enlisting an address to the list named blacklist_from is synonymous to using the 819directive blacklist_from 820 821Enlisting an address to the list named welcomelist_to is synonymous to using the 822directive welcomelist_to 823 824Enlisting an address to the list named welcomelist_from (previously whitelist_from) is synonymous to using the 825directive welcomelist_from 826 827e.g. 828 829 enlist_addrlist (PAYPAL_ADDRESS) service@paypal.com 830 enlist_addrlist (PAYPAL_ADDRESS) *@paypal.co.uk 831 832=cut 833 834 push (@cmds, { 835 setting => 'enlist_addrlist', 836 type => $CONF_TYPE_ADDRLIST, 837 code => sub { 838 my($conf, $key, $value, $line) = @_; 839 local($1,$2); 840 if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) { 841 return $MISSING_REQUIRED_VALUE; 842 } 843 my $listname = $1; # corresponds to arg in check_uri_host_in_wblist() 844 # note: must not factor out dereferencing, as otherwise 845 # subhashes would spring up in a copy and be lost 846 $conf->{parser}->add_to_addrlist ($listname, split(/\s+/, $value)); 847 } 848 }); 849 850=item blocklist_uri_host host-or-domain ... 851 852Previously blacklist_uri_host which will work interchangeably until 4.1. 853 854Is a shorthand for a directive: enlist_uri_host (BLACK) host ... 855 856Please see directives enlist_uri_host and delist_uri_host for details. 857 858=cut 859 860 push (@cmds, { 861 command => 'blocklist_uri_host', 862 aliases => ['blacklist_uri_host'], 863 setting => 'uri_host_lists', 864 type => $CONF_TYPE_HASH_KEY_VALUE, 865 code => sub { 866 my($conf, $key, $value, $line) = @_; 867 foreach my $host ( split(/\s+/, lc $value) ) { 868 my $v = $host =~ s/^!// ? 0 : 1; 869 $conf->{uri_host_lists}{'BLACK'}{$host} = $v; 870 } 871 } 872 }); 873 874=item welcomelist_uri_host host-or-domain ... 875 876Previously whitelist_uri_host which will work interchangeably until 4.1. 877 878Is a shorthand for a directive: enlist_uri_host (WHITE) host ... 879 880Please see directives enlist_uri_host and delist_uri_host for details. 881 882=cut 883 884 push (@cmds, { 885 command => 'welcomelist_uri_host', 886 aliases => ['whitelist_uri_host'], 887 setting => 'uri_host_lists', 888 type => $CONF_TYPE_HASH_KEY_VALUE, 889 code => sub { 890 my($conf, $key, $value, $line) = @_; 891 foreach my $host ( split(/\s+/, lc $value) ) { 892 my $v = $host =~ s/^!// ? 0 : 1; 893 $conf->{uri_host_lists}{'WHITE'}{$host} = $v; 894 } 895 } 896 }); 897 898=back 899 900=head2 BASIC MESSAGE TAGGING OPTIONS 901 902=over 4 903 904=item rewrite_header { subject | from | to } STRING 905 906By default, suspected spam messages will not have the C<Subject>, 907C<From> or C<To> lines tagged to indicate spam. By setting this option, 908the header will be tagged with C<STRING> to indicate that a message is 909spam. For the From or To headers, this will take the form of an RFC 2822 910comment following the address in parentheses. For the Subject header, 911this will be prepended to the original subject. Note that you should 912only use the _REQD_ and _SCORE_ tags when rewriting the Subject header 913if C<report_safe> is 0. Otherwise, you may not be able to remove 914the SpamAssassin markup via the normal methods. More information 915about tags is explained below in the B<TEMPLATE TAGS> section. 916 917Parentheses are not permitted in STRING if rewriting the From or To headers. 918(They will be converted to square brackets.) 919 920If C<rewrite_header subject> is used, but the message being rewritten 921does not already contain a C<Subject> header, one will be created. 922 923A null value for C<STRING> will remove any existing rewrite for the specified 924header. 925 926=cut 927 928 push (@cmds, { 929 setting => 'rewrite_header', 930 type => $CONF_TYPE_HASH_KEY_VALUE, 931 code => sub { 932 my ($self, $key, $value, $line) = @_; 933 my($hdr, $string) = split(/\s+/, $value, 2); 934 $hdr = ucfirst(lc($hdr)); 935 936 if ($hdr =~ /^$/) { 937 return $MISSING_REQUIRED_VALUE; 938 } 939 # We only deal with From, Subject, and To ... 940 elsif ($hdr =~ /^(?:From|Subject|To)$/) { 941 unless (defined $string && $string =~ /\S/) { 942 delete $self->{rewrite_header}->{$hdr}; 943 return; 944 } 945 946 if ($hdr ne 'Subject') { 947 $string =~ tr/()/[]/; 948 } 949 $self->{rewrite_header}->{$hdr} = $string; 950 return; 951 } 952 else { 953 # if we get here, note the issue, then we'll fail through for an error. 954 info("config: rewrite_header: ignoring $hdr, not From, Subject, or To"); 955 return $INVALID_VALUE; 956 } 957 } 958 }); 959 960=item subjprefix 961 962Add a prefix in emails Subject if a rule is matched. 963To enable this option "rewrite_header Subject" config 964option must be enabled as well. 965 966The check C<if can(Mail::SpamAssassin::Conf::feature_subjprefix)> 967should be used to silence warnings in previous 968SpamAssassin versions. 969 970To be able to use this feature a C<add_header all Subjprefix _SUBJPREFIX_> 971configuration line could be needed when the glue between the MTA and SpamAssassin 972rewrites the email content. 973 974Here is an example on how to use this feature: 975 976 rewrite_header Subject *****SPAM***** 977 add_header all Subjprefix _SUBJPREFIX_ 978 body OLEMACRO_MALICE eval:check_olemacro_malice() 979 describe OLEMACRO_MALICE Dangerous Office Macro 980 score OLEMACRO_MALICE 5.0 981 if can(Mail::SpamAssassin::Conf::feature_subjprefix) 982 subjprefix OLEMACRO_MALICE [VIRUS] 983 endif 984 985=cut 986 987 push (@cmds, { 988 command => 'subjprefix', 989 setting => 'subjprefix', 990 is_frequent => 1, 991 type => $CONF_TYPE_HASH_KEY_VALUE, 992 }); 993 994=item add_header { spam | ham | all } header_name string 995 996Customized headers can be added to the specified type of messages (spam, 997ham, or "all" to add to either). All headers begin with C<X-Spam-> 998(so a C<header_name> Foo will generate a header called X-Spam-Foo). 999header_name is restricted to the character set [A-Za-z0-9_-]. 1000 1001The order of C<add_header> configuration options is preserved, inserted 1002headers will follow this order of declarations. When combining C<add_header> 1003with C<clear_headers> and C<remove_header>, keep in mind that C<add_header> 1004appends a new header to the current list, after first removing any existing 1005header fields of the same name. Note also that C<add_header>, C<clear_headers> 1006and C<remove_header> may appear in multiple .cf files, which are interpreted 1007in alphabetic order. 1008 1009C<string> can contain tags as explained below in the B<TEMPLATE TAGS> section. 1010You can also use C<\n> and C<\t> in the header to add newlines and tabulators 1011as desired. A backslash has to be written as \\, any other escaped chars will 1012be silently removed. 1013 1014All headers will be folded if fold_headers is set to C<1>. Note: Manually 1015adding newlines via C<\n> disables any further automatic wrapping (ie: 1016long header lines are possible). The lines will still be properly folded 1017(marked as continuing) though. 1018 1019You can customize existing headers with B<add_header> (only the specified 1020subset of messages will be changed). 1021 1022See also C<clear_headers> and C<remove_header> for removing headers. 1023 1024Here are some examples (these are the defaults, note that Checker-Version can 1025not be changed or removed): 1026 1027 add_header spam Flag _YESNOCAPS_ 1028 add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_ 1029 add_header all Level _STARS(*)_ 1030 add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_ 1031 1032=cut 1033 1034 push (@cmds, { 1035 setting => 'add_header', 1036 code => sub { 1037 my ($self, $key, $value, $line) = @_; 1038 local ($1,$2,$3); 1039 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) { 1040 return $INVALID_VALUE; 1041 } 1042 1043 my ($type, $name, $hline) = ($1, $2, $3); 1044 if ($hline =~ /^"(.*)"$/) { 1045 $hline = $1; 1046 } 1047 my @line = split( 1048 /\\\\/, # split at double backslashes, 1049 $hline."\n" # newline needed to make trailing backslashes work 1050 ); 1051 foreach (@line) { 1052 s/\\t/\t/g; # expand tabs 1053 s/\\n/\n/g; # expand newlines 1054 s/\\.//g; # purge all other escapes 1055 }; 1056 $hline = join("\\", @line); 1057 chop($hline); # remove dummy newline again 1058 if (($type eq "ham") || ($type eq "all")) { 1059 $self->{headers_ham} = 1060 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_ham}} ]; 1061 push(@{$self->{headers_ham}}, [$name, $hline]); 1062 } 1063 if (($type eq "spam") || ($type eq "all")) { 1064 $self->{headers_spam} = 1065 [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_spam}} ]; 1066 push(@{$self->{headers_spam}}, [$name, $hline]); 1067 } 1068 } 1069 }); 1070 1071=item remove_header { spam | ham | all } header_name 1072 1073Headers can be removed from the specified type of messages (spam, ham, 1074or "all" to remove from either). All headers begin with C<X-Spam-> 1075(so C<header_name> will be appended to C<X-Spam->). 1076 1077See also C<clear_headers> for removing all the headers at once. 1078 1079Note that B<X-Spam-Checker-Version> is not removable because the version 1080information is needed by mail administrators and developers to debug 1081problems. Without at least one header, it might not even be possible to 1082determine that SpamAssassin is running. 1083 1084=cut 1085 1086 push (@cmds, { 1087 setting => 'remove_header', 1088 code => sub { 1089 my ($self, $key, $value, $line) = @_; 1090 local ($1,$2); 1091 if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s*$/) { 1092 return $INVALID_VALUE; 1093 } 1094 1095 my ($type, $name) = ($1, $2); 1096 return if ( $name eq "Checker-Version" ); 1097 1098 $name = lc($name); 1099 if (($type eq "ham") || ($type eq "all")) { 1100 $self->{headers_ham} = 1101 [ grep { lc($_->[0]) ne $name } @{$self->{headers_ham}} ]; 1102 } 1103 if (($type eq "spam") || ($type eq "all")) { 1104 $self->{headers_spam} = 1105 [ grep { lc($_->[0]) ne $name } @{$self->{headers_spam}} ]; 1106 } 1107 } 1108 }); 1109 1110=item clear_headers 1111 1112Clear the list of headers to be added to messages. You may use this 1113before any B<add_header> options to prevent the default headers from being 1114added to the message. 1115 1116C<add_header>, C<clear_headers> and C<remove_header> may appear in multiple 1117.cf files, which are interpreted in alphabetic order, so C<clear_headers> 1118in a later file will remove all added headers from previously interpreted 1119configuration files, which may or may not be desired. 1120 1121Note that B<X-Spam-Checker-Version> is not removable because the version 1122information is needed by mail administrators and developers to debug 1123problems. Without at least one header, it might not even be possible to 1124determine that SpamAssassin is running. 1125 1126=cut 1127 1128 push (@cmds, { 1129 setting => 'clear_headers', 1130 type => $CONF_TYPE_NOARGS, 1131 code => sub { 1132 my ($self, $key, $value, $line) = @_; 1133 unless (!defined $value || $value eq '') { 1134 return $INVALID_VALUE; 1135 } 1136 my @h = grep { lc($_->[0]) eq "checker-version" } 1137 @{$self->{headers_ham}}; 1138 $self->{headers_ham} = !@h ? [] : [ $h[0] ]; 1139 $self->{headers_spam} = !@h ? [] : [ $h[0] ]; 1140 } 1141 }); 1142 1143=item report_safe ( 0 | 1 | 2 ) (default: 1) 1144 1145if this option is set to 1, if an incoming message is tagged as spam, 1146instead of modifying the original message, SpamAssassin will create a 1147new report message and attach the original message as a message/rfc822 1148MIME part (ensuring the original message is completely preserved, not 1149easily opened, and easier to recover). 1150 1151If this option is set to 2, then original messages will be attached with 1152a content type of text/plain instead of message/rfc822. This setting 1153may be required for safety reasons on certain broken mail clients that 1154automatically load attachments without any action by the user. This 1155setting may also make it somewhat more difficult to extract or view the 1156original message. 1157 1158If this option is set to 0, incoming spam is only modified by adding 1159some C<X-Spam-> headers and no changes will be made to the body. In 1160addition, a header named B<X-Spam-Report> will be added to spam. You 1161can use the B<remove_header> option to remove that header after setting 1162B<report_safe> to 0. 1163 1164See B<report_safe_copy_headers> if you want to copy headers from 1165the original mail into tagged messages. 1166 1167=cut 1168 1169 push (@cmds, { 1170 setting => 'report_safe', 1171 default => 1, 1172 type => $CONF_TYPE_NUMERIC, 1173 code => sub { 1174 my ($self, $key, $value, $line) = @_; 1175 if ($value eq '') { 1176 return $MISSING_REQUIRED_VALUE; 1177 } 1178 elsif ($value !~ /^[012]$/) { 1179 return $INVALID_VALUE; 1180 } 1181 1182 $self->{report_safe} = $value+0; 1183 if (! $self->{report_safe} && 1184 ! (grep { lc($_->[0]) eq "report" } @{$self->{headers_spam}}) ) { 1185 push(@{$self->{headers_spam}}, ["Report", "_REPORT_"]); 1186 } 1187 } 1188 }); 1189 1190=item report_wrap_width (default: 75) 1191 1192This option sets the wrap width for description lines in the X-Spam-Report 1193header, not accounting for tab width. 1194 1195=cut 1196 1197 push (@cmds, { 1198 setting => 'report_wrap_width', 1199 default => '75', 1200 type => $CONF_TYPE_NUMERIC, 1201 }); 1202 1203=back 1204 1205=head2 LANGUAGE OPTIONS 1206 1207=over 4 1208 1209=item ok_locales xx [ yy zz ... ] (default: all) 1210 1211This option is used to specify which locales are considered OK for 1212incoming mail. Mail using the B<character sets> that are allowed by 1213this option will not be marked as possibly being spam in a foreign 1214language. 1215 1216If you receive lots of spam in foreign languages, and never get any non-spam in 1217these languages, this may help. Note that all ISO-8859-* character sets, and 1218Windows code page character sets, are always permitted by default. 1219 1220Set this to C<all> to allow all character sets. This is the default. 1221 1222The rules C<CHARSET_FARAWAY>, C<CHARSET_FARAWAY_BODY>, and 1223C<CHARSET_FARAWAY_HEADERS> are triggered based on how this is set. 1224 1225Examples: 1226 1227 ok_locales all (allow all locales) 1228 ok_locales en (only allow English) 1229 ok_locales en ja zh (allow English, Japanese, and Chinese) 1230 1231Note: if there are multiple ok_locales lines, only the last one is used. 1232 1233Select the locales to allow from the list below: 1234 1235=over 4 1236 1237=item en - Western character sets in general 1238 1239=item ja - Japanese character sets 1240 1241=item ko - Korean character sets 1242 1243=item ru - Cyrillic character sets 1244 1245=item th - Thai character sets 1246 1247=item zh - Chinese (both simplified and traditional) character sets 1248 1249=back 1250 1251=cut 1252 1253 push (@cmds, { 1254 setting => 'ok_locales', 1255 default => 'all', 1256 type => $CONF_TYPE_STRING, 1257 }); 1258 1259=item normalize_charset ( 0 | 1 ) (default: 1) 1260 1261Whether to decode non- UTF-8 and non-ASCII textual parts and recode them 1262to UTF-8 before the text is given over to rules processing. The character 1263set used for attempted decoding is primarily based on a declared character 1264set in a Content-Type header, but if the decoding attempt fails a module 1265Encode::Detect::Detector is consulted (if available) to provide a guess 1266based on the actual text, and decoding is re-attempted. Even if the option 1267is enabled no unnecessary decoding and re-encoding work is done when 1268possible (like with an all-ASCII text with a US-ASCII or extended ASCII 1269character set declaration, e.g. UTF-8 or ISO-8859-nn or Windows-nnnn). 1270 1271Unicode support in old versions of perl or in a core module Encode is likely 1272to be buggy in places, so if the normalize_charset function is enabled 1273it is advised to stick to more recent versions of perl (preferably 5.12 1274or later). The module Encode::Detect::Detector is optional, when necessary 1275it will be used if it is available. 1276 1277=cut 1278 1279 push (@cmds, { 1280 setting => 'normalize_charset', 1281 default => 1, 1282 type => $CONF_TYPE_BOOL, 1283 code => sub { 1284 my ($self, $key, $value, $line) = @_; 1285 unless (defined $value && $value !~ /^$/) { 1286 return $MISSING_REQUIRED_VALUE; 1287 } 1288 if (lc $value eq 'yes' || $value eq '1') { $value = 1 } 1289 elsif (lc $value eq 'no' || $value eq '0') { $value = 0 } 1290 else { return $INVALID_VALUE } 1291 1292 $self->{normalize_charset} = $value; 1293 1294 unless ($] > 5.008004) { 1295 $self->{parser}->lint_warn("config: normalize_charset requires Perl 5.8.5 or later"); 1296 $self->{normalize_charset} = 0; 1297 return $INVALID_VALUE; 1298 } 1299 require HTML::Parser; 1300 #changed to eval to use VERSION so that this version was not incorrectly parsed for CPAN 1301 unless ( eval { HTML::Parser->VERSION(3.46) } ) { 1302 $self->{parser}->lint_warn("config: normalize_charset requires HTML::Parser 3.46 or later"); 1303 $self->{normalize_charset} = 0; 1304 return $INVALID_VALUE; 1305 } 1306 } 1307 }); 1308 1309=back 1310 1311=head2 NETWORK TEST OPTIONS 1312 1313=over 4 1314 1315=item trusted_networks IPaddress[/masklen] ... (default: none) 1316 1317What networks or hosts are 'trusted' in your setup. B<Trusted> in this case 1318means that relay hosts on these networks are considered to not be potentially 1319operated by spammers, open relays, or open proxies. A trusted host could 1320conceivably relay spam, but will not originate it, and will not forge header 1321data. DNS blacklist checks will never query for hosts on these networks. 1322 1323See C<http://wiki.apache.org/spamassassin/TrustPath> for more information. 1324 1325MXes for your domain(s) and internal relays should B<also> be specified using 1326the C<internal_networks> setting. When there are 'trusted' hosts that 1327are not MXes or internal relays for your domain(s) they should B<only> be 1328specified in C<trusted_networks>. 1329 1330The C<IPaddress> can be an IPv4 address (in a dot-quad form), or an IPv6 1331address optionally enclosed in square brackets. Scoped link-local IPv6 1332addresses are syntactically recognized but the interface scope is currently 1333ignored (e.g. [fe80::1234%eth0] ) and should be avoided. 1334 1335If a C</masklen> is specified, it is considered a CIDR-style 'netmask' length, 1336specified in bits. If it is not specified, but less than 4 octets of an IPv4 1337address are specified with a trailing dot, an implied netmask length covers 1338all addresses in remaining octets (i.e. implied masklen is /8 or /16 or /24). 1339If masklen is not specified, and there is not trailing dot, then just a single 1340IP address specified is used, as if the masklen were C</32> with an IPv4 1341address, or C</128> in case of an IPv6 address. 1342 1343If a network or host address is prefaced by a C<!> the matching network or 1344host will be excluded from the list even if a less specific (shorter netmask 1345length) subnet is later specified in the list. This allows a subset of 1346a wider network to be exempt. In case of specifying overlapping subnets, 1347specify more specific subnets first (tighter matching, i.e. with a longer 1348netmask length), followed by less specific (shorter netmask length) subnets 1349to get predictable results regardless of the search algorithm used - when 1350Net::Patricia module is installed the search finds the tightest matching 1351entry in the list, while a sequential search as used in absence of the 1352module Net::Patricia will find the first matching entry in the list. 1353 1354Note: 127.0.0.0/8 and ::1 are always included in trusted_networks, regardless 1355of your config. 1356 1357Examples: 1358 1359 trusted_networks 192.168.0.0/16 # all in 192.168.*.* 1360 trusted_networks 192.168. # all in 192.168.*.* 1361 trusted_networks 212.17.35.15 # just that host 1362 trusted_networks !10.0.1.5 10.0.1/24 # all in 10.0.1.* but not 10.0.1.5 1363 trusted_networks 2001:db8:1::1 !2001:db8:1::/64 2001:db8::/32 1364 # 2001:db8::/32 and 2001:db8:1::1/128, except the rest of 2001:db8:1::/64 1365 1366This operates additively, so a C<trusted_networks> line after another one 1367will append new entries to the list of trusted networks. To clear out the 1368existing entries, use C<clear_trusted_networks>. 1369 1370If C<trusted_networks> is not set and C<internal_networks> is, the value 1371of C<internal_networks> will be used for this parameter. 1372 1373If neither C<trusted_networks> or C<internal_networks> is set, a basic 1374inference algorithm is applied. This works as follows: 1375 1376=over 4 1377 1378=item * 1379 1380If the 'from' host has an IP address in a private (RFC 1918) network range, 1381then it's trusted 1382 1383=item * 1384 1385If there are authentication tokens in the received header, and 1386the previous host was trusted, then this host is also trusted 1387 1388=item * 1389 1390Otherwise this host, and all further hosts, are consider untrusted. 1391 1392=back 1393 1394=cut 1395 1396 push (@cmds, { 1397 setting => 'trusted_networks', 1398 type => $CONF_TYPE_IPADDRLIST, 1399 }); 1400 1401=item clear_trusted_networks 1402 1403Empty the list of trusted networks. 1404 1405=cut 1406 1407 push (@cmds, { 1408 setting => 'clear_trusted_networks', 1409 type => $CONF_TYPE_NOARGS, 1410 code => sub { 1411 my ($self, $key, $value, $line) = @_; 1412 unless (!defined $value || $value eq '') { 1413 return $INVALID_VALUE; 1414 } 1415 $self->{trusted_networks} = $self->new_netset('trusted_networks',1); 1416 $self->{trusted_networks_configured} = 0; 1417 } 1418 }); 1419 1420=item internal_networks IPaddress[/masklen] ... (default: none) 1421 1422What networks or hosts are 'internal' in your setup. B<Internal> means 1423that relay hosts on these networks are considered to be MXes for your 1424domain(s), or internal relays. This uses the same syntax as 1425C<trusted_networks>, above - see there for details. 1426 1427This value is used when checking 'dial-up' or dynamic IP address 1428blocklists, in order to detect direct-to-MX spamming. 1429 1430Trusted relays that accept mail directly from dial-up connections 1431(i.e. are also performing a role of mail submission agents - MSA) 1432should not be listed in C<internal_networks>. List them only in 1433C<trusted_networks>. 1434 1435If C<trusted_networks> is set and C<internal_networks> is not, the value 1436of C<trusted_networks> will be used for this parameter. 1437 1438If neither C<trusted_networks> nor C<internal_networks> is set, no addresses 1439will be considered local; in other words, any relays past the machine where 1440SpamAssassin is running will be considered external. 1441 1442Every entry in C<internal_networks> must appear in C<trusted_networks>; in 1443other words, C<internal_networks> is always a subset of the trusted set. 1444 1445Note: 127/8 and ::1 are always included in internal_networks, regardless of 1446your config. 1447 1448=cut 1449 1450 push (@cmds, { 1451 setting => 'internal_networks', 1452 type => $CONF_TYPE_IPADDRLIST, 1453 }); 1454 1455=item clear_internal_networks 1456 1457Empty the list of internal networks. 1458 1459=cut 1460 1461 push (@cmds, { 1462 setting => 'clear_internal_networks', 1463 type => $CONF_TYPE_NOARGS, 1464 code => sub { 1465 my ($self, $key, $value, $line) = @_; 1466 unless (!defined $value || $value eq '') { 1467 return $INVALID_VALUE; 1468 } 1469 $self->{internal_networks} = $self->new_netset('internal_networks',1); 1470 $self->{internal_networks_configured} = 0; 1471 } 1472 }); 1473 1474=item msa_networks IPaddress[/masklen] ... (default: none) 1475 1476The networks or hosts which are acting as MSAs in your setup (but not also 1477as MX relays). This uses the same syntax as C<trusted_networks>, above - see 1478there for details. 1479 1480B<MSA> means that the relay hosts on these networks accept mail from your 1481own users and authenticates them appropriately. These relays will never 1482accept mail from hosts that aren't authenticated in some way. Examples of 1483authentication include, IP lists, SMTP AUTH, POP-before-SMTP, etc. 1484 1485All relays found in the message headers after the MSA relay will take 1486on the same trusted and internal classifications as the MSA relay itself, 1487as defined by your I<trusted_networks> and I<internal_networks> configuration. 1488 1489For example, if the MSA relay is trusted and internal so will all of the 1490relays that precede it. 1491 1492When using msa_networks to identify an MSA it is recommended that you treat 1493that MSA as both trusted and internal. When an MSA is not included in 1494msa_networks you should treat the MSA as trusted but not internal, however 1495if the MSA is also acting as an MX or intermediate relay you must always 1496treat it as both trusted and internal and ensure that the MSA includes 1497visible auth tokens in its Received header to identify submission clients. 1498 1499B<Warning:> Never include an MSA that also acts as an MX (or is also an 1500intermediate relay for an MX) or otherwise accepts mail from 1501non-authenticated users in msa_networks. Doing so will result in unknown 1502external relays being trusted. 1503 1504=cut 1505 1506 push (@cmds, { 1507 setting => 'msa_networks', 1508 type => $CONF_TYPE_IPADDRLIST, 1509 }); 1510 1511=item clear_msa_networks 1512 1513Empty the list of msa networks. 1514 1515=cut 1516 1517 push (@cmds, { 1518 setting => 'clear_msa_networks', 1519 type => $CONF_TYPE_NOARGS, 1520 code => sub { 1521 my ($self, $key, $value, $line) = @_; 1522 unless (!defined $value || $value eq '') { 1523 return $INVALID_VALUE; 1524 } 1525 $self->{msa_networks} = 1526 $self->new_netset('msa_networks',0); # no loopback IP 1527 $self->{msa_networks_configured} = 0; 1528 } 1529 }); 1530 1531=item originating_ip_headers header ... (default: X-Yahoo-Post-IP X-Originating-IP X-Apparently-From X-SenderIP) 1532 1533A list of header field names from which an originating IP address can 1534be obtained. For example, webmail servers may record a client IP address 1535in X-Originating-IP. 1536 1537These IP addresses are virtually appended into the Received: chain, so they 1538are used in RBL checks where appropriate. 1539 1540Currently the IP addresses are not added into X-Spam-Relays-* header fields, 1541but they may be in the future. 1542 1543=cut 1544 1545 push (@cmds, { 1546 setting => 'originating_ip_headers', 1547 default => [], 1548 type => $CONF_TYPE_STRINGLIST, 1549 code => sub { 1550 my ($self, $key, $value, $line) = @_; 1551 unless (defined $value && $value !~ /^$/) { 1552 return $MISSING_REQUIRED_VALUE; 1553 } 1554 foreach my $hfname (split(/\s+/, $value)) { 1555 # avoid duplicates, consider header field names case-insensitive 1556 push(@{$self->{originating_ip_headers}}, $hfname) 1557 if !grep(lc($_) eq lc($hfname), @{$self->{originating_ip_headers}}); 1558 } 1559 } 1560 }); 1561 1562=item clear_originating_ip_headers 1563 1564Empty the list of 'originating IP address' header field names. 1565 1566=cut 1567 1568 push (@cmds, { 1569 setting => 'clear_originating_ip_headers', 1570 type => $CONF_TYPE_NOARGS, 1571 code => sub { 1572 my ($self, $key, $value, $line) = @_; 1573 unless (!defined $value || $value eq '') { 1574 return $INVALID_VALUE; 1575 } 1576 $self->{originating_ip_headers} = []; 1577 } 1578 }); 1579 1580=item always_trust_envelope_sender ( 0 | 1 ) (default: 0) 1581 1582Trust the envelope sender even if the message has been passed through one or 1583more trusted relays. See also C<envelope_sender_header>. 1584 1585=cut 1586 1587 push (@cmds, { 1588 setting => 'always_trust_envelope_sender', 1589 default => 0, 1590 type => $CONF_TYPE_BOOL, 1591 }); 1592 1593=item skip_rbl_checks ( 0 | 1 ) (default: 0) 1594 1595Turning on the skip_rbl_checks setting will disable the DNSEval plugin, 1596which implements Real-time Block List (or: Blackhole List) (RBL) lookups. 1597 1598By default, SpamAssassin will run RBL checks. Individual blocklists may 1599be disabled selectively by setting a score of a corresponding rule to 0. 1600 1601See also a related configuration parameter skip_uribl_checks, 1602which controls the URIDNSBL plugin (documented in the URIDNSBL man page). 1603 1604=cut 1605 1606 push (@cmds, { 1607 setting => 'skip_rbl_checks', 1608 default => 0, 1609 type => $CONF_TYPE_BOOL, 1610 }); 1611 1612=item dns_available { yes | no | test[: domain1 domain2...] } (default: yes) 1613 1614Tells SpamAssassin whether DNS resolving is available or not. A value I<yes> 1615indicates DNS resolving is available, a value I<no> indicates DNS resolving 1616is not available - both of these values apply unconditionally and skip initial 1617DNS tests, which can be slow or unreliable. 1618 1619When the option value is a I<test> (with or without arguments), SpamAssassin 1620will query some domain names on the internet during initialization, attempting 1621to determine if DNS resolving is working or not. A space-separated list 1622of domain names may be specified explicitly, or left to a built-in default 1623of a dozen or so domain names. From an explicit or a default list a subset 1624of three domain names is picked randomly for checking. The test queries for 1625NS records of these domain: if at least one query returns a success then 1626SpamAssassin considers DNS resolving as available, otherwise not. 1627 1628The problem is that the test can introduce some startup delay if a network 1629connection is down, and in some cases it can wrongly guess that DNS is 1630unavailable because a test connection failed, what causes disabling several 1631DNS-dependent tests. 1632 1633Please note, the DNS test queries for NS records, so specify domain names, 1634not host names. 1635 1636Since version 3.4.0 of SpamAssassin a default setting for option 1637I<dns_available> is I<yes>. A default in older versions was I<test>. 1638 1639=cut 1640 1641 push (@cmds, { 1642 setting => 'dns_available', 1643 default => 'yes', 1644 type => $CONF_TYPE_STRING, 1645 code => sub { 1646 my ($self, $key, $value, $line) = @_; 1647 if ($value =~ /^test(?::\s*\S.*)?$/) { 1648 $self->{dns_available} = $value; 1649 } 1650 elsif ($value =~ /^(?:yes|1)$/) { 1651 $self->{dns_available} = 'yes'; 1652 } 1653 elsif ($value =~ /^(?:no|0)$/) { 1654 $self->{dns_available} = 'no'; 1655 } 1656 else { 1657 return $INVALID_VALUE; 1658 } 1659 } 1660 }); 1661 1662=item dns_server ip-addr-port (default: entries provided by Net::DNS) 1663 1664Specifies an IP address of a DNS server, and optionally its port number. 1665The I<dns_server> directive may be specified multiple times, each entry 1666adding to a list of available resolving name servers. The I<ip-addr-port> 1667argument can either be an IPv4 or IPv6 address, optionally enclosed in 1668brackets, and optionally followed by a colon and a port number. In absence 1669of a port number a standard port number 53 is assumed. When an IPv6 address 1670is specified along with a port number, the address B<must> be enclosed in 1671brackets to avoid parsing ambiguity regarding a colon separator. A scoped 1672link-local IP address is allowed (assuming underlying modules allow it). 1673 1674Examples : 1675 dns_server 127.0.0.1 1676 dns_server 127.0.0.1:53 1677 dns_server [127.0.0.1]:53 1678 dns_server [::1]:53 1679 dns_server fe80::1%lo0 1680 dns_server [fe80::1%lo0]:53 1681 1682In absence of I<dns_server> directives, the list of name servers is provided 1683by Net::DNS module, which typically obtains the list from /etc/resolv.conf, 1684but this may be platform dependent. Please consult the Net::DNS::Resolver 1685documentation for details. 1686 1687=cut 1688 1689 push (@cmds, { 1690 setting => 'dns_server', 1691 type => $CONF_TYPE_STRING, 1692 code => sub { 1693 my ($self, $key, $value, $line) = @_; 1694 my($address,$port); local($1,$2,$3); 1695 if ($value =~ /^(?: \[ ([^\]]*) \] | ([^:]*) ) : (\d+) \z/sx) { 1696 $address = defined $1 ? $1 : $2; $port = $3; 1697 } elsif ($value =~ /^(?: \[ ([^\]]*) \] | 1698 ([0-9A-F.:]+ (?: %[A-Z0-9._~-]* )? ) ) \z/six) { 1699 $address = defined $1 ? $1 : $2; $port = '53'; 1700 } else { 1701 return $INVALID_VALUE; 1702 } 1703 my $scope = ''; # scoped IP address? 1704 $scope = $1 if $address =~ s/ ( % [A-Z0-9._~-]* ) \z//xsi; 1705 if ($address =~ IS_IP_ADDRESS && $port >= 1 && $port <= 65535) { 1706 $self->{dns_servers} = [] if !$self->{dns_servers}; 1707 # checked, untainted, stored in a normalized form 1708 push(@{$self->{dns_servers}}, untaint_var("[$address$scope]:$port")); 1709 } else { 1710 return $INVALID_VALUE; 1711 } 1712 } 1713 }); 1714 1715=item clear_dns_servers 1716 1717Empty the list of explicitly configured DNS servers through a I<dns_server> 1718directive, falling back to Net::DNS -supplied defaults. 1719 1720=cut 1721 1722 push (@cmds, { 1723 setting => 'clear_dns_servers', 1724 type => $CONF_TYPE_NOARGS, 1725 code => sub { 1726 my ($self, $key, $value, $line) = @_; 1727 unless (!defined $value || $value eq '') { 1728 return $INVALID_VALUE; 1729 } 1730 undef $self->{dns_servers}; 1731 } 1732 }); 1733 1734=item dns_local_ports_permit ranges... 1735 1736Add the specified ports or ports ranges to the set of allowed port numbers 1737that can be used as local port numbers when sending DNS queries to a resolver. 1738 1739The argument is a whitespace-separated or a comma-separated list of 1740single port numbers n, or port number pairs (i.e. m-n) delimited by a '-', 1741representing a range. Allowed port numbers are between 1 and 65535. 1742 1743Directives I<dns_local_ports_permit> and I<dns_local_ports_avoid> are processed 1744in order in which they appear in configuration files. Each directive adds 1745(or subtracts) its subsets of ports to a current set of available ports. 1746Whatever is left in the set by the end of configuration processing 1747is made available to a DNS resolving client code. 1748 1749If the resulting set of port numbers is empty (see also the directive 1750I<dns_local_ports_none>), then SpamAssassin does not apply its ports 1751randomization logic, but instead leaves the operating system to choose 1752a suitable free local port number. 1753 1754The initial set consists of all port numbers in the range 1024-65535. 1755Note that system config files already modify the set and remove all the 1756IANA registered port numbers and some other ranges, so there is rarely 1757a need to adjust the ranges by site-specific directives. 1758 1759See also directives I<dns_local_ports_permit> and I<dns_local_ports_none>. 1760 1761=cut 1762 1763 push (@cmds, { 1764 setting => 'dns_local_ports_permit', 1765 type => $CONF_TYPE_STRING, 1766 is_admin => 1, 1767 code => sub { 1768 my($self, $key, $value, $line) = @_; 1769 my(@port_ranges); local($1,$2); 1770 foreach my $range (split(/[ \t,]+/, $value)) { 1771 if ($range =~ /^(\d{1,5})\z/) { 1772 # don't allow adding a port number 0 1773 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE } 1774 push(@port_ranges, [$1,$1]); 1775 } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) { 1776 if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE } 1777 if ($2 < 1 || $2 > 65535) { return $INVALID_VALUE } 1778 push(@port_ranges, [$1,$2]); 1779 } else { 1780 return $INVALID_VALUE; 1781 } 1782 } 1783 foreach my $p (@port_ranges) { 1784 undef $self->{dns_available_portscount}; # invalidate derived data 1785 set_ports_range(\$self->{dns_available_ports_bitset}, 1786 $p->[0], $p->[1], 1); 1787 } 1788 } 1789 }); 1790 1791=item dns_local_ports_avoid ranges... 1792 1793Remove specified ports or ports ranges from the set of allowed port numbers 1794that can be used as local port numbers when sending DNS queries to a resolver. 1795 1796Please see directive I<dns_local_ports_permit> for details. 1797 1798=cut 1799 1800 push (@cmds, { 1801 setting => 'dns_local_ports_avoid', 1802 type => $CONF_TYPE_STRING, 1803 is_admin => 1, 1804 code => sub { 1805 my($self, $key, $value, $line) = @_; 1806 my(@port_ranges); local($1,$2); 1807 foreach my $range (split(/[ \t,]+/, $value)) { 1808 if ($range =~ /^(\d{1,5})\z/) { 1809 if ($1 > 65535) { return $INVALID_VALUE } 1810 # don't mind clearing also the port number 0 1811 push(@port_ranges, [$1,$1]); 1812 } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) { 1813 if ($1 > 65535 || $2 > 65535) { return $INVALID_VALUE } 1814 push(@port_ranges, [$1,$2]); 1815 } else { 1816 return $INVALID_VALUE; 1817 } 1818 } 1819 foreach my $p (@port_ranges) { 1820 undef $self->{dns_available_portscount}; # invalidate derived data 1821 set_ports_range(\$self->{dns_available_ports_bitset}, 1822 $p->[0], $p->[1], 0); 1823 } 1824 } 1825 }); 1826 1827=item dns_local_ports_none 1828 1829Is a fast shorthand for: 1830 1831 dns_local_ports_avoid 1-65535 1832 1833leaving the set of available DNS query local port numbers empty. In all 1834respects (apart from speed) it is equivalent to the shown directive, and can 1835be freely mixed with I<dns_local_ports_permit> and I<dns_local_ports_avoid>. 1836 1837If the resulting set of port numbers is empty, then SpamAssassin does not 1838apply its ports randomization logic, but instead leaves the operating system 1839to choose a suitable free local port number. 1840 1841See also directives I<dns_local_ports_permit> and I<dns_local_ports_avoid>. 1842 1843=cut 1844 1845 push (@cmds, { 1846 setting => 'dns_local_ports_none', 1847 type => $CONF_TYPE_NOARGS, 1848 is_admin => 1, 1849 code => sub { 1850 my ($self, $key, $value, $line) = @_; 1851 unless (!defined $value || $value eq '') { 1852 return $INVALID_VALUE; 1853 } 1854 undef $self->{dns_available_portscount}; # invalidate derived data 1855 wipe_ports_range(\$self->{dns_available_ports_bitset}, 0); 1856 } 1857 }); 1858 1859=item dns_test_interval n (default: 600 seconds) 1860 1861If dns_available is set to I<test>, the dns_test_interval time in number 1862of seconds will tell SpamAssassin how often to retest for working DNS. 1863A numeric value is optionally suffixed by a time unit (s, m, h, d, w, 1864indicating seconds (default), minutes, hours, days, weeks). 1865 1866=cut 1867 1868 push (@cmds, { 1869 setting => 'dns_test_interval', 1870 default => 600, 1871 type => $CONF_TYPE_DURATION, 1872 }); 1873 1874=item dns_options opts (default: v4, v6, norotate, nodns0x20, edns=4096) 1875 1876Provides a (whitespace or comma -separated) list of options applying to DNS 1877resolving. Available options are: I<v4>, I<v6>, I<rotate>, I<dns0x20> and 1878I<edns> (or I<edns0>). Option name may be negated by prepending a I<no> 1879(e.g. I<norotate>, I<NoEDNS>) to counteract a previously enabled option. 1880Option names are not case-sensitive. The I<dns_options> directive may 1881appear in configuration files multiple times, the last setting prevails. 1882 1883Option I<v4> declares resolver capable of returning IPv4 (A) records. 1884Option I<v6> declares resolver capable of returning IPv6 (AAAA) records. 1885One would set I<nov6> if the resolver is filtering AAAA responses. NOTE: 1886these options only refer to I<resolving capabilies>, there is no other 1887meaning like whether the IP address of resolver itself is IPv4 or IPv6. 1888 1889Option I<edns> (or I<edsn0>) may take a value which specifies a requestor's 1890acceptable UDP payload size according to EDNS0 specifications (RFC 6891, 1891ex RFC 2671) e.g. I<edns=4096>. When EDNS0 is off (I<noedns> or I<edns=512>) 1892a traditional implied UDP payload size is 512 bytes, which is also a minimum 1893allowed value for this option. When the option is specified but a value 1894is not provided, a conservative default of 1220 bytes is implied. It is 1895recommended to keep I<edns> enabled when using a local recursive DNS server 1896which supports EDNS0 (like most modern DNS servers do), a suitable setting 1897in this case is I<edns=4096>, which is also a default. Allowing UDP payload 1898size larger than 512 bytes can avoid truncation of resource records in large 1899DNS responses (like in TXT records of some SPF and DKIM responses, or when 1900an unreasonable number of A records is published by some domain). The option 1901should be disabled when a recursive DNS server is only reachable through 1902non- RFC 6891 compliant middleboxes (such as some old-fashioned firewall) 1903which bans DNS UDP payload sizes larger than 512 bytes. A suitable value 1904when a non-local recursive DNS server is used and a middlebox B<does> allow 1905EDNS0 but blocks fragmented IP packets is perhaps 1220 bytes, allowing a 1906DNS UDP packet to fit within a single IP packet in most cases (a slightly 1907less conservative range would be 1280-1410 bytes). 1908 1909Option I<rotate> causes SpamAssassin to choose a DNS server at random 1910from all servers listed in C</etc/resolv.conf> every I<dns_test_interval> 1911seconds, effectively spreading the load over all currently available DNS 1912servers when there are many spamd workers. 1913 1914Option I<dns0x20> enables randomization of letters in a DNS query label 1915according to draft-vixie-dnsext-dns0x20, decreasing a chance of collisions 1916of responses (by chance or by a malicious intent) by increasing spread 1917as provided by a 16-bit query ID and up to 16 bits of a port number, 1918with additional bits as encoded by flipping case (upper/lower) of letters 1919in a query. The number of additional random bits corresponds to the number 1920of letters in a query label. Should work reliably with all mainstream 1921DNS servers - do not turn on if you see frequent info messages 1922"dns: no callback for id:" in the log, or if RBL or URIDNS lookups 1923do not work for no apparent reason. 1924 1925=cut 1926 1927 push (@cmds, { 1928 setting => 'dns_options', 1929 type => $CONF_TYPE_HASH_KEY_VALUE, 1930 # RFC 6891: A good compromise may be the use of an EDNS maximum payload size 1931 # of 4096 octets as a starting point. 1932 default => { 'v4' => 1, 'v6' => 1, 1933 'rotate' => 0, 'dns0x20' => 0, 'edns' => 4096 }, 1934 code => sub { 1935 my ($self, $key, $value, $line) = @_; 1936 foreach my $option (split (/[\s,]+/, lc $value)) { 1937 local($1,$2); 1938 if ($option =~ /^no(rotate|dns0x20|v4|v6)\z/) { 1939 $self->{dns_options}->{$1} = 0; 1940 } elsif ($option =~ /^no(edns)0?\z/) { 1941 $self->{dns_options}->{$1} = 0; 1942 } elsif ($option =~ /^(rotate|dns0x20|v4|v6)\z/) { 1943 $self->{dns_options}->{$1} = 1; 1944 } elsif ($option =~ /^(edns)0? (?: = (\d+) )? \z/x) { 1945 # RFC 6891 (ex RFC 2671) - EDNS0, value is a requestor's UDP payload 1946 # size, defaults to some UDP packet size likely to fit into a single 1947 # IP packet which is more likely to pass firewalls which choke on IP 1948 # fragments. RFC 2460: min MTU is 1280 for IPv6, minus 40 bytes for 1949 # basic header, yielding 1240. RFC 3226 prescribes a min of 1220 for 1950 # RFC 2535 compliant servers. RFC 6891: choosing between 1280 and 1951 # 1410 bytes for IP (v4 or v6) over Ethernet would be reasonable. 1952 # 1953 $self->{dns_options}->{$1} = $2 || 1220; 1954 return $INVALID_VALUE if $self->{dns_options}->{$1} < 512; 1955 } else { 1956 return $INVALID_VALUE; 1957 } 1958 } 1959 } 1960 }); 1961 1962=item dns_query_restriction (allow|deny) domain1 domain2 ... 1963 1964Option allows disabling of rules which would result in a DNS query to one of 1965the listed domains. The first argument must be a literal C<allow> or C<deny>, 1966remaining arguments are domains names. 1967 1968Most DNS queries (with some exceptions) are subject to dns_query_restriction. 1969A domain to be queried is successively stripped-off of its leading labels 1970(thus yielding a series of its parent domains), and on each iteration a 1971check is made against an associative array generated by dns_query_restriction 1972options. Search stops at the first match (i.e. the tightest match), and the 1973matching entry with its C<allow> or C<deny> value then controls whether a 1974DNS query is allowed to be launched. 1975 1976If no match is found an implicit default is to allow a query. The purpose of 1977an explicit C<allow> entry is to be able to override a previously configured 1978C<deny> on the same domain or to override an entry (possibly yet to be 1979configured in subsequent config directives) on one of its parent domains. 1980Thus an 'allow zen.spamhaus.org' with a 'deny spamhaus.org' would permit 1981DNS queries on a specific DNS BL zone but deny queries to other zones under 1982the same parent domain. 1983 1984Domains are matched case-insensitively, no wildcards are recognized, 1985there should be no leading or trailing dot. 1986 1987Specifying a block on querying a domain name has a similar effect as setting 1988a score of corresponding DNSBL and URIBL rules to zero, and can be a handy 1989alternative to hunting for such rules when a site policy does not allow 1990certain DNS block lists to be queried. 1991 1992Special wildcard "dns_query_restriction deny *" is supported to block all 1993queries except allowed ones. 1994 1995Example: 1996 dns_query_restriction deny dnswl.org surbl.org 1997 dns_query_restriction allow zen.spamhaus.org 1998 dns_query_restriction deny spamhaus.org mailspike.net spamcop.net 1999 2000=cut 2001 2002 push (@cmds, { 2003 setting => 'dns_query_restriction', 2004 type => $CONF_TYPE_STRING, 2005 code => sub { 2006 my ($self, $key, $value, $line) = @_; 2007 defined $value && $value =~ s/^(allow|deny)\s+//i 2008 or return $INVALID_VALUE; 2009 my $blocked = lc($1) eq 'deny' ? 1 : 0; 2010 foreach my $domain (split(/\s+/, $value)) { 2011 $domain =~ s/^\.//; $domain =~ s/\.\z//; # strip dots 2012 $self->{dns_query_blocked}{lc $domain} = $blocked; 2013 } 2014 } 2015 }); 2016 2017=item clear_dns_query_restriction 2018 2019The option removes any entries entered by previous 'dns_query_restriction' 2020options, leaving the list empty, i.e. allowing DNS queries for any domain 2021(including any DNS BL zone). 2022 2023=cut 2024 2025 push (@cmds, { 2026 setting => 'clear_dns_query_restriction', 2027 aliases => ['clear_dns_query_restrictions'], 2028 type => $CONF_TYPE_NOARGS, 2029 code => sub { 2030 my ($self, $key, $value, $line) = @_; 2031 return $INVALID_VALUE if defined $value && $value ne ''; 2032 delete $self->{dns_query_blocked}; 2033 } 2034 }); 2035 2036=item dns_block_rule RULE domain 2037 2038If rule named RULE is hit, DNS queries to specified domain are 2039I<temporarily> blocked. Intended to be used with rules that check 2040RBL return codes for specific blocked status. For example: 2041 2042 urirhssub URIBL_BLOCKED multi.uribl.com. A 1 2043 dns_block_rule URIBL_BLOCKED multi.uribl.com 2044 2045Block status is maintained across all processes by empty statefile named 2046"dnsblock_multi.uribl.com" in global state dir: 2047home_dir_for_helpers/.spamassassin, $HOME/.spamassassin, 2048/var/lib/spamassassin (localstate), depending which is found and writable. 2049 2050=cut 2051 2052 push (@cmds, { 2053 setting => 'dns_block_rule', 2054 is_admin => 1, 2055 type => $CONF_TYPE_HASH_KEY_VALUE, 2056 code => sub { 2057 my ($self, $key, $value, $line) = @_; 2058 local($1,$2); 2059 defined $value && $value =~ /^(\S+)\s+(.+)$/ 2060 or return $INVALID_VALUE; 2061 my $rule = $1; 2062 foreach my $domain (split(/\s+/, lc($2))) { 2063 $domain =~ s/^\.//; $domain =~ s/\.\z//; # strip dots 2064 if ($domain !~ /^[a-z0-9.-]+$/) { 2065 return $INVALID_VALUE; 2066 } 2067 # will end up in filename, do not allow / etc in above regex! 2068 $domain = untaint_var($domain); 2069 # Check.pm check_main() uses this 2070 $self->{dns_block_rule}{$rule}{$domain} = 1; 2071 # bgsend_and_start_lookup() uses this 2072 $self->{dns_block_rule_domains}{$domain} = $domain; 2073 } 2074 } 2075 }); 2076 2077=item dns_block_time (default: 300) 2078 2079dns_block_rule query blockage will last this many seconds. 2080 2081=cut 2082 2083 push (@cmds, { 2084 setting => 'dns_block_time', 2085 is_admin => 1, 2086 default => 300, 2087 type => $CONF_TYPE_NUMERIC, 2088 }); 2089 2090=back 2091 2092=head2 LEARNING OPTIONS 2093 2094=over 4 2095 2096=item use_learner ( 0 | 1 ) (default: 1) 2097 2098Whether to use any machine-learning classifiers with SpamAssassin, such as the 2099default 'BAYES_*' rules. Setting this to 0 will disable use of any and all 2100human-trained classifiers. 2101 2102=cut 2103 2104 push (@cmds, { 2105 setting => 'use_learner', 2106 default => 1, 2107 type => $CONF_TYPE_BOOL, 2108 }); 2109 2110=item use_bayes ( 0 | 1 ) (default: 1) 2111 2112Whether to use the naive-Bayesian-style classifier built into 2113SpamAssassin. This is a master on/off switch for all Bayes-related 2114operations. 2115 2116=cut 2117 2118 push (@cmds, { 2119 setting => 'use_bayes', 2120 default => 1, 2121 type => $CONF_TYPE_BOOL, 2122 }); 2123 2124=item use_bayes_rules ( 0 | 1 ) (default: 1) 2125 2126Whether to use rules using the naive-Bayesian-style classifier built 2127into SpamAssassin. This allows you to disable the rules while leaving 2128auto and manual learning enabled. 2129 2130=cut 2131 2132 push (@cmds, { 2133 setting => 'use_bayes_rules', 2134 default => 1, 2135 type => $CONF_TYPE_BOOL, 2136 }); 2137 2138=item bayes_auto_learn ( 0 | 1 ) (default: 1) 2139 2140Whether SpamAssassin should automatically feed high-scoring mails (or 2141low-scoring mails, for non-spam) into its learning systems. The only 2142learning system supported currently is a naive-Bayesian-style classifier. 2143 2144See the documentation for the 2145C<Mail::SpamAssassin::Plugin::AutoLearnThreshold> plugin module 2146for details on how Bayes auto-learning is implemented by default. 2147 2148=cut 2149 2150 push (@cmds, { 2151 setting => 'bayes_auto_learn', 2152 default => 1, 2153 type => $CONF_TYPE_BOOL, 2154 }); 2155 2156=item bayes_token_sources (default: header visible invisible uri) 2157 2158Controls which sources in a mail message can contribute tokens (e.g. words, 2159phrases, etc.) to a Bayes classifier. The argument is a space-separated list 2160of keywords: I<header>, I<visible>, I<invisible>, I<uri>, I<mimepart>), each 2161of which may be prefixed by a I<no> to indicate its exclusion. Additionally 2162two reserved keywords are allowed: I<all> and I<none> (or: I<noall>). The list 2163of keywords is processed sequentially: a keyword I<all> adds all available 2164keywords to a set being built, a I<none> or I<noall> clears the set, other 2165non-negated keywords are added to the set, and negated keywords are removed 2166from the set. Keywords are case-insensitive. 2167 2168The default set is: I<header> I<visible> I<invisible> I<uri>, which is 2169equivalent for example to: I<All> I<NoMIMEpart>. The reason why I<mimepart> 2170is not currently in a default set is that it is a newer source (introduced 2171with SpamAssassin version 3.4.1) and not much experience has yet been gathered 2172regarding its usefulness. 2173 2174See also option C<bayes_ignore_header> for a fine-grained control on individual 2175header fields under the umbrella of a more general keyword I<header> here. 2176 2177Keywords imply the following data sources: 2178 2179=over 4 2180 2181=item I<header> - tokens collected from a message header section 2182 2183=item I<visible> - words from visible text (plain or HTML) in a message body 2184 2185=item I<invisible> - hidden/invisible text in HTML parts of a message body 2186 2187=item I<uri> - URIs collected from a message body 2188 2189=item I<mimepart> - digests (hashes) of all MIME parts (textual or non-textual) of a message, computed after Base64 and quoted-printable decoding, suffixed by their Content-Type 2190 2191=item I<all> - adds all the above keywords to the set being assembled 2192 2193=item I<none> or I<noall> - removes all keywords from the set 2194 2195=back 2196 2197The C<bayes_token_sources> directive may appear multiple times, its keywords 2198are interpreted sequentially, adding or removing items from the final set 2199as they appear in their order in C<bayes_token_sources> directive(s). 2200 2201=cut 2202 2203 push (@cmds, { 2204 setting => 'bayes_token_sources', 2205 default => { map(($_,1), qw(header visible invisible uri)) }, # mimepart 2206 type => $CONF_TYPE_HASH_KEY_VALUE, 2207 code => sub { 2208 my ($self, $key, $value, $line) = @_; 2209 return $MISSING_REQUIRED_VALUE if $value eq ''; 2210 my $h = ($self->{bayes_token_sources} ||= {}); 2211 my %all_kw = map(($_,1), qw(header visible invisible uri mimepart)); 2212 foreach (split(/\s+/, lc $value)) { 2213 if (/^(none|noall)\z/) { 2214 %$h = (); 2215 } elsif ($_ eq 'all') { 2216 %$h = %all_kw; 2217 } elsif (/^(no)?(.+)\z/s && exists $all_kw{$2}) { 2218 $h->{$2} = defined $1 ? 0 : 1; 2219 } else { 2220 return $INVALID_VALUE; 2221 } 2222 } 2223 } 2224 }); 2225 2226=item bayes_ignore_header header_name 2227 2228If you receive mail filtered by upstream mail systems, like 2229a spam-filtering ISP or mailing list, and that service adds 2230new headers (as most of them do), these headers may provide 2231inappropriate cues to the Bayesian classifier, allowing it 2232to take a "short cut". To avoid this, list the headers using this 2233setting. Example: 2234 2235 bayes_ignore_header X-Upstream-Spamfilter 2236 bayes_ignore_header X-Upstream-SomethingElse 2237 2238=cut 2239 2240 push (@cmds, { 2241 setting => 'bayes_ignore_header', 2242 default => [], 2243 type => $CONF_TYPE_STRINGLIST, 2244 code => sub { 2245 my ($self, $key, $value, $line) = @_; 2246 if ($value eq '') { 2247 return $MISSING_REQUIRED_VALUE; 2248 } 2249 push (@{$self->{bayes_ignore_headers}}, split(/\s+/, $value)); 2250 } 2251 }); 2252 2253=item bayes_ignore_from user@example.com 2254 2255Bayesian classification and autolearning will not be performed on mail 2256from the listed addresses. Program C<sa-learn> will also ignore the 2257listed addresses if it is invoked using the C<--use-ignores> option. 2258One or more addresses can be listed, see C<welcomelist_from>. 2259 2260Spam messages from certain senders may contain many words that 2261frequently occur in ham. For example, one might read messages from a 2262preferred bookstore but also get unwanted spam messages from other 2263bookstores. If the unwanted messages are learned as spam then any 2264messages discussing books, including the preferred bookstore and 2265antiquarian messages would be in danger of being marked as spam. The 2266addresses of the annoying bookstores would be listed. (Assuming they 2267were halfway legitimate and didn't send you mail through myriad 2268affiliates.) 2269 2270Those who have pieces of spam in legitimate messages or otherwise 2271receive ham messages containing potentially spammy words might fear 2272that some spam messages might be in danger of being marked as ham. 2273The addresses of the spam mailing lists, correspondents, etc. would 2274be listed. 2275 2276=cut 2277 2278 push (@cmds, { 2279 setting => 'bayes_ignore_from', 2280 type => $CONF_TYPE_ADDRLIST, 2281 }); 2282 2283=item bayes_ignore_to user@example.com 2284 2285Bayesian classification and autolearning will not be performed on mail 2286to the listed addresses. See C<bayes_ignore_from> for details. 2287 2288=cut 2289 2290 push (@cmds, { 2291 setting => 'bayes_ignore_to', 2292 type => $CONF_TYPE_ADDRLIST, 2293 }); 2294 2295=item bayes_min_ham_num (Default: 200) 2296 2297=item bayes_min_spam_num (Default: 200) 2298 2299To be accurate, the Bayes system does not activate until a certain number of 2300ham (non-spam) and spam have been learned. The default is 200 of each ham and 2301spam, but you can tune these up or down with these two settings. 2302 2303=cut 2304 2305 push (@cmds, { 2306 setting => 'bayes_min_ham_num', 2307 default => 200, 2308 type => $CONF_TYPE_NUMERIC, 2309 }); 2310 push (@cmds, { 2311 setting => 'bayes_min_spam_num', 2312 default => 200, 2313 type => $CONF_TYPE_NUMERIC, 2314 }); 2315 2316=item bayes_learn_during_report (Default: 1) 2317 2318The Bayes system will, by default, learn any reported messages 2319(C<spamassassin -r>) as spam. If you do not want this to happen, set 2320this option to 0. 2321 2322=cut 2323 2324 push (@cmds, { 2325 setting => 'bayes_learn_during_report', 2326 default => 1, 2327 type => $CONF_TYPE_BOOL, 2328 }); 2329 2330=item bayes_sql_override_username 2331 2332Used by BayesStore::SQL storage implementation. 2333 2334If this options is set the BayesStore::SQL module will override the set 2335username with the value given. This could be useful for implementing global or 2336group bayes databases. 2337 2338=cut 2339 2340 push (@cmds, { 2341 setting => 'bayes_sql_override_username', 2342 default => '', 2343 type => $CONF_TYPE_STRING, 2344 }); 2345 2346=item bayes_use_hapaxes (default: 1) 2347 2348Should the Bayesian classifier use hapaxes (words/tokens that occur only 2349once) when classifying? This produces significantly better hit-rates. 2350 2351=cut 2352 2353 push (@cmds, { 2354 setting => 'bayes_use_hapaxes', 2355 default => 1, 2356 type => $CONF_TYPE_BOOL, 2357 }); 2358 2359=item bayes_journal_max_size (default: 102400) 2360 2361SpamAssassin will opportunistically sync the journal and the database. 2362It will do so once a day, but will sync more often if the journal file 2363size goes above this setting, in bytes. If set to 0, opportunistic 2364syncing will not occur. 2365 2366=cut 2367 2368 push (@cmds, { 2369 setting => 'bayes_journal_max_size', 2370 default => 102400, 2371 type => $CONF_TYPE_NUMERIC, 2372 }); 2373 2374=item bayes_expiry_max_db_size (default: 150000) 2375 2376What should be the maximum size of the Bayes tokens database? When expiry 2377occurs, the Bayes system will keep either 75% of the maximum value, or 2378100,000 tokens, whichever has a larger value. 150,000 tokens is roughly 2379equivalent to a 8Mb database file. 2380 2381=cut 2382 2383 push (@cmds, { 2384 setting => 'bayes_expiry_max_db_size', 2385 default => 150000, 2386 type => $CONF_TYPE_NUMERIC, 2387 }); 2388 2389=item bayes_auto_expire (default: 1) 2390 2391If enabled, the Bayes system will try to automatically expire old tokens 2392from the database. Auto-expiry occurs when the number of tokens in the 2393database surpasses the bayes_expiry_max_db_size value. If a bayes datastore 2394backend does not implement individual key/value expirations, the setting 2395is silently ignored. 2396 2397=cut 2398 2399 push (@cmds, { 2400 setting => 'bayes_auto_expire', 2401 default => 1, 2402 type => $CONF_TYPE_BOOL, 2403 }); 2404 2405=item bayes_token_ttl (default: 3w, i.e. 3 weeks) 2406 2407Time-to-live / expiration time in seconds for tokens kept in a Bayes database. 2408A numeric value is optionally suffixed by a time unit (s, m, h, d, w, 2409indicating seconds (default), minutes, hours, days, weeks). 2410 2411If bayes_auto_expire is true and a Bayes datastore backend supports it 2412(currently only Redis), this setting controls deletion of expired tokens 2413from a bayes database. The value is observed on a best-effort basis, exact 2414timing promises are not necessarily kept. If a bayes datastore backend 2415does not implement individual key/value expirations, the setting is silently 2416ignored. 2417 2418=cut 2419 2420 push (@cmds, { 2421 setting => 'bayes_token_ttl', 2422 default => 3*7*24*60*60, # seconds (3 weeks) 2423 type => $CONF_TYPE_DURATION, 2424 }); 2425 2426=item bayes_seen_ttl (default: 8d, i.e. 8 days) 2427 2428Time-to-live / expiration time in seconds for 'seen' entries 2429(i.e. mail message digests with their status) kept in a Bayes database. 2430A numeric value is optionally suffixed by a time unit (s, m, h, d, w, 2431indicating seconds (default), minutes, hours, days, weeks). 2432 2433If bayes_auto_expire is true and a Bayes datastore backend supports it 2434(currently only Redis), this setting controls deletion of expired 'seen' 2435entries from a bayes database. The value is observed on a best-effort basis, 2436exact timing promises are not necessarily kept. If a bayes datastore backend 2437does not implement individual key/value expirations, the setting is silently 2438ignored. 2439 2440=cut 2441 2442 push (@cmds, { 2443 setting => 'bayes_seen_ttl', 2444 default => 8*24*60*60, # seconds (8 days) 2445 type => $CONF_TYPE_DURATION, 2446 }); 2447 2448=item bayes_learn_to_journal (default: 0) 2449 2450If this option is set, whenever SpamAssassin does Bayes learning, it 2451will put the information into the journal instead of directly into the 2452database. This lowers contention for locking the database to execute 2453an update, but will also cause more access to the journal and cause a 2454delay before the updates are actually committed to the Bayes database. 2455 2456=cut 2457 2458 push (@cmds, { 2459 setting => 'bayes_learn_to_journal', 2460 default => 0, 2461 type => $CONF_TYPE_BOOL, 2462 }); 2463 2464=back 2465 2466=head2 MISCELLANEOUS OPTIONS 2467 2468=over 4 2469 2470=item time_limit n (default: 300) 2471 2472Specifies a limit on elapsed time in seconds that SpamAssassin is allowed 2473to spend before providing a result. The value may be fractional and must 2474not be negative, zero is interpreted as unlimited. The default is 300 2475seconds for consistency with the spamd default setting of --timeout-child . 2476 2477This is a best-effort advisory setting, processing will not be abruptly 2478aborted at an arbitrary point in processing when the time limit is exceeded, 2479but only on reaching one of locations in the program flow equipped with a 2480time test. Currently equipped with the test are the main checking loop, 2481asynchronous DNS lookups, plugins which are calling external programs. 2482Rule evaluation is guarded by starting a timer (alarm) on each set of 2483compiled rules. 2484 2485When a message is passed to Mail::SpamAssassin::parse, a deadline time 2486is established as a sum of current time and the C<time_limit> setting. 2487 2488This deadline may also be specified by a caller through an option 2489'master_deadline' in $suppl_attrib on a call to parse(), possibly providing 2490a more accurate deadline taking into account past and expected future 2491processing of a message in a mail filtering setup. If both the config 2492option as well as a 'master_deadline' option in a call are provided, 2493the shorter time limit of the two is used (since version 3.3.2). 2494Note that spamd (and possibly third-party callers of SpamAssassin) will 2495supply the 'master_deadline' option in a call based on its --timeout-child 2496option (or equivalent), unlike the command line C<spamassassin>, which has 2497no such command line option. 2498 2499When a time limit is exceeded, most of the remaining tests will be skipped, 2500as well as auto-learning. Whatever tests fired so far will determine the 2501final score. The behaviour is similar to short-circuiting with attribute 'on', 2502as implemented by a Shortcircuit plugin. A synthetic hit on a rule named 2503TIME_LIMIT_EXCEEDED with a near-zero default score is generated, so that 2504the report will reflect the event. A score for TIME_LIMIT_EXCEEDED may 2505be provided explicitly in a configuration file, for example to achieve 2506whitelisting or blacklisting effect for messages with long processing times. 2507 2508The C<time_limit> option is a useful protection against excessive processing 2509time on certain degenerate or unusually long or complex mail messages, as well 2510as against some DoS attacks. It is also needed in time-critical pre-queue 2511filtering setups (e.g. milter, proxy, integration with MTA), where message 2512processing must finish before a SMTP client times out. RFC 5321 prescribes 2513in section 4.5.3.2.6 the 'DATA Termination' time limit of 10 minutes, 2514although it is not unusual to see some SMTP clients abort sooner on waiting 2515for a response. A sensible C<time_limit> for a pre-queue filtering setup is 2516maybe 50 seconds, assuming that clients are willing to wait at least a minute. 2517 2518=cut 2519 2520 push (@cmds, { 2521 setting => 'time_limit', 2522 default => 300, 2523 type => $CONF_TYPE_DURATION, 2524 }); 2525 2526=item lock_method type 2527 2528Select the file-locking method used to protect database files on-disk. By 2529default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you 2530are sure that the database files you'll be using for Bayes and AWL storage will 2531never be accessed over NFS, a non-NFS-safe locking system can be selected. 2532 2533This will be quite a bit faster, but may risk file corruption if the files are 2534ever accessed by multiple clients at once, and one or more of them is accessing 2535them through an NFS filesystem. 2536 2537Note that different platforms require different locking systems. 2538 2539The supported locking systems for C<type> are as follows: 2540 2541=over 4 2542 2543=item I<nfssafe> - an NFS-safe locking system 2544 2545=item I<flock> - simple UNIX C<flock()> locking 2546 2547=item I<win32> - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>. 2548 2549=back 2550 2551nfssafe and flock are only available on UNIX, and win32 is only available 2552on Windows. By default, SpamAssassin will choose either nfssafe or 2553win32 depending on the platform in use. 2554 2555=cut 2556 2557 push (@cmds, { 2558 setting => 'lock_method', 2559 default => '', 2560 type => $CONF_TYPE_STRING, 2561 code => sub { 2562 my ($self, $key, $value, $line) = @_; 2563 if ($value !~ /^(nfssafe|flock|win32)$/) { 2564 return $INVALID_VALUE; 2565 } 2566 2567 $self->{lock_method} = $value; 2568 # recreate the locker 2569 $self->{main}->create_locker(); 2570 } 2571 }); 2572 2573=item fold_headers ( 0 | 1 ) (default: 1) 2574 2575By default, headers added by SpamAssassin will be whitespace folded. 2576In other words, they will be broken up into multiple lines instead of 2577one very long one and each continuation line will have a tabulator 2578prepended to mark it as a continuation of the preceding one. 2579 2580The automatic wrapping can be disabled here. Note that this can generate very 2581long lines. RFC 2822 required that header lines do not exceed 998 characters 2582(not counting the final CRLF). 2583 2584=cut 2585 2586 push (@cmds, { 2587 setting => 'fold_headers', 2588 default => 1, 2589 type => $CONF_TYPE_BOOL, 2590 }); 2591 2592=item report_safe_copy_headers header_name ... 2593 2594If using C<report_safe>, a few of the headers from the original message 2595are copied into the wrapper header (From, To, Cc, Subject, Date, etc.) 2596If you want to have other headers copied as well, you can add them 2597using this option. You can specify multiple headers on the same line, 2598separated by spaces, or you can just use multiple lines. 2599 2600=cut 2601 2602 push (@cmds, { 2603 setting => 'report_safe_copy_headers', 2604 default => [], 2605 type => $CONF_TYPE_STRINGLIST, 2606 code => sub { 2607 my ($self, $key, $value, $line) = @_; 2608 if ($value eq '') { 2609 return $MISSING_REQUIRED_VALUE; 2610 } 2611 push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value)); 2612 } 2613 }); 2614 2615=item envelope_sender_header Name-Of-Header 2616 2617SpamAssassin will attempt to discover the address used in the 'MAIL FROM:' 2618phase of the SMTP transaction that delivered this message, if this data has 2619been made available by the SMTP server. This is used in the C<EnvelopeFrom> 2620pseudo-header, and for various rules such as SPF checking. 2621 2622By default, various MTAs will use different headers, such as the following: 2623 2624 X-Envelope-From 2625 Envelope-Sender 2626 X-Sender 2627 Return-Path 2628 2629SpamAssassin will attempt to use these, if some heuristics (such as the header 2630placement in the message, or the absence of fetchmail signatures) appear to 2631indicate that they are safe to use. However, it may choose the wrong headers 2632in some mailserver configurations. (More discussion of this can be found 2633in bug 2142 and bug 4747 in the SpamAssassin BugZilla.) 2634 2635To avoid this heuristic failure, the C<envelope_sender_header> setting may be 2636helpful. Name the header that your MTA or MDA adds to messages containing the 2637address used at the MAIL FROM step of the SMTP transaction. 2638 2639If the header in question contains C<E<lt>> or C<E<gt>> characters at the start 2640and end of the email address in the right-hand side, as in the SMTP 2641transaction, these will be stripped. 2642 2643If the header is not found in a message, or if it's value does not contain an 2644C<@> sign, SpamAssassin will issue a warning in the logs and fall back to its 2645default heuristics. 2646 2647(Note for MTA developers: we would prefer if the use of a single header be 2648avoided in future, since that precludes 'downstream' spam scanning. 2649C<http://wiki.apache.org/spamassassin/EnvelopeSenderInReceived> details a 2650better proposal, storing the envelope sender at each hop in the C<Received> 2651header.) 2652 2653example: 2654 2655 envelope_sender_header X-SA-Exim-Mail-From 2656 2657=cut 2658 2659 push (@cmds, { 2660 setting => 'envelope_sender_header', 2661 default => undef, 2662 type => $CONF_TYPE_STRING, 2663 }); 2664 2665=item describe SYMBOLIC_TEST_NAME description ... 2666 2667Used to describe a test. This text is shown to users in the detailed report. 2668 2669Note that test names which begin with '__' are reserved for meta-match 2670sub-rules, and are not scored or listed in the 'tests hit' reports. 2671 2672Also note that by convention, rule descriptions should be limited in 2673length to no more than 50 characters. 2674 2675=cut 2676 2677 push (@cmds, { 2678 command => 'describe', 2679 setting => 'descriptions', 2680 type => $CONF_TYPE_HASH_KEY_VALUE, 2681 }); 2682 2683=item report_charset CHARSET (default: UTF-8) 2684 2685Set the MIME Content-Type charset used for the text/plain report which 2686is attached to spam mail messages. 2687 2688=cut 2689 2690 push (@cmds, { 2691 setting => 'report_charset', 2692 default => 'UTF-8', 2693 type => $CONF_TYPE_STRING, 2694 }); 2695 2696=item report ...some text for a report... 2697 2698Set the report template which is attached to spam mail messages. See the 2699C<10_default_prefs.cf> configuration file in C</usr/share/spamassassin> for an 2700example. 2701 2702If you change this, try to keep it under 78 columns. Each C<report> 2703line appends to the existing template, so use C<clear_report_template> 2704to restart. 2705 2706Tags can be included as explained above. 2707 2708=cut 2709 2710 push (@cmds, { 2711 command => 'report', 2712 setting => 'report_template', 2713 default => '', 2714 type => $CONF_TYPE_TEMPLATE, 2715 }); 2716 2717=item clear_report_template 2718 2719Clear the report template. 2720 2721=cut 2722 2723 push (@cmds, { 2724 command => 'clear_report_template', 2725 setting => 'report_template', 2726 type => $CONF_TYPE_NOARGS, 2727 code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear 2728 }); 2729 2730=item report_contact ...text of contact address... 2731 2732Set what _CONTACTADDRESS_ is replaced with in the above report text. 2733By default, this is 'the administrator of that system', since the hostname 2734of the system the scanner is running on is also included. 2735 2736=cut 2737 2738 push (@cmds, { 2739 setting => 'report_contact', 2740 default => 'the administrator of that system', 2741 type => $CONF_TYPE_STRING, 2742 }); 2743 2744=item report_hostname ...hostname to use... 2745 2746Set what _HOSTNAME_ is replaced with in the above report text. 2747By default, this is determined dynamically as whatever the host running 2748SpamAssassin calls itself. 2749 2750=cut 2751 2752 push (@cmds, { 2753 setting => 'report_hostname', 2754 default => '', 2755 type => $CONF_TYPE_STRING, 2756 }); 2757 2758=item unsafe_report ...some text for a report... 2759 2760Set the report template which is attached to spam mail messages which contain a 2761non-text/plain part. See the C<10_default_prefs.cf> configuration file in 2762C</usr/share/spamassassin> for an example. 2763 2764Each C<unsafe-report> line appends to the existing template, so use 2765C<clear_unsafe_report_template> to restart. 2766 2767Tags can be used in this template (see above for details). 2768 2769=cut 2770 2771 push (@cmds, { 2772 command => 'unsafe_report', 2773 setting => 'unsafe_report_template', 2774 default => '', 2775 type => $CONF_TYPE_TEMPLATE, 2776 }); 2777 2778=item clear_unsafe_report_template 2779 2780Clear the unsafe_report template. 2781 2782=cut 2783 2784 push (@cmds, { 2785 command => 'clear_unsafe_report_template', 2786 setting => 'unsafe_report_template', 2787 type => $CONF_TYPE_NOARGS, 2788 code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear 2789 }); 2790 2791=item mbox_format_from_regex 2792 2793Set a specific regular expression to be used for mbox file From separators. 2794 2795For example, this setting will allow sa-learn to process emails stored in 2796a kmail 2 mbox: 2797 2798mbox_format_from_regex /^From \S+ ?[[:upper:]][[:lower:]]{2}(?:, \d\d [[:upper:]][[:lower:]]{2} \d{4} [0-2]\d:\d\d:\d\d [+-]\d{4}| [[:upper:]][[:lower:]]{2} [ 1-3]\d [ 0-2]\d:\d\d:\d\d \d{4})/ 2799 2800 2801=cut 2802 2803 push (@cmds, { 2804 setting => 'mbox_format_from_regex', 2805 type => $CONF_TYPE_STRING 2806 }); 2807 2808 2809=item parse_dkim_uris ( 0 | 1 ) (default: 1) 2810 2811If this option is set to 1 and the message contains DKIM headers, the headers will be parsed for URIs to process alongside URIs found in the body with some rules and modules (ex. URIDNSBL) 2812 2813=cut 2814 2815 push (@cmds, { 2816 setting => 'parse_dkim_uris', 2817 default => 1, 2818 type => $CONF_TYPE_BOOL, 2819 }); 2820 2821=back 2822 2823=head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS 2824 2825These settings differ from the ones above, in that they are considered 2826'privileged'. Only users running C<spamassassin> from their procmailrc's or 2827forward files, or sysadmins editing a file in C</etc/mail/spamassassin>, can 2828use them. C<spamd> users cannot use them in their C<user_prefs> files, for 2829security and efficiency reasons, unless C<allow_user_rules> is enabled (and 2830then, they may only add rules from below). 2831 2832=over 4 2833 2834=item allow_user_rules ( 0 | 1 ) (default: 0) 2835 2836This setting allows users to create rules (and only rules) in their 2837C<user_prefs> files for use with C<spamd>. It defaults to off, because 2838this could be a severe security hole. It may be possible for users to 2839gain root level access if C<spamd> is run as root. It is NOT a good 2840idea, unless you have some other way of ensuring that users' tests are 2841safe. Don't use this unless you are certain you know what you are 2842doing. Furthermore, this option causes spamassassin to recompile all 2843the tests each time it processes a message for a user with a rule in 2844his/her C<user_prefs> file, which could have a significant effect on 2845server load. It is not recommended. 2846 2847Note that it is not currently possible to use C<allow_user_rules> to modify an 2848existing system rule from a C<user_prefs> file with C<spamd>. 2849 2850=cut 2851 2852 push (@cmds, { 2853 setting => 'allow_user_rules', 2854 is_priv => 1, 2855 default => 0, 2856 type => $CONF_TYPE_BOOL, 2857 code => sub { 2858 my ($self, $key, $value, $line) = @_; 2859 if ($value eq '') { 2860 return $MISSING_REQUIRED_VALUE; 2861 } 2862 elsif ($value !~ /^[01]$/) { 2863 return $INVALID_VALUE; 2864 } 2865 2866 $self->{allow_user_rules} = $value+0; 2867 dbg("config: " . ($self->{allow_user_rules} ? "allowing":"not allowing") . " user rules!"); 2868 } 2869 }); 2870 2871=item redirector_pattern /pattern/modifiers 2872 2873A regex pattern that matches both the redirector site portion, and 2874the target site portion of a URI. 2875 2876Note: The target URI portion must be surrounded in parentheses and 2877 no other part of the pattern may create a backreference. 2878 2879Example: http://chkpt.zdnet.com/chkpt/whatever/spammer.domain/yo/dude 2880 2881 redirector_pattern /^https?:\/\/(?:opt\.)?chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i 2882 2883=cut 2884 2885 push (@cmds, { 2886 setting => 'redirector_pattern', 2887 is_priv => 1, 2888 default => [], 2889 type => $CONF_TYPE_STRINGLIST, 2890 code => sub { 2891 my ($self, $key, $value, $line) = @_; 2892 $value =~ s/^\s+//; 2893 if ($value eq '') { 2894 return $MISSING_REQUIRED_VALUE; 2895 } 2896 my ($rec, $err) = compile_regexp($value, 1); 2897 if (!$rec) { 2898 dbg("config: invalid redirector_pattern '$value': $err"); 2899 return $INVALID_VALUE; 2900 } 2901 push @{$self->{main}->{conf}->{redirector_patterns}}, $rec; 2902 } 2903 }); 2904 2905=item header SYMBOLIC_TEST_NAME header op /pattern/modifiers [if-unset: STRING] 2906 2907Define a test. C<SYMBOLIC_TEST_NAME> is a symbolic test name, such as 2908'FROM_ENDS_IN_NUMS'. C<header> is the name of a mail header field, 2909such as 'Subject', 'To', 'From', etc. Header field names are matched 2910case-insensitively (conforming to RFC 5322 section 1.2.2), except for 2911all-capitals metaheader fields such as ALL, MESSAGEID, ALL-TRUSTED. 2912 2913Appending a modifier C<:raw> to a header field name will inhibit decoding of 2914quoted-printable or base-64 encoded strings, and will preserve all whitespace 2915inside the header string. The C<:raw> may also be applied to pseudo-headers 2916e.g. C<ALL:raw> will return a pristine (unmodified) header section. 2917 2918Appending a modifier C<:addr> to a header field name will cause everything 2919except the first email address to be removed from the header field. It is 2920mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with 2921their 'Resent-*' counterparts, and the 'Return-Path'. 2922 2923Appending a modifier C<:name> to a header field name will cause everything 2924except the first display name to be removed from the header field. It is 2925mainly applicable to header fields containing a single mail address: 'From', 2926'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts. 2927 2928It is syntactically permitted to append more than one modifier to a header 2929field name, although currently most combinations achieve no additional effect, 2930for example C<From:addr:raw> or C<From:raw:addr> is currently the same as 2931C<From:addr> . 2932 2933For example, appending C<:addr> to a header name will result in example@foo 2934in all of the following cases: 2935 2936=over 4 2937 2938=item example@foo 2939 2940=item example@foo (Foo Blah) 2941 2942=item example@foo, example@bar 2943 2944=item display: example@foo (Foo Blah), example@bar ; 2945 2946=item Foo Blah E<lt>example@fooE<gt> 2947 2948=item "Foo Blah" E<lt>example@fooE<gt> 2949 2950=item "'Foo Blah'" E<lt>example@fooE<gt> 2951 2952=back 2953 2954For example, appending C<:name> to a header name will result in "Foo Blah" 2955(without quotes) in all of the following cases: 2956 2957=over 4 2958 2959=item example@foo (Foo Blah) 2960 2961=item example@foo (Foo Blah), example@bar 2962 2963=item display: example@foo (Foo Blah), example@bar ; 2964 2965=item Foo Blah E<lt>example@fooE<gt> 2966 2967=item "Foo Blah" E<lt>example@fooE<gt> 2968 2969=item "'Foo Blah'" E<lt>example@fooE<gt> 2970 2971=back 2972 2973There are several special pseudo-headers that can be specified: 2974 2975=over 4 2976 2977=item C<ALL> can be used to mean the text of all the message's headers. 2978Note that all whitespace inside the headers, at line folds, is currently 2979compressed into a single space (' ') character. To obtain a pristine 2980(unmodified) header section, use C<ALL:raw> - the :raw modifier is documented 2981above. Also similar that return headers added by specific relays: ALL-TRUSTED, 2982ALL-INTERNAL, ALL-UNTRUSTED, ALL-EXTERNAL. 2983 2984=item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc' 2985headers. 2986 2987=item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP 2988transaction that delivered this message, if this data has been made available 2989by the SMTP server. See C<envelope_sender_header> for more information 2990on how to set this. 2991 2992=item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message; 2993some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id' 2994or to 'X-Message-Id', then uses its own one in the 'Message-Id' header. 2995The value returned for this symbol is the text from all 3 headers, separated 2996by newlines. 2997 2998=item C<X-Spam-Relays-Untrusted>, C<X-Spam-Relays-Trusted>, 2999C<X-Spam-Relays-Internal> and C<X-Spam-Relays-External> represent a portable, 3000pre-parsed representation of the message's network path, as recorded in the 3001Received headers, divided into 'trusted' vs 'untrusted' and 'internal' vs 3002'external' sets. See C<http://wiki.apache.org/spamassassin/TrustedRelays> for 3003more details. 3004 3005=back 3006 3007C<op> is either C<=~> (contains regular expression) or C<!~> (does not contain 3008regular expression), and C<pattern> is a valid Perl regular expression, with 3009C<modifiers> as regexp modifiers in the usual style. Note that multi-line 3010rules are not supported, even if you use C<x> as a modifier. Also note that 3011the C<#> character must be escaped (C<\#>) or else it will be considered to be 3012the start of a comment and not part of the regexp. 3013 3014If the header specified matches multiple headers, their text will be 3015concatenated with embedded \n's. Therefore you may wish to use C</m> if you 3016use C<^> or C<$> in your regular expression. 3017 3018If the C<[if-unset: STRING]> tag is present, then C<STRING> will 3019be used if the header is not found in the mail message. 3020 3021Test names must not start with a number, and must contain only 3022alphanumerics and underscores. It is suggested that lower-case characters 3023not be used, and names have a length of no more than 22 characters, 3024as an informal convention. Dashes are not allowed. 3025 3026Note that test names which begin with '__' are reserved for meta-match 3027sub-rules, and are not scored or listed in the 'tests hit' reports. 3028Test names which begin with 'T_' are reserved for tests which are 3029undergoing QA, and these are given a very low score. 3030 3031If you add or modify a test, please be sure to run a sanity check afterwards 3032by running C<spamassassin --lint>. This will avoid confusing error 3033messages, or other tests being skipped as a side-effect. 3034 3035=item header SYMBOLIC_TEST_NAME exists:header_field_name 3036 3037Define a header field existence test. C<header_field_name> is the name 3038of a header field to test for existence. Not to be confused with a 3039test for a nonempty header field body, which can be implemented by a 3040C<header SYMBOLIC_TEST_NAME header =~ /\S/> rule as described above. 3041 3042=item header SYMBOLIC_TEST_NAME eval:name_of_eval_method([arguments]) 3043 3044Define a header eval test. C<name_of_eval_method> is the name of 3045a method registered by a C<Mail::SpamAssassin::Plugin> object. 3046C<arguments> are optional arguments to the function call. 3047 3048=item header SYMBOLIC_TEST_NAME eval:check_rbl('set', 'zone' [, 'sub-test']) 3049 3050Check a DNSBL (a DNS blacklist or whitelist). This will retrieve Received: 3051headers from the message, extract the IP addresses, select which ones are 3052'untrusted' based on the C<trusted_networks> logic, and query that DNSBL 3053zone. There's a few things to note: 3054 3055=over 4 3056 3057=item duplicated or private IPs 3058 3059Duplicated IPs are only queried once and reserved IPs are not queried. 3060Private IPs are those listed in 3061C<https://www.iana.org/assignments/ipv4-address-space>, 3062C<http://duxcw.com/faq/network/privip.htm>, 3063C<http://duxcw.com/faq/network/autoip.htm>, or 3064C<https://tools.ietf.org/html/rfc5735> as private. 3065 3066=item the 'set' argument 3067 3068This is used as a 'zone ID'. If you want to look up a multiple-meaning zone 3069like SORBS, you can then query the results from that zone using it; 3070but all check_rbl_sub() calls must use that zone ID. 3071 3072Also, if more than one IP address gets a DNSBL hit for a particular rule, it 3073does not affect the score because rules only trigger once per message. 3074 3075=item the 'zone' argument 3076 3077This is the root zone of the DNSBL. 3078 3079The domain name is considered to be a fully qualified domain name 3080(i.e. not subject to DNS resolver's search or default domain options). 3081No trailing period is needed, and will be removed if specified. 3082 3083=item the 'sub-test' argument 3084 3085This optional argument behaves the same as the sub-test argument in 3086C<check_rbl_sub()> below. 3087 3088=item selecting all IPs except for the originating one 3089 3090This is accomplished by placing '-notfirsthop' at the end of the set name. 3091This is useful for querying against DNS lists which list dialup IP 3092addresses; the first hop may be a dialup, but as long as there is at least 3093one more hop, via their outgoing SMTP server, that's legitimate, and so 3094should not gain points. If there is only one hop, that will be queried 3095anyway, as it should be relaying via its outgoing SMTP server instead of 3096sending directly to your MX (mail exchange). 3097 3098=item selecting IPs by whether they are trusted 3099 3100When checking a 'nice' DNSBL (a DNS whitelist), you cannot trust the IP 3101addresses in Received headers that were not added by trusted relays. To 3102test the first IP address that can be trusted, place '-firsttrusted' at the 3103end of the set name. That should test the IP address of the relay that 3104connected to the most remote trusted relay. 3105 3106Note that this requires that SpamAssassin know which relays are trusted. For 3107simple cases, SpamAssassin can make a good estimate. For complex cases, you 3108may get better results by setting C<trusted_networks> manually. 3109 3110In addition, you can test all untrusted IP addresses by placing '-untrusted' 3111at the end of the set name. Important note -- this does NOT include the 3112IP address from the most recent 'untrusted line', as used in '-firsttrusted' 3113above. That's because we're talking about the trustworthiness of the 3114IP address data, not the source header line, here; and in the case of 3115the most recent header (the 'firsttrusted'), that data can be trusted. 3116See the Wiki page at C<http://wiki.apache.org/spamassassin/TrustedRelays> 3117for more information on this. 3118 3119=item Selecting just the last external IP 3120 3121By using '-lastexternal' at the end of the set name, you can select only 3122the external host that connected to your internal network, or at least 3123the last external host with a public IP. 3124 3125=back 3126 3127=item header SYMBOLIC_TEST_NAME eval:check_rbl_txt('set', 'zone') 3128 3129Same as check_rbl(), except querying using IN TXT instead of IN A records. 3130If the zone supports it, it will result in a line of text describing 3131why the IP is listed, typically a hyperlink to a database entry. 3132 3133=item header SYMBOLIC_TEST_NAME eval:check_rbl_sub('set', 'sub-test') 3134 3135Create a sub-test for 'set'. If you want to look up a multi-meaning zone 3136like relays.osirusoft.com, you can then query the results from that zone 3137using the zone ID from the original query. The sub-test may either be an 3138IPv4 dotted address for RBLs that return multiple A records, or a 3139non-negative decimal number to specify a bitmask for RBLs that return a 3140single A record containing a bitmask of results, or a regular expression. 3141 3142Note: the set name must be exactly the same for as the main query rule, 3143including selections like '-notfirsthop' appearing at the end of the set 3144name. 3145 3146=cut 3147 3148 push (@cmds, { 3149 setting => 'header', 3150 is_priv => 1, 3151 code => sub { 3152 my ($self, $key, $value, $line) = @_; 3153 local($1); 3154 if ($value !~ s/^(\S+)\s+//) { 3155 return $INVALID_VALUE; 3156 } 3157 my $rulename = $1; 3158 if ($value eq '') { 3159 return $MISSING_REQUIRED_VALUE; 3160 } 3161 if ($value =~ /^(?:rbl)?eval:(.*)$/) { 3162 my $fn = $1; 3163 if ($fn !~ /^\w+\(.*\)$/) { 3164 return $INVALID_VALUE; 3165 } 3166 if ($fn =~ /^check_(?:rbl|dns)/) { 3167 $self->{parser}->add_test ($rulename, $fn, $TYPE_RBL_EVALS); 3168 } 3169 else { 3170 $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS); 3171 } 3172 } 3173 else { 3174 # Detailed parsing in add_test 3175 $self->{parser}->add_test ($rulename, $value, $TYPE_HEAD_TESTS); 3176 } 3177 } 3178 }); 3179 3180=item body SYMBOLIC_TEST_NAME /pattern/modifiers 3181 3182Define a body pattern test. C<pattern> is a Perl regular expression. Note: 3183as per the header tests, C<#> must be escaped (C<\#>) or else it is considered 3184the beginning of a comment. 3185 3186The 'body' in this case is the textual parts of the message body; any 3187non-text MIME parts are stripped, and the message decoded from 3188Quoted-Printable or Base-64-encoded format if necessary. Parts declared as 3189text/html will be rendered from HTML to text. 3190 3191Body is processed as a raw byte string, which means Unicode-specific regex 3192features like \p{} can NOT be used for matching. The normalize_charset 3193setting will also affect how raw bytes are presented. Rules in .cf files 3194should be written portably - to match "a with umlaut" character, look for 3195both LATIN1 and UTF8 raw byte variants: /(?:\xE4|\xC3\xA4)/ 3196 3197All body paragraphs (double-newline-separated blocks text) are turned into a 3198line breaks removed, whitespace normalized single line. Any lines longer 3199than 2kB are split into shorter separate lines (from a boundary when 3200possible), this may unexpectedly prevent pattern from matching. Patterns 3201are matched independently against each of these lines. 3202 3203Note that by default the message Subject header is considered part of the 3204body and becomes the first line when running the rules. If you don't want 3205to match Subject along with body text, use "tflags RULENAME nosubject". 3206 3207=item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) 3208 3209Define a body eval test. See above. 3210 3211=cut 3212 3213 push (@cmds, { 3214 setting => 'body', 3215 is_priv => 1, 3216 code => sub { 3217 my ($self, $key, $value, $line) = @_; 3218 local($1); 3219 if ($value !~ s/^(\S+)\s+//) { 3220 return $INVALID_VALUE; 3221 } 3222 my $rulename = $1; 3223 if ($value eq '') { 3224 return $MISSING_REQUIRED_VALUE; 3225 } 3226 if ($value =~ /^eval:(.*)$/) { 3227 my $fn = $1; 3228 if ($fn !~ /^\w+\(.*\)$/) { 3229 return $INVALID_VALUE; 3230 } 3231 $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS); 3232 } else { 3233 $self->{parser}->add_test ($rulename, $value, $TYPE_BODY_TESTS); 3234 } 3235 } 3236 }); 3237 3238=item uri SYMBOLIC_TEST_NAME /pattern/modifiers 3239 3240Define a uri pattern test. C<pattern> is a Perl regular expression. Note: as 3241per the header tests, C<#> must be escaped (C<\#>) or else it is considered 3242the beginning of a comment. 3243 3244The 'uri' in this case is a list of all the URIs in the body of the email, 3245and the test will be run on each and every one of those URIs, adjusting the 3246score if a match is found. Use this test instead of one of the body tests 3247when you need to match a URI, as it is more accurately bound to the start/end 3248points of the URI, and will also be faster. 3249 3250=cut 3251 3252# we don't do URI evals yet - maybe later 3253# if (/^uri\s+(\S+)\s+eval:(.*)$/) { 3254# $self->{parser}->add_test ($1, $2, $TYPE_URI_EVALS); 3255# next; 3256# } 3257 push (@cmds, { 3258 setting => 'uri', 3259 is_priv => 1, 3260 code => sub { 3261 my ($self, $key, $value, $line) = @_; 3262 local($1); 3263 if ($value !~ s/^(\S+)\s+//) { 3264 return $INVALID_VALUE; 3265 } 3266 my $rulename = $1; 3267 if ($value eq '') { 3268 return $MISSING_REQUIRED_VALUE; 3269 } 3270 $self->{parser}->add_test ($rulename, $value, $TYPE_URI_TESTS); 3271 } 3272 }); 3273 3274=item rawbody SYMBOLIC_TEST_NAME /pattern/modifiers 3275 3276Define a raw-body pattern test. C<pattern> is a Perl regular expression. 3277Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is 3278considered the beginning of a comment. 3279 3280The 'raw body' of a message is the raw data inside all textual parts. The 3281text will be decoded from base64 or quoted-printable encoding, but HTML 3282tags and line breaks will still be present. Multiline expressions will 3283need to be used to match strings that are broken by line breaks. 3284 3285Note that the text is split into 2-4kB chunks (from a word boundary when 3286possible), this may unexpectedly prevent pattern from matching. Patterns 3287are matched independently against each of these chunks. 3288 3289=item rawbody SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) 3290 3291Define a raw-body eval test. See above. 3292 3293=cut 3294 3295 push (@cmds, { 3296 setting => 'rawbody', 3297 is_priv => 1, 3298 code => sub { 3299 my ($self, $key, $value, $line) = @_; 3300 local($1); 3301 if ($value !~ s/^(\S+)\s+//) { 3302 return $INVALID_VALUE; 3303 } 3304 my $rulename = $1; 3305 if ($value eq '') { 3306 return $MISSING_REQUIRED_VALUE; 3307 } 3308 if ($value =~ /^eval:(.*)$/) { 3309 my $fn = $1; 3310 if ($fn !~ /^\w+\(.*\)$/) { 3311 return $INVALID_VALUE; 3312 } 3313 $self->{parser}->add_test ($rulename, $fn, $TYPE_RAWBODY_EVALS); 3314 } else { 3315 $self->{parser}->add_test ($rulename, $value, $TYPE_RAWBODY_TESTS); 3316 } 3317 } 3318 }); 3319 3320=item full SYMBOLIC_TEST_NAME /pattern/modifiers 3321 3322Define a full message pattern test. C<pattern> is a Perl regular expression. 3323Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is 3324considered the beginning of a comment. 3325 3326The full message is the pristine message headers plus the pristine message 3327body, including all MIME data such as images, other attachments, MIME 3328boundaries, etc. 3329 3330Note that CRLF/LF line endings are matched as the original message has them. 3331For any full rules that match newlines, it's recommended to use \r?$ instead 3332of plain $, so it works on all systems. 3333 3334=item full SYMBOLIC_TEST_NAME eval:name_of_eval_method([args]) 3335 3336Define a full message eval test. See above. 3337 3338=cut 3339 3340 push (@cmds, { 3341 setting => 'full', 3342 is_priv => 1, 3343 code => sub { 3344 my ($self, $key, $value, $line) = @_; 3345 local($1); 3346 if ($value !~ s/^(\S+)\s+//) { 3347 return $INVALID_VALUE; 3348 } 3349 my $rulename = $1; 3350 if ($value eq '') { 3351 return $MISSING_REQUIRED_VALUE; 3352 } 3353 if ($value =~ /^eval:(.*)$/) { 3354 my $fn = $1; 3355 if ($fn !~ /^\w+\(.*\)$/) { 3356 return $INVALID_VALUE; 3357 } 3358 $self->{parser}->add_test ($rulename, $fn, $TYPE_FULL_EVALS); 3359 } else { 3360 $self->{parser}->add_test ($rulename, $value, $TYPE_FULL_TESTS); 3361 } 3362 } 3363 }); 3364 3365=item meta SYMBOLIC_TEST_NAME boolean expression 3366 3367Define a boolean expression test in terms of other tests that have 3368been hit or not hit. For example: 3369 3370meta META1 TEST1 && !(TEST2 || TEST3) 3371 3372Note that English language operators ("and", "or") will be treated as 3373rule names, and that there is no C<XOR> operator. 3374 3375=item meta SYMBOLIC_TEST_NAME boolean arithmetic expression 3376 3377Can also define an arithmetic expression in terms of other tests, 3378with an unhit test having the value "0" and a hit test having a 3379nonzero value. The value of a hit meta test is that of its arithmetic 3380expression. The value of a hit eval test is that returned by its 3381method. The value of a hit header, body, rawbody, uri, or full test 3382which has the "multiple" tflag is the number of times the test hit. 3383The value of any other type of hit test is "1". 3384 3385For example: 3386 3387meta META2 (3 * TEST1 - 2 * TEST2) E<gt> 0 3388 3389Note that Perl builtins and functions, like C<abs()>, B<can't> be 3390used, and will be treated as rule names. 3391 3392If you want to define a meta-rule, but do not want its individual sub-rules to 3393count towards the final score unless the entire meta-rule matches, give the 3394sub-rules names that start with '__' (two underscores). SpamAssassin will 3395ignore these for scoring. 3396 3397=item meta SYMBOLIC_TEST_NAME ... rules_matching(RULEGLOB) ... 3398 3399Special function that will expand to list of matching rulenames. Can be 3400used anywhere in expressions. Argument supports glob style rulename 3401matching (* = anything, ? = one character). Matching is case-sensitive. 3402 3403For example, this will hit if at least two __FOO_* rule hits: 3404 3405 body __FOO_1 /xxx/ 3406 body __FOO_2 /yyy/ 3407 body __FOO_3 /zzz/ 3408 meta FOO_META rules_matching(__FOO_*) >= 2 3409 3410Which would be the same as: 3411 3412 meta FOO_META (__FOO_1 + __FOO_2 + __FOO_3) >= 2 3413 3414 3415=cut 3416 3417 push (@cmds, { 3418 setting => 'meta', 3419 is_priv => 1, 3420 code => sub { 3421 my ($self, $key, $value, $line) = @_; 3422 local($1); 3423 if ($value !~ s/^(\S+)\s+//) { 3424 return $INVALID_VALUE; 3425 } 3426 my $rulename = $1; 3427 if ($value eq '') { 3428 return $MISSING_REQUIRED_VALUE; 3429 } 3430 if ($value =~ /\*\s*\*/) { 3431 info("config: found invalid '**' or '* *' operator in meta command"); 3432 return $INVALID_VALUE; 3433 } 3434 $self->{parser}->add_test ($rulename, $value, $TYPE_META_TESTS); 3435 } 3436 }); 3437 3438=item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ] 3439 3440Defines the name of a test that should be "reused" during the scoring 3441process. If a message has an X-Spam-Status header that shows a hit for 3442this rule or any of the old rule names given, a hit will be added for 3443this rule when B<mass-check --reuse> is used. Examples: 3444 3445C<reuse SPF_PASS> 3446 3447C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1> 3448 3449The actual logic for reuse tests is done by 3450B<Mail::SpamAssassin::Plugin::Reuse>. 3451 3452=cut 3453 3454 push (@cmds, { 3455 setting => 'reuse', 3456 is_priv => 1, 3457 code => sub { 3458 my ($self, $key, $value, $line) = @_; 3459 if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) { 3460 return $INVALID_VALUE; 3461 } 3462 my $rule_name = $1; 3463 # don't overwrite tests, just define them so scores, priorities work 3464 if (!exists $self->{tests}->{$rule_name}) { 3465 $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS); 3466 } 3467 } 3468 }); 3469 3470=item tflags SYMBOLIC_TEST_NAME flags 3471 3472Used to set flags on a test. Parameter is a space-separated list of flag 3473names or flag name = value pairs. 3474These flags are used in the score-determination back end system for details 3475of the test's behaviour. Please see C<bayes_auto_learn> for more information 3476about tflag interaction with those systems. The following flags can be set: 3477 3478=over 4 3479 3480=item net 3481 3482The test is a network test, and will not be run in the mass checking system 3483or if B<-L> is used, therefore its score should not be modified. 3484 3485=item nice 3486 3487The test is intended to compensate for common false positives, and should be 3488assigned a negative score. 3489 3490=item userconf 3491 3492The test requires user configuration before it can be used (like 3493language-specific tests). 3494 3495=item learn 3496 3497The test requires training before it can be used. 3498 3499=item noautolearn 3500 3501The test will explicitly be ignored when calculating the score for 3502learning systems. 3503 3504=item autolearn_force 3505 3506The test will be subject to less stringent autolearn thresholds. 3507 3508Normally, SpamAssassin will require 3 points from the header and 3 3509points from the body to be auto-learned as spam. This option keeps 3510the threshold at 6 points total but changes it to have no regard to the 3511source of the points. 3512 3513=item noawl 3514 3515This flag is specific when using AWL plugin. 3516 3517Normally, AWL plugin normalizes scores via auto-whitelist. In some scenarios 3518it works against the system administrator when trying to add some rules to 3519correct miss-classified email. When AWL plugin searches the email and finds 3520the noawl flag it will exit without normalizing the score nor storing the 3521value in db. 3522 3523=item multiple 3524 3525The test will be evaluated multiple times, for use with meta rules. 3526Only affects header, body, rawbody, uri, and full tests. 3527 3528=item maxhits=N 3529 3530If B<multiple> is specified, limit the number of hits found to N. 3531If the rule is used in a meta that counts the hits (e.g. __RULENAME E<gt> 5), 3532this is a way to avoid wasted extra work (use "tflags multiple maxhits=6"). 3533 3534For example: 3535 3536 uri __KAM_COUNT_URIS /^./ 3537 tflags __KAM_COUNT_URIS multiple maxhits=16 3538 describe __KAM_COUNT_URIS A multiple match used to count URIs in a message 3539 3540 meta __KAM_HAS_0_URIS (__KAM_COUNT_URIS == 0) 3541 meta __KAM_HAS_1_URIS (__KAM_COUNT_URIS >= 1) 3542 meta __KAM_HAS_2_URIS (__KAM_COUNT_URIS >= 2) 3543 meta __KAM_HAS_3_URIS (__KAM_COUNT_URIS >= 3) 3544 meta __KAM_HAS_4_URIS (__KAM_COUNT_URIS >= 4) 3545 meta __KAM_HAS_5_URIS (__KAM_COUNT_URIS >= 5) 3546 meta __KAM_HAS_10_URIS (__KAM_COUNT_URIS >= 10) 3547 meta __KAM_HAS_15_URIS (__KAM_COUNT_URIS >= 15) 3548 3549=item nosubject 3550 3551Used only for B<body> rules. If specified, Subject header will not be a 3552part of the matched body text. See I<body> for more info. 3553 3554=item ips_only 3555 3556This flag is specific to rules invoking an URIDNSBL plugin, 3557it is documented there. 3558 3559=item domains_only 3560 3561This flag is specific to rules invoking an URIDNSBL plugin, 3562it is documented there. 3563 3564=item ns 3565 3566This flag is specific to rules invoking an URIDNSBL plugin, 3567it is documented there. 3568 3569=item a 3570 3571This flag is specific to rules invoking an URIDNSBL plugin, 3572it is documented there. 3573 3574=item notrim 3575 3576This flag is specific to rules invoking an URIDNSBL plugin, 3577it is documented there. 3578 3579=item nolog 3580 3581This flag will hide (sensitive) rule informations from reports 3582 3583=back 3584 3585=cut 3586 3587 push (@cmds, { 3588 setting => 'tflags', 3589 is_priv => 1, 3590 type => $CONF_TYPE_HASH_KEY_VALUE, 3591 }); 3592 3593=item priority SYMBOLIC_TEST_NAME n 3594 3595Assign a specific priority to a test. All tests, except for DNS and Meta 3596tests, are run in increasing priority value order (negative priority values 3597are run before positive priority values). The default test priority is 0 3598(zero). 3599 3600The values C<-99999999999999> and C<-99999999999998> have a special meaning 3601internally, and should not be used. 3602 3603=cut 3604 3605 push (@cmds, { 3606 setting => 'priority', 3607 is_priv => 1, 3608 type => $CONF_TYPE_HASH_KEY_VALUE, 3609 code => sub { 3610 my ($self, $key, $value, $line) = @_; 3611 my ($rulename, $priority) = split(/\s+/, $value, 2); 3612 unless (defined $priority) { 3613 return $MISSING_REQUIRED_VALUE; 3614 } 3615 unless ($rulename =~ IS_RULENAME) { 3616 return $INVALID_VALUE; 3617 } 3618 unless ($priority =~ /^-?\d+$/) { 3619 return $INVALID_VALUE; 3620 } 3621 $self->{priority}->{$rulename} = $priority; 3622 } 3623 }); 3624 3625=back 3626 3627=head1 ADMINISTRATOR SETTINGS 3628 3629These settings differ from the ones above, in that they are considered 'more 3630privileged' -- even more than the ones in the B<PRIVILEGED SETTINGS> section. 3631No matter what C<allow_user_rules> is set to, these can never be set from a 3632user's C<user_prefs> file when spamc/spamd is being used. However, all 3633settings can be used by local programs run directly by the user. 3634 3635=over 4 3636 3637=item version_tag string 3638 3639This tag is appended to the SA version in the X-Spam-Status header. You should 3640include it when you modify your ruleset, especially if you plan to distribute it. 3641A good choice for I<string> is your last name or your initials followed by a 3642number which you increase with each change. 3643 3644The version_tag will be lowercased, and any non-alphanumeric or period 3645character will be replaced by an underscore. 3646 3647e.g. 3648 3649 version_tag myrules1 # version=2.41-myrules1 3650 3651=cut 3652 3653 push (@cmds, { 3654 setting => 'version_tag', 3655 is_admin => 1, 3656 code => sub { 3657 my ($self, $key, $value, $line) = @_; 3658 if ($value eq '') { 3659 return $MISSING_REQUIRED_VALUE; 3660 } 3661 my $tag = lc($value); 3662 $tag =~ tr/a-z0-9./_/c; 3663 foreach (@Mail::SpamAssassin::EXTRA_VERSION) { 3664 if($_ eq $tag) { $tag = undef; last; } 3665 } 3666 push(@Mail::SpamAssassin::EXTRA_VERSION, $tag) if($tag); 3667 } 3668 }); 3669 3670=item test SYMBOLIC_TEST_NAME (ok|fail) Some string to test against 3671 3672Define a regression testing string. You can have more than one regression test 3673string per symbolic test name. Simply specify a string that you wish the test 3674to match. 3675 3676These tests are only run as part of the test suite - they should not affect the 3677general running of SpamAssassin. 3678 3679=cut 3680 3681 push (@cmds, { 3682 setting => 'test', 3683 is_admin => 1, 3684 code => sub { 3685 return unless defined $COLLECT_REGRESSION_TESTS; 3686 my ($self, $key, $value, $line) = @_; 3687 local ($1,$2,$3); 3688 if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; } 3689 $self->{parser}->add_regression_test($1, $2, $3); 3690 } 3691 }); 3692 3693=item body_part_scan_size (default: 50000) 3694 3695Per mime-part scan size limit in bytes for "body" type rules. 3696The decoded/stripped mime-part is truncated approx to this size. 3697Helps scanning large messages safely, so it's not necessary to 3698skip them completely. Disabled with 0. 3699 3700=cut 3701 3702 push (@cmds, { 3703 setting => 'body_part_scan_size', 3704 is_admin => 1, 3705 default => 50000, 3706 type => $CONF_TYPE_NUMERIC, 3707 }); 3708 3709 3710=item rawbody_part_scan_size (default: 500000) 3711 3712Like body_part_scan_size, for "rawbody" type rules. 3713 3714=cut 3715 3716 push (@cmds, { 3717 setting => 'rawbody_part_scan_size', 3718 is_admin => 1, 3719 default => 500000, 3720 type => $CONF_TYPE_NUMERIC, 3721 }); 3722 3723=item rbl_timeout t [t_min] [zone] (default: 15 3) 3724 3725All DNS queries are made at the beginning of a check and we try to read 3726the results at the end. This value specifies the maximum period of time 3727(in seconds) to wait for a DNS query. If most of the DNS queries have 3728succeeded for a particular message, then SpamAssassin will not wait for 3729the full period to avoid wasting time on unresponsive server(s), but will 3730shrink the timeout according to a percentage of queries already completed. 3731As the number of queries remaining approaches 0, the timeout value will 3732gradually approach a t_min value, which is an optional second parameter 3733and defaults to 0.2 * t. If t is smaller than t_min, the initial timeout 3734is set to t_min. Here is a chart of queries remaining versus the timeout 3735in seconds, for the default 15 second / 3 second timeout setting: 3736 3737 queries left 100% 90% 80% 70% 60% 50% 40% 30% 20% 10% 0% 3738 timeout 15 14.9 14.5 13.9 13.1 12.0 10.7 9.1 7.3 5.3 3 3739 3740For example, if 20 queries are made at the beginning of a message check 3741and 16 queries have returned (leaving 20%), the remaining 4 queries should 3742finish within 7.3 seconds since their query started or they will be timed out. 3743Note that timed out queries are only aborted when there is nothing else left 3744for SpamAssassin to do - long evaluation of other rules may grant queries 3745additional time. 3746 3747If a parameter 'zone' is specified (it must end with a letter, which 3748distinguishes it from other numeric parametrs), then the setting only 3749applies to DNS queries against the specified DNS domain (host, domain or 3750RBL (sub)zone). Matching is case-insensitive, the actual domain may be a 3751subdomain of the specified zone. 3752 3753=cut 3754 3755 push (@cmds, { 3756 setting => 'rbl_timeout', 3757 is_admin => 1, 3758 default => 15, 3759 code => sub { 3760 my ($self, $key, $value, $line) = @_; 3761 unless (defined $value && $value !~ /^$/) { 3762 return $MISSING_REQUIRED_VALUE; 3763 } 3764 local ($1,$2,$3); 3765 unless ($value =~ /^ ( \+? \d+ (?: \. \d*)? [smhdw]? ) 3766 (?: \s+ ( \+? \d+ (?: \. \d*)? [smhdw]? ) )? 3767 (?: \s+ (\S* [a-zA-Z]) )? $/xsi) { 3768 return $INVALID_VALUE; 3769 } 3770 my($timeout, $timeout_min, $zone) = ($1, $2, $3); 3771 foreach ($timeout, $timeout_min) { 3772 if (defined $_ && s/\s*([smhdw])\z//i) { 3773 $_ *= { s => 1, m => 60, h => 3600, 3774 d => 24*3600, w => 7*24*3600 }->{lc $1}; 3775 } 3776 } 3777 if (!defined $zone) { # a global setting 3778 $self->{rbl_timeout} = 0 + $timeout; 3779 $self->{rbl_timeout_min} = 0 + $timeout_min if defined $timeout_min; 3780 } 3781 else { # per-zone settings 3782 $zone =~ s/^\.//; $zone =~ s/\.\z//; # strip leading and trailing dot 3783 $zone = lc $zone; 3784 $self->{by_zone}{$zone}{rbl_timeout} = 0 + $timeout; 3785 $self->{by_zone}{$zone}{rbl_timeout_min} = 3786 0 + $timeout_min if defined $timeout_min; 3787 } 3788 }, 3789 type => $CONF_TYPE_DURATION, 3790 }); 3791 3792=item util_rb_tld tld1 tld2 ... 3793 3794=encoding utf8 3795 3796This option maintains a list of valid TLDs in the RegistryBoundaries code. 3797Top level domains (TLD) include things like com, net, org, xn--p1ai, рф, ... 3798International domain names may be specified in ASCII-compatible encoding (ACE), 3799e.g. xn--p1ai, xn--qxam, or with Unicode labels encoded as UTF-8 octets, 3800e.g. рф, ελ. 3801 3802=cut 3803 3804 push (@cmds, { 3805 setting => 'util_rb_tld', 3806 is_admin => 1, 3807 code => sub { 3808 my ($self, $key, $value, $line) = @_; 3809 unless (defined $value && $value !~ /^$/) { 3810 return $MISSING_REQUIRED_VALUE; 3811 } 3812 unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) { 3813 return $INVALID_VALUE; 3814 } 3815 foreach (split(/\s+/, $value)) { 3816 $self->{valid_tlds}{idn_to_ascii($_)} = 1; 3817 } 3818 } 3819 }); 3820 3821=item util_rb_2tld 2tld-1.tld 2tld-2.tld ... 3822 3823This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries 3824code. 2TLDs include things like co.uk, fed.us, etc. International domain 3825names may be specified in ASCII-compatible encoding (ACE), or with Unicode 3826labels encoded as UTF-8 octets. 3827 3828=cut 3829 3830 push (@cmds, { 3831 setting => 'util_rb_2tld', 3832 is_admin => 1, 3833 code => sub { 3834 my ($self, $key, $value, $line) = @_; 3835 unless (defined $value && $value !~ /^$/) { 3836 return $MISSING_REQUIRED_VALUE; 3837 } 3838 unless ($value =~ /^[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) { 3839 return $INVALID_VALUE; 3840 } 3841 foreach (split(/\s+/, $value)) { 3842 $self->{two_level_domains}{idn_to_ascii($_)} = 1; 3843 } 3844 } 3845 }); 3846 3847=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ... 3848 3849This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries 3850code. 3TLDs include things like demon.co.uk, plc.co.im, etc. International 3851domain names may be specified in ASCII-compatible encoding (ACE), or with 3852Unicode labels encoded as UTF-8 octets. 3853 3854=cut 3855 3856 push (@cmds, { 3857 setting => 'util_rb_3tld', 3858 is_admin => 1, 3859 code => sub { 3860 my ($self, $key, $value, $line) = @_; 3861 unless (defined $value && $value !~ /^$/) { 3862 return $MISSING_REQUIRED_VALUE; 3863 } 3864 unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) { 3865 return $INVALID_VALUE; 3866 } 3867 foreach (split(/\s+/, $value)) { 3868 $self->{three_level_domains}{idn_to_ascii($_)} = 1; 3869 } 3870 } 3871 }); 3872 3873=item clear_util_rb 3874 3875Empty internal list of valid TLDs (including 2nd and 3rd level) which 3876RegistryBoundaries code uses. Only useful if you want to override the 3877standard lists supplied by sa-update. 3878 3879=cut 3880 3881 push (@cmds, { 3882 setting => 'clear_util_rb', 3883 type => $CONF_TYPE_NOARGS, 3884 code => sub { 3885 my ($self, $key, $value, $line) = @_; 3886 unless (!defined $value || $value eq '') { 3887 return $INVALID_VALUE; 3888 } 3889 undef $self->{valid_tlds}; 3890 undef $self->{two_level_domains}; 3891 undef $self->{three_level_domains}; 3892 dbg("config: cleared tld lists"); 3893 } 3894 }); 3895 3896=item bayes_path /path/filename (default: ~/.spamassassin/bayes) 3897 3898This is the directory and filename for Bayes databases. Several databases 3899will be created, with this as the base directory and filename, with C<_toks>, 3900C<_seen>, etc. appended to the base. The default setting results in files 3901called C<~/.spamassassin/bayes_seen>, C<~/.spamassassin/bayes_toks>, etc. 3902 3903By default, each user has their own in their C<~/.spamassassin> directory with 3904mode 0700/0600. For system-wide SpamAssassin use, you may want to reduce disk 3905space usage by sharing this across all users. However, Bayes appears to be 3906more effective with individual user databases. 3907 3908=cut 3909 3910 push (@cmds, { 3911 setting => 'bayes_path', 3912 is_admin => 1, 3913 default => '__userstate__/bayes', 3914 type => $CONF_TYPE_STRING, 3915 code => sub { 3916 my ($self, $key, $value, $line) = @_; 3917 unless (defined $value && $value !~ /^$/) { 3918 return $MISSING_REQUIRED_VALUE; 3919 } 3920 if (-d $value) { 3921 return $INVALID_VALUE; 3922 } 3923 $self->{bayes_path} = $value; 3924 } 3925 }); 3926 3927=item bayes_file_mode (default: 0700) 3928 3929The file mode bits used for the Bayesian filtering database files. 3930 3931Make sure you specify this using the 'x' mode bits set, as it may also be used 3932to create directories. However, if a file is created, the resulting file will 3933not have any execute bits set (the umask is set to 111). The argument is a 3934string of octal digits, it is converted to a numeric value internally. 3935 3936=cut 3937 3938 push (@cmds, { 3939 setting => 'bayes_file_mode', 3940 is_admin => 1, 3941 default => '0700', 3942 type => $CONF_TYPE_NUMERIC, 3943 code => sub { 3944 my ($self, $key, $value, $line) = @_; 3945 if ($value !~ /^0?[0-7]{3}$/) { return $INVALID_VALUE } 3946 $self->{bayes_file_mode} = untaint_var($value); 3947 } 3948 }); 3949 3950=item bayes_store_module Name::Of::BayesStore::Module 3951 3952If this option is set, the module given will be used as an alternate 3953to the default bayes storage mechanism. It must conform to the 3954published storage specification (see 3955Mail::SpamAssassin::BayesStore). For example, set this to 3956Mail::SpamAssassin::BayesStore::SQL to use the generic SQL storage 3957module. 3958 3959=cut 3960 3961 push (@cmds, { 3962 setting => 'bayes_store_module', 3963 is_admin => 1, 3964 default => '', 3965 type => $CONF_TYPE_STRING, 3966 code => sub { 3967 my ($self, $key, $value, $line) = @_; 3968 local ($1); 3969 if ($value !~ /^([_A-Za-z0-9:]+)$/) { return $INVALID_VALUE; } 3970 $self->{bayes_store_module} = $1; 3971 } 3972 }); 3973 3974=item bayes_sql_dsn DBI::databasetype:databasename:hostname:port 3975 3976Used for BayesStore::SQL storage implementation. 3977 3978This option give the connect string used to connect to the SQL based Bayes storage. 3979 3980=cut 3981 3982 push (@cmds, { 3983 setting => 'bayes_sql_dsn', 3984 is_admin => 1, 3985 default => '', 3986 type => $CONF_TYPE_STRING, 3987 }); 3988 3989=item bayes_sql_username 3990 3991Used by BayesStore::SQL storage implementation. 3992 3993This option gives the username used by the above DSN. 3994 3995=cut 3996 3997 push (@cmds, { 3998 setting => 'bayes_sql_username', 3999 is_admin => 1, 4000 default => '', 4001 type => $CONF_TYPE_STRING, 4002 }); 4003 4004=item bayes_sql_password 4005 4006Used by BayesStore::SQL storage implementation. 4007 4008This option gives the password used by the above DSN. 4009 4010=cut 4011 4012 push (@cmds, { 4013 setting => 'bayes_sql_password', 4014 is_admin => 1, 4015 default => '', 4016 type => $CONF_TYPE_STRING, 4017 }); 4018 4019=item bayes_sql_username_authorized ( 0 | 1 ) (default: 0) 4020 4021Whether to call the services_authorized_for_username plugin hook in BayesSQL. 4022If the hook does not determine that the user is allowed to use bayes or is 4023invalid then then database will not be initialized. 4024 4025NOTE: By default the user is considered invalid until a plugin returns 4026a true value. If you enable this, but do not have a proper plugin 4027loaded, all users will turn up as invalid. 4028 4029The username passed into the plugin can be affected by the 4030bayes_sql_override_username config option. 4031 4032=cut 4033 4034 push (@cmds, { 4035 setting => 'bayes_sql_username_authorized', 4036 is_admin => 1, 4037 default => 0, 4038 type => $CONF_TYPE_BOOL, 4039 }); 4040 4041=item user_scores_dsn DBI:databasetype:databasename:hostname:port 4042 4043If you load user scores from an SQL database, this will set the DSN 4044used to connect. Example: C<DBI:mysql:spamassassin:localhost> 4045 4046If you load user scores from an LDAP directory, this will set the DSN used to 4047connect. You have to write the DSN as an LDAP URL, the components being the 4048host and port to connect to, the base DN for the search, the scope of the 4049search (base, one or sub), the single attribute being the multivalued attribute 4050used to hold the configuration data (space separated pairs of key and value, 4051just as in a file) and finally the filter being the expression used to filter 4052out the wanted username. Note that the filter expression is being used in a 4053sprintf statement with the username as the only parameter, thus is can hold a 4054single __USERNAME__ expression. This will be replaced with the username. 4055 4056Example: C<ldap://localhost:389/dc=koehntopp,dc=de?saconfig?uid=__USERNAME__> 4057 4058=cut 4059 4060 push (@cmds, { 4061 setting => 'user_scores_dsn', 4062 is_admin => 1, 4063 default => '', 4064 type => $CONF_TYPE_STRING, 4065 }); 4066 4067=item user_scores_sql_username username 4068 4069The authorized username to connect to the above DSN. 4070 4071=cut 4072 4073 push (@cmds, { 4074 setting => 'user_scores_sql_username', 4075 is_admin => 1, 4076 default => '', 4077 type => $CONF_TYPE_STRING, 4078 }); 4079 4080=item user_scores_sql_password password 4081 4082The password for the database username, for the above DSN. 4083 4084=cut 4085 4086 push (@cmds, { 4087 setting => 'user_scores_sql_password', 4088 is_admin => 1, 4089 default => '', 4090 type => $CONF_TYPE_STRING, 4091 }); 4092 4093=item user_scores_sql_custom_query query 4094 4095This option gives you the ability to create a custom SQL query to 4096retrieve user scores and preferences. In order to work correctly your 4097query should return two values, the preference name and value, in that 4098order. In addition, there are several "variables" that you can use 4099as part of your query, these variables will be substituted for the 4100current values right before the query is run. The current allowed 4101variables are: 4102 4103=over 4 4104 4105=item _TABLE_ 4106 4107The name of the table where user scores and preferences are stored. Currently 4108hardcoded to userpref, to change this value you need to create a new custom 4109query with the new table name. 4110 4111=item _USERNAME_ 4112 4113The current user's username. 4114 4115=item _MAILBOX_ 4116 4117The portion before the @ as derived from the current user's username. 4118 4119=item _DOMAIN_ 4120 4121The portion after the @ as derived from the current user's username, this 4122value may be null. 4123 4124=back 4125 4126The query must be one continuous line in order to parse correctly. 4127 4128Here are several example queries, please note that these are broken up 4129for easy reading, in your config it should be one continuous line. 4130 4131=over 4 4132 4133=item Current default query: 4134 4135C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username ASC> 4136 4137=item Use global and then domain level defaults: 4138 4139C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' OR username = '@~'||_DOMAIN_ ORDER BY username ASC> 4140 4141=item Maybe global prefs should override user prefs: 4142 4143C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username DESC> 4144 4145=back 4146 4147=cut 4148 4149 push (@cmds, { 4150 setting => 'user_scores_sql_custom_query', 4151 is_admin => 1, 4152 default => undef, 4153 type => $CONF_TYPE_STRING, 4154 }); 4155 4156=item user_scores_ldap_username 4157 4158This is the Bind DN used to connect to the LDAP server. It defaults 4159to the empty string (""), allowing anonymous binding to work. 4160 4161Example: C<cn=master,dc=koehntopp,dc=de> 4162 4163=cut 4164 4165 push (@cmds, { 4166 setting => 'user_scores_ldap_username', 4167 is_admin => 1, 4168 default => '', 4169 type => $CONF_TYPE_STRING, 4170 }); 4171 4172=item user_scores_ldap_password 4173 4174This is the password used to connect to the LDAP server. It defaults 4175to the empty string (""). 4176 4177=cut 4178 4179 push (@cmds, { 4180 setting => 'user_scores_ldap_password', 4181 is_admin => 1, 4182 default => '', 4183 type => $CONF_TYPE_STRING, 4184 }); 4185 4186=item user_scores_fallback_to_global (default: 1) 4187 4188Fall back to global scores and settings if userprefs can't be loaded 4189from SQL or LDAP, instead of passing the message through unprocessed. 4190 4191=cut 4192 4193 push (@cmds, { 4194 setting => 'user_scores_fallback_to_global', 4195 is_admin => 1, 4196 default => 1, 4197 type => $CONF_TYPE_BOOL, 4198 }); 4199 4200=item loadplugin [Mail::SpamAssassin::Plugin::]ModuleName [/path/module.pm] 4201 4202Load a SpamAssassin plugin module. The C<ModuleName> is the perl module 4203name, used to create the plugin object itself. 4204 4205Module naming is strict, name must only contain alphanumeric characters or 4206underscores. File must have .pm extension. 4207 4208C</path/module.pm> is the file to load, containing the module's perl code; 4209if it's specified as a relative path, it's considered to be relative to the 4210current configuration file. If it is omitted, the module will be loaded 4211using perl's search path (the C<@INC> array). 4212 4213See C<Mail::SpamAssassin::Plugin> for more details on writing plugins. 4214 4215=cut 4216 4217 push (@cmds, { 4218 setting => 'loadplugin', 4219 is_admin => 1, 4220 code => sub { 4221 my ($self, $key, $value, $line) = @_; 4222 if ($value eq '') { 4223 return $MISSING_REQUIRED_VALUE; 4224 } 4225 my ($package, $path); 4226 local ($1,$2); 4227 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) { 4228 ($package, $path) = ($1, $2); 4229 } else { 4230 return $INVALID_VALUE; 4231 } 4232 $self->load_plugin ($package, $path); 4233 } 4234 }); 4235 4236=item tryplugin ModuleName [/path/module.pm] 4237 4238Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in 4239the filesystem. 4240 4241=cut 4242 4243 push (@cmds, { 4244 setting => 'tryplugin', 4245 is_admin => 1, 4246 code => sub { 4247 my ($self, $key, $value, $line) = @_; 4248 if ($value eq '') { 4249 return $MISSING_REQUIRED_VALUE; 4250 } 4251 my ($package, $path); 4252 local ($1,$2); 4253 if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) { 4254 ($package, $path) = ($1, $2); 4255 } else { 4256 return $INVALID_VALUE; 4257 } 4258 $self->load_plugin ($package, $path, 1); 4259 } 4260 }); 4261 4262=item ignore_always_matching_regexps (Default: 0) 4263 4264Ignore any rule which contains a regexp which always matches. 4265Currently only catches regexps which contain '||', or which begin or 4266end with a '|'. Also ignore rules with C<some> combinatorial explosions. 4267 4268=cut 4269 4270 push (@cmds, { 4271 setting => 'ignore_always_matching_regexps', 4272 is_admin => 1, 4273 default => 0, 4274 type => $CONF_TYPE_BOOL, 4275 }); 4276 4277=item geodb_module STRING 4278 4279This option tells SpamAssassin which geolocation module to use. 4280If not specified, all supported ones are tried in this order: 4281 4282Plugins can override this internally if required. 4283 4284 MaxMind::DB::Reader (same as GeoIP2::Database::Reader) 4285 Geo::IP 4286 IP::Country::DB_File (not used unless geodb_options path set) 4287 IP::Country::Fast 4288 4289=cut 4290 4291 push (@cmds, { 4292 setting => 'geodb_module', 4293 is_admin => 1, 4294 default => undef, 4295 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, 4296 code => sub { 4297 my ($self, $key, $value, $line) = @_; 4298 $value = lc $value; 4299 if ($value eq 'maxmind::db::reader' || 4300 $value eq 'geoip2::database::reader' || $value eq 'geoip2') { 4301 $self->{geodb}->{module} = 'geoip2'; 4302 } elsif ($value eq 'geo::ip' || $value eq 'geoip') { 4303 $self->{geodb}->{module} = 'geoip'; 4304 } elsif ($value eq 'ip::country::db_file' || $value eq 'db_file') { 4305 $self->{geodb}->{module} = 'dbfile'; 4306 } elsif ($value eq 'ip::country::fast' || $value eq 'fast') { 4307 $self->{geodb}->{module} = 'fast'; 4308 } else { 4309 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 4310 } 4311 } 4312 }); 4313 4314 # support deprecated RelayCountry setting 4315 push (@cmds, { 4316 setting => 'country_db_type', 4317 is_admin => 1, 4318 default => undef, 4319 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, 4320 code => sub { 4321 my ($self, $key, $value, $line) = @_; 4322 warn("config: deprecated setting used, change country_db_type to geodb_module\n"); 4323 if ($value =~ /GeoIP2/i) { 4324 $self->{geodb}->{module} = 'geoip2'; 4325 } elsif ($value =~ /Geo/i) { 4326 $self->{geodb}->{module} = 'geoip'; 4327 } elsif ($value =~ /Fast/i) { 4328 $self->{geodb}->{module} = 'fast'; 4329 } else { 4330 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 4331 } 4332 } 4333 }); 4334 4335=item geodb_options dbtype:/path/to/db ... 4336 4337Supported dbtypes: 4338 4339I<city> - use City database 4340I<country> - use Country database 4341I<isp> - try loading ISP database 4342I<asn> - try loading ASN database 4343 4344Append full database path with colon, for example: 4345I<isp:/opt/geoip/isp.mmdb> 4346 4347Plugins can internally request all types they require, geodb_options is only 4348needed if the default location search (described below) does not work. 4349 4350GeoIP/GeoIP2 searches these files/directories: 4351 4352 country: 4353 GeoIP2-Country.mmdb, GeoLite2-Country.mmdb 4354 GeoIP.dat (and v6 version) 4355 city: 4356 GeoIP2-City.mmdb, GeoLite2-City.mmdb 4357 GeoIPCity.dat, GeoLiteCity.dat (and v6 versions) 4358 isp: 4359 GeoIP2-ISP.mmdb 4360 GeoIPISP.dat, GeoLiteISP.dat (and v6 versions) 4361 directories: 4362 /usr/local/share/GeoIP 4363 /usr/share/GeoIP 4364 /var/lib/GeoIP 4365 /opt/share/GeoIP 4366 4367=cut 4368 4369 push (@cmds, { 4370 setting => 'geodb_options', 4371 is_admin => 1, 4372 type => $CONF_TYPE_HASH_KEY_VALUE, 4373 default => {}, 4374 code => sub { 4375 my ($self, $key, $value, $line) = @_; 4376 foreach my $option (split (/\s+/, $value)) { 4377 my ($option, $db) = split(/:/, $option, 2); 4378 $option = lc($option); 4379 if ($option eq 'reset') { 4380 $self->{geodb}->{options} = {}; 4381 } elsif ($option eq 'country') { 4382 $self->{geodb}->{options}->{country} = $db || undef; 4383 } elsif ($option eq 'city') { 4384 $self->{geodb}->{options}->{city} = $db || undef; 4385 } elsif ($option eq 'isp') { 4386 $self->{geodb}->{options}->{isp} = $db || undef; 4387 } else { 4388 return $INVALID_VALUE; 4389 } 4390 } 4391 } 4392 }); 4393 4394=item geodb_search_path /path/to/GeoIP ... 4395 4396Alternative to geodb_options. Overrides the default list of directories to 4397search for default filenames. 4398 4399=cut 4400 4401 push (@cmds, { 4402 setting => 'geodb_search_path', 4403 is_admin => 1, 4404 default => [], 4405 type => $CONF_TYPE_STRINGLIST, 4406 code => sub { 4407 my ($self, $key, $value, $line) = @_; 4408 if ($value eq 'reset') { 4409 $self->{geodb}->{geodb_search_path} = []; 4410 } elsif ($value eq '') { 4411 return $MISSING_REQUIRED_VALUE; 4412 } else { 4413 push(@{$self->{geodb}->{geodb_search_path}}, split(/\s+/, $value)); 4414 } 4415 } 4416 }); 4417 4418 # support deprecated RelayCountry setting 4419 push (@cmds, { 4420 setting => 'country_db_path', 4421 is_admin => 1, 4422 default => undef, 4423 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, 4424 code => sub { 4425 my ($self, $key, $value, $line) = @_; 4426 warn("config: deprecated setting used, change country_db_path to geodb_options\n"); 4427 if ($value ne '') { 4428 $self->{geodb}->{options}->{country} = $value; 4429 } else { 4430 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 4431 } 4432 } 4433 }); 4434 # support deprecated URILocalBL setting 4435 push (@cmds, { 4436 setting => 'uri_country_db_path', 4437 is_admin => 1, 4438 default => undef, 4439 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, 4440 code => sub { 4441 my ($self, $key, $value, $line) = @_; 4442 warn("config: deprecated setting used, change uri_country_db_path to geodb_options\n"); 4443 if ($value ne '') { 4444 $self->{geodb}->{options}->{country} = $value; 4445 } else { 4446 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 4447 } 4448 } 4449 }); 4450 # support deprecated URILocalBL setting 4451 push (@cmds, { 4452 setting => 'uri_country_db_isp_path', 4453 is_admin => 1, 4454 default => undef, 4455 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, 4456 code => sub { 4457 my ($self, $key, $value, $line) = @_; 4458 warn("config: deprecated setting used, change uri_country_db_isp_path to geodb_options\n"); 4459 if ($value ne '') { 4460 $self->{geodb}->{options}->{isp} = $value; 4461 } else { 4462 return $Mail::SpamAssassin::Conf::INVALID_VALUE; 4463 } 4464 } 4465 }); 4466 4467=back 4468 4469=head1 PREPROCESSING OPTIONS 4470 4471=over 4 4472 4473=item include filename 4474 4475Include configuration lines from C<filename>. Relative paths are considered 4476relative to the current configuration file or user preferences file. 4477 4478=item if (boolean perl expression) 4479 4480Used to support conditional interpretation of the configuration 4481file. Lines between this and a corresponding C<else> or C<endif> line 4482will be ignored unless the expression evaluates as true 4483(in the perl sense; that is, defined and non-0 and non-empty string). 4484 4485The conditional accepts a limited subset of perl for security -- just enough to 4486perform basic arithmetic comparisons. The following input is accepted: 4487 4488=over 4 4489 4490=item numbers, whitespace, arithmetic operations and grouping 4491 4492Namely these characters and ranges: 4493 4494 ( ) - + * / _ . , < = > ! ~ 0-9 whitespace 4495 4496=item version 4497 4498This will be replaced with the version number of the currently-running 4499SpamAssassin engine. Note: The version used is in the internal SpamAssassin 4500version format which is C<x.yyyzzz>, where x is major version, y is minor 4501version, and z is maintenance version. So 3.0.0 is C<3.000000>, and 3.4.80 4502is C<3.004080>. 4503 4504=item perl_version 4505 4506(Introduced in 3.4.1) This will be replaced with the version number of the 4507currently-running perl engine. Note: The version used is in the $] version 4508format which is C<x.yyyzzz>, where x is major version, y is minor version, 4509and z is maintenance version. So 5.8.8 is C<5.008008>, and 5.10.0 is 4510C<5.010000>. Use to protect rules that incorporate RE syntax elements 4511introduced in later versions of perl, such as the C<++> non-backtracking 4512match introduced in perl 5.10. For example: 4513 4514 # Avoid lint error on older perl installs 4515 # Check SA version first to avoid warnings on checking perl_version on older SA 4516 if version > 3.004001 && perl_version >= 5.018000 4517 body INVALID_RE_SYNTAX_IN_PERL_BEFORE_5_18 /(?[ \p{Thai} & \p{Digit} ])/ 4518 endif 4519 4520Note that the above will still generate a warning on perl older than 5.10.0; 4521to avoid that warning do this instead: 4522 4523 # Avoid lint error on older perl installs 4524 if can(Mail::SpamAssassin::Conf::perl_min_version_5010000) 4525 body INVALID_RE_SYNTAX_IN_PERL_5_8 /\w++/ 4526 endif 4527 4528Warning: a can() test is only defined for perl 5.10.0! 4529 4530 4531=item plugin(Name::Of::Plugin) 4532 4533This is a function call that returns C<1> if the plugin named 4534C<Name::Of::Plugin> is loaded, or C<undef> otherwise. 4535 4536=item has(Name::Of::Package::function_name) 4537 4538This is a function call that returns C<1> if the perl package named 4539C<Name::Of::Package> includes a function called C<function_name>, or C<undef> 4540otherwise. Note that packages can be SpamAssassin plugins or built-in classes, 4541there's no difference in this respect. Internally this invokes UNIVERSAL::can. 4542 4543=item can(Name::Of::Package::function_name) 4544 4545This is a function call that returns C<1> if the perl package named 4546C<Name::Of::Package> includes a function called C<function_name> 4547B<and> that function returns a true value when called with no arguments, 4548otherwise C<undef> is returned. 4549 4550Is similar to C<has>, except that it also calls the named function, 4551testing its return value (unlike the perl function UNIVERSAL::can). 4552This makes it possible for a 'feature' function to determine its result 4553value at run time. 4554 4555=back 4556 4557If the end of a configuration file is reached while still inside a 4558C<if> scope, a warning will be issued, but parsing will restart on 4559the next file. 4560 4561For example: 4562 4563 if (version > 3.000000) 4564 header MY_FOO ... 4565 endif 4566 4567 loadplugin MyPlugin plugintest.pm 4568 4569 if plugin (MyPlugin) 4570 header MY_PLUGIN_FOO eval:check_for_foo() 4571 score MY_PLUGIN_FOO 0.1 4572 endif 4573 4574=item ifplugin PluginModuleName 4575 4576An alias for C<if plugin(PluginModuleName)>. 4577 4578=item else 4579 4580Used to support conditional interpretation of the configuration 4581file. Lines between this and a corresponding C<endif> line, 4582will be ignored unless the conditional expression evaluates as false 4583(in the perl sense; that is, not defined and not 0 and non-empty string). 4584 4585=item require_version n.nnnnnn 4586 4587Indicates that the entire file, from this line on, requires a certain 4588version of SpamAssassin to run. If a different (older or newer) version 4589of SpamAssassin tries to read the configuration from this file, it will 4590output a warning instead, and ignore it. 4591 4592Note: The version used is in the internal SpamAssassin version format which is 4593C<x.yyyzzz>, where x is major version, y is minor version, and z is maintenance 4594version. So 3.0.0 is C<3.000000>, and 3.4.80 is C<3.004080>. 4595 4596=cut 4597 4598 push (@cmds, { 4599 setting => 'require_version', 4600 type => $CONF_TYPE_STRING, 4601 code => sub { 4602 } 4603 }); 4604 4605=back 4606 4607=head1 TEMPLATE TAGS 4608 4609The following C<tags> can be used as placeholders in certain options. 4610They will be replaced by the corresponding value when they are used. 4611 4612Some tags can take an argument (in parentheses). The argument is 4613optional, and the default is shown below. 4614 4615 _YESNO_ "Yes" for spam, "No" for nonspam (=ham) 4616 _YESNO(spam_str,ham_str)_ returns the first argument ("Yes" if missing) 4617 for spam, and the second argument ("No" if missing) for ham 4618 _YESNOCAPS_ "YES" for spam, "NO" for nonspam (=ham) 4619 _YESNOCAPS(spam_str,ham_str)_ same as _YESNO(...)_, but uppercased 4620 _SCORE(PAD)_ message score, if PAD is included and is either spaces or 4621 zeroes, then pad scores with that many spaces or zeroes 4622 (default, none) ie: _SCORE(0)_ makes 2.4 become 02.4, 4623 _SCORE(00)_ is 002.4. 12.3 would be 12.3 and 012.3 4624 respectively. 4625 _REQD_ message threshold 4626 _VERSION_ version (eg. 3.0.0 or 3.1.0-r26142-foo1) 4627 _SUBVERSION_ sub-version/code revision date (eg. 2004-01-10) 4628 _RULESVERSION_ comma-separated list of rules versions, retrieved from 4629 an '# UPDATE version' comment in rules files; if there is 4630 more than one set of rules (update channels) the order 4631 is unspecified (currently sorted by names of files); 4632 _HOSTNAME_ hostname of the machine the mail was processed on 4633 _REMOTEHOSTNAME_ hostname of the machine the mail was sent from, only 4634 available with spamd 4635 _REMOTEHOSTADDR_ ip address of the machine the mail was sent from, only 4636 available with spamd 4637 _BAYES_ bayes score 4638 _TOKENSUMMARY_ number of new, neutral, spammy, and hammy tokens found 4639 _BAYESTC_ number of new tokens found 4640 _BAYESTCLEARNED_ number of seen tokens found 4641 _BAYESTCSPAMMY_ number of spammy tokens found 4642 _BAYESTCHAMMY_ number of hammy tokens found 4643 _HAMMYTOKENS(N)_ the N most significant hammy tokens (default, 5) 4644 _SPAMMYTOKENS(N)_ the N most significant spammy tokens (default, 5) 4645 _DATE_ rfc-2822 date of scan 4646 _STARS(*)_ one "*" (use any character) for each full score point 4647 (note: limited to 50 'stars') 4648 _SENDERDOMAIN_ a domain name of the envelope sender address, lowercased 4649 _AUTHORDOMAIN_ a domain name of the author address (the From header 4650 field), lowercased; note that RFC 5322 allows a mail 4651 message to have multiple authors - currently only the 4652 domain name of the first email address is returned 4653 _RELAYSTRUSTED_ relays used and deemed to be trusted (see the 4654 'X-Spam-Relays-Trusted' pseudo-header) 4655 _RELAYSUNTRUSTED_ relays used that can not be trusted (see the 4656 'X-Spam-Relays-Untrusted' pseudo-header) 4657 _RELAYSINTERNAL_ relays used and deemed to be internal (see the 4658 'X-Spam-Relays-Internal' pseudo-header) 4659 _RELAYSEXTERNAL_ relays used and deemed to be external (see the 4660 'X-Spam-Relays-External' pseudo-header) 4661 _FIRSTTRUSTEDIP_ IP address of first trusted client (see RELAYSTRUSTED) 4662 _FIRSTTRUSTEDREVIP_ IP address of first trusted client (in reversed 4663 format suitable for RBL queries) 4664 _LASTEXTERNALIP_ IP address of client in the external-to-internal 4665 SMTP handover 4666 _LASTEXTERNALREVIP_ IP address of client in the external-to-internal 4667 SMTP handover (in reversed format suitable for RBL 4668 queries) 4669 _LASTEXTERNALRDNS_ reverse-DNS of client in the external-to-internal 4670 SMTP handover 4671 _LASTEXTERNALHELO_ HELO string used by client in the external-to-internal 4672 SMTP handover 4673 _AUTOLEARN_ autolearn status ("ham", "no", "spam", "disabled", 4674 "failed", "unavailable") 4675 _AUTOLEARNSCORE_ portion of message score used by autolearn 4676 _TESTS(,)_ tests hit separated by "," (or other separator) 4677 _TESTSSCORES(,)_ as above, except with scores appended (eg. AWL=-3.0,...) 4678 _SUBTESTS(,)_ subtests (start with "__") hit separated by "," 4679 (or other separator) 4680 _SUBTESTSCOLLAPSED(,)_ subtests (start with "__") hit separated by "," 4681 (or other separator) with duplicated rules collapsed 4682 _DCCB_ DCC's "Brand" 4683 _DCCR_ DCC's results 4684 _PYZOR_ Pyzor results 4685 _RBL_ full results for positive RBL queries in DNS URI format 4686 _LANGUAGES_ possible languages of mail 4687 _PREVIEW_ content preview 4688 _REPORT_ terse report of tests hit (for header reports) 4689 _SUBJPREFIX_ subject prefix based on rules, to be prepended to Subject 4690 header by SpamAssassin caller 4691 _SUMMARY_ summary of tests hit for standard report (for body reports) 4692 _CONTACTADDRESS_ contents of the 'report_contact' setting 4693 _HEADER(NAME)_ includes the value of a message header. value is the same 4694 as is found for header rules (see elsewhere in this doc) 4695 _TIMING_ timing breakdown report 4696 _ADDEDHEADERHAM_ resulting header fields as requested by add_header for spam 4697 _ADDEDHEADERSPAM_ resulting header fields as requested by add_header for ham 4698 _ADDEDHEADER_ same as ADDEDHEADERHAM for ham or ADDEDHEADERSPAM for spam 4699 4700If a tag reference uses the name of a tag which is not in this list or defined 4701by a loaded plugin, the reference will be left intact and not replaced by any 4702value. 4703All template tag names should be restricted to the character set [A-Za-z0-9(,)]. 4704 4705Additional, plugin specific, template tags can be found in the documentation for 4706the following plugins: 4707 4708 L<Mail::SpamAssassin::Plugin::ASN> 4709 L<Mail::SpamAssassin::Plugin::AWL> 4710 L<Mail::SpamAssassin::Plugin::TxRep> 4711 4712The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument 4713which specifies a format. See the B<HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT> 4714section, below, for details. 4715 4716=head2 HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT 4717 4718The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument 4719which specifies a format: C<_SPAMMYTOKENS(N,FMT)_>, C<_HAMMYTOKENS(N,FMT)_> 4720The following formats are available: 4721 4722=over 4 4723 4724=item short 4725 4726Only the tokens themselves are listed. 4727I<For example, preference file entry:> 4728 4729C<add_header all Spammy _SPAMMYTOKENS(2,short)_> 4730 4731I<Results in message header:> 4732 4733C<X-Spam-Spammy: remove.php, UD:jpg> 4734 4735Indicating that the top two spammy tokens found are C<remove.php> 4736and C<UD:jpg>. (The token itself follows the last colon, the 4737text before the colon indicates something about the token. 4738C<UD> means the token looks like it might be part of a domain name.) 4739 4740=item compact 4741 4742The token probability, an abbreviated declassification distance (see 4743example), and the token are listed. 4744I<For example, preference file entry:> 4745 4746C<add_header all Spammy _SPAMMYTOKENS(2,compact)_> 4747 4748I<Results in message header:> 4749 4750C<0.989-6--remove.php, 0.988-+--UD:jpg> 4751 4752Indicating that the probabilities of the top two tokens are 0.989 and 47530.988, respectively. The first token has a declassification distance 4754of 6, meaning that if the token had appeared in at least 6 more ham 4755messages it would not be considered spammy. The C<+> for the second 4756token indicates a declassification distance greater than 9. 4757 4758=item long 4759 4760Probability, declassification distance, number of times seen in a ham 4761message, number of times seen in a spam message, age and the token are 4762listed. 4763 4764I<For example, preference file entry:> 4765 4766C<add_header all Spammy _SPAMMYTOKENS(2,long)_> 4767 4768I<Results in message header:> 4769 4770C<X-Spam-Spammy: 0.989-6--0h-4s--4d--remove.php, 0.988-33--2h-25s--1d--UD:jpg> 4771 4772In addition to the information provided by the compact option, 4773the long option shows that the first token appeared in zero 4774ham messages and four spam messages, and that it was last 4775seen four days ago. The second token appeared in two ham messages, 477625 spam messages and was last seen one day ago. 4777(Unlike the C<compact> option, the long option shows declassification 4778distances that are greater than 9.) 4779 4780=back 4781 4782=cut 4783 4784 return \@cmds; 4785} 4786 4787########################################################################### 4788 4789# settings that were once part of core, but are now in (possibly-optional) 4790# bundled plugins. These will be warned about, but do not generate a fatal 4791# error when "spamassassin --lint" is run like a normal syntax error would. 4792 4793our @MIGRATED_SETTINGS = qw{ 4794 ok_languages 4795}; 4796 4797########################################################################### 4798 4799sub new { 4800 my $class = shift; 4801 $class = ref($class) || $class; 4802 my $self = { 4803 main => shift, 4804 registered_commands => [], 4805 }; bless ($self, $class); 4806 4807 $self->{parser} = Mail::SpamAssassin::Conf::Parser->new($self); 4808 $self->{parser}->register_commands($self->set_default_commands()); 4809 4810 $self->{errors} = 0; 4811 $self->{plugins_loaded} = { }; 4812 4813 $self->{tests} = { }; 4814 $self->{test_types} = { }; 4815 $self->{scoreset} = [ {}, {}, {}, {} ]; 4816 $self->{scoreset_current} = 0; 4817 $self->set_score_set (0); 4818 $self->{tflags} = { }; 4819 $self->{source_file} = { }; 4820 4821 # keep descriptions in a slow but space-efficient single-string 4822 # data structure 4823 # NOTE: Deprecated usage of TieOneStringHash as of 10/2018, it's an 4824 # absolute pig, doubling config parsing time, while benchmarks indicate 4825 # no difference in resident memory size! 4826 $self->{descriptions} = { }; 4827 #tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash' 4828 # or warn "tie failed"; 4829 $self->{subjprefix} = { }; 4830 4831 # after parsing, tests are refiled into these hashes for each test type. 4832 # this allows e.g. a full-text test to be rewritten as a body test in 4833 # the user's user_prefs file. 4834 $self->{body_tests} = { }; 4835 $self->{uri_tests} = { }; 4836 $self->{uri_evals} = { }; # not used/implemented yet 4837 $self->{head_tests} = { }; 4838 $self->{head_evals} = { }; 4839 $self->{body_evals} = { }; 4840 $self->{full_tests} = { }; 4841 $self->{full_evals} = { }; 4842 $self->{rawbody_tests} = { }; 4843 $self->{rawbody_evals} = { }; 4844 $self->{meta_tests} = { }; 4845 $self->{eval_plugins} = { }; 4846 $self->{eval_plugins_types} = { }; 4847 4848 # meta dependencies 4849 $self->{meta_dependencies} = {}; 4850 4851 # map eval function names to rulenames 4852 $self->{eval_to_rule} = {}; 4853 4854 # testing stuff 4855 $self->{regression_tests} = { }; 4856 4857 $self->{rewrite_header} = { }; 4858 $self->{want_rebuild_for_type} = { }; 4859 $self->{user_defined_rules} = { }; 4860 $self->{headers_spam} = [ ]; 4861 $self->{headers_ham} = [ ]; 4862 4863 $self->{bayes_ignore_headers} = [ ]; 4864 $self->{bayes_ignore_from} = { }; 4865 $self->{bayes_ignore_to} = { }; 4866 4867 $self->{welcomelist_auth} = { }; 4868 $self->{def_welcomelist_auth} = { }; 4869 $self->{welcomelist_from} = { }; 4870 $self->{whitelist_allows_relays} = { }; 4871 $self->{blacklist_from} = { }; 4872 $self->{welcomelist_from_rcvd} = { }; 4873 $self->{def_welcomelist_from_rcvd} = { }; 4874 4875 $self->{blacklist_to} = { }; 4876 $self->{welcomelist_to} = { }; 4877 $self->{more_spam_to} = { }; 4878 $self->{all_spam_to} = { }; 4879 4880 $self->{trusted_networks} = $self->new_netset('trusted_networks',1); 4881 $self->{internal_networks} = $self->new_netset('internal_networks',1); 4882 $self->{msa_networks} = $self->new_netset('msa_networks',0); # no loopback IP 4883 $self->{trusted_networks_configured} = 0; 4884 $self->{internal_networks_configured} = 0; 4885 4886 # Make sure we add in X-Spam-Checker-Version 4887 { my $r = [ "Checker-Version", 4888 "SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_" ]; 4889 push(@{$self->{headers_spam}}, $r); 4890 push(@{$self->{headers_ham}}, $r); 4891 } 4892 4893 # these should potentially be settable by end-users 4894 # perhaps via plugin? 4895 $self->{num_check_received} = 9; 4896 $self->{bayes_expiry_pct} = 0.75; 4897 $self->{bayes_expiry_period} = 43200; 4898 $self->{bayes_expiry_max_exponent} = 9; 4899 4900 $self->{encapsulated_content_description} = 'original message before SpamAssassin'; 4901 4902 $self; 4903} 4904 4905sub mtime { 4906 my $self = shift; 4907 if (@_) { 4908 $self->{mtime} = shift; 4909 } 4910 return $self->{mtime}; 4911} 4912 4913########################################################################### 4914 4915sub parse_scores_only { 4916 my ($self) = @_; 4917 $self->{parser}->parse ($_[1], 1); 4918} 4919 4920sub parse_rules { 4921 my ($self) = @_; 4922 $self->{parser}->parse ($_[1], 0); 4923} 4924 4925########################################################################### 4926 4927sub set_score_set { 4928 my ($self, $set) = @_; 4929 $self->{scores} = $self->{scoreset}->[$set]; 4930 $self->{scoreset_current} = $set; 4931 dbg("config: score set $set chosen."); 4932} 4933 4934sub get_score_set { 4935 my($self) = @_; 4936 return $self->{scoreset_current}; 4937} 4938 4939sub get_rule_types { 4940 my ($self) = @_; 4941 return @rule_types; 4942} 4943 4944sub get_rule_keys { 4945 my ($self, $test_type, $priority) = @_; 4946 4947 # special case rbl_evals since they do not have a priority 4948 if ($test_type eq 'rbl_evals') { 4949 return keys(%{$self->{$test_type}}); 4950 } 4951 4952 if (defined($priority)) { 4953 return keys(%{$self->{$test_type}->{$priority}}); 4954 } 4955 else { 4956 my @rules; 4957 foreach my $pri (keys(%{$self->{priorities}})) { 4958 push(@rules, keys(%{$self->{$test_type}->{$pri}})); 4959 } 4960 return @rules; 4961 } 4962} 4963 4964sub get_rule_value { 4965 my ($self, $test_type, $rulename, $priority) = @_; 4966 4967 # special case rbl_evals since they do not have a priority 4968 if ($test_type eq 'rbl_evals') { 4969 return @{$self->{$test_type}->{$rulename}}; 4970 } 4971 4972 if (defined($priority)) { 4973 return $self->{$test_type}->{$priority}->{$rulename}; 4974 } 4975 else { 4976 foreach my $pri (keys(%{$self->{priorities}})) { 4977 if (exists($self->{$test_type}->{$pri}->{$rulename})) { 4978 return $self->{$test_type}->{$pri}->{$rulename}; 4979 } 4980 } 4981 return; # if we get here we didn't find the rule 4982 } 4983} 4984 4985sub delete_rule { 4986 my ($self, $test_type, $rulename, $priority) = @_; 4987 4988 # special case rbl_evals since they do not have a priority 4989 if ($test_type eq 'rbl_evals') { 4990 return delete($self->{$test_type}->{$rulename}); 4991 } 4992 4993 if (defined($priority)) { 4994 return delete($self->{$test_type}->{$priority}->{$rulename}); 4995 } 4996 else { 4997 foreach my $pri (keys(%{$self->{priorities}})) { 4998 if (exists($self->{$test_type}->{$pri}->{$rulename})) { 4999 return delete($self->{$test_type}->{$pri}->{$rulename}); 5000 } 5001 } 5002 return; # if we get here we didn't find the rule 5003 } 5004} 5005 5006# trim_rules ($regexp) 5007# 5008# Remove all rules that don't match the given regexp (or are sub-rules of 5009# meta-tests that match the regexp). 5010 5011sub trim_rules { 5012 my ($self, $regexp) = @_; 5013 5014 my ($rec, $err) = compile_regexp($regexp, 0); 5015 if (!$rec) { 5016 die "config: trim_rules: invalid regexp '$regexp': $err"; 5017 } 5018 5019 my @all_rules; 5020 5021 foreach my $rule_type ($self->get_rule_types()) { 5022 push(@all_rules, $self->get_rule_keys($rule_type)); 5023 } 5024 5025 my @rules_to_keep = grep(/$rec/o, @all_rules); 5026 5027 if (@rules_to_keep == 0) { 5028 die "config: trim_rules: all rules excluded, nothing to test\n"; 5029 } 5030 5031 my @meta_tests = grep(/$rec/o, $self->get_rule_keys('meta_tests')); 5032 foreach my $meta (@meta_tests) { 5033 push(@rules_to_keep, $self->add_meta_depends($meta)) 5034 } 5035 5036 my %rules_to_keep_hash; 5037 5038 foreach my $rule (@rules_to_keep) { 5039 $rules_to_keep_hash{$rule} = 1; 5040 } 5041 5042 foreach my $rule_type ($self->get_rule_types()) { 5043 foreach my $rulekey ($self->get_rule_keys($rule_type)) { 5044 $self->delete_rule($rule_type, $rulekey) 5045 if (!$rules_to_keep_hash{$rulekey}); 5046 } 5047 } 5048} # trim_rules() 5049 5050sub add_meta_depends { 5051 my ($self, $meta) = @_; 5052 5053 my @rules; 5054 my @tokens = $self->get_rule_value('meta_tests', $meta) =~ m/(\w+)/g; 5055 5056 @tokens = grep(!/^\d+$/, @tokens); 5057 # @tokens now only consists of sub-rules 5058 5059 foreach my $token (@tokens) { 5060 die "config: meta test $meta depends on itself\n" if $token eq $meta; 5061 push(@rules, $token); 5062 5063 # If the sub-rule is a meta-test, recurse 5064 if ($self->get_rule_value('meta_tests', $token)) { 5065 push(@rules, $self->add_meta_depends($token)); 5066 } 5067 } # foreach my $token (@tokens) 5068 5069 return @rules; 5070} # add_meta_depends() 5071 5072sub is_rule_active { 5073 my ($self, $test_type, $rulename, $priority) = @_; 5074 5075 # special case rbl_evals since they do not have a priority 5076 if ($test_type eq 'rbl_evals') { 5077 return 0 unless ($self->{$test_type}->{$rulename}); 5078 return ($self->{scores}->{$rulename}); 5079 } 5080 5081 # first determine if the rule is defined 5082 if (defined($priority)) { 5083 # we have a specific priority 5084 return 0 unless ($self->{$test_type}->{$priority}->{$rulename}); 5085 } 5086 else { 5087 # no specific priority so we must loop over all currently defined 5088 # priorities to see if the rule is defined 5089 my $found_p = 0; 5090 foreach my $pri (keys %{$self->{priorities}}) { 5091 if ($self->{$test_type}->{$pri}->{$rulename}) { 5092 $found_p = 1; 5093 last; 5094 } 5095 } 5096 return 0 unless ($found_p); 5097 } 5098 5099 return ($self->{scores}->{$rulename}); 5100} 5101 5102########################################################################### 5103 5104# treats a bitset argument as a bit vector of all possible port numbers (8 kB) 5105# and sets bit values to $value (0 or 1) in the specified range of port numbers 5106# 5107sub set_ports_range { 5108 my($bitset_ref, $port_range_lo, $port_range_hi, $value) = @_; 5109 $port_range_lo = 0 if $port_range_lo < 0; 5110 $port_range_hi = 65535 if $port_range_hi > 65535; 5111 if (!defined $$bitset_ref) { # provide a sensible default 5112 wipe_ports_range($bitset_ref, 1); # turn on all bits 0..65535 5113 vec($$bitset_ref,$_,1) = 0 for 0..1023; # avoid 0 and privileged ports 5114 } elsif ($$bitset_ref eq '') { # repopulate the bitset (late configuration) 5115 wipe_ports_range($bitset_ref, 0); # turn off all bits 0..65535 5116 } 5117 $value = !$value ? 0 : 1; 5118 for (my $j = $port_range_lo; $j <= $port_range_hi; $j++) { 5119 vec($$bitset_ref,$j,1) = $value; 5120 } 5121} 5122 5123sub wipe_ports_range { 5124 my($bitset_ref, $value) = @_; 5125 $value = !$value ? "\000" : "\377"; 5126 $$bitset_ref = $value x 8192; # quickly turn all bits 0..65535 on or off 5127} 5128 5129########################################################################### 5130 5131sub add_to_addrlist { 5132 my $self = shift; $self->{parser}->add_to_addrlist(@_); 5133} 5134sub add_to_addrlist_rcvd { 5135 my $self = shift; $self->{parser}->add_to_addrlist_rcvd(@_); 5136} 5137sub remove_from_addrlist { 5138 my $self = shift; $self->{parser}->remove_from_addrlist(@_); 5139} 5140sub remove_from_addrlist_rcvd { 5141 my $self = shift; $self->{parser}->remove_from_addrlist_rcvd(@_); 5142} 5143 5144########################################################################### 5145 5146sub regression_tests { 5147 my $self = shift; 5148 if (@_ == 1) { 5149 # we specified a symbolic name, return the strings 5150 my $name = shift; 5151 my $tests = $self->{regression_tests}->{$name}; 5152 return @$tests; 5153 } 5154 else { 5155 # no name asked for, just return the symbolic names we have tests for 5156 return keys %{$self->{regression_tests}}; 5157 } 5158} 5159 5160########################################################################### 5161 5162sub finish_parsing { 5163 my ($self, $user) = @_; 5164 $self->{parser}->finish_parsing($user); 5165} 5166 5167########################################################################### 5168 5169sub found_any_rules { 5170 my ($self) = @_; 5171 if (!defined $self->{found_any_rules}) { 5172 $self->{found_any_rules} = (scalar keys %{$self->{tests}} > 0); 5173 } 5174 return $self->{found_any_rules}; 5175} 5176 5177########################################################################### 5178 5179sub get_description_for_rule { 5180 my ($self, $rule) = @_; 5181 # as silly as it looks, localized $1 here prevents an outer $1 from getting 5182 # tainted by the expression or assignment in the next line, bug 6148 5183 local($1); 5184 my $rule_descr = $self->{descriptions}->{$rule}; 5185 return $rule_descr; 5186} 5187 5188########################################################################### 5189 5190sub maybe_header_only { 5191 my($self,$rulename) = @_; 5192 my $type = $self->{test_types}->{$rulename}; 5193 5194 if (index($rulename, 'AUTOLEARNTEST') == 0) { 5195 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}."); 5196 } 5197 5198 return 0 if (!defined ($type)); 5199 5200 if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) { 5201 return 1; 5202 5203 } elsif ($type == $TYPE_META_TESTS) { 5204 if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) { 5205 return 0; 5206 } else { 5207 return 1; 5208 } 5209 } 5210 5211 return 0; 5212} 5213 5214sub maybe_body_only { 5215 my($self,$rulename) = @_; 5216 my $type = $self->{test_types}->{$rulename}; 5217 5218 if (index($rulename, 'AUTOLEARNTEST') == 0) { 5219 dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}."); 5220 } 5221 5222 return 0 if (!defined ($type)); 5223 5224 if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS) 5225 || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS)) 5226 { 5227 # some rawbody go off of headers... 5228 return 1; 5229 5230 } elsif ($type == $TYPE_META_TESTS) { 5231 if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) { 5232 return 0; 5233 } else { 5234 return 1; 5235 } 5236 } 5237 5238 return 0; 5239} 5240 5241########################################################################### 5242 5243sub load_plugin { 5244 my ($self, $package, $path, $silent) = @_; 5245 $self->{main}->{plugins}->load_plugin($package, $path, $silent); 5246} 5247 5248sub load_plugin_succeeded { 5249 my ($self, $plugin, $package, $path) = @_; 5250 $self->{plugins_loaded}->{$package} = 1; 5251} 5252 5253sub register_eval_rule { 5254 my ($self, $pluginobj, $nameofsub, $ruletype) = @_; 5255 if (exists $self->{eval_plugins}->{$nameofsub}) { 5256 warn("config: eval function '$nameofsub' already exists, overwriting\n"); 5257 } 5258 $self->{eval_plugins}->{$nameofsub} = $pluginobj; 5259 if (defined $ruletype) { 5260 if (defined $TYPE_AS_STRING{$ruletype}) { 5261 $self->{eval_plugins_types}->{$nameofsub} = $ruletype; 5262 } else { 5263 $self->{parser}->lint_warn("config: invalid ruletype for eval $nameofsub"); 5264 } 5265 } 5266} 5267 5268########################################################################### 5269 5270sub clone { 5271 my ($self, $source, $dest) = @_; 5272 5273 unless (defined $source) { 5274 $source = $self; 5275 } 5276 unless (defined $dest) { 5277 $dest = $self; 5278 } 5279 5280 my %done; 5281 5282 # keys that should not be copied in ->clone(). 5283 # bug 4179: include want_rebuild_for_type, so that if a user rule 5284 # is defined, its method will be recompiled for future scans in 5285 # order to *remove* the generated method calls 5286 my @NON_COPIED_KEYS = qw( 5287 main eval_plugins eval_plugins_types plugins_loaded registered_commands 5288 sed_path_cache parser scoreset scores want_rebuild_for_type 5289 ); 5290 5291 # special cases. first, skip anything that cannot be changed 5292 # by users, and the stuff we take care of here 5293 foreach my $var (@NON_COPIED_KEYS) { 5294 $done{$var} = undef; 5295 } 5296 5297 # keys that should can be copied using a ->clone() method, in ->clone() 5298 my @CLONABLE_KEYS = qw( 5299 internal_networks trusted_networks msa_networks 5300 ); 5301 5302 foreach my $key (@CLONABLE_KEYS) { 5303 $dest->{$key} = $source->{$key}->clone(); 5304 $done{$key} = undef; 5305 } 5306 5307 # two-level hashes 5308 foreach my $key (qw(uri_host_lists askdns)) { 5309 my $v = $source->{$key}; 5310 my $dest_key_ref = $dest->{$key} = {}; # must start from scratch! 5311 while(my($k2,$v2) = each %{$v}) { 5312 %{$dest_key_ref->{$k2}} = %{$v2}; 5313 } 5314 $done{$key} = undef; 5315 } 5316 5317 # bug 4179: be smarter about cloning the rule-type structures; 5318 # some are like this: $self->{type}->{priority}->{name} = 'value'; 5319 # which is an extra level that the below code won't deal with 5320 foreach my $t (@rule_types) { 5321 foreach my $k (keys %{$source->{$t}}) { 5322 my $v = $source->{$t}->{$k}; 5323 my $i = ref $v; 5324 if ($i eq 'HASH') { 5325 %{$dest->{$t}->{$k}} = %{$v}; 5326 } 5327 elsif ($i eq 'ARRAY') { 5328 @{$dest->{$t}->{$k}} = @{$v}; 5329 } 5330 else { 5331 $dest->{$t}->{$k} = $v; 5332 } 5333 } 5334 $done{$t} = undef; 5335 } 5336 5337 # and now, copy over all the rest -- the less complex cases. 5338 while(my($k,$v) = each %{$source}) { 5339 next if exists $done{$k}; # we handled it above 5340 $done{$k} = undef; 5341 my $i = ref($v); 5342 5343 # Not a reference, or a scalar? Just copy the value over. 5344 if ($i eq '') { 5345 $dest->{$k} = $v; 5346 } 5347 elsif ($i eq 'SCALAR') { 5348 $dest->{$k} = $$v; 5349 } 5350 elsif ($i eq 'ARRAY') { 5351 @{$dest->{$k}} = @{$v}; 5352 } 5353 elsif ($i eq 'HASH') { 5354 %{$dest->{$k}} = %{$v}; 5355 } 5356 elsif ($i eq 'Regexp') { 5357 $dest->{$k} = $v; 5358 } 5359 else { 5360 # throw a warning for debugging -- should never happen in normal usage 5361 warn "config: dup unknown type $k, $i\n"; 5362 } 5363 } 5364 5365 foreach my $cmd (@{$self->{registered_commands}}) { 5366 my $k = $cmd->{setting}; 5367 next if exists $done{$k}; # we handled it above 5368 $done{$k} = undef; 5369 $dest->{$k} = $source->{$k}; 5370 } 5371 5372 # scoresets 5373 delete $dest->{scoreset}; 5374 for my $i (0 .. 3) { 5375 %{$dest->{scoreset}->[$i]} = %{$source->{scoreset}->[$i]}; 5376 } 5377 5378 # deal with $conf->{scores}, it needs to be a reference into the scoreset 5379 # hash array dealy. Do it at the end since scoreset_current isn't set 5380 # otherwise. 5381 $dest->{scores} = $dest->{scoreset}->[$dest->{scoreset_current}]; 5382 5383 # ensure we don't copy the path cache from the master 5384 delete $dest->{sed_path_cache}; 5385 5386 return 1; 5387} 5388 5389########################################################################### 5390 5391sub free_uncompiled_rule_source { 5392 my ($self) = @_; 5393 5394 if (!$self->{main}->{keep_config_parsing_metadata} && 5395 !$self->{allow_user_rules}) 5396 { 5397 #delete $self->{if_stack}; # it's Parser not Conf? 5398 #delete $self->{source_file}; 5399 } 5400} 5401 5402sub new_netset { 5403 my ($self, $netset_name, $add_loopback) = @_; 5404 my $set = Mail::SpamAssassin::NetSet->new($netset_name); 5405 if ($add_loopback) { 5406 $set->add_cidr('127.0.0.0/8'); 5407 $set->add_cidr('::1'); 5408 } 5409 return $set; 5410} 5411 5412########################################################################### 5413 5414sub finish { 5415 my ($self) = @_; 5416 #untie %{$self->{descriptions}}; 5417 %{$self} = (); 5418} 5419 5420########################################################################### 5421 5422sub sa_die { Mail::SpamAssassin::sa_die(@_); } 5423 5424########################################################################### 5425 5426# subroutines available to conditionalize rules, for example: 5427# if (can(Mail::SpamAssassin::Conf::feature_originating_ip_headers)) 5428 5429sub feature_originating_ip_headers { 1 } 5430sub feature_dns_local_ports_permit_avoid { 1 } 5431sub feature_bayes_auto_learn_on_error { 1 } 5432sub feature_uri_host_listed { 1 } 5433sub feature_yesno_takes_args { 1 } 5434sub feature_bug6558_free { 1 } 5435sub feature_edns { 1 } # supports 'dns_options edns' config option 5436sub feature_dns_query_restriction { 1 } # supported config option 5437sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries 5438sub feature_geodb { 1 } # if needed for some reason 5439sub feature_dns_block_rule { 1 } # supports 'dns_block_rule' config option 5440sub feature_compile_regexp { 1 } # Util::compile_regexp 5441sub feature_meta_rules_matching { 1 } # meta rules_matching() expression 5442sub feature_subjprefix { 1 } # add subject prefixes rule option 5443sub feature_bayes_stopwords { 1 } # multi language stopwords in Bayes 5444sub feature_get_host { 1 } # $pms->get() :host :domain :ip :revip # was implemented together with AskDNS::has_tag_header # Bug 7734 5445sub feature_blocklist_welcomelist { 1 } # bz 7826 5446sub feature_header_address_parser { 1 } # improved header address parsing using Email::Address::XS, $pms->get() list context 5447sub feature_local_tests_only { 1 } # Config parser supports "if (local_tests_only)" 5448sub has_tflags_nosubject { 1 } # tflags nosubject 5449sub has_tflags_nolog { 1 } # tflags nolog 5450sub perl_min_version_5010000 { return $] >= 5.010000 } # perl version check ("perl_version" not neatly backwards-compatible) 5451 5452########################################################################### 5453 54541; 5455__END__ 5456 5457=head1 LOCALISATION 5458 5459A line starting with the text C<lang xx> will only be interpreted if 5460SpamAssassin is running in that locale, allowing test descriptions and 5461templates to be set for that language. 5462 5463Current locale is determined from LANGUAGE, LC_ALL, LC_MESSAGES or LANG 5464environment variables, first found is used. 5465 5466The locales string should specify either both the language and country, e.g. 5467C<lang pt_BR>, or just the language, e.g. C<lang de>. 5468 5469Example: 5470 5471 lang de describe EXAMPLE_RULE Beispielregel 5472 5473=head1 SEE ALSO 5474 5475Mail::SpamAssassin(3) 5476spamassassin(1) 5477spamd(1) 5478 5479=cut 5480