1# <@LICENSE>
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements.  See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to you under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License.  You may obtain a copy of the License at:
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# </@LICENSE>
17
18=head1 NAME
19
20Mail::SpamAssassin::Conf - SpamAssassin configuration file
21
22=head1 SYNOPSIS
23
24  # a comment
25
26  rewrite_header Subject          *****SPAM*****
27
28  full PARA_A_2_C_OF_1618         /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i
29  describe PARA_A_2_C_OF_1618     Claims compliance with senate bill 1618
30
31  header FROM_HAS_MIXED_NUMS      From =~ /\d+[a-z]+\d+\S*@/i
32  describe FROM_HAS_MIXED_NUMS    From: contains numbers mixed in with letters
33
34  score A_HREF_TO_REMOVE          2.0
35
36  lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com
37
38  lang pt_BR report O programa detetor de Spam ZOE [...]
39
40=head1 DESCRIPTION
41
42SpamAssassin is configured using traditional UNIX-style configuration files,
43loaded from the C</usr/share/spamassassin> and C</etc/mail/spamassassin>
44directories.
45
46The following web page lists the most important configuration settings
47used to configure SpamAssassin; novices are encouraged to read it first:
48
49  http://wiki.apache.org/spamassassin/ImportantInitialConfigItems
50
51=head1 FILE FORMAT
52
53The C<#> character starts a comment, which continues until end of line.
54B<NOTE:> if the C<#> character is to be used as part of a rule or
55configuration option, it must be escaped with a backslash.  i.e.: C<\#>
56
57Whitespace in the files is not significant, but please note that starting a
58line with whitespace is deprecated, as we reserve its use for multi-line rule
59definitions, at some point in the future.
60
61Currently, each rule or configuration setting must fit on one-line; multi-line
62settings are not supported yet.
63
64File and directory paths can use C<~> to refer to the user's home
65directory, but no other shell-style path extensions such as globing or
66C<~user/> are supported.
67
68Where appropriate below, default values are listed in parentheses.
69
70Test names ("SYMBOLIC_TEST_NAME") can only contain alphanumerics/underscores,
71can not start with digit, and must be less than 128 characters.
72
73=head1 USER PREFERENCES
74
75The following options can be used in both site-wide (C<local.cf>) and
76user-specific (C<user_prefs>) configuration files to customize how
77SpamAssassin handles incoming email messages.
78
79=cut
80
81package Mail::SpamAssassin::Conf;
82
83use strict;
84use warnings;
85# use bytes;
86use re 'taint';
87
88use Mail::SpamAssassin::NetSet;
89use Mail::SpamAssassin::Constants qw(:sa :ip);
90use Mail::SpamAssassin::Conf::Parser;
91use Mail::SpamAssassin::Logger;
92use Mail::SpamAssassin::Util qw(untaint_var idn_to_ascii compile_regexp);
93use File::Spec;
94
95our @ISA = qw();
96
97our $COLLECT_REGRESSION_TESTS; # Used only for unit tests.
98
99# odd => eval test.  Not constants so they can be shared with Parser
100# TODO: move to Constants.pm?
101our $TYPE_HEAD_TESTS    = 0x0008;
102our $TYPE_HEAD_EVALS    = 0x0009;
103our $TYPE_BODY_TESTS    = 0x000a;
104our $TYPE_BODY_EVALS    = 0x000b;
105our $TYPE_FULL_TESTS    = 0x000c;
106our $TYPE_FULL_EVALS    = 0x000d;
107our $TYPE_RAWBODY_TESTS = 0x000e;
108our $TYPE_RAWBODY_EVALS = 0x000f;
109our $TYPE_URI_TESTS     = 0x0010;
110our $TYPE_URI_EVALS     = 0x0011;
111our $TYPE_META_TESTS    = 0x0012;
112our $TYPE_RBL_EVALS     = 0x0013;
113our $TYPE_EMPTY_TESTS   = 0x0014;
114
115my @rule_types = ("body_tests", "uri_tests", "uri_evals",
116                  "head_tests", "head_evals", "body_evals", "full_tests",
117                  "full_evals", "rawbody_tests", "rawbody_evals",
118		  "rbl_evals", "meta_tests");
119
120# Map internal ruletype to descriptive ruletype string
121our %TYPE_AS_STRING = (
122  $TYPE_HEAD_TESTS => 'header',
123  $TYPE_HEAD_EVALS => 'header',
124  $TYPE_BODY_TESTS => 'body',
125  $TYPE_BODY_EVALS => 'body',
126  $TYPE_FULL_TESTS => 'full',
127  $TYPE_FULL_EVALS => 'full',
128  $TYPE_RAWBODY_TESTS => 'rawbody',
129  $TYPE_RAWBODY_EVALS => 'rawbody',
130  $TYPE_URI_TESTS => 'uri',
131  $TYPE_URI_EVALS => 'uri',
132  $TYPE_META_TESTS => 'meta',
133  $TYPE_RBL_EVALS => 'header',
134  $TYPE_EMPTY_TESTS => 'empty',
135);
136
137#Removed $VERSION per BUG 6422
138#$VERSION = 'bogus';     # avoid CPAN.pm picking up version strings later
139
140# these are variables instead of constants so that other classes can
141# access them; if they're constants, they'd have to go in Constants.pm
142# TODO: move to Constants.pm?
143our $CONF_TYPE_STRING           =  1;
144our $CONF_TYPE_BOOL             =  2;
145our $CONF_TYPE_NUMERIC          =  3;
146our $CONF_TYPE_HASH_KEY_VALUE   =  4;
147our $CONF_TYPE_ADDRLIST         =  5;
148our $CONF_TYPE_TEMPLATE         =  6;
149our $CONF_TYPE_NOARGS           =  7;
150our $CONF_TYPE_STRINGLIST       =  8;
151our $CONF_TYPE_IPADDRLIST       =  9;
152our $CONF_TYPE_DURATION         = 10;
153our $MISSING_REQUIRED_VALUE     = '-99999999999999';  # string expected by parser
154our $INVALID_VALUE              = '-99999999999998';
155our $INVALID_HEADER_FIELD_NAME  = '-99999999999997';
156
157# set to "1" by the test suite code, to record regression tests
158# $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1;
159
160# search for "sub new {" to find the start of the code
161###########################################################################
162
163sub set_default_commands {
164  my($self) = @_;
165
166  # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt.
167  # push each config item like this, to avoid a POD bug; it can't just accept
168  # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies.
169  my @cmds;
170
171=head2 SCORING OPTIONS
172
173=over 4
174
175=item required_score n.nn (default: 5)
176
177Set the score required before a mail is considered spam.  C<n.nn> can
178be an integer or a real number.  5.0 is the default setting, and is
179quite aggressive; it would be suitable for a single-user setup, but if
180you're an ISP installing SpamAssassin, you should probably set the
181default to be more conservative, like 8.0 or 10.0.  It is not
182recommended to automatically delete or discard messages marked as
183spam, as your users B<will> complain, but if you choose to do so, only
184delete messages with an exceptionally high score such as 15.0 or
185higher. This option was previously known as C<required_hits> and that
186name is still accepted, but is deprecated.
187
188=cut
189
190  push (@cmds, {
191    setting => 'required_score',
192    aliases => ['required_hits'],       # backward compatible
193    default => 5,
194    type => $CONF_TYPE_NUMERIC,
195  });
196
197=item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ]
198
199Assign scores (the number of points for a hit) to a given test.
200Scores can be positive or negative real numbers or integers.
201C<SYMBOLIC_TEST_NAME> is the symbolic name used by SpamAssassin for
202that test; for example, 'FROM_ENDS_IN_NUMS'.
203
204If only one valid score is listed, then that score is always used
205for a test.
206
207If four valid scores are listed, then the score that is used depends
208on how SpamAssassin is being used. The first score is used when
209both Bayes and network tests are disabled (score set 0). The second
210score is used when Bayes is disabled, but network tests are enabled
211(score set 1). The third score is used when Bayes is enabled and
212network tests are disabled (score set 2). The fourth score is used
213when Bayes is enabled and network tests are enabled (score set 3).
214
215Setting a rule's score to 0 will disable that rule from running.
216
217If any of the score values are surrounded by parenthesis '()', then
218all of the scores in the line are considered to be relative to the
219already set score.  ie: '(3)' means increase the score for this
220rule by 3 points in all score sets.  '(3) (0) (3) (0)' means increase
221the score for this rule by 3 in score sets 0 and 2 only.
222
223If no score is given for a test by the end of the configuration,
224a default score is assigned: a score of 1.0 is used for all tests,
225except those whose names begin with 'T_' (this is used to indicate a
226rule in testing) which receive 0.01.
227
228Note that test names which begin with '__' are indirect rules used
229to compose meta-match rules and can also act as prerequisites to
230other rules.  They are not scored or listed in the 'tests hit'
231reports, but assigning a score of 0 to an indirect rule will disable
232it from running.
233
234=cut
235
236  push (@cmds, {
237    setting => 'score',
238    code => sub {
239      my ($self, $key, $value, $line) = @_;
240      my($rule, @scores) = split(/\s+/, $value);
241      unless (defined $value && $value !~ /^$/ &&
242		(scalar @scores == 1 || scalar @scores == 4)) {
243	info("config: score: requires a symbolic rule name and 1 or 4 scores");
244	return $MISSING_REQUIRED_VALUE;
245      }
246
247      # Figure out if we're doing relative scores, remove the parens if we are
248      my $relative = 0;
249      foreach (@scores) {
250        local ($1);
251        if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) {
252	  $relative = 1;
253	}
254	unless (/^-?\d+(?:\.\d+)?$/) {
255	  info("config: score: the non-numeric score ($_) is not valid, " .
256	    "a numeric score is required");
257	  return $INVALID_VALUE;
258	}
259      }
260
261      if ($relative && !exists $self->{scoreset}->[0]->{$rule}) {
262        info("config: score: relative score without previous setting in " .
263	  "configuration");
264        return $INVALID_VALUE;
265      }
266
267      # If we're only passed 1 score, copy it to the other scoresets
268      if (@scores) {
269        if (@scores != 4) {
270          @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] );
271        }
272
273        # Set the actual scoreset values appropriately
274        for my $index (0..3) {
275          my $score = $relative ?
276            $self->{scoreset}->[$index]->{$rule} + $scores[$index] :
277            $scores[$index];
278
279          $self->{scoreset}->[$index]->{$rule} = $score + 0.0;
280        }
281      }
282    }
283  });
284
285=back
286
287=head2 WHITELIST AND BLACKLIST OPTIONS
288
289=over 4
290
291=item welcomelist_from user@example.com
292
293Previously whitelist_from which will work interchangeably until 4.1.
294
295Used to whitelist sender addresses which send mail that is often tagged
296(incorrectly) as spam.
297
298Use of this setting is not recommended, since it blindly trusts the message,
299which is routinely and easily forged by spammers and phish senders. The
300recommended solution is to instead use C<welcomelist_auth> or other authenticated
301whitelisting methods, or C<welcomelist_from_rcvd>.
302
303Whitelist and blacklist addresses are now file-glob-style patterns, so
304C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
305Specifically, C<*> and C<?> are allowed, but all other metacharacters
306are not. Regular expressions are not used for security reasons.
307Matching is case-insensitive.
308
309Multiple addresses per line, separated by spaces, is OK.  Multiple
310C<welcomelist_from> lines are also OK.
311
312The headers checked for whitelist addresses are as follows: if C<Resent-From>
313is set, use that; otherwise check all addresses taken from the following
314set of headers:
315
316	Envelope-Sender
317	Resent-Sender
318	X-Envelope-From
319	From
320
321In addition, the "envelope sender" data, taken from the SMTP envelope data
322where this is available, is looked up.  See C<envelope_sender_header>.
323
324e.g.
325
326  welcomelist_from joe@example.com fred@example.com
327  welcomelist_from *@example.com
328
329=cut
330
331  push (@cmds, {
332    setting => 'welcomelist_from',
333    type => $CONF_TYPE_ADDRLIST,
334    aliases => ['whitelist_from'],     # backward compatible - to be removed for 4.1
335  });
336
337=item unwhitelist_from user@example.com
338
339Used to remove a default welcomelist_from (previously whitelist_from) entry, so for example a distribution
340welcomelist_from can be overridden in a local.cf file, or an individual user can
341override a welcomelist_from entry in their own C<user_prefs> file.
342The specified email address has to match exactly (although case-insensitively)
343the address previously used in a welcomelist_from line, which implies that a
344wildcard only matches literally the same wildcard (not 'any' address).
345
346e.g.
347
348  unwhitelist_from joe@example.com fred@example.com
349  unwhitelist_from *@example.com
350
351=cut
352
353  push (@cmds, {
354    command => 'unwhitelist_from',
355    setting => 'welcomelist_from',
356    type => $CONF_TYPE_ADDRLIST,
357    code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
358  });
359
360=item welcomelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
361
362Previously whitelist_from_rcvd which will work interchangeably until 4.1.
363
364Works similarly to welcomelist_from (previously whitelist_from), except that in addition to matching
365a sender address, a relay's rDNS name or its IP address must match too
366for the whitelisting rule to fire. The first parameter is a sender's e-mail
367address to whitelist, and the second is a string to match the relay's rDNS,
368or its IP address. Matching is case-insensitive.
369
370This second parameter is matched against a TCP-info information field as
371provided in a FROM clause of a trace information (i.e. in a Received header
372field, see RFC 5321). Only the Received header fields inserted by trusted
373hosts are considered. This parameter can either be a full hostname, or a
374domain component of that hostname, or an IP address (optionally followed
375by a slash and a prefix length) in square brackets. The address prefix
376(mask) length with a slash may stand within brackets along with an address,
377or may follow the bracketed address. Reverse DNS lookup is done by an MTA,
378not by SpamAssassin.
379
380For backward compatibility as an alternative to a CIDR notation, an IPv4
381address in brackets may be truncated on classful boundaries to cover whole
382subnets, e.g. C<[10.1.2.3]>, C<[10.1.2]>, C<[10.1]>, C<[10]>.
383
384In other words, if the host that connected to your MX had an IP address
385192.0.2.123 that mapped to 'sendinghost.example.org', you should specify
386C<sendinghost.example.org>, or C<example.org>, or C<[192.0.2.123]>, or
387C<[192.0.2.0/24]>, or C<[192.0.2]> here.
388
389Note that this requires that C<internal_networks> be correct.  For simple
390cases, it will be, but for a complex network you may get better results
391by setting that parameter.
392
393It also requires that your mail exchangers be configured to perform DNS
394reverse lookups on the connecting host's IP address, and to record the
395result in the generated Received header field according to RFC 5321.
396
397e.g.
398
399  welcomelist_from_rcvd joe@example.com  example.com
400  welcomelist_from_rcvd *@*              mail.example.org
401  welcomelist_from_rcvd *@axkit.org      [192.0.2.123]
402  welcomelist_from_rcvd *@axkit.org      [192.0.2.0/24]
403  welcomelist_from_rcvd *@axkit.org      [192.0.2.0]/24
404  welcomelist_from_rcvd *@axkit.org      [2001:db8:1234::/48]
405  welcomelist_from_rcvd *@axkit.org      [2001:db8:1234::]/48
406
407=item def_welcomelist_from_rcvd addr@lists.sourceforge.net sourceforge.net
408
409Previously def_whitelist_from_rcvd which will work interchangeably until 4.1.
410
411Same as C<welcomelist_from_rcvd>, but used for the default welcomelist entries
412in the SpamAssassin distribution.  The welcomelist score is lower, because
413these are often targets for spammer spoofing.
414
415=cut
416
417  push (@cmds, {
418    setting => 'welcomelist_from_rcvd',
419    aliases => ['whitelist_from_rcvd'], # backward compatible - to be removed for 4.1
420    type => $CONF_TYPE_ADDRLIST,
421    code => sub {
422      my ($self, $key, $value, $line) = @_;
423      unless (defined $value && $value !~ /^$/) {
424	return $MISSING_REQUIRED_VALUE;
425      }
426      unless ($value =~ /^\S+\s+\S+$/) {
427	return $INVALID_VALUE;
428      }
429      $self->{parser}->add_to_addrlist_rcvd ('welcomelist_from_rcvd',
430                                        split(/\s+/, $value));
431    }
432  });
433
434  push (@cmds, {
435    setting => 'def_welcomelist_from_rcvd',
436    aliases => ['def_whitelist_from_rcvd'],
437    type => $CONF_TYPE_ADDRLIST,
438    code => sub {
439      my ($self, $key, $value, $line) = @_;
440      unless (defined $value && $value !~ /^$/) {
441	return $MISSING_REQUIRED_VALUE;
442      }
443      unless ($value =~ /^\S+\s+\S+$/) {
444	return $INVALID_VALUE;
445      }
446      $self->{parser}->add_to_addrlist_rcvd ('def_welcomelist_from_rcvd',
447                                        split(/\s+/, $value));
448    }
449  });
450
451=item whitelist_allows_relays user@example.com
452
453Specify addresses which are in C<welcomelist_from_rcvd> that sometimes
454send through a mail relay other than the listed ones. By default mail
455with a From address that is in C<welcomelist_from_rcvd> that does not match
456the relay will trigger a forgery rule. Including the address in
457C<whitelist_allows_relay> prevents that.
458
459Whitelist and blacklist addresses are now file-glob-style patterns, so
460C<friend@somewhere.com>, C<*@isp.com>, or C<*.domain.net> will all work.
461Specifically, C<*> and C<?> are allowed, but all other metacharacters
462are not. Regular expressions are not used for security reasons.
463Matching is case-insensitive.
464
465Multiple addresses per line, separated by spaces, is OK.  Multiple
466C<whitelist_allows_relays> lines are also OK.
467
468The specified email address does not have to match exactly the address
469previously used in a welcomelist_from_rcvd line as it is compared to the
470address in the header.
471
472e.g.
473
474  whitelist_allows_relays joe@example.com fred@example.com
475  whitelist_allows_relays *@example.com
476
477=cut
478
479  push (@cmds, {
480    setting => 'whitelist_allows_relays',
481    type => $CONF_TYPE_ADDRLIST,
482  });
483
484=item unwelcomelist_from_rcvd user@example.com
485
486Previously unwhitelist_from_rcvd which will work interchangeably until 4.1.
487
488Used to remove a default welcomelist_from_rcvd (previously whitelist_from_rcvd) or def_welcomelist_from_rcvd (previously def_whitelist_from_rcvd)
489entry, so for example a distribution welcomelist_from_rcvd can be overridden
490in a local.cf file, or an individual user can override a welcomelist_from_rcvd
491entry in their own C<user_prefs> file.
492
493The specified email address has to match exactly the address previously
494used in a welcomelist_from_rcvd line.
495
496e.g.
497
498  unwelcomelist_from_rcvd joe@example.com fred@example.com
499  unwelcomelist_from_rcvd *@axkit.org
500
501=cut
502
503  push (@cmds, {
504    setting => 'unwelcomelist_from_rcvd',
505    aliases => ['unwhitelist_from_rcvd'],
506    type => $CONF_TYPE_ADDRLIST,
507    code => sub {
508      my ($self, $key, $value, $line) = @_;
509      unless (defined $value && $value !~ /^$/) {
510	return $MISSING_REQUIRED_VALUE;
511      }
512      unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
513	return $INVALID_VALUE;
514      }
515      $self->{parser}->remove_from_addrlist_rcvd('welcomelist_from_rcvd',
516                                        split (/\s+/, $value));
517      $self->{parser}->remove_from_addrlist_rcvd('def_welcomelist_from_rcvd',
518                                        split (/\s+/, $value));
519    }
520  });
521
522=item blacklist_from user@example.com
523
524Used to specify addresses which send mail that is often tagged (incorrectly) as
525non-spam, but which the user doesn't want.  Same format as C<welcomelist_from>.
526
527=cut
528
529  push (@cmds, {
530    setting => 'blacklist_from',
531    type => $CONF_TYPE_ADDRLIST,
532  });
533
534=item unblacklist_from user@example.com
535
536Used to remove a default blacklist_from entry, so for example a
537distribution blacklist_from can be overridden in a local.cf file, or
538an individual user can override a blacklist_from entry in their own
539C<user_prefs> file. The specified email address has to match exactly
540the address previously used in a blacklist_from line.
541
542
543e.g.
544
545  unblacklist_from joe@example.com fred@example.com
546  unblacklist_from *@spammer.com
547
548=cut
549
550
551  push (@cmds, {
552    command => 'unblacklist_from',
553    setting => 'blacklist_from',
554    type => $CONF_TYPE_ADDRLIST,
555    code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value
556  });
557
558
559=item welcomelist_to user@example.com
560
561Previously whitelist_to which will work interchangeably until 4.1.
562
563If the given address appears as a recipient in the message headers
564(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
565be listed as allowed.  Useful if you're deploying SpamAssassin system-wide,
566and don't want some users to have their mail filtered.  Same format
567as C<welcomelist_from>.
568
569There are three levels of To-welcomelisting, C<welcomelist_to>, C<more_spam_to>
570and C<all_spam_to>.  Users in the first level may still get some spammish
571mails blocked, but users in C<all_spam_to> should never get mail blocked.
572
573The headers checked for welcomelist addresses are as follows: if C<Resent-To> or
574C<Resent-Cc> are set, use those; otherwise check all addresses taken from the
575following set of headers:
576
577        To
578        Cc
579        Apparently-To
580        Delivered-To
581        Envelope-Recipients
582        Apparently-Resent-To
583        X-Envelope-To
584        Envelope-To
585        X-Delivered-To
586        X-Original-To
587        X-Rcpt-To
588        X-Real-To
589
590=item more_spam_to user@example.com
591
592See above.
593
594=item all_spam_to user@example.com
595
596See above.
597
598=cut
599
600  push (@cmds, {
601    setting => 'welcomelist_to',
602    type => $CONF_TYPE_ADDRLIST,
603    aliases => ['whitelist_to'],       # backward compatible - to be removed for 4.1
604  });
605  push (@cmds, {
606    setting => 'more_spam_to',
607    type => $CONF_TYPE_ADDRLIST,
608  });
609  push (@cmds, {
610    setting => 'all_spam_to',
611    type => $CONF_TYPE_ADDRLIST,
612  });
613
614=item blacklist_to user@example.com
615
616If the given address appears as a recipient in the message headers
617(Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will
618be blacklisted.  Same format as C<blacklist_from>.
619
620=cut
621
622  push (@cmds, {
623    setting => 'blacklist_to',
624    type => $CONF_TYPE_ADDRLIST,
625  });
626
627=item welcomelist_auth user@example.com
628
629Previously whitelist_auth which will work interchangeably until 4.1.
630
631Used to specify addresses which send mail that is often tagged (incorrectly) as
632spam.  This is different from C<welcomelist_from> and C<welcomelist_from_rcvd> in
633that it first verifies that the message was sent by an authorized sender for
634the address, before whitelisting.
635
636Authorization is performed using one of the installed sender-authorization
637schemes: SPF (using C<Mail::SpamAssassin::Plugin::SPF>), or DKIM (using
638C<Mail::SpamAssassin::Plugin::DKIM>).  Note that those plugins must be active,
639and working, for this to operate.
640
641Using C<welcomelist_auth> is roughly equivalent to specifying duplicate
642C<whitelist_from_spf>, C<whitelist_from_dk>, and C<welcomelist_from_dkim> lines
643for each of the addresses specified.
644
645e.g.
646
647  welcomelist_auth joe@example.com fred@example.com
648  welcomelist_auth *@example.com
649
650=item def_welcomelist_auth user@example.com
651
652Previously def_whitelist_auth which will work interchangeably until 4.1.
653
654Same as C<welcomelist_auth>, but used for the default welcomelist entries
655in the SpamAssassin distribution.  The welcomelist score is lower, because
656these are often targets for spammer spoofing.
657
658=cut
659
660  push (@cmds, {
661    setting => 'welcomelist_auth',
662    aliases => ['whitelist_auth'], # backward compatible - to be removed for 4.1
663    type => $CONF_TYPE_ADDRLIST,
664  });
665
666  push (@cmds, {
667    setting => 'def_welcomelist_auth',
668    aliases => ['def_whitelist_auth'], # backward compatible - to be removed for 4.1
669    type => $CONF_TYPE_ADDRLIST,
670  });
671
672=item unwhitelist_auth user@example.com
673
674Previously unwhitelist_auth which will work interchangeably until 4.1.
675
676Used to remove a C<welcomelist_auth> or C<def_welcomelist_auth> entry. The
677specified email address has to match exactly the address previously used.
678
679e.g.
680
681  unwelcomelist_auth joe@example.com fred@example.com
682  unwelcomelist_auth *@example.com
683
684=cut
685
686  push (@cmds, {
687    setting => 'unwelcomelist_auth',
688    aliases => ['unwhitelist_auth'],
689    type => $CONF_TYPE_ADDRLIST,
690    code => sub {
691      my ($self, $key, $value, $line) = @_;
692      unless (defined $value && $value !~ /^$/) {
693        return $MISSING_REQUIRED_VALUE;
694      }
695      unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) {
696        return $INVALID_VALUE;
697      }
698      $self->{parser}->remove_from_addrlist('welcomelist_auth',
699                                        split (/\s+/, $value));
700      $self->{parser}->remove_from_addrlist('def_welcomelist_auth',
701                                        split (/\s+/, $value));
702    }
703  });
704
705
706=item enlist_uri_host (listname) host ...
707
708Adds one or more host names or domain names to a named list of URI domains.
709The named list can then be consulted through a check_uri_host_listed()
710eval rule implemented by the WLBLEval plugin, which takes the list name as
711an argument. Parenthesis around a list name are literal - a required syntax.
712
713Host names may optionally be prefixed by an exclamation mark '!', which
714produces false as a result if this entry matches. This makes it easier
715to exclude some subdomains when their superdomain is listed, for example:
716
717  enlist_uri_host (MYLIST) !sub1.example.com !sub2.example.com example.com
718
719No wildcards are supported, but subdomains do match implicitly. Lists
720are independent. Search for each named list starts by looking up the
721full hostname first, then leading fields are progressively stripped off
722(e.g.: sub.example.com, example.com, com) until a match is found or we run
723out of fields. The first matching entry (the most specific) determines if a
724lookup yielded a true (no '!' prefix) or a false (with a '!' prefix) result.
725
726If an URL found in a message contains an IP address in place of a host name,
727the given list must specify the exact same IP address (instead of a host name)
728in order to match.
729
730Use the delist_uri_host directive to neutralize previous enlist_uri_host
731settings.
732
733Enlisting to lists named 'BLACK' and 'WHITE' have their shorthand directives
734blocklist_uri_host and welcomelist_uri_host and corresponding default rules,
735but the names 'BLACK' and 'WHITE' are otherwise not special or reserved.
736
737=cut
738
739  push (@cmds, {
740    command => 'enlist_uri_host',
741    setting => 'uri_host_lists',
742    type => $CONF_TYPE_HASH_KEY_VALUE,
743    code => sub {
744      my($conf, $key, $value, $line) = @_;
745      local($1,$2);
746      if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) {
747        return $MISSING_REQUIRED_VALUE;
748      }
749      my $listname = $1;  # corresponds to arg in check_uri_host_in_wblist()
750      # note: must not factor out dereferencing, as otherwise
751      # subhashes would spring up in a copy and be lost
752      foreach my $host ( split(/\s+/, lc $2) ) {
753        my $v = $host =~ s/^!// ? 0 : 1;
754        $conf->{uri_host_lists}{$listname}{$host} = $v;
755      }
756    }
757  });
758
759=item delist_uri_host [ (listname) ] host ...
760
761Removes one or more specified host names from a named list of URI domains.
762Removing an unlisted name is ignored (is not an error). Listname is optional,
763if specified then just the named list is affected, otherwise hosts are
764removed from all URI host lists created so far. Parenthesis around a list
765name are a required syntax.
766
767Note that directives in configuration files are processed in sequence,
768the delist_uri_host only applies to previously listed entries and has
769no effect on enlisted entries in yet-to-be-processed directives.
770
771For convenience (similarity to the enlist_uri_host directive) hostnames
772may be prefixed by a an exclamation mark, which is stripped off from each
773name and has no meaning here.
774
775=cut
776
777  push (@cmds, {
778    command => 'delist_uri_host',
779    setting => 'uri_host_lists',
780    type => $CONF_TYPE_HASH_KEY_VALUE,
781    code => sub {
782      my($conf, $key, $value, $line) = @_;
783      local($1,$2);
784      if ($value !~ /^ (?: \( (.+?) \) \s+ )? (.+) \z/sx) {
785        return $MISSING_REQUIRED_VALUE;
786      }
787      my @listnames = defined $1 ? $1 : keys %{$conf->{uri_host_lists}};
788      my @args = split(/\s+/, lc $2);
789      foreach my $listname (@listnames) {
790        foreach my $host (@args) {
791          my $v = $host =~ s/^!// ? 0 : 1;
792          delete $conf->{uri_host_lists}{$listname}{$host};
793        }
794      }
795    }
796  });
797
798=item enlist_addrlist (listname) user@example.com
799
800Adds one or more addresses to a named list of addresses.
801The named list can then be consulted through a check_from_in_list() or a
802check_to_in_list() eval rule implemented by the WLBLEval plugin, which takes
803the list name as an argument. Parenthesis around a list name are literal - a
804required syntax.
805
806Listed addresses are file-glob-style patterns, so C<friend@somewhere.com>,
807C<*@isp.com>, or C<*.domain.net> will all work.
808Specifically, C<*> and C<?> are allowed, but all other metacharacters
809are not. Regular expressions are not used for security reasons.
810Matching is case-insensitive.
811
812Multiple addresses per line, separated by spaces, is OK.  Multiple
813C<enlist_addrlist> lines are also OK.
814
815Enlisting an address to the list named blacklist_to is synonymous to using the
816directive blacklist_to
817
818Enlisting an address to the list named blacklist_from is synonymous to using the
819directive blacklist_from
820
821Enlisting an address to the list named welcomelist_to is synonymous to using the
822directive welcomelist_to
823
824Enlisting an address to the list named welcomelist_from (previously whitelist_from) is synonymous to using the
825directive welcomelist_from
826
827e.g.
828
829  enlist_addrlist (PAYPAL_ADDRESS) service@paypal.com
830  enlist_addrlist (PAYPAL_ADDRESS) *@paypal.co.uk
831
832=cut
833
834  push (@cmds, {
835    setting => 'enlist_addrlist',
836    type => $CONF_TYPE_ADDRLIST,
837    code => sub {
838      my($conf, $key, $value, $line) = @_;
839      local($1,$2);
840      if ($value !~ /^ \( (.+?) \) \s+ (.+) \z/sx) {
841        return $MISSING_REQUIRED_VALUE;
842      }
843      my $listname = $1;  # corresponds to arg in check_uri_host_in_wblist()
844      # note: must not factor out dereferencing, as otherwise
845      # subhashes would spring up in a copy and be lost
846      $conf->{parser}->add_to_addrlist ($listname, split(/\s+/, $value));
847    }
848  });
849
850=item blocklist_uri_host host-or-domain ...
851
852Previously blacklist_uri_host which will work interchangeably until 4.1.
853
854Is a shorthand for a directive:  enlist_uri_host (BLACK) host ...
855
856Please see directives enlist_uri_host and delist_uri_host for details.
857
858=cut
859
860  push (@cmds, {
861    command => 'blocklist_uri_host',
862    aliases => ['blacklist_uri_host'],
863    setting => 'uri_host_lists',
864    type => $CONF_TYPE_HASH_KEY_VALUE,
865    code => sub {
866      my($conf, $key, $value, $line) = @_;
867      foreach my $host ( split(/\s+/, lc $value) ) {
868        my $v = $host =~ s/^!// ? 0 : 1;
869        $conf->{uri_host_lists}{'BLACK'}{$host} = $v;
870      }
871    }
872  });
873
874=item welcomelist_uri_host host-or-domain ...
875
876Previously whitelist_uri_host which will work interchangeably until 4.1.
877
878Is a shorthand for a directive:  enlist_uri_host (WHITE) host ...
879
880Please see directives enlist_uri_host and delist_uri_host for details.
881
882=cut
883
884  push (@cmds, {
885    command => 'welcomelist_uri_host',
886    aliases => ['whitelist_uri_host'],
887    setting => 'uri_host_lists',
888    type => $CONF_TYPE_HASH_KEY_VALUE,
889    code => sub {
890      my($conf, $key, $value, $line) = @_;
891      foreach my $host ( split(/\s+/, lc $value) ) {
892        my $v = $host =~ s/^!// ? 0 : 1;
893        $conf->{uri_host_lists}{'WHITE'}{$host} = $v;
894      }
895    }
896  });
897
898=back
899
900=head2 BASIC MESSAGE TAGGING OPTIONS
901
902=over 4
903
904=item rewrite_header { subject | from | to } STRING
905
906By default, suspected spam messages will not have the C<Subject>,
907C<From> or C<To> lines tagged to indicate spam. By setting this option,
908the header will be tagged with C<STRING> to indicate that a message is
909spam. For the From or To headers, this will take the form of an RFC 2822
910comment following the address in parentheses. For the Subject header,
911this will be prepended to the original subject. Note that you should
912only use the _REQD_ and _SCORE_ tags when rewriting the Subject header
913if C<report_safe> is 0. Otherwise, you may not be able to remove
914the SpamAssassin markup via the normal methods.  More information
915about tags is explained below in the B<TEMPLATE TAGS> section.
916
917Parentheses are not permitted in STRING if rewriting the From or To headers.
918(They will be converted to square brackets.)
919
920If C<rewrite_header subject> is used, but the message being rewritten
921does not already contain a C<Subject> header, one will be created.
922
923A null value for C<STRING> will remove any existing rewrite for the specified
924header.
925
926=cut
927
928  push (@cmds, {
929    setting => 'rewrite_header',
930    type => $CONF_TYPE_HASH_KEY_VALUE,
931    code => sub {
932      my ($self, $key, $value, $line) = @_;
933      my($hdr, $string) = split(/\s+/, $value, 2);
934      $hdr = ucfirst(lc($hdr));
935
936      if ($hdr =~ /^$/) {
937	return $MISSING_REQUIRED_VALUE;
938      }
939      # We only deal with From, Subject, and To ...
940      elsif ($hdr =~ /^(?:From|Subject|To)$/) {
941	unless (defined $string && $string =~ /\S/) {
942	  delete $self->{rewrite_header}->{$hdr};
943	  return;
944	}
945
946	if ($hdr ne 'Subject') {
947          $string =~ tr/()/[]/;
948	}
949        $self->{rewrite_header}->{$hdr} = $string;
950        return;
951      }
952      else {
953	# if we get here, note the issue, then we'll fail through for an error.
954	info("config: rewrite_header: ignoring $hdr, not From, Subject, or To");
955	return $INVALID_VALUE;
956      }
957    }
958  });
959
960=item subjprefix
961
962Add a prefix in emails Subject if a rule is matched.
963To enable this option "rewrite_header Subject" config
964option must be enabled as well.
965
966The check C<if can(Mail::SpamAssassin::Conf::feature_subjprefix)>
967should be used to silence warnings in previous
968SpamAssassin versions.
969
970To be able to use this feature a C<add_header all Subjprefix _SUBJPREFIX_>
971configuration line could be needed when the glue between the MTA and SpamAssassin
972rewrites the email content.
973
974Here is an example on how to use this feature:
975
976	rewrite_header Subject *****SPAM*****
977	add_header all Subjprefix _SUBJPREFIX_
978	body     OLEMACRO_MALICE eval:check_olemacro_malice()
979	describe OLEMACRO_MALICE Dangerous Office Macro
980	score    OLEMACRO_MALICE 5.0
981	if can(Mail::SpamAssassin::Conf::feature_subjprefix)
982	  subjprefix OLEMACRO_MALICE [VIRUS]
983	endif
984
985=cut
986
987  push (@cmds, {
988    command => 'subjprefix',
989    setting => 'subjprefix',
990    is_frequent => 1,
991    type => $CONF_TYPE_HASH_KEY_VALUE,
992  });
993
994=item add_header { spam | ham | all } header_name string
995
996Customized headers can be added to the specified type of messages (spam,
997ham, or "all" to add to either).  All headers begin with C<X-Spam->
998(so a C<header_name> Foo will generate a header called X-Spam-Foo).
999header_name is restricted to the character set [A-Za-z0-9_-].
1000
1001The order of C<add_header> configuration options is preserved, inserted
1002headers will follow this order of declarations. When combining C<add_header>
1003with C<clear_headers> and C<remove_header>, keep in mind that C<add_header>
1004appends a new header to the current list, after first removing any existing
1005header fields of the same name. Note also that C<add_header>, C<clear_headers>
1006and C<remove_header> may appear in multiple .cf files, which are interpreted
1007in alphabetic order.
1008
1009C<string> can contain tags as explained below in the B<TEMPLATE TAGS> section.
1010You can also use C<\n> and C<\t> in the header to add newlines and tabulators
1011as desired.  A backslash has to be written as \\, any other escaped chars will
1012be silently removed.
1013
1014All headers will be folded if fold_headers is set to C<1>. Note: Manually
1015adding newlines via C<\n> disables any further automatic wrapping (ie:
1016long header lines are possible). The lines will still be properly folded
1017(marked as continuing) though.
1018
1019You can customize existing headers with B<add_header> (only the specified
1020subset of messages will be changed).
1021
1022See also C<clear_headers> and C<remove_header> for removing headers.
1023
1024Here are some examples (these are the defaults, note that Checker-Version can
1025not be changed or removed):
1026
1027  add_header spam Flag _YESNOCAPS_
1028  add_header all Status _YESNO_, score=_SCORE_ required=_REQD_ tests=_TESTS_ autolearn=_AUTOLEARN_ version=_VERSION_
1029  add_header all Level _STARS(*)_
1030  add_header all Checker-Version SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_
1031
1032=cut
1033
1034  push (@cmds, {
1035    setting => 'add_header',
1036    code => sub {
1037      my ($self, $key, $value, $line) = @_;
1038      local ($1,$2,$3);
1039      if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s+(.*?)\s*$/) {
1040        return $INVALID_VALUE;
1041      }
1042
1043      my ($type, $name, $hline) = ($1, $2, $3);
1044      if ($hline =~ /^"(.*)"$/) {
1045        $hline = $1;
1046      }
1047      my @line = split(
1048                  /\\\\/,     # split at double backslashes,
1049                  $hline."\n" # newline needed to make trailing backslashes work
1050                );
1051      foreach (@line) {
1052        s/\\t/\t/g; # expand tabs
1053        s/\\n/\n/g; # expand newlines
1054        s/\\.//g;   # purge all other escapes
1055      };
1056      $hline = join("\\", @line);
1057      chop($hline);  # remove dummy newline again
1058      if (($type eq "ham") || ($type eq "all")) {
1059        $self->{headers_ham} =
1060          [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_ham}} ];
1061        push(@{$self->{headers_ham}}, [$name, $hline]);
1062      }
1063      if (($type eq "spam") || ($type eq "all")) {
1064        $self->{headers_spam} =
1065          [ grep { lc($_->[0]) ne lc($name) } @{$self->{headers_spam}} ];
1066        push(@{$self->{headers_spam}}, [$name, $hline]);
1067      }
1068    }
1069  });
1070
1071=item remove_header { spam | ham | all } header_name
1072
1073Headers can be removed from the specified type of messages (spam, ham,
1074or "all" to remove from either).  All headers begin with C<X-Spam->
1075(so C<header_name> will be appended to C<X-Spam->).
1076
1077See also C<clear_headers> for removing all the headers at once.
1078
1079Note that B<X-Spam-Checker-Version> is not removable because the version
1080information is needed by mail administrators and developers to debug
1081problems.  Without at least one header, it might not even be possible to
1082determine that SpamAssassin is running.
1083
1084=cut
1085
1086  push (@cmds, {
1087    setting => 'remove_header',
1088    code => sub {
1089      my ($self, $key, $value, $line) = @_;
1090      local ($1,$2);
1091      if ($value !~ /^(ham|spam|all)\s+([A-Za-z0-9_-]+)\s*$/) {
1092        return $INVALID_VALUE;
1093      }
1094
1095      my ($type, $name) = ($1, $2);
1096      return if ( $name eq "Checker-Version" );
1097
1098      $name = lc($name);
1099      if (($type eq "ham") || ($type eq "all")) {
1100        $self->{headers_ham} =
1101          [ grep { lc($_->[0]) ne $name } @{$self->{headers_ham}} ];
1102      }
1103      if (($type eq "spam") || ($type eq "all")) {
1104        $self->{headers_spam} =
1105          [ grep { lc($_->[0]) ne $name } @{$self->{headers_spam}} ];
1106      }
1107    }
1108  });
1109
1110=item clear_headers
1111
1112Clear the list of headers to be added to messages.  You may use this
1113before any B<add_header> options to prevent the default headers from being
1114added to the message.
1115
1116C<add_header>, C<clear_headers> and C<remove_header> may appear in multiple
1117.cf files, which are interpreted in alphabetic order, so C<clear_headers>
1118in a later file will remove all added headers from previously interpreted
1119configuration files, which may or may not be desired.
1120
1121Note that B<X-Spam-Checker-Version> is not removable because the version
1122information is needed by mail administrators and developers to debug
1123problems.  Without at least one header, it might not even be possible to
1124determine that SpamAssassin is running.
1125
1126=cut
1127
1128  push (@cmds, {
1129    setting => 'clear_headers',
1130    type => $CONF_TYPE_NOARGS,
1131    code => sub {
1132      my ($self, $key, $value, $line) = @_;
1133      unless (!defined $value || $value eq '') {
1134        return $INVALID_VALUE;
1135      }
1136      my @h = grep { lc($_->[0]) eq "checker-version" }
1137                   @{$self->{headers_ham}};
1138      $self->{headers_ham}  = !@h ? [] : [ $h[0] ];
1139      $self->{headers_spam} = !@h ? [] : [ $h[0] ];
1140    }
1141  });
1142
1143=item report_safe ( 0 | 1 | 2 )	(default: 1)
1144
1145if this option is set to 1, if an incoming message is tagged as spam,
1146instead of modifying the original message, SpamAssassin will create a
1147new report message and attach the original message as a message/rfc822
1148MIME part (ensuring the original message is completely preserved, not
1149easily opened, and easier to recover).
1150
1151If this option is set to 2, then original messages will be attached with
1152a content type of text/plain instead of message/rfc822.  This setting
1153may be required for safety reasons on certain broken mail clients that
1154automatically load attachments without any action by the user.  This
1155setting may also make it somewhat more difficult to extract or view the
1156original message.
1157
1158If this option is set to 0, incoming spam is only modified by adding
1159some C<X-Spam-> headers and no changes will be made to the body.  In
1160addition, a header named B<X-Spam-Report> will be added to spam.  You
1161can use the B<remove_header> option to remove that header after setting
1162B<report_safe> to 0.
1163
1164See B<report_safe_copy_headers> if you want to copy headers from
1165the original mail into tagged messages.
1166
1167=cut
1168
1169  push (@cmds, {
1170    setting => 'report_safe',
1171    default => 1,
1172    type => $CONF_TYPE_NUMERIC,
1173    code => sub {
1174      my ($self, $key, $value, $line) = @_;
1175      if ($value eq '') {
1176        return $MISSING_REQUIRED_VALUE;
1177      }
1178      elsif ($value !~ /^[012]$/) {
1179        return $INVALID_VALUE;
1180      }
1181
1182      $self->{report_safe} = $value+0;
1183      if (! $self->{report_safe} &&
1184          ! (grep { lc($_->[0]) eq "report" } @{$self->{headers_spam}}) ) {
1185        push(@{$self->{headers_spam}}, ["Report", "_REPORT_"]);
1186      }
1187    }
1188  });
1189
1190=item report_wrap_width (default: 75)
1191
1192This option sets the wrap width for description lines in the X-Spam-Report
1193header, not accounting for tab width.
1194
1195=cut
1196
1197  push (@cmds, {
1198    setting => 'report_wrap_width',
1199    default => '75',
1200    type => $CONF_TYPE_NUMERIC,
1201  });
1202
1203=back
1204
1205=head2 LANGUAGE OPTIONS
1206
1207=over 4
1208
1209=item ok_locales xx [ yy zz ... ]		(default: all)
1210
1211This option is used to specify which locales are considered OK for
1212incoming mail.  Mail using the B<character sets> that are allowed by
1213this option will not be marked as possibly being spam in a foreign
1214language.
1215
1216If you receive lots of spam in foreign languages, and never get any non-spam in
1217these languages, this may help.  Note that all ISO-8859-* character sets, and
1218Windows code page character sets, are always permitted by default.
1219
1220Set this to C<all> to allow all character sets.  This is the default.
1221
1222The rules C<CHARSET_FARAWAY>, C<CHARSET_FARAWAY_BODY>, and
1223C<CHARSET_FARAWAY_HEADERS> are triggered based on how this is set.
1224
1225Examples:
1226
1227  ok_locales all         (allow all locales)
1228  ok_locales en          (only allow English)
1229  ok_locales en ja zh    (allow English, Japanese, and Chinese)
1230
1231Note: if there are multiple ok_locales lines, only the last one is used.
1232
1233Select the locales to allow from the list below:
1234
1235=over 4
1236
1237=item en	- Western character sets in general
1238
1239=item ja	- Japanese character sets
1240
1241=item ko	- Korean character sets
1242
1243=item ru	- Cyrillic character sets
1244
1245=item th	- Thai character sets
1246
1247=item zh	- Chinese (both simplified and traditional) character sets
1248
1249=back
1250
1251=cut
1252
1253  push (@cmds, {
1254    setting => 'ok_locales',
1255    default => 'all',
1256    type => $CONF_TYPE_STRING,
1257  });
1258
1259=item normalize_charset ( 0 | 1 )        (default: 1)
1260
1261Whether to decode non- UTF-8 and non-ASCII textual parts and recode them
1262to UTF-8 before the text is given over to rules processing. The character
1263set used for attempted decoding is primarily based on a declared character
1264set in a Content-Type header, but if the decoding attempt fails a module
1265Encode::Detect::Detector is consulted (if available) to provide a guess
1266based on the actual text, and decoding is re-attempted. Even if the option
1267is enabled no unnecessary decoding and re-encoding work is done when
1268possible (like with an all-ASCII text with a US-ASCII or extended ASCII
1269character set declaration, e.g. UTF-8 or ISO-8859-nn or Windows-nnnn).
1270
1271Unicode support in old versions of perl or in a core module Encode is likely
1272to be buggy in places, so if the normalize_charset function is enabled
1273it is advised to stick to more recent versions of perl (preferably 5.12
1274or later). The module Encode::Detect::Detector is optional, when necessary
1275it will be used if it is available.
1276
1277=cut
1278
1279  push (@cmds, {
1280    setting => 'normalize_charset',
1281    default => 1,
1282    type => $CONF_TYPE_BOOL,
1283    code => sub {
1284	my ($self, $key, $value, $line) = @_;
1285	unless (defined $value && $value !~ /^$/) {
1286	    return $MISSING_REQUIRED_VALUE;
1287	}
1288        if    (lc $value eq 'yes' || $value eq '1') { $value = 1 }
1289        elsif (lc $value eq 'no'  || $value eq '0') { $value = 0 }
1290        else { return $INVALID_VALUE }
1291
1292	$self->{normalize_charset} = $value;
1293
1294	unless ($] > 5.008004) {
1295	    $self->{parser}->lint_warn("config: normalize_charset requires Perl 5.8.5 or later");
1296	    $self->{normalize_charset} = 0;
1297	    return $INVALID_VALUE;
1298	}
1299	require HTML::Parser;
1300        #changed to eval to use VERSION so that this version was not incorrectly parsed for CPAN
1301	unless ( eval { HTML::Parser->VERSION(3.46) } ) {
1302	    $self->{parser}->lint_warn("config: normalize_charset requires HTML::Parser 3.46 or later");
1303	    $self->{normalize_charset} = 0;
1304	    return $INVALID_VALUE;
1305	}
1306    }
1307  });
1308
1309=back
1310
1311=head2 NETWORK TEST OPTIONS
1312
1313=over 4
1314
1315=item trusted_networks IPaddress[/masklen] ...   (default: none)
1316
1317What networks or hosts are 'trusted' in your setup.  B<Trusted> in this case
1318means that relay hosts on these networks are considered to not be potentially
1319operated by spammers, open relays, or open proxies.  A trusted host could
1320conceivably relay spam, but will not originate it, and will not forge header
1321data. DNS blacklist checks will never query for hosts on these networks.
1322
1323See C<http://wiki.apache.org/spamassassin/TrustPath> for more information.
1324
1325MXes for your domain(s) and internal relays should B<also> be specified using
1326the C<internal_networks> setting. When there are 'trusted' hosts that
1327are not MXes or internal relays for your domain(s) they should B<only> be
1328specified in C<trusted_networks>.
1329
1330The C<IPaddress> can be an IPv4 address (in a dot-quad form), or an IPv6
1331address optionally enclosed in square brackets. Scoped link-local IPv6
1332addresses are syntactically recognized but the interface scope is currently
1333ignored (e.g. [fe80::1234%eth0] ) and should be avoided.
1334
1335If a C</masklen> is specified, it is considered a CIDR-style 'netmask' length,
1336specified in bits.  If it is not specified, but less than 4 octets of an IPv4
1337address are specified with a trailing dot, an implied netmask length covers
1338all addresses in remaining octets (i.e. implied masklen is /8 or /16 or /24).
1339If masklen is not specified, and there is not trailing dot, then just a single
1340IP address specified is used, as if the masklen were C</32> with an IPv4
1341address, or C</128> in case of an IPv6 address.
1342
1343If a network or host address is prefaced by a C<!> the matching network or
1344host will be excluded from the list even if a less specific (shorter netmask
1345length) subnet is later specified in the list. This allows a subset of
1346a wider network to be exempt. In case of specifying overlapping subnets,
1347specify more specific subnets first (tighter matching, i.e. with a longer
1348netmask length), followed by less specific (shorter netmask length) subnets
1349to get predictable results regardless of the search algorithm used - when
1350Net::Patricia module is installed the search finds the tightest matching
1351entry in the list, while a sequential search as used in absence of the
1352module Net::Patricia will find the first matching entry in the list.
1353
1354Note: 127.0.0.0/8 and ::1 are always included in trusted_networks, regardless
1355of your config.
1356
1357Examples:
1358
1359   trusted_networks 192.168.0.0/16        # all in 192.168.*.*
1360   trusted_networks 192.168.              # all in 192.168.*.*
1361   trusted_networks 212.17.35.15          # just that host
1362   trusted_networks !10.0.1.5 10.0.1/24   # all in 10.0.1.* but not 10.0.1.5
1363   trusted_networks 2001:db8:1::1 !2001:db8:1::/64 2001:db8::/32
1364     # 2001:db8::/32 and 2001:db8:1::1/128, except the rest of 2001:db8:1::/64
1365
1366This operates additively, so a C<trusted_networks> line after another one
1367will append new entries to the list of trusted networks.  To clear out the
1368existing entries, use C<clear_trusted_networks>.
1369
1370If C<trusted_networks> is not set and C<internal_networks> is, the value
1371of C<internal_networks> will be used for this parameter.
1372
1373If neither C<trusted_networks> or C<internal_networks> is set, a basic
1374inference algorithm is applied.  This works as follows:
1375
1376=over 4
1377
1378=item *
1379
1380If the 'from' host has an IP address in a private (RFC 1918) network range,
1381then it's trusted
1382
1383=item *
1384
1385If there are authentication tokens in the received header, and
1386the previous host was trusted, then this host is also trusted
1387
1388=item *
1389
1390Otherwise this host, and all further hosts, are consider untrusted.
1391
1392=back
1393
1394=cut
1395
1396  push (@cmds, {
1397    setting => 'trusted_networks',
1398    type => $CONF_TYPE_IPADDRLIST,
1399  });
1400
1401=item clear_trusted_networks
1402
1403Empty the list of trusted networks.
1404
1405=cut
1406
1407  push (@cmds, {
1408    setting => 'clear_trusted_networks',
1409    type => $CONF_TYPE_NOARGS,
1410    code => sub {
1411      my ($self, $key, $value, $line) = @_;
1412      unless (!defined $value || $value eq '') {
1413        return $INVALID_VALUE;
1414      }
1415      $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
1416      $self->{trusted_networks_configured} = 0;
1417    }
1418  });
1419
1420=item internal_networks IPaddress[/masklen] ...   (default: none)
1421
1422What networks or hosts are 'internal' in your setup.   B<Internal> means
1423that relay hosts on these networks are considered to be MXes for your
1424domain(s), or internal relays.  This uses the same syntax as
1425C<trusted_networks>, above - see there for details.
1426
1427This value is used when checking 'dial-up' or dynamic IP address
1428blocklists, in order to detect direct-to-MX spamming.
1429
1430Trusted relays that accept mail directly from dial-up connections
1431(i.e. are also performing a role of mail submission agents - MSA)
1432should not be listed in C<internal_networks>. List them only in
1433C<trusted_networks>.
1434
1435If C<trusted_networks> is set and C<internal_networks> is not, the value
1436of C<trusted_networks> will be used for this parameter.
1437
1438If neither C<trusted_networks> nor C<internal_networks> is set, no addresses
1439will be considered local; in other words, any relays past the machine where
1440SpamAssassin is running will be considered external.
1441
1442Every entry in C<internal_networks> must appear in C<trusted_networks>; in
1443other words, C<internal_networks> is always a subset of the trusted set.
1444
1445Note: 127/8 and ::1 are always included in internal_networks, regardless of
1446your config.
1447
1448=cut
1449
1450  push (@cmds, {
1451    setting => 'internal_networks',
1452    type => $CONF_TYPE_IPADDRLIST,
1453  });
1454
1455=item clear_internal_networks
1456
1457Empty the list of internal networks.
1458
1459=cut
1460
1461  push (@cmds, {
1462    setting => 'clear_internal_networks',
1463    type => $CONF_TYPE_NOARGS,
1464    code => sub {
1465      my ($self, $key, $value, $line) = @_;
1466      unless (!defined $value || $value eq '') {
1467        return $INVALID_VALUE;
1468      }
1469      $self->{internal_networks} = $self->new_netset('internal_networks',1);
1470      $self->{internal_networks_configured} = 0;
1471    }
1472  });
1473
1474=item msa_networks IPaddress[/masklen] ...   (default: none)
1475
1476The networks or hosts which are acting as MSAs in your setup (but not also
1477as MX relays). This uses the same syntax as C<trusted_networks>, above - see
1478there for details.
1479
1480B<MSA> means that the relay hosts on these networks accept mail from your
1481own users and authenticates them appropriately.  These relays will never
1482accept mail from hosts that aren't authenticated in some way. Examples of
1483authentication include, IP lists, SMTP AUTH, POP-before-SMTP, etc.
1484
1485All relays found in the message headers after the MSA relay will take
1486on the same trusted and internal classifications as the MSA relay itself,
1487as defined by your I<trusted_networks> and I<internal_networks> configuration.
1488
1489For example, if the MSA relay is trusted and internal so will all of the
1490relays that precede it.
1491
1492When using msa_networks to identify an MSA it is recommended that you treat
1493that MSA as both trusted and internal.  When an MSA is not included in
1494msa_networks you should treat the MSA as trusted but not internal, however
1495if the MSA is also acting as an MX or intermediate relay you must always
1496treat it as both trusted and internal and ensure that the MSA includes
1497visible auth tokens in its Received header to identify submission clients.
1498
1499B<Warning:> Never include an MSA that also acts as an MX (or is also an
1500intermediate relay for an MX) or otherwise accepts mail from
1501non-authenticated users in msa_networks.  Doing so will result in unknown
1502external relays being trusted.
1503
1504=cut
1505
1506  push (@cmds, {
1507    setting => 'msa_networks',
1508    type => $CONF_TYPE_IPADDRLIST,
1509  });
1510
1511=item clear_msa_networks
1512
1513Empty the list of msa networks.
1514
1515=cut
1516
1517  push (@cmds, {
1518    setting => 'clear_msa_networks',
1519    type => $CONF_TYPE_NOARGS,
1520    code => sub {
1521      my ($self, $key, $value, $line) = @_;
1522      unless (!defined $value || $value eq '') {
1523        return $INVALID_VALUE;
1524      }
1525      $self->{msa_networks} =
1526        $self->new_netset('msa_networks',0);  # no loopback IP
1527      $self->{msa_networks_configured} = 0;
1528    }
1529  });
1530
1531=item originating_ip_headers header ...   (default: X-Yahoo-Post-IP X-Originating-IP X-Apparently-From X-SenderIP)
1532
1533A list of header field names from which an originating IP address can
1534be obtained. For example, webmail servers may record a client IP address
1535in X-Originating-IP.
1536
1537These IP addresses are virtually appended into the Received: chain, so they
1538are used in RBL checks where appropriate.
1539
1540Currently the IP addresses are not added into X-Spam-Relays-* header fields,
1541but they may be in the future.
1542
1543=cut
1544
1545  push (@cmds, {
1546    setting => 'originating_ip_headers',
1547    default => [],
1548    type => $CONF_TYPE_STRINGLIST,
1549    code => sub {
1550      my ($self, $key, $value, $line) = @_;
1551      unless (defined $value && $value !~ /^$/) {
1552	return $MISSING_REQUIRED_VALUE;
1553      }
1554      foreach my $hfname (split(/\s+/, $value)) {
1555        # avoid duplicates, consider header field names case-insensitive
1556        push(@{$self->{originating_ip_headers}}, $hfname)
1557          if !grep(lc($_) eq lc($hfname), @{$self->{originating_ip_headers}});
1558      }
1559    }
1560  });
1561
1562=item clear_originating_ip_headers
1563
1564Empty the list of 'originating IP address' header field names.
1565
1566=cut
1567
1568  push (@cmds, {
1569    setting => 'clear_originating_ip_headers',
1570    type => $CONF_TYPE_NOARGS,
1571    code => sub {
1572      my ($self, $key, $value, $line) = @_;
1573      unless (!defined $value || $value eq '') {
1574        return $INVALID_VALUE;
1575      }
1576      $self->{originating_ip_headers} = [];
1577    }
1578  });
1579
1580=item always_trust_envelope_sender ( 0 | 1 )   (default: 0)
1581
1582Trust the envelope sender even if the message has been passed through one or
1583more trusted relays.  See also C<envelope_sender_header>.
1584
1585=cut
1586
1587  push (@cmds, {
1588    setting => 'always_trust_envelope_sender',
1589    default => 0,
1590    type => $CONF_TYPE_BOOL,
1591  });
1592
1593=item skip_rbl_checks ( 0 | 1 )   (default: 0)
1594
1595Turning on the skip_rbl_checks setting will disable the DNSEval plugin,
1596which implements Real-time Block List (or: Blackhole List) (RBL) lookups.
1597
1598By default, SpamAssassin will run RBL checks. Individual blocklists may
1599be disabled selectively by setting a score of a corresponding rule to 0.
1600
1601See also a related configuration parameter skip_uribl_checks,
1602which controls the URIDNSBL plugin (documented in the URIDNSBL man page).
1603
1604=cut
1605
1606  push (@cmds, {
1607    setting => 'skip_rbl_checks',
1608    default => 0,
1609    type => $CONF_TYPE_BOOL,
1610  });
1611
1612=item dns_available { yes | no | test[: domain1 domain2...] }   (default: yes)
1613
1614Tells SpamAssassin whether DNS resolving is available or not. A value I<yes>
1615indicates DNS resolving is available, a value I<no> indicates DNS resolving
1616is not available - both of these values apply unconditionally and skip initial
1617DNS tests, which can be slow or unreliable.
1618
1619When the option value is a I<test> (with or without arguments), SpamAssassin
1620will query some domain names on the internet during initialization, attempting
1621to determine if DNS resolving is working or not. A space-separated list
1622of domain names may be specified explicitly, or left to a built-in default
1623of a dozen or so domain names. From an explicit or a default list a subset
1624of three domain names is picked randomly for checking. The test queries for
1625NS records of these domain: if at least one query returns a success then
1626SpamAssassin considers DNS resolving as available, otherwise not.
1627
1628The problem is that the test can introduce some startup delay if a network
1629connection is down, and in some cases it can wrongly guess that DNS is
1630unavailable because a test connection failed, what causes disabling several
1631DNS-dependent tests.
1632
1633Please note, the DNS test queries for NS records, so specify domain names,
1634not host names.
1635
1636Since version 3.4.0 of SpamAssassin a default setting for option
1637I<dns_available> is I<yes>. A default in older versions was I<test>.
1638
1639=cut
1640
1641  push (@cmds, {
1642    setting => 'dns_available',
1643    default => 'yes',
1644    type => $CONF_TYPE_STRING,
1645    code => sub {
1646      my ($self, $key, $value, $line) = @_;
1647      if ($value =~ /^test(?::\s*\S.*)?$/) {
1648        $self->{dns_available} = $value;
1649      }
1650      elsif ($value =~ /^(?:yes|1)$/) {
1651        $self->{dns_available} = 'yes';
1652      }
1653      elsif ($value =~ /^(?:no|0)$/) {
1654        $self->{dns_available} = 'no';
1655      }
1656      else {
1657        return $INVALID_VALUE;
1658      }
1659    }
1660  });
1661
1662=item dns_server ip-addr-port  (default: entries provided by Net::DNS)
1663
1664Specifies an IP address of a DNS server, and optionally its port number.
1665The I<dns_server> directive may be specified multiple times, each entry
1666adding to a list of available resolving name servers. The I<ip-addr-port>
1667argument can either be an IPv4 or IPv6 address, optionally enclosed in
1668brackets, and optionally followed by a colon and a port number. In absence
1669of a port number a standard port number 53 is assumed. When an IPv6 address
1670is specified along with a port number, the address B<must> be enclosed in
1671brackets to avoid parsing ambiguity regarding a colon separator. A scoped
1672link-local IP address is allowed (assuming underlying modules allow it).
1673
1674Examples :
1675 dns_server 127.0.0.1
1676 dns_server 127.0.0.1:53
1677 dns_server [127.0.0.1]:53
1678 dns_server [::1]:53
1679 dns_server fe80::1%lo0
1680 dns_server [fe80::1%lo0]:53
1681
1682In absence of I<dns_server> directives, the list of name servers is provided
1683by Net::DNS module, which typically obtains the list from /etc/resolv.conf,
1684but this may be platform dependent. Please consult the Net::DNS::Resolver
1685documentation for details.
1686
1687=cut
1688
1689  push (@cmds, {
1690    setting => 'dns_server',
1691    type => $CONF_TYPE_STRING,
1692    code => sub {
1693      my ($self, $key, $value, $line) = @_;
1694      my($address,$port); local($1,$2,$3);
1695      if ($value =~ /^(?: \[ ([^\]]*) \] | ([^:]*) ) : (\d+) \z/sx) {
1696        $address = defined $1 ? $1 : $2;  $port = $3;
1697      } elsif ($value =~ /^(?: \[ ([^\]]*) \] |
1698                               ([0-9A-F.:]+ (?: %[A-Z0-9._~-]* )? ) ) \z/six) {
1699        $address = defined $1 ? $1 : $2;  $port = '53';
1700      } else {
1701        return $INVALID_VALUE;
1702      }
1703      my $scope = '';  # scoped IP address?
1704      $scope = $1  if $address =~ s/ ( % [A-Z0-9._~-]* ) \z//xsi;
1705      if ($address =~ IS_IP_ADDRESS && $port >= 1 && $port <= 65535) {
1706        $self->{dns_servers} = []  if !$self->{dns_servers};
1707        # checked, untainted, stored in a normalized form
1708        push(@{$self->{dns_servers}}, untaint_var("[$address$scope]:$port"));
1709      } else {
1710        return $INVALID_VALUE;
1711      }
1712    }
1713  });
1714
1715=item clear_dns_servers
1716
1717Empty the list of explicitly configured DNS servers through a I<dns_server>
1718directive, falling back to Net::DNS -supplied defaults.
1719
1720=cut
1721
1722  push (@cmds, {
1723    setting => 'clear_dns_servers',
1724    type => $CONF_TYPE_NOARGS,
1725    code => sub {
1726      my ($self, $key, $value, $line) = @_;
1727      unless (!defined $value || $value eq '') {
1728        return $INVALID_VALUE;
1729      }
1730      undef $self->{dns_servers};
1731    }
1732  });
1733
1734=item dns_local_ports_permit ranges...
1735
1736Add the specified ports or ports ranges to the set of allowed port numbers
1737that can be used as local port numbers when sending DNS queries to a resolver.
1738
1739The argument is a whitespace-separated or a comma-separated list of
1740single port numbers n, or port number pairs (i.e. m-n) delimited by a '-',
1741representing a range. Allowed port numbers are between 1 and 65535.
1742
1743Directives I<dns_local_ports_permit> and I<dns_local_ports_avoid> are processed
1744in order in which they appear in configuration files. Each directive adds
1745(or subtracts) its subsets of ports to a current set of available ports.
1746Whatever is left in the set by the end of configuration processing
1747is made available to a DNS resolving client code.
1748
1749If the resulting set of port numbers is empty (see also the directive
1750I<dns_local_ports_none>), then SpamAssassin does not apply its ports
1751randomization logic, but instead leaves the operating system to choose
1752a suitable free local port number.
1753
1754The initial set consists of all port numbers in the range 1024-65535.
1755Note that system config files already modify the set and remove all the
1756IANA registered port numbers and some other ranges, so there is rarely
1757a need to adjust the ranges by site-specific directives.
1758
1759See also directives I<dns_local_ports_permit> and I<dns_local_ports_none>.
1760
1761=cut
1762
1763  push (@cmds, {
1764    setting => 'dns_local_ports_permit',
1765    type => $CONF_TYPE_STRING,
1766    is_admin => 1,
1767    code => sub {
1768      my($self, $key, $value, $line) = @_;
1769      my(@port_ranges); local($1,$2);
1770      foreach my $range (split(/[ \t,]+/, $value)) {
1771        if ($range =~ /^(\d{1,5})\z/) {
1772          # don't allow adding a port number 0
1773          if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
1774          push(@port_ranges, [$1,$1]);
1775        } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1776          if ($1 < 1 || $1 > 65535) { return $INVALID_VALUE }
1777          if ($2 < 1 || $2 > 65535) { return $INVALID_VALUE }
1778          push(@port_ranges, [$1,$2]);
1779        } else {
1780          return $INVALID_VALUE;
1781        }
1782      }
1783      foreach my $p (@port_ranges) {
1784        undef $self->{dns_available_portscount};  # invalidate derived data
1785        set_ports_range(\$self->{dns_available_ports_bitset},
1786                        $p->[0], $p->[1], 1);
1787      }
1788    }
1789  });
1790
1791=item dns_local_ports_avoid ranges...
1792
1793Remove specified ports or ports ranges from the set of allowed port numbers
1794that can be used as local port numbers when sending DNS queries to a resolver.
1795
1796Please see directive I<dns_local_ports_permit> for details.
1797
1798=cut
1799
1800  push (@cmds, {
1801    setting => 'dns_local_ports_avoid',
1802    type => $CONF_TYPE_STRING,
1803    is_admin => 1,
1804    code => sub {
1805      my($self, $key, $value, $line) = @_;
1806      my(@port_ranges); local($1,$2);
1807      foreach my $range (split(/[ \t,]+/, $value)) {
1808        if ($range =~ /^(\d{1,5})\z/) {
1809          if ($1 > 65535) { return $INVALID_VALUE }
1810          # don't mind clearing also the port number 0
1811          push(@port_ranges, [$1,$1]);
1812        } elsif ($range =~ /^(\d{1,5})-(\d{1,5})\z/) {
1813          if ($1 > 65535 || $2 > 65535) { return $INVALID_VALUE }
1814          push(@port_ranges, [$1,$2]);
1815        } else {
1816          return $INVALID_VALUE;
1817        }
1818      }
1819      foreach my $p (@port_ranges) {
1820        undef $self->{dns_available_portscount};  # invalidate derived data
1821        set_ports_range(\$self->{dns_available_ports_bitset},
1822                        $p->[0], $p->[1], 0);
1823      }
1824    }
1825  });
1826
1827=item dns_local_ports_none
1828
1829Is a fast shorthand for:
1830
1831  dns_local_ports_avoid 1-65535
1832
1833leaving the set of available DNS query local port numbers empty. In all
1834respects (apart from speed) it is equivalent to the shown directive, and can
1835be freely mixed with I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1836
1837If the resulting set of port numbers is empty, then SpamAssassin does not
1838apply its ports randomization logic, but instead leaves the operating system
1839to choose a suitable free local port number.
1840
1841See also directives I<dns_local_ports_permit> and I<dns_local_ports_avoid>.
1842
1843=cut
1844
1845  push (@cmds, {
1846    setting => 'dns_local_ports_none',
1847    type => $CONF_TYPE_NOARGS,
1848    is_admin => 1,
1849    code => sub {
1850      my ($self, $key, $value, $line) = @_;
1851      unless (!defined $value || $value eq '') {
1852        return $INVALID_VALUE;
1853      }
1854      undef $self->{dns_available_portscount};  # invalidate derived data
1855      wipe_ports_range(\$self->{dns_available_ports_bitset}, 0);
1856    }
1857  });
1858
1859=item dns_test_interval n   (default: 600 seconds)
1860
1861If dns_available is set to I<test>, the dns_test_interval time in number
1862of seconds will tell SpamAssassin how often to retest for working DNS.
1863A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
1864indicating seconds (default), minutes, hours, days, weeks).
1865
1866=cut
1867
1868  push (@cmds, {
1869    setting => 'dns_test_interval',
1870    default => 600,
1871    type => $CONF_TYPE_DURATION,
1872  });
1873
1874=item dns_options opts   (default: v4, v6, norotate, nodns0x20, edns=4096)
1875
1876Provides a (whitespace or comma -separated) list of options applying to DNS
1877resolving.  Available options are: I<v4>, I<v6>, I<rotate>, I<dns0x20> and
1878I<edns> (or I<edns0>).  Option name may be negated by prepending a I<no>
1879(e.g.  I<norotate>, I<NoEDNS>) to counteract a previously enabled option.
1880Option names are not case-sensitive.  The I<dns_options> directive may
1881appear in configuration files multiple times, the last setting prevails.
1882
1883Option I<v4> declares resolver capable of returning IPv4 (A) records.
1884Option I<v6> declares resolver capable of returning IPv6 (AAAA) records.
1885One would set I<nov6> if the resolver is filtering AAAA responses.  NOTE:
1886these options only refer to I<resolving capabilies>, there is no other
1887meaning like whether the IP address of resolver itself is IPv4 or IPv6.
1888
1889Option I<edns> (or I<edsn0>) may take a value which specifies a requestor's
1890acceptable UDP payload size according to EDNS0 specifications (RFC 6891,
1891ex RFC 2671) e.g. I<edns=4096>. When EDNS0 is off (I<noedns> or I<edns=512>)
1892a traditional implied UDP payload size is 512 bytes, which is also a minimum
1893allowed value for this option. When the option is specified but a value
1894is not provided, a conservative default of 1220 bytes is implied. It is
1895recommended to keep I<edns> enabled when using a local recursive DNS server
1896which supports EDNS0 (like most modern DNS servers do), a suitable setting
1897in this case is I<edns=4096>, which is also a default. Allowing UDP payload
1898size larger than 512 bytes can avoid truncation of resource records in large
1899DNS responses (like in TXT records of some SPF and DKIM responses, or when
1900an unreasonable number of A records is published by some domain). The option
1901should be disabled when a recursive DNS server is only reachable through
1902non- RFC 6891 compliant middleboxes (such as some old-fashioned firewall)
1903which bans DNS UDP payload sizes larger than 512 bytes. A suitable value
1904when a non-local recursive DNS server is used and a middlebox B<does> allow
1905EDNS0 but blocks fragmented IP packets is perhaps 1220 bytes, allowing a
1906DNS UDP packet to fit within a single IP packet in most cases (a slightly
1907less conservative range would be 1280-1410 bytes).
1908
1909Option I<rotate> causes SpamAssassin to choose a DNS server at random
1910from all servers listed in C</etc/resolv.conf> every I<dns_test_interval>
1911seconds, effectively spreading the load over all currently available DNS
1912servers when there are many spamd workers.
1913
1914Option I<dns0x20> enables randomization of letters in a DNS query label
1915according to draft-vixie-dnsext-dns0x20, decreasing a chance of collisions
1916of responses (by chance or by a malicious intent) by increasing spread
1917as provided by a 16-bit query ID and up to 16 bits of a port number,
1918with additional bits as encoded by flipping case (upper/lower) of letters
1919in a query. The number of additional random bits corresponds to the number
1920of letters in a query label. Should work reliably with all mainstream
1921DNS servers - do not turn on if you see frequent info messages
1922"dns: no callback for id:" in the log, or if RBL or URIDNS lookups
1923do not work for no apparent reason.
1924
1925=cut
1926
1927  push (@cmds, {
1928    setting => 'dns_options',
1929    type => $CONF_TYPE_HASH_KEY_VALUE,
1930    # RFC 6891: A good compromise may be the use of an EDNS maximum payload size
1931    # of 4096 octets as a starting point.
1932    default => { 'v4' => 1, 'v6' => 1,
1933                 'rotate' => 0, 'dns0x20' => 0, 'edns' => 4096 },
1934    code => sub {
1935      my ($self, $key, $value, $line) = @_;
1936      foreach my $option (split (/[\s,]+/, lc $value)) {
1937        local($1,$2);
1938        if ($option =~ /^no(rotate|dns0x20|v4|v6)\z/) {
1939          $self->{dns_options}->{$1} = 0;
1940        } elsif ($option =~ /^no(edns)0?\z/) {
1941          $self->{dns_options}->{$1} = 0;
1942        } elsif ($option =~ /^(rotate|dns0x20|v4|v6)\z/) {
1943          $self->{dns_options}->{$1} = 1;
1944        } elsif ($option =~ /^(edns)0? (?: = (\d+) )? \z/x) {
1945          # RFC 6891 (ex RFC 2671) - EDNS0, value is a requestor's UDP payload
1946          # size, defaults to some UDP packet size likely to fit into a single
1947          # IP packet which is more likely to pass firewalls which choke on IP
1948          # fragments.  RFC 2460: min MTU is 1280 for IPv6, minus 40 bytes for
1949          # basic header, yielding 1240.  RFC 3226 prescribes a min of 1220 for
1950          # RFC 2535 compliant servers.  RFC 6891: choosing between 1280 and
1951          # 1410 bytes for IP (v4 or v6) over Ethernet would be reasonable.
1952          #
1953          $self->{dns_options}->{$1} = $2 || 1220;
1954          return $INVALID_VALUE  if $self->{dns_options}->{$1} < 512;
1955        } else {
1956          return $INVALID_VALUE;
1957        }
1958      }
1959    }
1960  });
1961
1962=item dns_query_restriction (allow|deny) domain1 domain2 ...
1963
1964Option allows disabling of rules which would result in a DNS query to one of
1965the listed domains. The first argument must be a literal C<allow> or C<deny>,
1966remaining arguments are domains names.
1967
1968Most DNS queries (with some exceptions) are subject to dns_query_restriction.
1969A domain to be queried is successively stripped-off of its leading labels
1970(thus yielding a series of its parent domains), and on each iteration a
1971check is made against an associative array generated by dns_query_restriction
1972options. Search stops at the first match (i.e. the tightest match), and the
1973matching entry with its C<allow> or C<deny> value then controls whether a
1974DNS query is allowed to be launched.
1975
1976If no match is found an implicit default is to allow a query. The purpose of
1977an explicit C<allow> entry is to be able to override a previously configured
1978C<deny> on the same domain or to override an entry (possibly yet to be
1979configured in subsequent config directives) on one of its parent domains.
1980Thus an 'allow zen.spamhaus.org' with a 'deny spamhaus.org' would permit
1981DNS queries on a specific DNS BL zone but deny queries to other zones under
1982the same parent domain.
1983
1984Domains are matched case-insensitively, no wildcards are recognized,
1985there should be no leading or trailing dot.
1986
1987Specifying a block on querying a domain name has a similar effect as setting
1988a score of corresponding DNSBL and URIBL rules to zero, and can be a handy
1989alternative to hunting for such rules when a site policy does not allow
1990certain DNS block lists to be queried.
1991
1992Special wildcard "dns_query_restriction deny *" is supported to block all
1993queries except allowed ones.
1994
1995Example:
1996  dns_query_restriction deny  dnswl.org surbl.org
1997  dns_query_restriction allow zen.spamhaus.org
1998  dns_query_restriction deny  spamhaus.org mailspike.net spamcop.net
1999
2000=cut
2001
2002  push (@cmds, {
2003    setting => 'dns_query_restriction',
2004    type => $CONF_TYPE_STRING,
2005    code => sub {
2006      my ($self, $key, $value, $line) = @_;
2007      defined $value && $value =~ s/^(allow|deny)\s+//i
2008        or return $INVALID_VALUE;
2009      my $blocked = lc($1) eq 'deny' ? 1 : 0;
2010      foreach my $domain (split(/\s+/, $value)) {
2011        $domain =~ s/^\.//; $domain =~ s/\.\z//;  # strip dots
2012        $self->{dns_query_blocked}{lc $domain} = $blocked;
2013      }
2014    }
2015  });
2016
2017=item clear_dns_query_restriction
2018
2019The option removes any entries entered by previous 'dns_query_restriction'
2020options, leaving the list empty, i.e. allowing DNS queries for any domain
2021(including any DNS BL zone).
2022
2023=cut
2024
2025  push (@cmds, {
2026    setting =>  'clear_dns_query_restriction',
2027    aliases => ['clear_dns_query_restrictions'],
2028    type => $CONF_TYPE_NOARGS,
2029    code => sub {
2030      my ($self, $key, $value, $line) = @_;
2031      return $INVALID_VALUE  if defined $value && $value ne '';
2032      delete $self->{dns_query_blocked};
2033    }
2034  });
2035
2036=item dns_block_rule RULE domain
2037
2038If rule named RULE is hit, DNS queries to specified domain are
2039I<temporarily> blocked. Intended to be used with rules that check
2040RBL return codes for specific blocked status.  For example:
2041
2042  urirhssub URIBL_BLOCKED multi.uribl.com. A 1
2043  dns_block_rule URIBL_BLOCKED multi.uribl.com
2044
2045Block status is maintained across all processes by empty statefile named
2046"dnsblock_multi.uribl.com" in global state dir:
2047home_dir_for_helpers/.spamassassin, $HOME/.spamassassin,
2048/var/lib/spamassassin (localstate), depending which is found and writable.
2049
2050=cut
2051
2052  push (@cmds, {
2053    setting => 'dns_block_rule',
2054    is_admin => 1,
2055    type => $CONF_TYPE_HASH_KEY_VALUE,
2056    code => sub {
2057      my ($self, $key, $value, $line) = @_;
2058      local($1,$2);
2059      defined $value && $value =~ /^(\S+)\s+(.+)$/
2060        or return $INVALID_VALUE;
2061      my $rule = $1;
2062      foreach my $domain (split(/\s+/, lc($2))) {
2063        $domain =~ s/^\.//; $domain =~ s/\.\z//;  # strip dots
2064        if ($domain !~ /^[a-z0-9.-]+$/) {
2065          return $INVALID_VALUE;
2066        }
2067        # will end up in filename, do not allow / etc in above regex!
2068        $domain = untaint_var($domain);
2069        # Check.pm check_main() uses this
2070        $self->{dns_block_rule}{$rule}{$domain} = 1;
2071        # bgsend_and_start_lookup() uses this
2072        $self->{dns_block_rule_domains}{$domain} = $domain;
2073      }
2074    }
2075  });
2076
2077=item dns_block_time   (default: 300)
2078
2079dns_block_rule query blockage will last this many seconds.
2080
2081=cut
2082
2083  push (@cmds, {
2084    setting => 'dns_block_time',
2085    is_admin => 1,
2086    default => 300,
2087    type => $CONF_TYPE_NUMERIC,
2088  });
2089
2090=back
2091
2092=head2 LEARNING OPTIONS
2093
2094=over 4
2095
2096=item use_learner ( 0 | 1 )		(default: 1)
2097
2098Whether to use any machine-learning classifiers with SpamAssassin, such as the
2099default 'BAYES_*' rules.  Setting this to 0 will disable use of any and all
2100human-trained classifiers.
2101
2102=cut
2103
2104  push (@cmds, {
2105    setting => 'use_learner',
2106    default => 1,
2107    type => $CONF_TYPE_BOOL,
2108  });
2109
2110=item use_bayes ( 0 | 1 )		(default: 1)
2111
2112Whether to use the naive-Bayesian-style classifier built into
2113SpamAssassin.  This is a master on/off switch for all Bayes-related
2114operations.
2115
2116=cut
2117
2118  push (@cmds, {
2119    setting => 'use_bayes',
2120    default => 1,
2121    type => $CONF_TYPE_BOOL,
2122  });
2123
2124=item use_bayes_rules ( 0 | 1 )		(default: 1)
2125
2126Whether to use rules using the naive-Bayesian-style classifier built
2127into SpamAssassin.  This allows you to disable the rules while leaving
2128auto and manual learning enabled.
2129
2130=cut
2131
2132  push (@cmds, {
2133    setting => 'use_bayes_rules',
2134    default => 1,
2135    type => $CONF_TYPE_BOOL,
2136  });
2137
2138=item bayes_auto_learn ( 0 | 1 )      (default: 1)
2139
2140Whether SpamAssassin should automatically feed high-scoring mails (or
2141low-scoring mails, for non-spam) into its learning systems.  The only
2142learning system supported currently is a naive-Bayesian-style classifier.
2143
2144See the documentation for the
2145C<Mail::SpamAssassin::Plugin::AutoLearnThreshold> plugin module
2146for details on how Bayes auto-learning is implemented by default.
2147
2148=cut
2149
2150  push (@cmds, {
2151    setting => 'bayes_auto_learn',
2152    default => 1,
2153    type => $CONF_TYPE_BOOL,
2154  });
2155
2156=item bayes_token_sources  (default: header visible invisible uri)
2157
2158Controls which sources in a mail message can contribute tokens (e.g. words,
2159phrases, etc.) to a Bayes classifier. The argument is a space-separated list
2160of keywords: I<header>, I<visible>, I<invisible>, I<uri>, I<mimepart>), each
2161of which may be prefixed by a I<no> to indicate its exclusion. Additionally
2162two reserved keywords are allowed: I<all> and I<none> (or: I<noall>). The list
2163of keywords is processed sequentially: a keyword I<all> adds all available
2164keywords to a set being built, a I<none> or I<noall> clears the set, other
2165non-negated keywords are added to the set, and negated keywords are removed
2166from the set. Keywords are case-insensitive.
2167
2168The default set is: I<header> I<visible> I<invisible> I<uri>, which is
2169equivalent for example to: I<All> I<NoMIMEpart>. The reason why I<mimepart>
2170is not currently in a default set is that it is a newer source (introduced
2171with SpamAssassin version 3.4.1) and not much experience has yet been gathered
2172regarding its usefulness.
2173
2174See also option C<bayes_ignore_header> for a fine-grained control on individual
2175header fields under the umbrella of a more general keyword I<header> here.
2176
2177Keywords imply the following data sources:
2178
2179=over 4
2180
2181=item I<header> - tokens collected from a message header section
2182
2183=item I<visible> - words from visible text (plain or HTML) in a message body
2184
2185=item I<invisible> - hidden/invisible text in HTML parts of a message body
2186
2187=item I<uri> - URIs collected from a message body
2188
2189=item I<mimepart> - digests (hashes) of all MIME parts (textual or non-textual) of a message, computed after Base64 and quoted-printable decoding, suffixed by their Content-Type
2190
2191=item I<all> - adds all the above keywords to the set being assembled
2192
2193=item I<none> or I<noall> - removes all keywords from the set
2194
2195=back
2196
2197The C<bayes_token_sources> directive may appear multiple times, its keywords
2198are interpreted sequentially, adding or removing items from the final set
2199as they appear in their order in C<bayes_token_sources> directive(s).
2200
2201=cut
2202
2203  push (@cmds, {
2204    setting => 'bayes_token_sources',
2205    default => { map(($_,1), qw(header visible invisible uri)) },  # mimepart
2206    type => $CONF_TYPE_HASH_KEY_VALUE,
2207    code => sub {
2208      my ($self, $key, $value, $line) = @_;
2209      return $MISSING_REQUIRED_VALUE  if $value eq '';
2210      my $h = ($self->{bayes_token_sources} ||= {});
2211      my %all_kw = map(($_,1), qw(header visible invisible uri mimepart));
2212      foreach (split(/\s+/, lc $value)) {
2213        if (/^(none|noall)\z/) {
2214          %$h = ();
2215        } elsif ($_ eq 'all') {
2216          %$h = %all_kw;
2217        } elsif (/^(no)?(.+)\z/s && exists $all_kw{$2}) {
2218          $h->{$2} = defined $1 ? 0 : 1;
2219        } else {
2220          return $INVALID_VALUE;
2221        }
2222      }
2223    }
2224  });
2225
2226=item bayes_ignore_header header_name
2227
2228If you receive mail filtered by upstream mail systems, like
2229a spam-filtering ISP or mailing list, and that service adds
2230new headers (as most of them do), these headers may provide
2231inappropriate cues to the Bayesian classifier, allowing it
2232to take a "short cut". To avoid this, list the headers using this
2233setting.  Example:
2234
2235        bayes_ignore_header X-Upstream-Spamfilter
2236        bayes_ignore_header X-Upstream-SomethingElse
2237
2238=cut
2239
2240  push (@cmds, {
2241    setting => 'bayes_ignore_header',
2242    default => [],
2243    type => $CONF_TYPE_STRINGLIST,
2244    code => sub {
2245      my ($self, $key, $value, $line) = @_;
2246      if ($value eq '') {
2247        return $MISSING_REQUIRED_VALUE;
2248      }
2249      push (@{$self->{bayes_ignore_headers}}, split(/\s+/, $value));
2250    }
2251  });
2252
2253=item bayes_ignore_from user@example.com
2254
2255Bayesian classification and autolearning will not be performed on mail
2256from the listed addresses.  Program C<sa-learn> will also ignore the
2257listed addresses if it is invoked using the C<--use-ignores> option.
2258One or more addresses can be listed, see C<welcomelist_from>.
2259
2260Spam messages from certain senders may contain many words that
2261frequently occur in ham.  For example, one might read messages from a
2262preferred bookstore but also get unwanted spam messages from other
2263bookstores.  If the unwanted messages are learned as spam then any
2264messages discussing books, including the preferred bookstore and
2265antiquarian messages would be in danger of being marked as spam.  The
2266addresses of the annoying bookstores would be listed.  (Assuming they
2267were halfway legitimate and didn't send you mail through myriad
2268affiliates.)
2269
2270Those who have pieces of spam in legitimate messages or otherwise
2271receive ham messages containing potentially spammy words might fear
2272that some spam messages might be in danger of being marked as ham.
2273The addresses of the spam mailing lists, correspondents, etc.  would
2274be listed.
2275
2276=cut
2277
2278  push (@cmds, {
2279    setting => 'bayes_ignore_from',
2280    type => $CONF_TYPE_ADDRLIST,
2281  });
2282
2283=item bayes_ignore_to user@example.com
2284
2285Bayesian classification and autolearning will not be performed on mail
2286to the listed addresses.  See C<bayes_ignore_from> for details.
2287
2288=cut
2289
2290  push (@cmds, {
2291    setting => 'bayes_ignore_to',
2292    type => $CONF_TYPE_ADDRLIST,
2293  });
2294
2295=item bayes_min_ham_num			(Default: 200)
2296
2297=item bayes_min_spam_num		(Default: 200)
2298
2299To be accurate, the Bayes system does not activate until a certain number of
2300ham (non-spam) and spam have been learned.  The default is 200 of each ham and
2301spam, but you can tune these up or down with these two settings.
2302
2303=cut
2304
2305  push (@cmds, {
2306    setting => 'bayes_min_ham_num',
2307    default => 200,
2308    type => $CONF_TYPE_NUMERIC,
2309  });
2310  push (@cmds, {
2311    setting => 'bayes_min_spam_num',
2312    default => 200,
2313    type => $CONF_TYPE_NUMERIC,
2314  });
2315
2316=item bayes_learn_during_report         (Default: 1)
2317
2318The Bayes system will, by default, learn any reported messages
2319(C<spamassassin -r>) as spam.  If you do not want this to happen, set
2320this option to 0.
2321
2322=cut
2323
2324  push (@cmds, {
2325    setting => 'bayes_learn_during_report',
2326    default => 1,
2327    type => $CONF_TYPE_BOOL,
2328  });
2329
2330=item bayes_sql_override_username
2331
2332Used by BayesStore::SQL storage implementation.
2333
2334If this options is set the BayesStore::SQL module will override the set
2335username with the value given.  This could be useful for implementing global or
2336group bayes databases.
2337
2338=cut
2339
2340  push (@cmds, {
2341    setting => 'bayes_sql_override_username',
2342    default => '',
2343    type => $CONF_TYPE_STRING,
2344  });
2345
2346=item bayes_use_hapaxes		(default: 1)
2347
2348Should the Bayesian classifier use hapaxes (words/tokens that occur only
2349once) when classifying?  This produces significantly better hit-rates.
2350
2351=cut
2352
2353  push (@cmds, {
2354    setting => 'bayes_use_hapaxes',
2355    default => 1,
2356    type => $CONF_TYPE_BOOL,
2357  });
2358
2359=item bayes_journal_max_size		(default: 102400)
2360
2361SpamAssassin will opportunistically sync the journal and the database.
2362It will do so once a day, but will sync more often if the journal file
2363size goes above this setting, in bytes.  If set to 0, opportunistic
2364syncing will not occur.
2365
2366=cut
2367
2368  push (@cmds, {
2369    setting => 'bayes_journal_max_size',
2370    default => 102400,
2371    type => $CONF_TYPE_NUMERIC,
2372  });
2373
2374=item bayes_expiry_max_db_size		(default: 150000)
2375
2376What should be the maximum size of the Bayes tokens database?  When expiry
2377occurs, the Bayes system will keep either 75% of the maximum value, or
2378100,000 tokens, whichever has a larger value.  150,000 tokens is roughly
2379equivalent to a 8Mb database file.
2380
2381=cut
2382
2383  push (@cmds, {
2384    setting => 'bayes_expiry_max_db_size',
2385    default => 150000,
2386    type => $CONF_TYPE_NUMERIC,
2387  });
2388
2389=item bayes_auto_expire       		(default: 1)
2390
2391If enabled, the Bayes system will try to automatically expire old tokens
2392from the database.  Auto-expiry occurs when the number of tokens in the
2393database surpasses the bayes_expiry_max_db_size value. If a bayes datastore
2394backend does not implement individual key/value expirations, the setting
2395is silently ignored.
2396
2397=cut
2398
2399  push (@cmds, {
2400    setting => 'bayes_auto_expire',
2401    default => 1,
2402    type => $CONF_TYPE_BOOL,
2403  });
2404
2405=item bayes_token_ttl       		(default: 3w, i.e. 3 weeks)
2406
2407Time-to-live / expiration time in seconds for tokens kept in a Bayes database.
2408A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2409indicating seconds (default), minutes, hours, days, weeks).
2410
2411If bayes_auto_expire is true and a Bayes datastore backend supports it
2412(currently only Redis), this setting controls deletion of expired tokens
2413from a bayes database. The value is observed on a best-effort basis, exact
2414timing promises are not necessarily kept. If a bayes datastore backend
2415does not implement individual key/value expirations, the setting is silently
2416ignored.
2417
2418=cut
2419
2420  push (@cmds, {
2421    setting => 'bayes_token_ttl',
2422    default => 3*7*24*60*60,  # seconds (3 weeks)
2423    type => $CONF_TYPE_DURATION,
2424  });
2425
2426=item bayes_seen_ttl       		(default: 8d, i.e. 8 days)
2427
2428Time-to-live / expiration time in seconds for 'seen' entries
2429(i.e. mail message digests with their status) kept in a Bayes database.
2430A numeric value is optionally suffixed by a time unit (s, m, h, d, w,
2431indicating seconds (default), minutes, hours, days, weeks).
2432
2433If bayes_auto_expire is true and a Bayes datastore backend supports it
2434(currently only Redis), this setting controls deletion of expired 'seen'
2435entries from a bayes database. The value is observed on a best-effort basis,
2436exact timing promises are not necessarily kept. If a bayes datastore backend
2437does not implement individual key/value expirations, the setting is silently
2438ignored.
2439
2440=cut
2441
2442  push (@cmds, {
2443    setting => 'bayes_seen_ttl',
2444    default => 8*24*60*60,  # seconds (8 days)
2445    type => $CONF_TYPE_DURATION,
2446  });
2447
2448=item bayes_learn_to_journal  	(default: 0)
2449
2450If this option is set, whenever SpamAssassin does Bayes learning, it
2451will put the information into the journal instead of directly into the
2452database.  This lowers contention for locking the database to execute
2453an update, but will also cause more access to the journal and cause a
2454delay before the updates are actually committed to the Bayes database.
2455
2456=cut
2457
2458  push (@cmds, {
2459    setting => 'bayes_learn_to_journal',
2460    default => 0,
2461    type => $CONF_TYPE_BOOL,
2462  });
2463
2464=back
2465
2466=head2 MISCELLANEOUS OPTIONS
2467
2468=over 4
2469
2470=item time_limit n   (default: 300)
2471
2472Specifies a limit on elapsed time in seconds that SpamAssassin is allowed
2473to spend before providing a result. The value may be fractional and must
2474not be negative, zero is interpreted as unlimited. The default is 300
2475seconds for consistency with the spamd default setting of --timeout-child .
2476
2477This is a best-effort advisory setting, processing will not be abruptly
2478aborted at an arbitrary point in processing when the time limit is exceeded,
2479but only on reaching one of locations in the program flow equipped with a
2480time test. Currently equipped with the test are the main checking loop,
2481asynchronous DNS lookups, plugins which are calling external programs.
2482Rule evaluation is guarded by starting a timer (alarm) on each set of
2483compiled rules.
2484
2485When a message is passed to Mail::SpamAssassin::parse, a deadline time
2486is established as a sum of current time and the C<time_limit> setting.
2487
2488This deadline may also be specified by a caller through an option
2489'master_deadline' in $suppl_attrib on a call to parse(), possibly providing
2490a more accurate deadline taking into account past and expected future
2491processing of a message in a mail filtering setup. If both the config
2492option as well as a 'master_deadline' option in a call are provided,
2493the shorter time limit of the two is used (since version 3.3.2).
2494Note that spamd (and possibly third-party callers of SpamAssassin) will
2495supply the 'master_deadline' option in a call based on its --timeout-child
2496option (or equivalent), unlike the command line C<spamassassin>, which has
2497no such command line option.
2498
2499When a time limit is exceeded, most of the remaining tests will be skipped,
2500as well as auto-learning. Whatever tests fired so far will determine the
2501final score. The behaviour is similar to short-circuiting with attribute 'on',
2502as implemented by a Shortcircuit plugin. A synthetic hit on a rule named
2503TIME_LIMIT_EXCEEDED with a near-zero default score is generated, so that
2504the report will reflect the event. A score for TIME_LIMIT_EXCEEDED may
2505be provided explicitly in a configuration file, for example to achieve
2506whitelisting or blacklisting effect for messages with long processing times.
2507
2508The C<time_limit> option is a useful protection against excessive processing
2509time on certain degenerate or unusually long or complex mail messages, as well
2510as against some DoS attacks. It is also needed in time-critical pre-queue
2511filtering setups (e.g. milter, proxy, integration with MTA), where message
2512processing must finish before a SMTP client times out.  RFC 5321 prescribes
2513in section 4.5.3.2.6 the 'DATA Termination' time limit of 10 minutes,
2514although it is not unusual to see some SMTP clients abort sooner on waiting
2515for a response. A sensible C<time_limit> for a pre-queue filtering setup is
2516maybe 50 seconds, assuming that clients are willing to wait at least a minute.
2517
2518=cut
2519
2520  push (@cmds, {
2521    setting => 'time_limit',
2522    default => 300,
2523    type => $CONF_TYPE_DURATION,
2524  });
2525
2526=item lock_method type
2527
2528Select the file-locking method used to protect database files on-disk. By
2529default, SpamAssassin uses an NFS-safe locking method on UNIX; however, if you
2530are sure that the database files you'll be using for Bayes and AWL storage will
2531never be accessed over NFS, a non-NFS-safe locking system can be selected.
2532
2533This will be quite a bit faster, but may risk file corruption if the files are
2534ever accessed by multiple clients at once, and one or more of them is accessing
2535them through an NFS filesystem.
2536
2537Note that different platforms require different locking systems.
2538
2539The supported locking systems for C<type> are as follows:
2540
2541=over 4
2542
2543=item I<nfssafe> - an NFS-safe locking system
2544
2545=item I<flock> - simple UNIX C<flock()> locking
2546
2547=item I<win32> - Win32 locking using C<sysopen (..., O_CREAT|O_EXCL)>.
2548
2549=back
2550
2551nfssafe and flock are only available on UNIX, and win32 is only available
2552on Windows.  By default, SpamAssassin will choose either nfssafe or
2553win32 depending on the platform in use.
2554
2555=cut
2556
2557  push (@cmds, {
2558    setting => 'lock_method',
2559    default => '',
2560    type => $CONF_TYPE_STRING,
2561    code => sub {
2562      my ($self, $key, $value, $line) = @_;
2563      if ($value !~ /^(nfssafe|flock|win32)$/) {
2564        return $INVALID_VALUE;
2565      }
2566
2567      $self->{lock_method} = $value;
2568      # recreate the locker
2569      $self->{main}->create_locker();
2570    }
2571  });
2572
2573=item fold_headers ( 0 | 1 )        (default: 1)
2574
2575By default, headers added by SpamAssassin will be whitespace folded.
2576In other words, they will be broken up into multiple lines instead of
2577one very long one and each continuation line will have a tabulator
2578prepended to mark it as a continuation of the preceding one.
2579
2580The automatic wrapping can be disabled here.  Note that this can generate very
2581long lines.  RFC 2822 required that header lines do not exceed 998 characters
2582(not counting the final CRLF).
2583
2584=cut
2585
2586  push (@cmds, {
2587    setting => 'fold_headers',
2588    default => 1,
2589    type => $CONF_TYPE_BOOL,
2590  });
2591
2592=item report_safe_copy_headers header_name ...
2593
2594If using C<report_safe>, a few of the headers from the original message
2595are copied into the wrapper header (From, To, Cc, Subject, Date, etc.)
2596If you want to have other headers copied as well, you can add them
2597using this option.  You can specify multiple headers on the same line,
2598separated by spaces, or you can just use multiple lines.
2599
2600=cut
2601
2602  push (@cmds, {
2603    setting => 'report_safe_copy_headers',
2604    default => [],
2605    type => $CONF_TYPE_STRINGLIST,
2606    code => sub {
2607      my ($self, $key, $value, $line) = @_;
2608      if ($value eq '') {
2609        return $MISSING_REQUIRED_VALUE;
2610      }
2611      push(@{$self->{report_safe_copy_headers}}, split(/\s+/, $value));
2612    }
2613  });
2614
2615=item envelope_sender_header Name-Of-Header
2616
2617SpamAssassin will attempt to discover the address used in the 'MAIL FROM:'
2618phase of the SMTP transaction that delivered this message, if this data has
2619been made available by the SMTP server.  This is used in the C<EnvelopeFrom>
2620pseudo-header, and for various rules such as SPF checking.
2621
2622By default, various MTAs will use different headers, such as the following:
2623
2624    X-Envelope-From
2625    Envelope-Sender
2626    X-Sender
2627    Return-Path
2628
2629SpamAssassin will attempt to use these, if some heuristics (such as the header
2630placement in the message, or the absence of fetchmail signatures) appear to
2631indicate that they are safe to use.  However, it may choose the wrong headers
2632in some mailserver configurations.  (More discussion of this can be found
2633in bug 2142 and bug 4747 in the SpamAssassin BugZilla.)
2634
2635To avoid this heuristic failure, the C<envelope_sender_header> setting may be
2636helpful.  Name the header that your MTA or MDA adds to messages containing the
2637address used at the MAIL FROM step of the SMTP transaction.
2638
2639If the header in question contains C<E<lt>> or C<E<gt>> characters at the start
2640and end of the email address in the right-hand side, as in the SMTP
2641transaction, these will be stripped.
2642
2643If the header is not found in a message, or if it's value does not contain an
2644C<@> sign, SpamAssassin will issue a warning in the logs and fall back to its
2645default heuristics.
2646
2647(Note for MTA developers: we would prefer if the use of a single header be
2648avoided in future, since that precludes 'downstream' spam scanning.
2649C<http://wiki.apache.org/spamassassin/EnvelopeSenderInReceived> details a
2650better proposal, storing the envelope sender at each hop in the C<Received>
2651header.)
2652
2653example:
2654
2655    envelope_sender_header X-SA-Exim-Mail-From
2656
2657=cut
2658
2659  push (@cmds, {
2660    setting => 'envelope_sender_header',
2661    default => undef,
2662    type => $CONF_TYPE_STRING,
2663  });
2664
2665=item describe SYMBOLIC_TEST_NAME description ...
2666
2667Used to describe a test.  This text is shown to users in the detailed report.
2668
2669Note that test names which begin with '__' are reserved for meta-match
2670sub-rules, and are not scored or listed in the 'tests hit' reports.
2671
2672Also note that by convention, rule descriptions should be limited in
2673length to no more than 50 characters.
2674
2675=cut
2676
2677  push (@cmds, {
2678    command => 'describe',
2679    setting => 'descriptions',
2680    type => $CONF_TYPE_HASH_KEY_VALUE,
2681  });
2682
2683=item report_charset CHARSET		(default: UTF-8)
2684
2685Set the MIME Content-Type charset used for the text/plain report which
2686is attached to spam mail messages.
2687
2688=cut
2689
2690  push (@cmds, {
2691    setting => 'report_charset',
2692    default => 'UTF-8',
2693    type => $CONF_TYPE_STRING,
2694  });
2695
2696=item report ...some text for a report...
2697
2698Set the report template which is attached to spam mail messages.  See the
2699C<10_default_prefs.cf> configuration file in C</usr/share/spamassassin> for an
2700example.
2701
2702If you change this, try to keep it under 78 columns. Each C<report>
2703line appends to the existing template, so use C<clear_report_template>
2704to restart.
2705
2706Tags can be included as explained above.
2707
2708=cut
2709
2710  push (@cmds, {
2711    command => 'report',
2712    setting => 'report_template',
2713    default => '',
2714    type => $CONF_TYPE_TEMPLATE,
2715  });
2716
2717=item clear_report_template
2718
2719Clear the report template.
2720
2721=cut
2722
2723  push (@cmds, {
2724    command => 'clear_report_template',
2725    setting => 'report_template',
2726    type => $CONF_TYPE_NOARGS,
2727    code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2728  });
2729
2730=item report_contact ...text of contact address...
2731
2732Set what _CONTACTADDRESS_ is replaced with in the above report text.
2733By default, this is 'the administrator of that system', since the hostname
2734of the system the scanner is running on is also included.
2735
2736=cut
2737
2738  push (@cmds, {
2739    setting => 'report_contact',
2740    default => 'the administrator of that system',
2741    type => $CONF_TYPE_STRING,
2742  });
2743
2744=item report_hostname ...hostname to use...
2745
2746Set what _HOSTNAME_ is replaced with in the above report text.
2747By default, this is determined dynamically as whatever the host running
2748SpamAssassin calls itself.
2749
2750=cut
2751
2752  push (@cmds, {
2753    setting => 'report_hostname',
2754    default => '',
2755    type => $CONF_TYPE_STRING,
2756  });
2757
2758=item unsafe_report ...some text for a report...
2759
2760Set the report template which is attached to spam mail messages which contain a
2761non-text/plain part.  See the C<10_default_prefs.cf> configuration file in
2762C</usr/share/spamassassin> for an example.
2763
2764Each C<unsafe-report> line appends to the existing template, so use
2765C<clear_unsafe_report_template> to restart.
2766
2767Tags can be used in this template (see above for details).
2768
2769=cut
2770
2771  push (@cmds, {
2772    command => 'unsafe_report',
2773    setting => 'unsafe_report_template',
2774    default => '',
2775    type => $CONF_TYPE_TEMPLATE,
2776  });
2777
2778=item clear_unsafe_report_template
2779
2780Clear the unsafe_report template.
2781
2782=cut
2783
2784  push (@cmds, {
2785    command => 'clear_unsafe_report_template',
2786    setting => 'unsafe_report_template',
2787    type => $CONF_TYPE_NOARGS,
2788    code => \&Mail::SpamAssassin::Conf::Parser::set_template_clear
2789  });
2790
2791=item mbox_format_from_regex
2792
2793Set a specific regular expression to be used for mbox file From separators.
2794
2795For example, this setting will allow sa-learn to process emails stored in
2796a kmail 2 mbox:
2797
2798mbox_format_from_regex /^From \S+  ?[[:upper:]][[:lower:]]{2}(?:, \d\d [[:upper:]][[:lower:]]{2} \d{4} [0-2]\d:\d\d:\d\d [+-]\d{4}| [[:upper:]][[:lower:]]{2} [ 1-3]\d [ 0-2]\d:\d\d:\d\d \d{4})/
2799
2800
2801=cut
2802
2803  push (@cmds, {
2804    setting => 'mbox_format_from_regex',
2805    type => $CONF_TYPE_STRING
2806  });
2807
2808
2809=item parse_dkim_uris ( 0 | 1 ) (default: 1)
2810
2811If this option is set to 1 and the message contains DKIM headers, the headers will be parsed for URIs to process alongside URIs found in the body with some rules and modules (ex. URIDNSBL)
2812
2813=cut
2814
2815  push (@cmds, {
2816    setting => 'parse_dkim_uris',
2817    default => 1,
2818    type => $CONF_TYPE_BOOL,
2819  });
2820
2821=back
2822
2823=head1 RULE DEFINITIONS AND PRIVILEGED SETTINGS
2824
2825These settings differ from the ones above, in that they are considered
2826'privileged'.  Only users running C<spamassassin> from their procmailrc's or
2827forward files, or sysadmins editing a file in C</etc/mail/spamassassin>, can
2828use them.   C<spamd> users cannot use them in their C<user_prefs> files, for
2829security and efficiency reasons, unless C<allow_user_rules> is enabled (and
2830then, they may only add rules from below).
2831
2832=over 4
2833
2834=item allow_user_rules ( 0 | 1 )		(default: 0)
2835
2836This setting allows users to create rules (and only rules) in their
2837C<user_prefs> files for use with C<spamd>. It defaults to off, because
2838this could be a severe security hole. It may be possible for users to
2839gain root level access if C<spamd> is run as root. It is NOT a good
2840idea, unless you have some other way of ensuring that users' tests are
2841safe. Don't use this unless you are certain you know what you are
2842doing. Furthermore, this option causes spamassassin to recompile all
2843the tests each time it processes a message for a user with a rule in
2844his/her C<user_prefs> file, which could have a significant effect on
2845server load. It is not recommended.
2846
2847Note that it is not currently possible to use C<allow_user_rules> to modify an
2848existing system rule from a C<user_prefs> file with C<spamd>.
2849
2850=cut
2851
2852  push (@cmds, {
2853    setting => 'allow_user_rules',
2854    is_priv => 1,
2855    default => 0,
2856    type => $CONF_TYPE_BOOL,
2857    code => sub {
2858      my ($self, $key, $value, $line) = @_;
2859      if ($value eq '') {
2860        return $MISSING_REQUIRED_VALUE;
2861      }
2862      elsif ($value !~ /^[01]$/) {
2863        return $INVALID_VALUE;
2864      }
2865
2866      $self->{allow_user_rules} = $value+0;
2867      dbg("config: " . ($self->{allow_user_rules} ? "allowing":"not allowing") . " user rules!");
2868    }
2869  });
2870
2871=item redirector_pattern	/pattern/modifiers
2872
2873A regex pattern that matches both the redirector site portion, and
2874the target site portion of a URI.
2875
2876Note: The target URI portion must be surrounded in parentheses and
2877      no other part of the pattern may create a backreference.
2878
2879Example: http://chkpt.zdnet.com/chkpt/whatever/spammer.domain/yo/dude
2880
2881  redirector_pattern	/^https?:\/\/(?:opt\.)?chkpt\.zdnet\.com\/chkpt\/\w+\/(.*)$/i
2882
2883=cut
2884
2885  push (@cmds, {
2886    setting => 'redirector_pattern',
2887    is_priv => 1,
2888    default => [],
2889    type => $CONF_TYPE_STRINGLIST,
2890    code => sub {
2891      my ($self, $key, $value, $line) = @_;
2892      $value =~ s/^\s+//;
2893      if ($value eq '') {
2894	return $MISSING_REQUIRED_VALUE;
2895      }
2896      my ($rec, $err) = compile_regexp($value, 1);
2897      if (!$rec) {
2898        dbg("config: invalid redirector_pattern '$value': $err");
2899	return $INVALID_VALUE;
2900      }
2901      push @{$self->{main}->{conf}->{redirector_patterns}}, $rec;
2902    }
2903  });
2904
2905=item header SYMBOLIC_TEST_NAME header op /pattern/modifiers	[if-unset: STRING]
2906
2907Define a test.  C<SYMBOLIC_TEST_NAME> is a symbolic test name, such as
2908'FROM_ENDS_IN_NUMS'.  C<header> is the name of a mail header field,
2909such as 'Subject', 'To', 'From', etc.  Header field names are matched
2910case-insensitively (conforming to RFC 5322 section 1.2.2), except for
2911all-capitals metaheader fields such as ALL, MESSAGEID, ALL-TRUSTED.
2912
2913Appending a modifier C<:raw> to a header field name will inhibit decoding of
2914quoted-printable or base-64 encoded strings, and will preserve all whitespace
2915inside the header string.  The C<:raw> may also be applied to pseudo-headers
2916e.g. C<ALL:raw> will return a pristine (unmodified) header section.
2917
2918Appending a modifier C<:addr> to a header field name will cause everything
2919except the first email address to be removed from the header field.  It is
2920mainly applicable to header fields 'From', 'Sender', 'To', 'Cc' along with
2921their 'Resent-*' counterparts, and the 'Return-Path'.
2922
2923Appending a modifier C<:name> to a header field name will cause everything
2924except the first display name to be removed from the header field. It is
2925mainly applicable to header fields containing a single mail address: 'From',
2926'Sender', along with their 'Resent-From' and 'Resent-Sender' counterparts.
2927
2928It is syntactically permitted to append more than one modifier to a header
2929field name, although currently most combinations achieve no additional effect,
2930for example C<From:addr:raw> or C<From:raw:addr> is currently the same as
2931C<From:addr> .
2932
2933For example, appending C<:addr> to a header name will result in example@foo
2934in all of the following cases:
2935
2936=over 4
2937
2938=item example@foo
2939
2940=item example@foo (Foo Blah)
2941
2942=item example@foo, example@bar
2943
2944=item display: example@foo (Foo Blah), example@bar ;
2945
2946=item Foo Blah E<lt>example@fooE<gt>
2947
2948=item "Foo Blah" E<lt>example@fooE<gt>
2949
2950=item "'Foo Blah'" E<lt>example@fooE<gt>
2951
2952=back
2953
2954For example, appending C<:name> to a header name will result in "Foo Blah"
2955(without quotes) in all of the following cases:
2956
2957=over 4
2958
2959=item example@foo (Foo Blah)
2960
2961=item example@foo (Foo Blah), example@bar
2962
2963=item display: example@foo (Foo Blah), example@bar ;
2964
2965=item Foo Blah E<lt>example@fooE<gt>
2966
2967=item "Foo Blah" E<lt>example@fooE<gt>
2968
2969=item "'Foo Blah'" E<lt>example@fooE<gt>
2970
2971=back
2972
2973There are several special pseudo-headers that can be specified:
2974
2975=over 4
2976
2977=item C<ALL> can be used to mean the text of all the message's headers.
2978Note that all whitespace inside the headers, at line folds, is currently
2979compressed into a single space (' ') character. To obtain a pristine
2980(unmodified) header section, use C<ALL:raw> - the :raw modifier is documented
2981above. Also similar that return headers added by specific relays: ALL-TRUSTED,
2982ALL-INTERNAL, ALL-UNTRUSTED, ALL-EXTERNAL.
2983
2984=item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc'
2985headers.
2986
2987=item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP
2988transaction that delivered this message, if this data has been made available
2989by the SMTP server.  See C<envelope_sender_header> for more information
2990on how to set this.
2991
2992=item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message;
2993some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
2994or to 'X-Message-Id', then uses its own one in the 'Message-Id' header.
2995The value returned for this symbol is the text from all 3 headers, separated
2996by newlines.
2997
2998=item C<X-Spam-Relays-Untrusted>, C<X-Spam-Relays-Trusted>,
2999C<X-Spam-Relays-Internal> and C<X-Spam-Relays-External> represent a portable,
3000pre-parsed representation of the message's network path, as recorded in the
3001Received headers, divided into 'trusted' vs 'untrusted' and 'internal' vs
3002'external' sets.  See C<http://wiki.apache.org/spamassassin/TrustedRelays> for
3003more details.
3004
3005=back
3006
3007C<op> is either C<=~> (contains regular expression) or C<!~> (does not contain
3008regular expression), and C<pattern> is a valid Perl regular expression, with
3009C<modifiers> as regexp modifiers in the usual style.   Note that multi-line
3010rules are not supported, even if you use C<x> as a modifier.  Also note that
3011the C<#> character must be escaped (C<\#>) or else it will be considered to be
3012the start of a comment and not part of the regexp.
3013
3014If the header specified matches multiple headers, their text will be
3015concatenated with embedded \n's. Therefore you may wish to use C</m> if you
3016use C<^> or C<$> in your regular expression.
3017
3018If the C<[if-unset: STRING]> tag is present, then C<STRING> will
3019be used if the header is not found in the mail message.
3020
3021Test names must not start with a number, and must contain only
3022alphanumerics and underscores.  It is suggested that lower-case characters
3023not be used, and names have a length of no more than 22 characters,
3024as an informal convention.  Dashes are not allowed.
3025
3026Note that test names which begin with '__' are reserved for meta-match
3027sub-rules, and are not scored or listed in the 'tests hit' reports.
3028Test names which begin with 'T_' are reserved for tests which are
3029undergoing QA, and these are given a very low score.
3030
3031If you add or modify a test, please be sure to run a sanity check afterwards
3032by running C<spamassassin --lint>.  This will avoid confusing error
3033messages, or other tests being skipped as a side-effect.
3034
3035=item header SYMBOLIC_TEST_NAME exists:header_field_name
3036
3037Define a header field existence test.  C<header_field_name> is the name
3038of a header field to test for existence.  Not to be confused with a
3039test for a nonempty header field body, which can be implemented by a
3040C<header SYMBOLIC_TEST_NAME header =~ /\S/> rule as described above.
3041
3042=item header SYMBOLIC_TEST_NAME eval:name_of_eval_method([arguments])
3043
3044Define a header eval test.  C<name_of_eval_method> is the name of
3045a method registered by a C<Mail::SpamAssassin::Plugin> object.
3046C<arguments> are optional arguments to the function call.
3047
3048=item header SYMBOLIC_TEST_NAME eval:check_rbl('set', 'zone' [, 'sub-test'])
3049
3050Check a DNSBL (a DNS blacklist or whitelist).  This will retrieve Received:
3051headers from the message, extract the IP addresses, select which ones are
3052'untrusted' based on the C<trusted_networks> logic, and query that DNSBL
3053zone.  There's a few things to note:
3054
3055=over 4
3056
3057=item duplicated or private IPs
3058
3059Duplicated IPs are only queried once and reserved IPs are not queried.
3060Private IPs are those listed in
3061C<https://www.iana.org/assignments/ipv4-address-space>,
3062C<http://duxcw.com/faq/network/privip.htm>,
3063C<http://duxcw.com/faq/network/autoip.htm>, or
3064C<https://tools.ietf.org/html/rfc5735> as private.
3065
3066=item the 'set' argument
3067
3068This is used as a 'zone ID'.  If you want to look up a multiple-meaning zone
3069like SORBS, you can then query the results from that zone using it;
3070but all check_rbl_sub() calls must use that zone ID.
3071
3072Also, if more than one IP address gets a DNSBL hit for a particular rule, it
3073does not affect the score because rules only trigger once per message.
3074
3075=item the 'zone' argument
3076
3077This is the root zone of the DNSBL.
3078
3079The domain name is considered to be a fully qualified domain name
3080(i.e. not subject to DNS resolver's search or default domain options).
3081No trailing period is needed, and will be removed if specified.
3082
3083=item the 'sub-test' argument
3084
3085This optional argument behaves the same as the sub-test argument in
3086C<check_rbl_sub()> below.
3087
3088=item selecting all IPs except for the originating one
3089
3090This is accomplished by placing '-notfirsthop' at the end of the set name.
3091This is useful for querying against DNS lists which list dialup IP
3092addresses; the first hop may be a dialup, but as long as there is at least
3093one more hop, via their outgoing SMTP server, that's legitimate, and so
3094should not gain points.  If there is only one hop, that will be queried
3095anyway, as it should be relaying via its outgoing SMTP server instead of
3096sending directly to your MX (mail exchange).
3097
3098=item selecting IPs by whether they are trusted
3099
3100When checking a 'nice' DNSBL (a DNS whitelist), you cannot trust the IP
3101addresses in Received headers that were not added by trusted relays.  To
3102test the first IP address that can be trusted, place '-firsttrusted' at the
3103end of the set name.  That should test the IP address of the relay that
3104connected to the most remote trusted relay.
3105
3106Note that this requires that SpamAssassin know which relays are trusted.  For
3107simple cases, SpamAssassin can make a good estimate.  For complex cases, you
3108may get better results by setting C<trusted_networks> manually.
3109
3110In addition, you can test all untrusted IP addresses by placing '-untrusted'
3111at the end of the set name.   Important note -- this does NOT include the
3112IP address from the most recent 'untrusted line', as used in '-firsttrusted'
3113above.  That's because we're talking about the trustworthiness of the
3114IP address data, not the source header line, here; and in the case of
3115the most recent header (the 'firsttrusted'), that data can be trusted.
3116See the Wiki page at C<http://wiki.apache.org/spamassassin/TrustedRelays>
3117for more information on this.
3118
3119=item Selecting just the last external IP
3120
3121By using '-lastexternal' at the end of the set name, you can select only
3122the external host that connected to your internal network, or at least
3123the last external host with a public IP.
3124
3125=back
3126
3127=item header SYMBOLIC_TEST_NAME eval:check_rbl_txt('set', 'zone')
3128
3129Same as check_rbl(), except querying using IN TXT instead of IN A records.
3130If the zone supports it, it will result in a line of text describing
3131why the IP is listed, typically a hyperlink to a database entry.
3132
3133=item header SYMBOLIC_TEST_NAME eval:check_rbl_sub('set', 'sub-test')
3134
3135Create a sub-test for 'set'.  If you want to look up a multi-meaning zone
3136like relays.osirusoft.com, you can then query the results from that zone
3137using the zone ID from the original query.  The sub-test may either be an
3138IPv4 dotted address for RBLs that return multiple A records, or a
3139non-negative decimal number to specify a bitmask for RBLs that return a
3140single A record containing a bitmask of results, or a regular expression.
3141
3142Note: the set name must be exactly the same for as the main query rule,
3143including selections like '-notfirsthop' appearing at the end of the set
3144name.
3145
3146=cut
3147
3148  push (@cmds, {
3149    setting => 'header',
3150    is_priv => 1,
3151    code => sub {
3152      my ($self, $key, $value, $line) = @_;
3153      local($1);
3154      if ($value !~ s/^(\S+)\s+//) {
3155        return $INVALID_VALUE;
3156      }
3157      my $rulename = $1;
3158      if ($value eq '') {
3159        return $MISSING_REQUIRED_VALUE;
3160      }
3161      if ($value =~ /^(?:rbl)?eval:(.*)$/) {
3162        my $fn = $1;
3163        if ($fn !~ /^\w+\(.*\)$/) {
3164          return $INVALID_VALUE;
3165        }
3166        if ($fn =~ /^check_(?:rbl|dns)/) {
3167          $self->{parser}->add_test ($rulename, $fn, $TYPE_RBL_EVALS);
3168        }
3169        else {
3170          $self->{parser}->add_test ($rulename, $fn, $TYPE_HEAD_EVALS);
3171        }
3172      }
3173      else {
3174        # Detailed parsing in add_test
3175        $self->{parser}->add_test ($rulename, $value, $TYPE_HEAD_TESTS);
3176      }
3177    }
3178  });
3179
3180=item body SYMBOLIC_TEST_NAME /pattern/modifiers
3181
3182Define a body pattern test.  C<pattern> is a Perl regular expression.  Note:
3183as per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3184the beginning of a comment.
3185
3186The 'body' in this case is the textual parts of the message body; any
3187non-text MIME parts are stripped, and the message decoded from
3188Quoted-Printable or Base-64-encoded format if necessary.  Parts declared as
3189text/html will be rendered from HTML to text.
3190
3191Body is processed as a raw byte string, which means Unicode-specific regex
3192features like \p{} can NOT be used for matching.  The normalize_charset
3193setting will also affect how raw bytes are presented.  Rules in .cf files
3194should be written portably - to match "a with umlaut" character, look for
3195both LATIN1 and UTF8 raw byte variants: /(?:\xE4|\xC3\xA4)/
3196
3197All body paragraphs (double-newline-separated blocks text) are turned into a
3198line breaks removed, whitespace normalized single line.  Any lines longer
3199than 2kB are split into shorter separate lines (from a boundary when
3200possible), this may unexpectedly prevent pattern from matching.  Patterns
3201are matched independently against each of these lines.
3202
3203Note that by default the message Subject header is considered part of the
3204body and becomes the first line when running the rules. If you don't want
3205to match Subject along with body text, use "tflags RULENAME nosubject".
3206
3207=item body SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3208
3209Define a body eval test.  See above.
3210
3211=cut
3212
3213  push (@cmds, {
3214    setting => 'body',
3215    is_priv => 1,
3216    code => sub {
3217      my ($self, $key, $value, $line) = @_;
3218      local($1);
3219      if ($value !~ s/^(\S+)\s+//) {
3220        return $INVALID_VALUE;
3221      }
3222      my $rulename = $1;
3223      if ($value eq '') {
3224        return $MISSING_REQUIRED_VALUE;
3225      }
3226      if ($value =~ /^eval:(.*)$/) {
3227        my $fn = $1;
3228        if ($fn !~ /^\w+\(.*\)$/) {
3229          return $INVALID_VALUE;
3230        }
3231        $self->{parser}->add_test ($rulename, $fn, $TYPE_BODY_EVALS);
3232      } else {
3233        $self->{parser}->add_test ($rulename, $value, $TYPE_BODY_TESTS);
3234      }
3235    }
3236  });
3237
3238=item uri SYMBOLIC_TEST_NAME /pattern/modifiers
3239
3240Define a uri pattern test.  C<pattern> is a Perl regular expression.  Note: as
3241per the header tests, C<#> must be escaped (C<\#>) or else it is considered
3242the beginning of a comment.
3243
3244The 'uri' in this case is a list of all the URIs in the body of the email,
3245and the test will be run on each and every one of those URIs, adjusting the
3246score if a match is found. Use this test instead of one of the body tests
3247when you need to match a URI, as it is more accurately bound to the start/end
3248points of the URI, and will also be faster.
3249
3250=cut
3251
3252# we don't do URI evals yet - maybe later
3253#    if (/^uri\s+(\S+)\s+eval:(.*)$/) {
3254#      $self->{parser}->add_test ($1, $2, $TYPE_URI_EVALS);
3255#      next;
3256#    }
3257  push (@cmds, {
3258    setting => 'uri',
3259    is_priv => 1,
3260    code => sub {
3261      my ($self, $key, $value, $line) = @_;
3262      local($1);
3263      if ($value !~ s/^(\S+)\s+//) {
3264        return $INVALID_VALUE;
3265      }
3266      my $rulename = $1;
3267      if ($value eq '') {
3268        return $MISSING_REQUIRED_VALUE;
3269      }
3270      $self->{parser}->add_test ($rulename, $value, $TYPE_URI_TESTS);
3271    }
3272  });
3273
3274=item rawbody SYMBOLIC_TEST_NAME /pattern/modifiers
3275
3276Define a raw-body pattern test.  C<pattern> is a Perl regular expression.
3277Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3278considered the beginning of a comment.
3279
3280The 'raw body' of a message is the raw data inside all textual parts. The
3281text will be decoded from base64 or quoted-printable encoding, but HTML
3282tags and line breaks will still be present.  Multiline expressions will
3283need to be used to match strings that are broken by line breaks.
3284
3285Note that the text is split into 2-4kB chunks (from a word boundary when
3286possible), this may unexpectedly prevent pattern from matching.  Patterns
3287are matched independently against each of these chunks.
3288
3289=item rawbody SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3290
3291Define a raw-body eval test.  See above.
3292
3293=cut
3294
3295  push (@cmds, {
3296    setting => 'rawbody',
3297    is_priv => 1,
3298    code => sub {
3299      my ($self, $key, $value, $line) = @_;
3300      local($1);
3301      if ($value !~ s/^(\S+)\s+//) {
3302        return $INVALID_VALUE;
3303      }
3304      my $rulename = $1;
3305      if ($value eq '') {
3306        return $MISSING_REQUIRED_VALUE;
3307      }
3308      if ($value =~ /^eval:(.*)$/) {
3309        my $fn = $1;
3310        if ($fn !~ /^\w+\(.*\)$/) {
3311          return $INVALID_VALUE;
3312        }
3313        $self->{parser}->add_test ($rulename, $fn, $TYPE_RAWBODY_EVALS);
3314      } else {
3315        $self->{parser}->add_test ($rulename, $value, $TYPE_RAWBODY_TESTS);
3316      }
3317    }
3318  });
3319
3320=item full SYMBOLIC_TEST_NAME /pattern/modifiers
3321
3322Define a full message pattern test.  C<pattern> is a Perl regular expression.
3323Note: as per the header tests, C<#> must be escaped (C<\#>) or else it is
3324considered the beginning of a comment.
3325
3326The full message is the pristine message headers plus the pristine message
3327body, including all MIME data such as images, other attachments, MIME
3328boundaries, etc.
3329
3330Note that CRLF/LF line endings are matched as the original message has them.
3331For any full rules that match newlines, it's recommended to use \r?$ instead
3332of plain $, so it works on all systems.
3333
3334=item full SYMBOLIC_TEST_NAME eval:name_of_eval_method([args])
3335
3336Define a full message eval test.  See above.
3337
3338=cut
3339
3340  push (@cmds, {
3341    setting => 'full',
3342    is_priv => 1,
3343    code => sub {
3344      my ($self, $key, $value, $line) = @_;
3345      local($1);
3346      if ($value !~ s/^(\S+)\s+//) {
3347        return $INVALID_VALUE;
3348      }
3349      my $rulename = $1;
3350      if ($value eq '') {
3351        return $MISSING_REQUIRED_VALUE;
3352      }
3353      if ($value =~ /^eval:(.*)$/) {
3354        my $fn = $1;
3355        if ($fn !~ /^\w+\(.*\)$/) {
3356          return $INVALID_VALUE;
3357        }
3358        $self->{parser}->add_test ($rulename, $fn, $TYPE_FULL_EVALS);
3359      } else {
3360        $self->{parser}->add_test ($rulename, $value, $TYPE_FULL_TESTS);
3361      }
3362    }
3363  });
3364
3365=item meta SYMBOLIC_TEST_NAME boolean expression
3366
3367Define a boolean expression test in terms of other tests that have
3368been hit or not hit.  For example:
3369
3370meta META1        TEST1 && !(TEST2 || TEST3)
3371
3372Note that English language operators ("and", "or") will be treated as
3373rule names, and that there is no C<XOR> operator.
3374
3375=item meta SYMBOLIC_TEST_NAME boolean arithmetic expression
3376
3377Can also define an arithmetic expression in terms of other tests,
3378with an unhit test having the value "0" and a hit test having a
3379nonzero value.  The value of a hit meta test is that of its arithmetic
3380expression.  The value of a hit eval test is that returned by its
3381method.  The value of a hit header, body, rawbody, uri, or full test
3382which has the "multiple" tflag is the number of times the test hit.
3383The value of any other type of hit test is "1".
3384
3385For example:
3386
3387meta META2        (3 * TEST1 - 2 * TEST2) E<gt> 0
3388
3389Note that Perl builtins and functions, like C<abs()>, B<can't> be
3390used, and will be treated as rule names.
3391
3392If you want to define a meta-rule, but do not want its individual sub-rules to
3393count towards the final score unless the entire meta-rule matches, give the
3394sub-rules names that start with '__' (two underscores).  SpamAssassin will
3395ignore these for scoring.
3396
3397=item meta SYMBOLIC_TEST_NAME ... rules_matching(RULEGLOB) ...
3398
3399Special function that will expand to list of matching rulenames.  Can be
3400used anywhere in expressions.  Argument supports glob style rulename
3401matching (* = anything, ? = one character).  Matching is case-sensitive.
3402
3403For example, this will hit if at least two __FOO_* rule hits:
3404
3405 body __FOO_1  /xxx/
3406 body __FOO_2  /yyy/
3407 body __FOO_3  /zzz/
3408 meta FOO_META  rules_matching(__FOO_*) >= 2
3409
3410Which would be the same as:
3411
3412 meta FOO_META  (__FOO_1 + __FOO_2 + __FOO_3) >= 2
3413
3414
3415=cut
3416
3417  push (@cmds, {
3418    setting => 'meta',
3419    is_priv => 1,
3420    code => sub {
3421      my ($self, $key, $value, $line) = @_;
3422      local($1);
3423      if ($value !~ s/^(\S+)\s+//) {
3424        return $INVALID_VALUE;
3425      }
3426      my $rulename = $1;
3427      if ($value eq '') {
3428        return $MISSING_REQUIRED_VALUE;
3429      }
3430      if ($value =~ /\*\s*\*/) {
3431	info("config: found invalid '**' or '* *' operator in meta command");
3432        return $INVALID_VALUE;
3433      }
3434      $self->{parser}->add_test ($rulename, $value, $TYPE_META_TESTS);
3435    }
3436  });
3437
3438=item reuse SYMBOLIC_TEST_NAME [ OLD_SYMBOLIC_TEST_NAME_1 ... ]
3439
3440Defines the name of a test that should be "reused" during the scoring
3441process. If a message has an X-Spam-Status header that shows a hit for
3442this rule or any of the old rule names given, a hit will be added for
3443this rule when B<mass-check --reuse> is used. Examples:
3444
3445C<reuse SPF_PASS>
3446
3447C<reuse MY_NET_RULE_V2 MY_NET_RULE_V1>
3448
3449The actual logic for reuse tests is done by
3450B<Mail::SpamAssassin::Plugin::Reuse>.
3451
3452=cut
3453
3454  push (@cmds, {
3455    setting => 'reuse',
3456    is_priv => 1,
3457    code => sub {
3458      my ($self, $key, $value, $line) = @_;
3459      if ($value !~ /\s*(\w+)(?:\s+(?:\w+(?:\s+\w+)*))?\s*$/) {
3460        return $INVALID_VALUE;
3461      }
3462      my $rule_name = $1;
3463      # don't overwrite tests, just define them so scores, priorities work
3464      if (!exists $self->{tests}->{$rule_name}) {
3465        $self->{parser}->add_test($rule_name, undef, $TYPE_EMPTY_TESTS);
3466      }
3467    }
3468  });
3469
3470=item tflags SYMBOLIC_TEST_NAME flags
3471
3472Used to set flags on a test. Parameter is a space-separated list of flag
3473names or flag name = value pairs.
3474These flags are used in the score-determination back end system for details
3475of the test's behaviour.  Please see C<bayes_auto_learn> for more information
3476about tflag interaction with those systems. The following flags can be set:
3477
3478=over 4
3479
3480=item  net
3481
3482The test is a network test, and will not be run in the mass checking system
3483or if B<-L> is used, therefore its score should not be modified.
3484
3485=item  nice
3486
3487The test is intended to compensate for common false positives, and should be
3488assigned a negative score.
3489
3490=item  userconf
3491
3492The test requires user configuration before it can be used (like
3493language-specific tests).
3494
3495=item  learn
3496
3497The test requires training before it can be used.
3498
3499=item  noautolearn
3500
3501The test will explicitly be ignored when calculating the score for
3502learning systems.
3503
3504=item  autolearn_force
3505
3506The test will be subject to less stringent autolearn thresholds.
3507
3508Normally, SpamAssassin will require 3 points from the header and 3
3509points from the body to be auto-learned as spam. This option keeps
3510the threshold at 6 points total but changes it to have no regard to the
3511source of the points.
3512
3513=item  noawl
3514
3515This flag is specific when using AWL plugin.
3516
3517Normally, AWL plugin normalizes scores via auto-whitelist. In some scenarios
3518it works against the system administrator when trying to add some rules to
3519correct miss-classified email. When AWL plugin searches the email and finds
3520the noawl flag it will exit without normalizing the score nor storing the
3521value in db.
3522
3523=item  multiple
3524
3525The test will be evaluated multiple times, for use with meta rules.
3526Only affects header, body, rawbody, uri, and full tests.
3527
3528=item  maxhits=N
3529
3530If B<multiple> is specified, limit the number of hits found to N.
3531If the rule is used in a meta that counts the hits (e.g. __RULENAME E<gt> 5),
3532this is a way to avoid wasted extra work (use "tflags multiple maxhits=6").
3533
3534For example:
3535
3536  uri      __KAM_COUNT_URIS /^./
3537  tflags   __KAM_COUNT_URIS multiple maxhits=16
3538  describe __KAM_COUNT_URIS A multiple match used to count URIs in a message
3539
3540  meta __KAM_HAS_0_URIS (__KAM_COUNT_URIS == 0)
3541  meta __KAM_HAS_1_URIS (__KAM_COUNT_URIS >= 1)
3542  meta __KAM_HAS_2_URIS (__KAM_COUNT_URIS >= 2)
3543  meta __KAM_HAS_3_URIS (__KAM_COUNT_URIS >= 3)
3544  meta __KAM_HAS_4_URIS (__KAM_COUNT_URIS >= 4)
3545  meta __KAM_HAS_5_URIS (__KAM_COUNT_URIS >= 5)
3546  meta __KAM_HAS_10_URIS (__KAM_COUNT_URIS >= 10)
3547  meta __KAM_HAS_15_URIS (__KAM_COUNT_URIS >= 15)
3548
3549=item  nosubject
3550
3551Used only for B<body> rules.  If specified, Subject header will not be a
3552part of the matched body text.  See I<body> for more info.
3553
3554=item  ips_only
3555
3556This flag is specific to rules invoking an URIDNSBL plugin,
3557it is documented there.
3558
3559=item  domains_only
3560
3561This flag is specific to rules invoking an URIDNSBL plugin,
3562it is documented there.
3563
3564=item  ns
3565
3566This flag is specific to rules invoking an URIDNSBL plugin,
3567it is documented there.
3568
3569=item  a
3570
3571This flag is specific to rules invoking an URIDNSBL plugin,
3572it is documented there.
3573
3574=item  notrim
3575
3576This flag is specific to rules invoking an URIDNSBL plugin,
3577it is documented there.
3578
3579=item nolog
3580
3581This flag will hide (sensitive) rule informations from reports
3582
3583=back
3584
3585=cut
3586
3587  push (@cmds, {
3588    setting => 'tflags',
3589    is_priv => 1,
3590    type => $CONF_TYPE_HASH_KEY_VALUE,
3591  });
3592
3593=item priority SYMBOLIC_TEST_NAME n
3594
3595Assign a specific priority to a test.  All tests, except for DNS and Meta
3596tests, are run in increasing priority value order (negative priority values
3597are run before positive priority values). The default test priority is 0
3598(zero).
3599
3600The values C<-99999999999999> and C<-99999999999998> have a special meaning
3601internally, and should not be used.
3602
3603=cut
3604
3605  push (@cmds, {
3606    setting => 'priority',
3607    is_priv => 1,
3608    type => $CONF_TYPE_HASH_KEY_VALUE,
3609    code => sub {
3610      my ($self, $key, $value, $line) = @_;
3611      my ($rulename, $priority) = split(/\s+/, $value, 2);
3612      unless (defined $priority) {
3613        return $MISSING_REQUIRED_VALUE;
3614      }
3615      unless ($rulename =~ IS_RULENAME) {
3616        return $INVALID_VALUE;
3617      }
3618      unless ($priority =~ /^-?\d+$/) {
3619        return $INVALID_VALUE;
3620      }
3621      $self->{priority}->{$rulename} = $priority;
3622    }
3623  });
3624
3625=back
3626
3627=head1 ADMINISTRATOR SETTINGS
3628
3629These settings differ from the ones above, in that they are considered 'more
3630privileged' -- even more than the ones in the B<PRIVILEGED SETTINGS> section.
3631No matter what C<allow_user_rules> is set to, these can never be set from a
3632user's C<user_prefs> file when spamc/spamd is being used.  However, all
3633settings can be used by local programs run directly by the user.
3634
3635=over 4
3636
3637=item version_tag string
3638
3639This tag is appended to the SA version in the X-Spam-Status header. You should
3640include it when you modify your ruleset, especially if you plan to distribute it.
3641A good choice for I<string> is your last name or your initials followed by a
3642number which you increase with each change.
3643
3644The version_tag will be lowercased, and any non-alphanumeric or period
3645character will be replaced by an underscore.
3646
3647e.g.
3648
3649  version_tag myrules1    # version=2.41-myrules1
3650
3651=cut
3652
3653  push (@cmds, {
3654    setting => 'version_tag',
3655    is_admin => 1,
3656    code => sub {
3657      my ($self, $key, $value, $line) = @_;
3658      if ($value eq '') {
3659        return $MISSING_REQUIRED_VALUE;
3660      }
3661      my $tag = lc($value);
3662      $tag =~ tr/a-z0-9./_/c;
3663      foreach (@Mail::SpamAssassin::EXTRA_VERSION) {
3664        if($_ eq $tag) { $tag = undef; last; }
3665      }
3666      push(@Mail::SpamAssassin::EXTRA_VERSION, $tag) if($tag);
3667    }
3668  });
3669
3670=item test SYMBOLIC_TEST_NAME (ok|fail) Some string to test against
3671
3672Define a regression testing string. You can have more than one regression test
3673string per symbolic test name. Simply specify a string that you wish the test
3674to match.
3675
3676These tests are only run as part of the test suite - they should not affect the
3677general running of SpamAssassin.
3678
3679=cut
3680
3681  push (@cmds, {
3682    setting => 'test',
3683    is_admin => 1,
3684    code => sub {
3685      return unless defined $COLLECT_REGRESSION_TESTS;
3686      my ($self, $key, $value, $line) = @_;
3687      local ($1,$2,$3);
3688      if ($value !~ /^(\S+)\s+(ok|fail)\s+(.*)$/) { return $INVALID_VALUE; }
3689      $self->{parser}->add_regression_test($1, $2, $3);
3690    }
3691  });
3692
3693=item body_part_scan_size               (default: 50000)
3694
3695Per mime-part scan size limit in bytes for "body" type rules.
3696The decoded/stripped mime-part is truncated approx to this size.
3697Helps scanning large messages safely, so it's not necessary to
3698skip them completely. Disabled with 0.
3699
3700=cut
3701
3702  push (@cmds, {
3703    setting => 'body_part_scan_size',
3704    is_admin => 1,
3705    default => 50000,
3706    type => $CONF_TYPE_NUMERIC,
3707  });
3708
3709
3710=item rawbody_part_scan_size               (default: 500000)
3711
3712Like body_part_scan_size, for "rawbody" type rules.
3713
3714=cut
3715
3716  push (@cmds, {
3717    setting => 'rawbody_part_scan_size',
3718    is_admin => 1,
3719    default => 500000,
3720    type => $CONF_TYPE_NUMERIC,
3721  });
3722
3723=item rbl_timeout t [t_min] [zone]		(default: 15 3)
3724
3725All DNS queries are made at the beginning of a check and we try to read
3726the results at the end.  This value specifies the maximum period of time
3727(in seconds) to wait for a DNS query.  If most of the DNS queries have
3728succeeded for a particular message, then SpamAssassin will not wait for
3729the full period to avoid wasting time on unresponsive server(s), but will
3730shrink the timeout according to a percentage of queries already completed.
3731As the number of queries remaining approaches 0, the timeout value will
3732gradually approach a t_min value, which is an optional second parameter
3733and defaults to 0.2 * t.  If t is smaller than t_min, the initial timeout
3734is set to t_min.  Here is a chart of queries remaining versus the timeout
3735in seconds, for the default 15 second / 3 second timeout setting:
3736
3737  queries left  100%  90%  80%  70%  60%  50%  40%  30%  20%  10%   0%
3738  timeout        15   14.9 14.5 13.9 13.1 12.0 10.7  9.1  7.3  5.3  3
3739
3740For example, if 20 queries are made at the beginning of a message check
3741and 16 queries have returned (leaving 20%), the remaining 4 queries should
3742finish within 7.3 seconds since their query started or they will be timed out.
3743Note that timed out queries are only aborted when there is nothing else left
3744for SpamAssassin to do - long evaluation of other rules may grant queries
3745additional time.
3746
3747If a parameter 'zone' is specified (it must end with a letter, which
3748distinguishes it from other numeric parametrs), then the setting only
3749applies to DNS queries against the specified DNS domain (host, domain or
3750RBL (sub)zone).  Matching is case-insensitive, the actual domain may be a
3751subdomain of the specified zone.
3752
3753=cut
3754
3755  push (@cmds, {
3756    setting => 'rbl_timeout',
3757    is_admin => 1,
3758    default => 15,
3759    code => sub {
3760      my ($self, $key, $value, $line) = @_;
3761      unless (defined $value && $value !~ /^$/) {
3762	return $MISSING_REQUIRED_VALUE;
3763      }
3764      local ($1,$2,$3);
3765      unless ($value =~ /^        ( \+? \d+ (?: \. \d*)? [smhdw]? )
3766                          (?: \s+ ( \+? \d+ (?: \. \d*)? [smhdw]? ) )?
3767                          (?: \s+ (\S* [a-zA-Z]) )? $/xsi) {
3768	return $INVALID_VALUE;
3769      }
3770      my($timeout, $timeout_min, $zone) = ($1, $2, $3);
3771      foreach ($timeout, $timeout_min) {
3772        if (defined $_ && s/\s*([smhdw])\z//i) {
3773          $_ *= { s => 1, m => 60, h => 3600,
3774                  d => 24*3600, w => 7*24*3600 }->{lc $1};
3775        }
3776      }
3777      if (!defined $zone) {  # a global setting
3778        $self->{rbl_timeout}     = 0 + $timeout;
3779        $self->{rbl_timeout_min} = 0 + $timeout_min  if defined $timeout_min;
3780      }
3781      else {  # per-zone settings
3782        $zone =~ s/^\.//;  $zone =~ s/\.\z//;  # strip leading and trailing dot
3783        $zone = lc $zone;
3784        $self->{by_zone}{$zone}{rbl_timeout} = 0 + $timeout;
3785        $self->{by_zone}{$zone}{rbl_timeout_min} =
3786                                     0 + $timeout_min  if defined $timeout_min;
3787      }
3788    },
3789    type => $CONF_TYPE_DURATION,
3790  });
3791
3792=item util_rb_tld tld1 tld2 ...
3793
3794=encoding utf8
3795
3796This option maintains a list of valid TLDs in the RegistryBoundaries code.
3797Top level domains (TLD) include things like com, net, org, xn--p1ai, рф, ...
3798International domain names may be specified in ASCII-compatible encoding (ACE),
3799e.g. xn--p1ai, xn--qxam, or with Unicode labels encoded as UTF-8 octets,
3800e.g. рф, ελ.
3801
3802=cut
3803
3804  push (@cmds, {
3805    setting => 'util_rb_tld',
3806    is_admin => 1,
3807    code => sub {
3808      my ($self, $key, $value, $line) = @_;
3809      unless (defined $value && $value !~ /^$/) {
3810	return $MISSING_REQUIRED_VALUE;
3811      }
3812      unless ($value =~ /^[^\s.]+(?:\s+[^\s.]+)*$/) {
3813	return $INVALID_VALUE;
3814      }
3815      foreach (split(/\s+/, $value)) {
3816        $self->{valid_tlds}{idn_to_ascii($_)} = 1;
3817      }
3818    }
3819  });
3820
3821=item util_rb_2tld 2tld-1.tld 2tld-2.tld ...
3822
3823This option maintains list of valid 2nd-level TLDs in the RegistryBoundaries
3824code.  2TLDs include things like co.uk, fed.us, etc.  International domain
3825names may be specified in ASCII-compatible encoding (ACE), or with Unicode
3826labels encoded as UTF-8 octets.
3827
3828=cut
3829
3830  push (@cmds, {
3831    setting => 'util_rb_2tld',
3832    is_admin => 1,
3833    code => sub {
3834      my ($self, $key, $value, $line) = @_;
3835      unless (defined $value && $value !~ /^$/) {
3836	return $MISSING_REQUIRED_VALUE;
3837      }
3838      unless ($value =~ /^[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+)*$/) {
3839	return $INVALID_VALUE;
3840      }
3841      foreach (split(/\s+/, $value)) {
3842        $self->{two_level_domains}{idn_to_ascii($_)} = 1;
3843      }
3844    }
3845  });
3846
3847=item util_rb_3tld 3tld1.some.tld 3tld2.other.tld ...
3848
3849This option maintains list of valid 3rd-level TLDs in the RegistryBoundaries
3850code.  3TLDs include things like demon.co.uk, plc.co.im, etc.  International
3851domain names may be specified in ASCII-compatible encoding (ACE), or with
3852Unicode labels encoded as UTF-8 octets.
3853
3854=cut
3855
3856  push (@cmds, {
3857    setting => 'util_rb_3tld',
3858    is_admin => 1,
3859    code => sub {
3860      my ($self, $key, $value, $line) = @_;
3861      unless (defined $value && $value !~ /^$/) {
3862	return $MISSING_REQUIRED_VALUE;
3863      }
3864      unless ($value =~ /^[^\s.]+\.[^\s.]+\.[^\s.]+(?:\s+[^\s.]+\.[^\s.]+\.[^\s.]+)*$/) {
3865	return $INVALID_VALUE;
3866      }
3867      foreach (split(/\s+/, $value)) {
3868        $self->{three_level_domains}{idn_to_ascii($_)} = 1;
3869      }
3870    }
3871  });
3872
3873=item clear_util_rb
3874
3875Empty internal list of valid TLDs (including 2nd and 3rd level) which
3876RegistryBoundaries code uses.  Only useful if you want to override the
3877standard lists supplied by sa-update.
3878
3879=cut
3880
3881  push (@cmds, {
3882    setting => 'clear_util_rb',
3883    type => $CONF_TYPE_NOARGS,
3884    code => sub {
3885      my ($self, $key, $value, $line) = @_;
3886      unless (!defined $value || $value eq '') {
3887        return $INVALID_VALUE;
3888      }
3889      undef $self->{valid_tlds};
3890      undef $self->{two_level_domains};
3891      undef $self->{three_level_domains};
3892      dbg("config: cleared tld lists");
3893    }
3894  });
3895
3896=item bayes_path /path/filename	(default: ~/.spamassassin/bayes)
3897
3898This is the directory and filename for Bayes databases.  Several databases
3899will be created, with this as the base directory and filename, with C<_toks>,
3900C<_seen>, etc. appended to the base.  The default setting results in files
3901called C<~/.spamassassin/bayes_seen>, C<~/.spamassassin/bayes_toks>, etc.
3902
3903By default, each user has their own in their C<~/.spamassassin> directory with
3904mode 0700/0600.  For system-wide SpamAssassin use, you may want to reduce disk
3905space usage by sharing this across all users.  However, Bayes appears to be
3906more effective with individual user databases.
3907
3908=cut
3909
3910  push (@cmds, {
3911    setting => 'bayes_path',
3912    is_admin => 1,
3913    default => '__userstate__/bayes',
3914    type => $CONF_TYPE_STRING,
3915    code => sub {
3916      my ($self, $key, $value, $line) = @_;
3917      unless (defined $value && $value !~ /^$/) {
3918	return $MISSING_REQUIRED_VALUE;
3919      }
3920      if (-d $value) {
3921	return $INVALID_VALUE;
3922      }
3923     $self->{bayes_path} = $value;
3924    }
3925  });
3926
3927=item bayes_file_mode		(default: 0700)
3928
3929The file mode bits used for the Bayesian filtering database files.
3930
3931Make sure you specify this using the 'x' mode bits set, as it may also be used
3932to create directories.  However, if a file is created, the resulting file will
3933not have any execute bits set (the umask is set to 111). The argument is a
3934string of octal digits, it is converted to a numeric value internally.
3935
3936=cut
3937
3938  push (@cmds, {
3939    setting => 'bayes_file_mode',
3940    is_admin => 1,
3941    default => '0700',
3942    type => $CONF_TYPE_NUMERIC,
3943    code => sub {
3944      my ($self, $key, $value, $line) = @_;
3945      if ($value !~ /^0?[0-7]{3}$/) { return $INVALID_VALUE }
3946      $self->{bayes_file_mode} = untaint_var($value);
3947    }
3948  });
3949
3950=item bayes_store_module Name::Of::BayesStore::Module
3951
3952If this option is set, the module given will be used as an alternate
3953to the default bayes storage mechanism.  It must conform to the
3954published storage specification (see
3955Mail::SpamAssassin::BayesStore). For example, set this to
3956Mail::SpamAssassin::BayesStore::SQL to use the generic SQL storage
3957module.
3958
3959=cut
3960
3961  push (@cmds, {
3962    setting => 'bayes_store_module',
3963    is_admin => 1,
3964    default => '',
3965    type => $CONF_TYPE_STRING,
3966    code => sub {
3967      my ($self, $key, $value, $line) = @_;
3968      local ($1);
3969      if ($value !~ /^([_A-Za-z0-9:]+)$/) { return $INVALID_VALUE; }
3970      $self->{bayes_store_module} = $1;
3971    }
3972  });
3973
3974=item bayes_sql_dsn DBI::databasetype:databasename:hostname:port
3975
3976Used for BayesStore::SQL storage implementation.
3977
3978This option give the connect string used to connect to the SQL based Bayes storage.
3979
3980=cut
3981
3982  push (@cmds, {
3983    setting => 'bayes_sql_dsn',
3984    is_admin => 1,
3985    default => '',
3986    type => $CONF_TYPE_STRING,
3987  });
3988
3989=item bayes_sql_username
3990
3991Used by BayesStore::SQL storage implementation.
3992
3993This option gives the username used by the above DSN.
3994
3995=cut
3996
3997  push (@cmds, {
3998    setting => 'bayes_sql_username',
3999    is_admin => 1,
4000    default => '',
4001    type => $CONF_TYPE_STRING,
4002  });
4003
4004=item bayes_sql_password
4005
4006Used by BayesStore::SQL storage implementation.
4007
4008This option gives the password used by the above DSN.
4009
4010=cut
4011
4012  push (@cmds, {
4013    setting => 'bayes_sql_password',
4014    is_admin => 1,
4015    default => '',
4016    type => $CONF_TYPE_STRING,
4017  });
4018
4019=item bayes_sql_username_authorized ( 0 | 1 )  (default: 0)
4020
4021Whether to call the services_authorized_for_username plugin hook in BayesSQL.
4022If the hook does not determine that the user is allowed to use bayes or is
4023invalid then then database will not be initialized.
4024
4025NOTE: By default the user is considered invalid until a plugin returns
4026a true value.  If you enable this, but do not have a proper plugin
4027loaded, all users will turn up as invalid.
4028
4029The username passed into the plugin can be affected by the
4030bayes_sql_override_username config option.
4031
4032=cut
4033
4034  push (@cmds, {
4035    setting => 'bayes_sql_username_authorized',
4036    is_admin => 1,
4037    default => 0,
4038    type => $CONF_TYPE_BOOL,
4039  });
4040
4041=item user_scores_dsn DBI:databasetype:databasename:hostname:port
4042
4043If you load user scores from an SQL database, this will set the DSN
4044used to connect.  Example: C<DBI:mysql:spamassassin:localhost>
4045
4046If you load user scores from an LDAP directory, this will set the DSN used to
4047connect. You have to write the DSN as an LDAP URL, the components being the
4048host and port to connect to, the base DN for the search, the scope of the
4049search (base, one or sub), the single attribute being the multivalued attribute
4050used to hold the configuration data (space separated pairs of key and value,
4051just as in a file) and finally the filter being the expression used to filter
4052out the wanted username. Note that the filter expression is being used in a
4053sprintf statement with the username as the only parameter, thus is can hold a
4054single __USERNAME__ expression. This will be replaced with the username.
4055
4056Example: C<ldap://localhost:389/dc=koehntopp,dc=de?saconfig?uid=__USERNAME__>
4057
4058=cut
4059
4060  push (@cmds, {
4061    setting => 'user_scores_dsn',
4062    is_admin => 1,
4063    default => '',
4064    type => $CONF_TYPE_STRING,
4065  });
4066
4067=item user_scores_sql_username username
4068
4069The authorized username to connect to the above DSN.
4070
4071=cut
4072
4073  push (@cmds, {
4074    setting => 'user_scores_sql_username',
4075    is_admin => 1,
4076    default => '',
4077    type => $CONF_TYPE_STRING,
4078  });
4079
4080=item user_scores_sql_password password
4081
4082The password for the database username, for the above DSN.
4083
4084=cut
4085
4086  push (@cmds, {
4087    setting => 'user_scores_sql_password',
4088    is_admin => 1,
4089    default => '',
4090    type => $CONF_TYPE_STRING,
4091  });
4092
4093=item user_scores_sql_custom_query query
4094
4095This option gives you the ability to create a custom SQL query to
4096retrieve user scores and preferences.  In order to work correctly your
4097query should return two values, the preference name and value, in that
4098order.  In addition, there are several "variables" that you can use
4099as part of your query, these variables will be substituted for the
4100current values right before the query is run.  The current allowed
4101variables are:
4102
4103=over 4
4104
4105=item _TABLE_
4106
4107The name of the table where user scores and preferences are stored. Currently
4108hardcoded to userpref, to change this value you need to create a new custom
4109query with the new table name.
4110
4111=item _USERNAME_
4112
4113The current user's username.
4114
4115=item _MAILBOX_
4116
4117The portion before the @ as derived from the current user's username.
4118
4119=item _DOMAIN_
4120
4121The portion after the @ as derived from the current user's username, this
4122value may be null.
4123
4124=back
4125
4126The query must be one continuous line in order to parse correctly.
4127
4128Here are several example queries, please note that these are broken up
4129for easy reading, in your config it should be one continuous line.
4130
4131=over 4
4132
4133=item Current default query:
4134
4135C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username ASC>
4136
4137=item Use global and then domain level defaults:
4138
4139C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' OR username = '@~'||_DOMAIN_ ORDER BY username ASC>
4140
4141=item Maybe global prefs should override user prefs:
4142
4143C<SELECT preference, value FROM _TABLE_ WHERE username = _USERNAME_ OR username = '@GLOBAL' ORDER BY username DESC>
4144
4145=back
4146
4147=cut
4148
4149  push (@cmds, {
4150    setting => 'user_scores_sql_custom_query',
4151    is_admin => 1,
4152    default => undef,
4153    type => $CONF_TYPE_STRING,
4154  });
4155
4156=item user_scores_ldap_username
4157
4158This is the Bind DN used to connect to the LDAP server.  It defaults
4159to the empty string (""), allowing anonymous binding to work.
4160
4161Example: C<cn=master,dc=koehntopp,dc=de>
4162
4163=cut
4164
4165  push (@cmds, {
4166    setting => 'user_scores_ldap_username',
4167    is_admin => 1,
4168    default => '',
4169    type => $CONF_TYPE_STRING,
4170  });
4171
4172=item user_scores_ldap_password
4173
4174This is the password used to connect to the LDAP server.  It defaults
4175to the empty string ("").
4176
4177=cut
4178
4179  push (@cmds, {
4180    setting => 'user_scores_ldap_password',
4181    is_admin => 1,
4182    default => '',
4183    type => $CONF_TYPE_STRING,
4184  });
4185
4186=item user_scores_fallback_to_global        (default: 1)
4187
4188Fall back to global scores and settings if userprefs can't be loaded
4189from SQL or LDAP, instead of passing the message through unprocessed.
4190
4191=cut
4192
4193  push (@cmds, {
4194    setting => 'user_scores_fallback_to_global',
4195    is_admin => 1,
4196    default => 1,
4197    type => $CONF_TYPE_BOOL,
4198  });
4199
4200=item loadplugin [Mail::SpamAssassin::Plugin::]ModuleName [/path/module.pm]
4201
4202Load a SpamAssassin plugin module.  The C<ModuleName> is the perl module
4203name, used to create the plugin object itself.
4204
4205Module naming is strict, name must only contain alphanumeric characters or
4206underscores.  File must have .pm extension.
4207
4208C</path/module.pm> is the file to load, containing the module's perl code;
4209if it's specified as a relative path, it's considered to be relative to the
4210current configuration file.  If it is omitted, the module will be loaded
4211using perl's search path (the C<@INC> array).
4212
4213See C<Mail::SpamAssassin::Plugin> for more details on writing plugins.
4214
4215=cut
4216
4217  push (@cmds, {
4218    setting => 'loadplugin',
4219    is_admin => 1,
4220    code => sub {
4221      my ($self, $key, $value, $line) = @_;
4222      if ($value eq '') {
4223        return $MISSING_REQUIRED_VALUE;
4224      }
4225      my ($package, $path);
4226      local ($1,$2);
4227      if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
4228        ($package, $path) = ($1, $2);
4229      } else {
4230	return $INVALID_VALUE;
4231      }
4232      $self->load_plugin ($package, $path);
4233    }
4234  });
4235
4236=item tryplugin ModuleName [/path/module.pm]
4237
4238Same as C<loadplugin>, but silently ignored if the .pm file cannot be found in
4239the filesystem.
4240
4241=cut
4242
4243  push (@cmds, {
4244    setting => 'tryplugin',
4245    is_admin => 1,
4246    code => sub {
4247      my ($self, $key, $value, $line) = @_;
4248      if ($value eq '') {
4249        return $MISSING_REQUIRED_VALUE;
4250      }
4251      my ($package, $path);
4252      local ($1,$2);
4253      if ($value =~ /^((?:\w+::){0,10}\w+)(?:\s+(\S+\.pm))?$/i) {
4254        ($package, $path) = ($1, $2);
4255      } else {
4256	return $INVALID_VALUE;
4257      }
4258      $self->load_plugin ($package, $path, 1);
4259    }
4260  });
4261
4262=item ignore_always_matching_regexps         (Default: 0)
4263
4264Ignore any rule which contains a regexp which always matches.
4265Currently only catches regexps which contain '||', or which begin or
4266end with a '|'.  Also ignore rules with C<some> combinatorial explosions.
4267
4268=cut
4269
4270  push (@cmds, {
4271    setting  => 'ignore_always_matching_regexps',
4272    is_admin => 1,
4273    default  => 0,
4274    type     => $CONF_TYPE_BOOL,
4275  });
4276
4277=item geodb_module STRING
4278
4279This option tells SpamAssassin which geolocation module to use.
4280If not specified, all supported ones are tried in this order:
4281
4282Plugins can override this internally if required.
4283
4284 MaxMind::DB::Reader  (same as GeoIP2::Database::Reader)
4285 Geo::IP
4286 IP::Country::DB_File  (not used unless geodb_options path set)
4287 IP::Country::Fast
4288
4289=cut
4290
4291  push (@cmds, {
4292    setting => 'geodb_module',
4293    is_admin => 1,
4294    default => undef,
4295    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
4296    code => sub {
4297      my ($self, $key, $value, $line) = @_;
4298      $value = lc $value;
4299      if ($value eq 'maxmind::db::reader' ||
4300            $value eq 'geoip2::database::reader' || $value eq 'geoip2') {
4301        $self->{geodb}->{module} = 'geoip2';
4302      } elsif ($value eq 'geo::ip' || $value eq 'geoip') {
4303        $self->{geodb}->{module} = 'geoip';
4304      } elsif ($value eq 'ip::country::db_file' || $value eq 'db_file') {
4305        $self->{geodb}->{module} = 'dbfile';
4306      } elsif ($value eq 'ip::country::fast' || $value eq 'fast') {
4307        $self->{geodb}->{module} = 'fast';
4308      } else {
4309        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
4310      }
4311    }
4312  });
4313
4314  # support deprecated RelayCountry setting
4315  push (@cmds, {
4316    setting => 'country_db_type',
4317    is_admin => 1,
4318    default => undef,
4319    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
4320    code => sub {
4321      my ($self, $key, $value, $line) = @_;
4322      warn("config: deprecated setting used, change country_db_type to geodb_module\n");
4323      if ($value =~ /GeoIP2/i) {
4324        $self->{geodb}->{module} = 'geoip2';
4325      } elsif ($value =~ /Geo/i) {
4326        $self->{geodb}->{module} = 'geoip';
4327      } elsif ($value =~ /Fast/i) {
4328        $self->{geodb}->{module} = 'fast';
4329      } else {
4330        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
4331      }
4332    }
4333  });
4334
4335=item geodb_options dbtype:/path/to/db ...
4336
4337Supported dbtypes:
4338
4339I<city> - use City database
4340I<country> - use Country database
4341I<isp> - try loading ISP database
4342I<asn> - try loading ASN database
4343
4344Append full database path with colon, for example:
4345I<isp:/opt/geoip/isp.mmdb>
4346
4347Plugins can internally request all types they require, geodb_options is only
4348needed if the default location search (described below) does not work.
4349
4350GeoIP/GeoIP2 searches these files/directories:
4351
4352 country:
4353   GeoIP2-Country.mmdb, GeoLite2-Country.mmdb
4354   GeoIP.dat (and v6 version)
4355 city:
4356   GeoIP2-City.mmdb, GeoLite2-City.mmdb
4357   GeoIPCity.dat, GeoLiteCity.dat (and v6 versions)
4358 isp:
4359   GeoIP2-ISP.mmdb
4360   GeoIPISP.dat, GeoLiteISP.dat (and v6 versions)
4361 directories:
4362   /usr/local/share/GeoIP
4363   /usr/share/GeoIP
4364   /var/lib/GeoIP
4365   /opt/share/GeoIP
4366
4367=cut
4368
4369  push (@cmds, {
4370    setting => 'geodb_options',
4371    is_admin => 1,
4372    type => $CONF_TYPE_HASH_KEY_VALUE,
4373    default => {},
4374    code => sub {
4375      my ($self, $key, $value, $line) = @_;
4376      foreach my $option (split (/\s+/, $value)) {
4377        my ($option, $db) = split(/:/, $option, 2);
4378        $option = lc($option);
4379        if ($option eq 'reset') {
4380          $self->{geodb}->{options} = {};
4381        } elsif ($option eq 'country') {
4382          $self->{geodb}->{options}->{country} = $db || undef;
4383        } elsif ($option eq 'city') {
4384          $self->{geodb}->{options}->{city} = $db || undef;
4385        } elsif ($option eq 'isp') {
4386          $self->{geodb}->{options}->{isp} = $db || undef;
4387        } else {
4388          return $INVALID_VALUE;
4389        }
4390      }
4391    }
4392  });
4393
4394=item geodb_search_path /path/to/GeoIP ...
4395
4396Alternative to geodb_options. Overrides the default list of directories to
4397search for default filenames.
4398
4399=cut
4400
4401  push (@cmds, {
4402    setting => 'geodb_search_path',
4403    is_admin => 1,
4404    default => [],
4405    type => $CONF_TYPE_STRINGLIST,
4406    code => sub {
4407      my ($self, $key, $value, $line) = @_;
4408      if ($value eq 'reset') {
4409        $self->{geodb}->{geodb_search_path} = [];
4410      } elsif ($value eq '') {
4411        return $MISSING_REQUIRED_VALUE;
4412      } else {
4413        push(@{$self->{geodb}->{geodb_search_path}}, split(/\s+/, $value));
4414      }
4415    }
4416  });
4417
4418  # support deprecated RelayCountry setting
4419  push (@cmds, {
4420    setting => 'country_db_path',
4421    is_admin => 1,
4422    default => undef,
4423    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
4424    code => sub {
4425      my ($self, $key, $value, $line) = @_;
4426      warn("config: deprecated setting used, change country_db_path to geodb_options\n");
4427      if ($value ne '') {
4428        $self->{geodb}->{options}->{country} = $value;
4429      } else {
4430        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
4431      }
4432    }
4433  });
4434  # support deprecated URILocalBL setting
4435  push (@cmds, {
4436    setting => 'uri_country_db_path',
4437    is_admin => 1,
4438    default => undef,
4439    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
4440    code => sub {
4441      my ($self, $key, $value, $line) = @_;
4442      warn("config: deprecated setting used, change uri_country_db_path to geodb_options\n");
4443      if ($value ne '') {
4444        $self->{geodb}->{options}->{country} = $value;
4445      } else {
4446        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
4447      }
4448    }
4449  });
4450  # support deprecated URILocalBL setting
4451  push (@cmds, {
4452    setting => 'uri_country_db_isp_path',
4453    is_admin => 1,
4454    default => undef,
4455    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
4456    code => sub {
4457      my ($self, $key, $value, $line) = @_;
4458      warn("config: deprecated setting used, change uri_country_db_isp_path to geodb_options\n");
4459      if ($value ne '') {
4460        $self->{geodb}->{options}->{isp} = $value;
4461      } else {
4462        return $Mail::SpamAssassin::Conf::INVALID_VALUE;
4463      }
4464    }
4465  });
4466
4467=back
4468
4469=head1 PREPROCESSING OPTIONS
4470
4471=over 4
4472
4473=item include filename
4474
4475Include configuration lines from C<filename>.   Relative paths are considered
4476relative to the current configuration file or user preferences file.
4477
4478=item if (boolean perl expression)
4479
4480Used to support conditional interpretation of the configuration
4481file. Lines between this and a corresponding C<else> or C<endif> line
4482will be ignored unless the expression evaluates as true
4483(in the perl sense; that is, defined and non-0 and non-empty string).
4484
4485The conditional accepts a limited subset of perl for security -- just enough to
4486perform basic arithmetic comparisons.  The following input is accepted:
4487
4488=over 4
4489
4490=item numbers, whitespace, arithmetic operations and grouping
4491
4492Namely these characters and ranges:
4493
4494  ( ) - + * / _ . , < = > ! ~ 0-9 whitespace
4495
4496=item version
4497
4498This will be replaced with the version number of the currently-running
4499SpamAssassin engine.  Note: The version used is in the internal SpamAssassin
4500version format which is C<x.yyyzzz>, where x is major version, y is minor
4501version, and z is maintenance version.  So 3.0.0 is C<3.000000>, and 3.4.80
4502is C<3.004080>.
4503
4504=item perl_version
4505
4506(Introduced in 3.4.1)  This will be replaced with the version number of the
4507currently-running perl engine.  Note: The version used is in the $] version
4508format which is C<x.yyyzzz>, where x is major version, y is minor version,
4509and z is maintenance version.  So 5.8.8 is C<5.008008>, and 5.10.0 is
4510C<5.010000>. Use to protect rules that incorporate RE syntax elements
4511introduced in later versions of perl, such as the C<++> non-backtracking
4512match introduced in perl 5.10. For example:
4513
4514  # Avoid lint error on older perl installs
4515  # Check SA version first to avoid warnings on checking perl_version on older SA
4516  if version > 3.004001 && perl_version >= 5.018000
4517    body  INVALID_RE_SYNTAX_IN_PERL_BEFORE_5_18  /(?[ \p{Thai} & \p{Digit} ])/
4518  endif
4519
4520Note that the above will still generate a warning on perl older than 5.10.0;
4521to avoid that warning do this instead:
4522
4523  # Avoid lint error on older perl installs
4524  if can(Mail::SpamAssassin::Conf::perl_min_version_5010000)
4525    body  INVALID_RE_SYNTAX_IN_PERL_5_8  /\w++/
4526  endif
4527
4528Warning: a can() test is only defined for perl 5.10.0!
4529
4530
4531=item plugin(Name::Of::Plugin)
4532
4533This is a function call that returns C<1> if the plugin named
4534C<Name::Of::Plugin> is loaded, or C<undef> otherwise.
4535
4536=item has(Name::Of::Package::function_name)
4537
4538This is a function call that returns C<1> if the perl package named
4539C<Name::Of::Package> includes a function called C<function_name>, or C<undef>
4540otherwise.  Note that packages can be SpamAssassin plugins or built-in classes,
4541there's no difference in this respect.  Internally this invokes UNIVERSAL::can.
4542
4543=item can(Name::Of::Package::function_name)
4544
4545This is a function call that returns C<1> if the perl package named
4546C<Name::Of::Package> includes a function called C<function_name>
4547B<and> that function returns a true value when called with no arguments,
4548otherwise C<undef> is returned.
4549
4550Is similar to C<has>, except that it also calls the named function,
4551testing its return value (unlike the perl function UNIVERSAL::can).
4552This makes it possible for a 'feature' function to determine its result
4553value at run time.
4554
4555=back
4556
4557If the end of a configuration file is reached while still inside a
4558C<if> scope, a warning will be issued, but parsing will restart on
4559the next file.
4560
4561For example:
4562
4563	if (version > 3.000000)
4564	  header MY_FOO	...
4565	endif
4566
4567	loadplugin MyPlugin plugintest.pm
4568
4569	if plugin (MyPlugin)
4570	  header MY_PLUGIN_FOO	eval:check_for_foo()
4571	  score  MY_PLUGIN_FOO	0.1
4572	endif
4573
4574=item ifplugin PluginModuleName
4575
4576An alias for C<if plugin(PluginModuleName)>.
4577
4578=item else
4579
4580Used to support conditional interpretation of the configuration
4581file. Lines between this and a corresponding C<endif> line,
4582will be ignored unless the conditional expression evaluates as false
4583(in the perl sense; that is, not defined and not 0 and non-empty string).
4584
4585=item require_version n.nnnnnn
4586
4587Indicates that the entire file, from this line on, requires a certain
4588version of SpamAssassin to run.  If a different (older or newer) version
4589of SpamAssassin tries to read the configuration from this file, it will
4590output a warning instead, and ignore it.
4591
4592Note: The version used is in the internal SpamAssassin version format which is
4593C<x.yyyzzz>, where x is major version, y is minor version, and z is maintenance
4594version.  So 3.0.0 is C<3.000000>, and 3.4.80 is C<3.004080>.
4595
4596=cut
4597
4598  push (@cmds, {
4599    setting => 'require_version',
4600    type => $CONF_TYPE_STRING,
4601    code => sub {
4602    }
4603  });
4604
4605=back
4606
4607=head1 TEMPLATE TAGS
4608
4609The following C<tags> can be used as placeholders in certain options.
4610They will be replaced by the corresponding value when they are used.
4611
4612Some tags can take an argument (in parentheses). The argument is
4613optional, and the default is shown below.
4614
4615 _YESNO_           "Yes" for spam, "No" for nonspam (=ham)
4616 _YESNO(spam_str,ham_str)_  returns the first argument ("Yes" if missing)
4617                   for spam, and the second argument ("No" if missing) for ham
4618 _YESNOCAPS_       "YES" for spam, "NO" for nonspam (=ham)
4619 _YESNOCAPS(spam_str,ham_str)_  same as _YESNO(...)_, but uppercased
4620 _SCORE(PAD)_      message score, if PAD is included and is either spaces or
4621                   zeroes, then pad scores with that many spaces or zeroes
4622		   (default, none)  ie: _SCORE(0)_ makes 2.4 become 02.4,
4623		   _SCORE(00)_ is 002.4.  12.3 would be 12.3 and 012.3
4624		   respectively.
4625 _REQD_            message threshold
4626 _VERSION_         version (eg. 3.0.0 or 3.1.0-r26142-foo1)
4627 _SUBVERSION_      sub-version/code revision date (eg. 2004-01-10)
4628 _RULESVERSION_    comma-separated list of rules versions, retrieved from
4629                   an '# UPDATE version' comment in rules files; if there is
4630                   more than one set of rules (update channels) the order
4631                   is unspecified (currently sorted by names of files);
4632 _HOSTNAME_        hostname of the machine the mail was processed on
4633 _REMOTEHOSTNAME_  hostname of the machine the mail was sent from, only
4634                   available with spamd
4635 _REMOTEHOSTADDR_  ip address of the machine the mail was sent from, only
4636                   available with spamd
4637 _BAYES_           bayes score
4638 _TOKENSUMMARY_    number of new, neutral, spammy, and hammy tokens found
4639 _BAYESTC_         number of new tokens found
4640 _BAYESTCLEARNED_  number of seen tokens found
4641 _BAYESTCSPAMMY_   number of spammy tokens found
4642 _BAYESTCHAMMY_    number of hammy tokens found
4643 _HAMMYTOKENS(N)_  the N most significant hammy tokens (default, 5)
4644 _SPAMMYTOKENS(N)_ the N most significant spammy tokens (default, 5)
4645 _DATE_            rfc-2822 date of scan
4646 _STARS(*)_        one "*" (use any character) for each full score point
4647                   (note: limited to 50 'stars')
4648 _SENDERDOMAIN_    a domain name of the envelope sender address, lowercased
4649 _AUTHORDOMAIN_    a domain name of the author address (the From header
4650                   field), lowercased;  note that RFC 5322 allows a mail
4651                   message to have multiple authors - currently only the
4652                   domain name of the first email address is returned
4653 _RELAYSTRUSTED_   relays used and deemed to be trusted (see the
4654                   'X-Spam-Relays-Trusted' pseudo-header)
4655 _RELAYSUNTRUSTED_ relays used that can not be trusted (see the
4656                   'X-Spam-Relays-Untrusted' pseudo-header)
4657 _RELAYSINTERNAL_  relays used and deemed to be internal (see the
4658                   'X-Spam-Relays-Internal' pseudo-header)
4659 _RELAYSEXTERNAL_  relays used and deemed to be external (see the
4660                   'X-Spam-Relays-External' pseudo-header)
4661 _FIRSTTRUSTEDIP_  IP address of first trusted client (see RELAYSTRUSTED)
4662 _FIRSTTRUSTEDREVIP_  IP address of first trusted client (in reversed
4663                   format suitable for RBL queries)
4664 _LASTEXTERNALIP_  IP address of client in the external-to-internal
4665                   SMTP handover
4666 _LASTEXTERNALREVIP_  IP address of client in the external-to-internal
4667                   SMTP handover (in reversed format suitable for RBL
4668                   queries)
4669 _LASTEXTERNALRDNS_ reverse-DNS of client in the external-to-internal
4670                   SMTP handover
4671 _LASTEXTERNALHELO_ HELO string used by client in the external-to-internal
4672                   SMTP handover
4673 _AUTOLEARN_       autolearn status ("ham", "no", "spam", "disabled",
4674                   "failed", "unavailable")
4675 _AUTOLEARNSCORE_  portion of message score used by autolearn
4676 _TESTS(,)_        tests hit separated by "," (or other separator)
4677 _TESTSSCORES(,)_  as above, except with scores appended (eg. AWL=-3.0,...)
4678 _SUBTESTS(,)_     subtests (start with "__") hit separated by ","
4679                   (or other separator)
4680 _SUBTESTSCOLLAPSED(,)_ subtests (start with "__") hit separated by ","
4681                   (or other separator) with duplicated rules collapsed
4682 _DCCB_            DCC's "Brand"
4683 _DCCR_            DCC's results
4684 _PYZOR_           Pyzor results
4685 _RBL_             full results for positive RBL queries in DNS URI format
4686 _LANGUAGES_       possible languages of mail
4687 _PREVIEW_         content preview
4688 _REPORT_          terse report of tests hit (for header reports)
4689 _SUBJPREFIX_      subject prefix based on rules, to be prepended to Subject
4690                   header by SpamAssassin caller
4691 _SUMMARY_         summary of tests hit for standard report (for body reports)
4692 _CONTACTADDRESS_  contents of the 'report_contact' setting
4693 _HEADER(NAME)_    includes the value of a message header.  value is the same
4694                   as is found for header rules (see elsewhere in this doc)
4695 _TIMING_          timing breakdown report
4696 _ADDEDHEADERHAM_  resulting header fields as requested by add_header for spam
4697 _ADDEDHEADERSPAM_ resulting header fields as requested by add_header for ham
4698 _ADDEDHEADER_     same as ADDEDHEADERHAM for ham or ADDEDHEADERSPAM for spam
4699
4700If a tag reference uses the name of a tag which is not in this list or defined
4701by a loaded plugin, the reference will be left intact and not replaced by any
4702value.
4703All template tag names should be restricted to the character set [A-Za-z0-9(,)].
4704
4705Additional, plugin specific, template tags can be found in the documentation for
4706the following plugins:
4707
4708 L<Mail::SpamAssassin::Plugin::ASN>
4709 L<Mail::SpamAssassin::Plugin::AWL>
4710 L<Mail::SpamAssassin::Plugin::TxRep>
4711
4712The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4713which specifies a format.  See the B<HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT>
4714section, below, for details.
4715
4716=head2 HAMMYTOKENS/SPAMMYTOKENS TAG FORMAT
4717
4718The C<HAMMYTOKENS> and C<SPAMMYTOKENS> tags have an optional second argument
4719which specifies a format: C<_SPAMMYTOKENS(N,FMT)_>, C<_HAMMYTOKENS(N,FMT)_>
4720The following formats are available:
4721
4722=over 4
4723
4724=item short
4725
4726Only the tokens themselves are listed.
4727I<For example, preference file entry:>
4728
4729C<add_header all Spammy _SPAMMYTOKENS(2,short)_>
4730
4731I<Results in message header:>
4732
4733C<X-Spam-Spammy: remove.php, UD:jpg>
4734
4735Indicating that the top two spammy tokens found are C<remove.php>
4736and C<UD:jpg>.  (The token itself follows the last colon, the
4737text before the colon indicates something about the token.
4738C<UD> means the token looks like it might be part of a domain name.)
4739
4740=item compact
4741
4742The token probability, an abbreviated declassification distance (see
4743example), and the token are listed.
4744I<For example, preference file entry:>
4745
4746C<add_header all Spammy _SPAMMYTOKENS(2,compact)_>
4747
4748I<Results in message header:>
4749
4750C<0.989-6--remove.php, 0.988-+--UD:jpg>
4751
4752Indicating that the probabilities of the top two tokens are 0.989 and
47530.988, respectively.  The first token has a declassification distance
4754of 6, meaning that if the token had appeared in at least 6 more ham
4755messages it would not be considered spammy.  The C<+> for the second
4756token indicates a declassification distance greater than 9.
4757
4758=item long
4759
4760Probability, declassification distance, number of times seen in a ham
4761message, number of times seen in a spam message, age and the token are
4762listed.
4763
4764I<For example, preference file entry:>
4765
4766C<add_header all Spammy _SPAMMYTOKENS(2,long)_>
4767
4768I<Results in message header:>
4769
4770C<X-Spam-Spammy: 0.989-6--0h-4s--4d--remove.php, 0.988-33--2h-25s--1d--UD:jpg>
4771
4772In addition to the information provided by the compact option,
4773the long option shows that the first token appeared in zero
4774ham messages and four spam messages, and that it was last
4775seen four days ago.  The second token appeared in two ham messages,
477625 spam messages and was last seen one day ago.
4777(Unlike the C<compact> option, the long option shows declassification
4778distances that are greater than 9.)
4779
4780=back
4781
4782=cut
4783
4784  return \@cmds;
4785}
4786
4787###########################################################################
4788
4789# settings that were once part of core, but are now in (possibly-optional)
4790# bundled plugins. These will be warned about, but do not generate a fatal
4791# error when "spamassassin --lint" is run like a normal syntax error would.
4792
4793our @MIGRATED_SETTINGS = qw{
4794  ok_languages
4795};
4796
4797###########################################################################
4798
4799sub new {
4800  my $class = shift;
4801  $class = ref($class) || $class;
4802  my $self = {
4803    main => shift,
4804    registered_commands => [],
4805  }; bless ($self, $class);
4806
4807  $self->{parser} = Mail::SpamAssassin::Conf::Parser->new($self);
4808  $self->{parser}->register_commands($self->set_default_commands());
4809
4810  $self->{errors} = 0;
4811  $self->{plugins_loaded} = { };
4812
4813  $self->{tests} = { };
4814  $self->{test_types} = { };
4815  $self->{scoreset} = [ {}, {}, {}, {} ];
4816  $self->{scoreset_current} = 0;
4817  $self->set_score_set (0);
4818  $self->{tflags} = { };
4819  $self->{source_file} = { };
4820
4821  # keep descriptions in a slow but space-efficient single-string
4822  # data structure
4823  # NOTE: Deprecated usage of TieOneStringHash as of 10/2018, it's an
4824  # absolute pig, doubling config parsing time, while benchmarks indicate
4825  # no difference in resident memory size!
4826  $self->{descriptions} = { };
4827  #tie %{$self->{descriptions}}, 'Mail::SpamAssassin::Util::TieOneStringHash'
4828  #  or warn "tie failed";
4829  $self->{subjprefix} = { };
4830
4831  # after parsing, tests are refiled into these hashes for each test type.
4832  # this allows e.g. a full-text test to be rewritten as a body test in
4833  # the user's user_prefs file.
4834  $self->{body_tests} = { };
4835  $self->{uri_tests}  = { };
4836  $self->{uri_evals}  = { }; # not used/implemented yet
4837  $self->{head_tests} = { };
4838  $self->{head_evals} = { };
4839  $self->{body_evals} = { };
4840  $self->{full_tests} = { };
4841  $self->{full_evals} = { };
4842  $self->{rawbody_tests} = { };
4843  $self->{rawbody_evals} = { };
4844  $self->{meta_tests} = { };
4845  $self->{eval_plugins} = { };
4846  $self->{eval_plugins_types} = { };
4847
4848  # meta dependencies
4849  $self->{meta_dependencies} = {};
4850
4851  # map eval function names to rulenames
4852  $self->{eval_to_rule} = {};
4853
4854  # testing stuff
4855  $self->{regression_tests} = { };
4856
4857  $self->{rewrite_header} = { };
4858  $self->{want_rebuild_for_type} = { };
4859  $self->{user_defined_rules} = { };
4860  $self->{headers_spam} = [ ];
4861  $self->{headers_ham} = [ ];
4862
4863  $self->{bayes_ignore_headers} = [ ];
4864  $self->{bayes_ignore_from} = { };
4865  $self->{bayes_ignore_to} = { };
4866
4867  $self->{welcomelist_auth} = { };
4868  $self->{def_welcomelist_auth} = { };
4869  $self->{welcomelist_from} = { };
4870  $self->{whitelist_allows_relays} = { };
4871  $self->{blacklist_from} = { };
4872  $self->{welcomelist_from_rcvd} = { };
4873  $self->{def_welcomelist_from_rcvd} = { };
4874
4875  $self->{blacklist_to} = { };
4876  $self->{welcomelist_to} = { };
4877  $self->{more_spam_to} = { };
4878  $self->{all_spam_to} = { };
4879
4880  $self->{trusted_networks} = $self->new_netset('trusted_networks',1);
4881  $self->{internal_networks} = $self->new_netset('internal_networks',1);
4882  $self->{msa_networks} = $self->new_netset('msa_networks',0); # no loopback IP
4883  $self->{trusted_networks_configured} = 0;
4884  $self->{internal_networks_configured} = 0;
4885
4886  # Make sure we add in X-Spam-Checker-Version
4887  { my $r = [ "Checker-Version",
4888              "SpamAssassin _VERSION_ (_SUBVERSION_) on _HOSTNAME_" ];
4889    push(@{$self->{headers_spam}}, $r);
4890    push(@{$self->{headers_ham}},  $r);
4891  }
4892
4893  # these should potentially be settable by end-users
4894  # perhaps via plugin?
4895  $self->{num_check_received} = 9;
4896  $self->{bayes_expiry_pct} = 0.75;
4897  $self->{bayes_expiry_period} = 43200;
4898  $self->{bayes_expiry_max_exponent} = 9;
4899
4900  $self->{encapsulated_content_description} = 'original message before SpamAssassin';
4901
4902  $self;
4903}
4904
4905sub mtime {
4906  my $self = shift;
4907  if (@_) {
4908    $self->{mtime} = shift;
4909  }
4910  return $self->{mtime};
4911}
4912
4913###########################################################################
4914
4915sub parse_scores_only {
4916  my ($self) = @_;
4917  $self->{parser}->parse ($_[1], 1);
4918}
4919
4920sub parse_rules {
4921  my ($self) = @_;
4922  $self->{parser}->parse ($_[1], 0);
4923}
4924
4925###########################################################################
4926
4927sub set_score_set {
4928  my ($self, $set) = @_;
4929  $self->{scores} = $self->{scoreset}->[$set];
4930  $self->{scoreset_current} = $set;
4931  dbg("config: score set $set chosen.");
4932}
4933
4934sub get_score_set {
4935  my($self) = @_;
4936  return $self->{scoreset_current};
4937}
4938
4939sub get_rule_types {
4940  my ($self) = @_;
4941  return @rule_types;
4942}
4943
4944sub get_rule_keys {
4945  my ($self, $test_type, $priority) = @_;
4946
4947  # special case rbl_evals since they do not have a priority
4948  if ($test_type eq 'rbl_evals') {
4949    return keys(%{$self->{$test_type}});
4950  }
4951
4952  if (defined($priority)) {
4953    return keys(%{$self->{$test_type}->{$priority}});
4954  }
4955  else {
4956    my @rules;
4957    foreach my $pri (keys(%{$self->{priorities}})) {
4958      push(@rules, keys(%{$self->{$test_type}->{$pri}}));
4959    }
4960    return @rules;
4961  }
4962}
4963
4964sub get_rule_value {
4965  my ($self, $test_type, $rulename, $priority) = @_;
4966
4967  # special case rbl_evals since they do not have a priority
4968  if ($test_type eq 'rbl_evals') {
4969    return @{$self->{$test_type}->{$rulename}};
4970  }
4971
4972  if (defined($priority)) {
4973    return $self->{$test_type}->{$priority}->{$rulename};
4974  }
4975  else {
4976    foreach my $pri (keys(%{$self->{priorities}})) {
4977      if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4978        return $self->{$test_type}->{$pri}->{$rulename};
4979      }
4980    }
4981    return;  # if we get here we didn't find the rule
4982  }
4983}
4984
4985sub delete_rule {
4986  my ($self, $test_type, $rulename, $priority) = @_;
4987
4988  # special case rbl_evals since they do not have a priority
4989  if ($test_type eq 'rbl_evals') {
4990    return delete($self->{$test_type}->{$rulename});
4991  }
4992
4993  if (defined($priority)) {
4994    return delete($self->{$test_type}->{$priority}->{$rulename});
4995  }
4996  else {
4997    foreach my $pri (keys(%{$self->{priorities}})) {
4998      if (exists($self->{$test_type}->{$pri}->{$rulename})) {
4999        return delete($self->{$test_type}->{$pri}->{$rulename});
5000      }
5001    }
5002    return;  # if we get here we didn't find the rule
5003  }
5004}
5005
5006# trim_rules ($regexp)
5007#
5008# Remove all rules that don't match the given regexp (or are sub-rules of
5009# meta-tests that match the regexp).
5010
5011sub trim_rules {
5012  my ($self, $regexp) = @_;
5013
5014  my ($rec, $err) = compile_regexp($regexp, 0);
5015  if (!$rec) {
5016    die "config: trim_rules: invalid regexp '$regexp': $err";
5017  }
5018
5019  my @all_rules;
5020
5021  foreach my $rule_type ($self->get_rule_types()) {
5022    push(@all_rules, $self->get_rule_keys($rule_type));
5023  }
5024
5025  my @rules_to_keep = grep(/$rec/o, @all_rules);
5026
5027  if (@rules_to_keep == 0) {
5028    die "config: trim_rules: all rules excluded, nothing to test\n";
5029  }
5030
5031  my @meta_tests    = grep(/$rec/o, $self->get_rule_keys('meta_tests'));
5032  foreach my $meta (@meta_tests) {
5033    push(@rules_to_keep, $self->add_meta_depends($meta))
5034  }
5035
5036  my %rules_to_keep_hash;
5037
5038  foreach my $rule (@rules_to_keep) {
5039    $rules_to_keep_hash{$rule} = 1;
5040  }
5041
5042  foreach my $rule_type ($self->get_rule_types()) {
5043    foreach my $rulekey ($self->get_rule_keys($rule_type)) {
5044      $self->delete_rule($rule_type, $rulekey)
5045                    if (!$rules_to_keep_hash{$rulekey});
5046    }
5047  }
5048} # trim_rules()
5049
5050sub add_meta_depends {
5051  my ($self, $meta) = @_;
5052
5053  my @rules;
5054  my @tokens = $self->get_rule_value('meta_tests', $meta) =~ m/(\w+)/g;
5055
5056  @tokens = grep(!/^\d+$/, @tokens);
5057  # @tokens now only consists of sub-rules
5058
5059  foreach my $token (@tokens) {
5060    die "config: meta test $meta depends on itself\n" if $token eq $meta;
5061    push(@rules, $token);
5062
5063    # If the sub-rule is a meta-test, recurse
5064    if ($self->get_rule_value('meta_tests', $token)) {
5065      push(@rules, $self->add_meta_depends($token));
5066    }
5067  } # foreach my $token (@tokens)
5068
5069  return @rules;
5070} # add_meta_depends()
5071
5072sub is_rule_active {
5073  my ($self, $test_type, $rulename, $priority) = @_;
5074
5075  # special case rbl_evals since they do not have a priority
5076  if ($test_type eq 'rbl_evals') {
5077    return 0 unless ($self->{$test_type}->{$rulename});
5078    return ($self->{scores}->{$rulename});
5079  }
5080
5081  # first determine if the rule is defined
5082  if (defined($priority)) {
5083    # we have a specific priority
5084    return 0 unless ($self->{$test_type}->{$priority}->{$rulename});
5085  }
5086  else {
5087    # no specific priority so we must loop over all currently defined
5088    # priorities to see if the rule is defined
5089    my $found_p = 0;
5090    foreach my $pri (keys %{$self->{priorities}}) {
5091      if ($self->{$test_type}->{$pri}->{$rulename}) {
5092        $found_p = 1;
5093        last;
5094      }
5095    }
5096    return 0 unless ($found_p);
5097  }
5098
5099  return ($self->{scores}->{$rulename});
5100}
5101
5102###########################################################################
5103
5104# treats a bitset argument as a bit vector of all possible port numbers (8 kB)
5105# and sets bit values to $value (0 or 1) in the specified range of port numbers
5106#
5107sub set_ports_range {
5108  my($bitset_ref, $port_range_lo, $port_range_hi, $value) = @_;
5109  $port_range_lo = 0      if $port_range_lo < 0;
5110  $port_range_hi = 65535  if $port_range_hi > 65535;
5111  if (!defined $$bitset_ref) {  # provide a sensible default
5112    wipe_ports_range($bitset_ref, 1);  # turn on all bits 0..65535
5113    vec($$bitset_ref,$_,1) = 0  for 0..1023;  # avoid 0 and privileged ports
5114  } elsif ($$bitset_ref eq '') {  # repopulate the bitset (late configuration)
5115    wipe_ports_range($bitset_ref, 0);  # turn off all bits 0..65535
5116  }
5117  $value = !$value ? 0 : 1;
5118  for (my $j = $port_range_lo; $j <= $port_range_hi; $j++) {
5119    vec($$bitset_ref,$j,1) = $value;
5120  }
5121}
5122
5123sub wipe_ports_range {
5124  my($bitset_ref, $value) = @_;
5125  $value = !$value ? "\000" : "\377";
5126  $$bitset_ref = $value x 8192;  # quickly turn all bits 0..65535 on or off
5127}
5128
5129###########################################################################
5130
5131sub add_to_addrlist {
5132  my $self = shift; $self->{parser}->add_to_addrlist(@_);
5133}
5134sub add_to_addrlist_rcvd {
5135  my $self = shift; $self->{parser}->add_to_addrlist_rcvd(@_);
5136}
5137sub remove_from_addrlist {
5138  my $self = shift; $self->{parser}->remove_from_addrlist(@_);
5139}
5140sub remove_from_addrlist_rcvd {
5141  my $self = shift; $self->{parser}->remove_from_addrlist_rcvd(@_);
5142}
5143
5144###########################################################################
5145
5146sub regression_tests {
5147  my $self = shift;
5148  if (@_ == 1) {
5149    # we specified a symbolic name, return the strings
5150    my $name = shift;
5151    my $tests = $self->{regression_tests}->{$name};
5152    return @$tests;
5153  }
5154  else {
5155    # no name asked for, just return the symbolic names we have tests for
5156    return keys %{$self->{regression_tests}};
5157  }
5158}
5159
5160###########################################################################
5161
5162sub finish_parsing {
5163  my ($self, $user) = @_;
5164  $self->{parser}->finish_parsing($user);
5165}
5166
5167###########################################################################
5168
5169sub found_any_rules {
5170  my ($self) = @_;
5171  if (!defined $self->{found_any_rules}) {
5172    $self->{found_any_rules} = (scalar keys %{$self->{tests}} > 0);
5173  }
5174  return $self->{found_any_rules};
5175}
5176
5177###########################################################################
5178
5179sub get_description_for_rule {
5180  my ($self, $rule) = @_;
5181  # as silly as it looks, localized $1 here prevents an outer $1 from getting
5182  # tainted by the expression or assignment in the next line, bug 6148
5183  local($1);
5184  my $rule_descr = $self->{descriptions}->{$rule};
5185  return $rule_descr;
5186}
5187
5188###########################################################################
5189
5190sub maybe_header_only {
5191  my($self,$rulename) = @_;
5192  my $type = $self->{test_types}->{$rulename};
5193
5194  if (index($rulename, 'AUTOLEARNTEST') == 0) {
5195    dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
5196  }
5197
5198  return 0 if (!defined ($type));
5199
5200  if (($type == $TYPE_HEAD_TESTS) || ($type == $TYPE_HEAD_EVALS)) {
5201    return 1;
5202
5203  } elsif ($type == $TYPE_META_TESTS) {
5204    if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) {
5205      return 0;
5206    } else {
5207      return 1;
5208    }
5209  }
5210
5211  return 0;
5212}
5213
5214sub maybe_body_only {
5215  my($self,$rulename) = @_;
5216  my $type = $self->{test_types}->{$rulename};
5217
5218  if (index($rulename, 'AUTOLEARNTEST') == 0) {
5219    dbg("config: auto-learn: $rulename - Test type is $self->{test_types}->{$rulename}.");
5220  }
5221
5222  return 0 if (!defined ($type));
5223
5224  if (($type == $TYPE_BODY_TESTS) || ($type == $TYPE_BODY_EVALS)
5225        || ($type == $TYPE_URI_TESTS) || ($type == $TYPE_URI_EVALS))
5226  {
5227    # some rawbody go off of headers...
5228    return 1;
5229
5230  } elsif ($type == $TYPE_META_TESTS) {
5231    if (($self->{tflags}->{$rulename}||'') =~ /\bnet\b/) {
5232      return 0;
5233    } else {
5234      return 1;
5235    }
5236  }
5237
5238  return 0;
5239}
5240
5241###########################################################################
5242
5243sub load_plugin {
5244  my ($self, $package, $path, $silent) = @_;
5245  $self->{main}->{plugins}->load_plugin($package, $path, $silent);
5246}
5247
5248sub load_plugin_succeeded {
5249  my ($self, $plugin, $package, $path) = @_;
5250  $self->{plugins_loaded}->{$package} = 1;
5251}
5252
5253sub register_eval_rule {
5254  my ($self, $pluginobj, $nameofsub, $ruletype) = @_;
5255  if (exists $self->{eval_plugins}->{$nameofsub}) {
5256    warn("config: eval function '$nameofsub' already exists, overwriting\n");
5257  }
5258  $self->{eval_plugins}->{$nameofsub} = $pluginobj;
5259  if (defined $ruletype) {
5260    if (defined $TYPE_AS_STRING{$ruletype}) {
5261      $self->{eval_plugins_types}->{$nameofsub} = $ruletype;
5262    } else {
5263      $self->{parser}->lint_warn("config: invalid ruletype for eval $nameofsub");
5264    }
5265  }
5266}
5267
5268###########################################################################
5269
5270sub clone {
5271  my ($self, $source, $dest) = @_;
5272
5273  unless (defined $source) {
5274    $source = $self;
5275  }
5276  unless (defined $dest) {
5277    $dest = $self;
5278  }
5279
5280  my %done;
5281
5282  # keys that should not be copied in ->clone().
5283  # bug 4179: include want_rebuild_for_type, so that if a user rule
5284  # is defined, its method will be recompiled for future scans in
5285  # order to *remove* the generated method calls
5286  my @NON_COPIED_KEYS = qw(
5287    main eval_plugins eval_plugins_types plugins_loaded registered_commands
5288    sed_path_cache parser scoreset scores want_rebuild_for_type
5289  );
5290
5291  # special cases.  first, skip anything that cannot be changed
5292  # by users, and the stuff we take care of here
5293  foreach my $var (@NON_COPIED_KEYS) {
5294    $done{$var} = undef;
5295  }
5296
5297  # keys that should can be copied using a ->clone() method, in ->clone()
5298  my @CLONABLE_KEYS = qw(
5299    internal_networks trusted_networks msa_networks
5300  );
5301
5302  foreach my $key (@CLONABLE_KEYS) {
5303    $dest->{$key} = $source->{$key}->clone();
5304    $done{$key} = undef;
5305  }
5306
5307  # two-level hashes
5308  foreach my $key (qw(uri_host_lists askdns)) {
5309    my $v = $source->{$key};
5310    my $dest_key_ref = $dest->{$key} = {};  # must start from scratch!
5311    while(my($k2,$v2) = each %{$v}) {
5312      %{$dest_key_ref->{$k2}} = %{$v2};
5313    }
5314    $done{$key} = undef;
5315  }
5316
5317  # bug 4179: be smarter about cloning the rule-type structures;
5318  # some are like this: $self->{type}->{priority}->{name} = 'value';
5319  # which is an extra level that the below code won't deal with
5320  foreach my $t (@rule_types) {
5321    foreach my $k (keys %{$source->{$t}}) {
5322      my $v = $source->{$t}->{$k};
5323      my $i = ref $v;
5324      if ($i eq 'HASH') {
5325        %{$dest->{$t}->{$k}} = %{$v};
5326      }
5327      elsif ($i eq 'ARRAY') {
5328        @{$dest->{$t}->{$k}} = @{$v};
5329      }
5330      else {
5331        $dest->{$t}->{$k} = $v;
5332      }
5333    }
5334    $done{$t} = undef;
5335  }
5336
5337  # and now, copy over all the rest -- the less complex cases.
5338  while(my($k,$v) = each %{$source}) {
5339    next if exists $done{$k};   # we handled it above
5340    $done{$k} = undef;
5341    my $i = ref($v);
5342
5343    # Not a reference, or a scalar?  Just copy the value over.
5344    if ($i eq '') {
5345      $dest->{$k} = $v;
5346    }
5347    elsif ($i eq 'SCALAR') {
5348      $dest->{$k} = $$v;
5349    }
5350    elsif ($i eq 'ARRAY') {
5351      @{$dest->{$k}} = @{$v};
5352    }
5353    elsif ($i eq 'HASH') {
5354      %{$dest->{$k}} = %{$v};
5355    }
5356    elsif ($i eq 'Regexp') {
5357      $dest->{$k} = $v;
5358    }
5359    else {
5360      # throw a warning for debugging -- should never happen in normal usage
5361      warn "config: dup unknown type $k, $i\n";
5362    }
5363  }
5364
5365  foreach my $cmd (@{$self->{registered_commands}}) {
5366    my $k = $cmd->{setting};
5367    next if exists $done{$k};   # we handled it above
5368    $done{$k} = undef;
5369    $dest->{$k} = $source->{$k};
5370  }
5371
5372  # scoresets
5373  delete $dest->{scoreset};
5374  for my $i (0 .. 3) {
5375    %{$dest->{scoreset}->[$i]} = %{$source->{scoreset}->[$i]};
5376  }
5377
5378  # deal with $conf->{scores}, it needs to be a reference into the scoreset
5379  # hash array dealy.  Do it at the end since scoreset_current isn't set
5380  # otherwise.
5381  $dest->{scores} = $dest->{scoreset}->[$dest->{scoreset_current}];
5382
5383  # ensure we don't copy the path cache from the master
5384  delete $dest->{sed_path_cache};
5385
5386  return 1;
5387}
5388
5389###########################################################################
5390
5391sub free_uncompiled_rule_source {
5392  my ($self) = @_;
5393
5394  if (!$self->{main}->{keep_config_parsing_metadata} &&
5395        !$self->{allow_user_rules})
5396  {
5397    #delete $self->{if_stack}; # it's Parser not Conf?
5398    #delete $self->{source_file};
5399  }
5400}
5401
5402sub new_netset {
5403  my ($self, $netset_name, $add_loopback) = @_;
5404  my $set = Mail::SpamAssassin::NetSet->new($netset_name);
5405  if ($add_loopback) {
5406    $set->add_cidr('127.0.0.0/8');
5407    $set->add_cidr('::1');
5408  }
5409  return $set;
5410}
5411
5412###########################################################################
5413
5414sub finish {
5415  my ($self) = @_;
5416  #untie %{$self->{descriptions}};
5417  %{$self} = ();
5418}
5419
5420###########################################################################
5421
5422sub sa_die { Mail::SpamAssassin::sa_die(@_); }
5423
5424###########################################################################
5425
5426# subroutines available to conditionalize rules, for example:
5427#   if (can(Mail::SpamAssassin::Conf::feature_originating_ip_headers))
5428
5429sub feature_originating_ip_headers { 1 }
5430sub feature_dns_local_ports_permit_avoid { 1 }
5431sub feature_bayes_auto_learn_on_error { 1 }
5432sub feature_uri_host_listed { 1 }
5433sub feature_yesno_takes_args { 1 }
5434sub feature_bug6558_free { 1 }
5435sub feature_edns { 1 }  # supports 'dns_options edns' config option
5436sub feature_dns_query_restriction { 1 }  # supported config option
5437sub feature_registryboundaries { 1 } # replaces deprecated registrarboundaries
5438sub feature_geodb { 1 } # if needed for some reason
5439sub feature_dns_block_rule { 1 } # supports 'dns_block_rule' config option
5440sub feature_compile_regexp { 1 } # Util::compile_regexp
5441sub feature_meta_rules_matching { 1 } # meta rules_matching() expression
5442sub feature_subjprefix { 1 } # add subject prefixes rule option
5443sub feature_bayes_stopwords { 1 } # multi language stopwords in Bayes
5444sub feature_get_host { 1 } # $pms->get() :host :domain :ip :revip # was implemented together with AskDNS::has_tag_header # Bug 7734
5445sub feature_blocklist_welcomelist { 1 } # bz 7826
5446sub feature_header_address_parser { 1 } # improved header address parsing using Email::Address::XS, $pms->get() list context
5447sub feature_local_tests_only { 1 } # Config parser supports "if (local_tests_only)"
5448sub has_tflags_nosubject { 1 } # tflags nosubject
5449sub has_tflags_nolog { 1 } # tflags nolog
5450sub perl_min_version_5010000 { return $] >= 5.010000 }  # perl version check ("perl_version" not neatly backwards-compatible)
5451
5452###########################################################################
5453
54541;
5455__END__
5456
5457=head1 LOCALISATION
5458
5459A line starting with the text C<lang xx> will only be interpreted if
5460SpamAssassin is running in that locale, allowing test descriptions and
5461templates to be set for that language.
5462
5463Current locale is determined from LANGUAGE, LC_ALL, LC_MESSAGES or LANG
5464environment variables, first found is used.
5465
5466The locales string should specify either both the language and country, e.g.
5467C<lang pt_BR>, or just the language, e.g. C<lang de>.
5468
5469Example:
5470
5471 lang de describe EXAMPLE_RULE Beispielregel
5472
5473=head1 SEE ALSO
5474
5475Mail::SpamAssassin(3)
5476spamassassin(1)
5477spamd(1)
5478
5479=cut
5480