1;# $Id$ 2;# 3;# Copyright (c) 1990-2006, Raphael Manfredi 4;# 5;# You may redistribute only under the terms of the Artistic License, 6;# as specified in the README file that comes with the distribution. 7;# You may reuse parts of this distribution only within the terms of 8;# that same Artistic License; a copy of which may be found at the root 9;# of the source tree for mailagent 3.0. 10;# 11;# $Log: matching.pl,v $ 12;# Revision 3.0.1.5 2001/03/17 18:12:50 ram 13;# patch72: fixed longstanding lie in man; "To: gue@eiffel.fr" now works 14;# 15;# Revision 3.0.1.4 1999/07/12 13:52:50 ram 16;# patch66: specialized <3> to mean <3,3> in mrange() 17;# 18;# Revision 3.0.1.3 1996/12/24 14:56:12 ram 19;# patch45: new Envelope and Relayed selectors 20;# patch45: protect all un-escaped @ in patterns, for perl5 21;# 22;# Revision 3.0.1.2 1994/07/01 15:02:33 ram 23;# patch8: allow macro substitution on patterns if rulemac is ON 24;# 25;# Revision 3.0.1.1 1994/04/25 15:17:49 ram 26;# patch7: fixed selector combination logic and added some debug logs 27;# 28;# Revision 3.0 1993/11/29 13:49:00 ram 29;# Baseline for mailagent 3.0 netwide release. 30;# 31;# 32# 33# Matching functions 34# 35 36# List of special header selector, for which a pattern without / is to be 37# taken as an equality with the login name of the address. If there are some 38# metacharacters, then a match will be attempted on that name. For each of 39# those special headers, we record the name of the subroutine to be called. 40# If a matching function is not specified, the default is 'match_var'. 41# The %Amatcher gives the name of the fields which contains an address. 42sub init_matcher { 43 %Matcher = ( 44 'Envelope', 'match_single', 45 'From', 'match_single', 46 'To', 'match_list', 47 'Cc', 'match_list', 48 'Apparently-To', 'match_list', 49 'Newsgroups', 'match_list', 50 'Sender', 'match_single', 51 'Resent-From', 'match_single', 52 'Resent-To', 'match_list', 53 'Resent-Cc', 'match_list', 54 'Resent-Sender', 'match_single', 55 'Reply-To', 'match_single', 56 'Relayed', 'match_list', 57 ); 58 %Amatcher = ( 59 'From', 1, 60 'Envelope', 1, 61 'To', 1, 62 'Cc', 1, 63 'Apparently-To', 1, 64 'Sender', 1, 65 'Resent-From', 1, 66 'Resent-To', 1, 67 'Resent-Cc', 1, 68 'Resent-Sender', 1, 69 'Reply-To', 1, 70 ); 71} 72 73# Transform a shell-style pattern into a perl pattern 74sub perl_pattern { 75 local($_) = @_; # The shell pattern 76 s/\./\\./g; # Escape . 77 s/\*/.*/g; # Transform * into .* 78 s/\?/./g; # Transform ? into . 79 $_; # Perl pattern 80} 81 82# Take a pattern as written in the rule file and make it suitable for 83# pattern matching as understood by perl. Unless the pattern starts with a 84# leading / or is of the form m||, it is enclosed within slashes. 85# We also enclose the whole pattern within (). 86sub make_pattern { 87 local($_) = shift(@_); 88 # The whole pattern is inserted within () to make at least one 89 # backreference. Otherwise, the following could happen: 90 # $_ = '1 for you'; 91 # @matched = /^\d/; 92 # @matched = /^(\d)/; 93 # In both cases, the @matched array is set to ('1'), with no way to 94 # determine whether it is due to a backreference (2nd case) or a sucessful 95 # match. Knowing we have at least one bracketed reference is enough to 96 # disambiguate. 97 if (/^m(\W)(.*)\1(\w*)$/) { 98 $_ = "m$1($2)$1$3"; 99 } elsif (m|^/(.*)/(\w*)$|) { 100 $_ = "/($1)/$2"; 101 } else { 102 # Pattern does not start with a / or is not of the form m|xxx| 103 $_ = &perl_pattern($_); # Simple words specified via shell patterns 104 $_ = "/^($_)\$/"; # Anchor pattern 105 } 106 $_; # Pattern suitable for eval'ed matching 107} 108 109# ### Main matching entry point ### 110# ### (called from &apply_rules in pl/analyze.pl) 111# Attempt a match of a set of pattern, for each possible selector. The selector 112# string given can contain multiple selectors separated by white spaces. 113sub match { 114 local($selector) = shift(@_); # The selector on which pattern applies 115 local($pattern) = shift(@_); # The pattern or script to apply 116 local($range) = shift(@_); # The range on which pattern applies 117 local($matched) = 0; # Matching status returned 118 # If the pattern is held within double quotes, it is assumed to be the name 119 # of a file from which patterns may be found (one per line, shell comments 120 # being ignored). 121 if ($pattern !~ /^"/) { 122 $matched = &apply_match($selector, $pattern, $range); 123 } else { 124 # Load patterns from file whose name is given between "quotes" 125 # All un-escaped @ in patterns are escaped for perl5. 126 local(@filepat) = &include_file($pattern, 'pattern'); 127 grep(s/([^\\](\\\\)*)@/$1\\@/g && undef, @filepat); 128 # Now do the match for all the patterns. Stop as soon as one matches. 129 foreach (@filepat) { 130 $matched = &apply_match($selector, $_, $range); 131 last if $matched; 132 } 133 } 134 $matched ? 1 : 0; # Return matching status (guaranteed numeric) 135} 136 137# Attempt a pattern match on a set of selectors, and set the special macro %& 138# to the name of the regexp-specified fields which matched. 139sub apply_match { 140 local($selector) = shift(@_); # The selector on which pattern applies 141 local($pattern) = shift(@_); # The pattern or script to apply 142 local($range) = shift(@_); # The range on which pattern applies 143 local($matched) = 0; # True when a matching occurred 144 local($inverted) = 0; # True whenever all '!' match succeeded 145 local($invert) = 1; # Set to false whenever a '!' match fails 146 local($match); # Matching status reported 147 local($not) = ''; # Shall we negate matching status? 148 if ($selector eq 'script') { # Pseudo header selector 149 $matched = &evaluate(*pattern); 150 } else { # True header selector 151 152 # There can be multiple selectors separated by white spaces. As soon as 153 # one of them matches, we stop and return true. A selector may contain 154 # metacharacters, in which case a regular pattern matching is attempted 155 # on the true *header* fields (i.e. we skip the pseudo keys like Body, 156 # Head, etc..). For instance, Return.* would attempt a match on the 157 # field Return-Receipt-To:, if present. The special macro %& is set 158 # to the list of all the fields on which the match succeeded 159 # (alphabetically sorted). 160 161 foreach $select (split(/ /, $selector)) { 162 $not = ''; 163 $select =~ s/^!// && ($not = '!'); 164 # Allowed metacharacters are listed here (no braces wanted) 165 if ($select =~ /\.|\*|\[|\]|\||\\|\^|\?|\+|\(|\)/) { 166 $match = &expr_selector_match($select, $pattern, $range); 167 } else { 168 $match = &selector_match($select, $pattern, $range); 169 } 170 if ($not) { # Negated test 171 $invert = !$match if $invert; # '!' tests AND'ed 172 $inverted = $invert; # Meaningful from now on 173 } else { 174 $matched = $match; # Normal tests OR'ed 175 } 176 last if $matched; # Stop when matching status known 177 } 178 } 179 $matched = $matched || $inverted; 180 if ($loglvl > 19) { 181 local($logmsg) = "applied '$pattern' on '$selector' ($range) was "; 182 $logmsg .= $matched ? "true" : "false"; 183 &add_log($logmsg); 184 } 185 $matched; # Return matching status 186} 187 188# Attempt a pattern match on a set of selectors, and set the special macro %& 189# to the name of the field which matched. If there is more than one such 190# selector, values are separated using comas. If selector is preceded by a '!', 191# then the matching status is negated and *all* the tested fields are recorded 192# within %& when the returned status is 'true'. 193sub expr_selector_match { 194 local($selector) = shift(@_); # The selector on which pattern applies 195 local($pattern) = shift(@_); # The pattern or script to apply 196 local($range) = shift(@_); # The range on which pattern applies 197 local($matched) = 0; # True when a matching occurred 198 local(@keys) = sort keys %Header; 199 local($match); # Local matching status 200 local($not) = ''; # Shall boolean value be negated? 201 local($orig_ampersand) = $macro_ampersand; # Save %& 202 $selector =~ s/^!// && ($not = '!'); 203 &add_log("field '$selector' has metacharacters") if $loglvl > 18; 204 field: foreach $key (@keys) { 205 next if $Pseudokey{$key}; # Skip Body, All... 206 &add_log("'$select' tried on '$key'") if $loglvl > 19; 207 next unless eval '$key =~ /' . $select . '/'; 208 $match = &selector_match($key, $pattern, $range); 209 $matched = 1 if $match; # Only one match needed 210 # Record matching field for futher reference if a match occurred and 211 # the selector does not start with a '!'. Record all the tested fields 212 # if's starting with a '!' (because that's what is interesting in that 213 # case). In that last case, the original macro will be restored if any 214 # match occurs. 215 if ($not || $match) { 216 $macro_ampersand .= ',' if $macro_ampersand; 217 $macro_ampersand =~ s/;,$/;/; 218 $macro_ampersand .= $key; 219 } 220 if ($match) { 221 &add_log("obtained match with '$key' field") 222 if $loglvl > 18; 223 next field; # Try all the matching selectors 224 } 225 &add_log("no match with '$key' field") if $loglvl > 18; 226 } 227 $macro_ampersand .= ';'; # Set terminated with a ';' 228 # No need to negate status if selector was preceded by a '!': this will 229 # be done by apply match. 230 $macro_ampersand = $orig_ampersand if $not && $matched; # Restore %& 231 &add_log("matching status for '$selector' ($range) is '$matched'") 232 if $loglvl > 18; 233 $matched; # Return matching status 234} 235 236# Attempt a match of a pattern against a selector, return boolean status. 237# If pattern is preceded by a '!', the boolean status is negated. 238# If the 'rulemac' configuration variable is set to ON, a macro substitution 239# is performed on the search pattern. 240sub selector_match { 241 local($selector) = shift(@_); # The selector on which pattern applies 242 local($pattern) = shift(@_); # The pattern to apply 243 local($range) = shift(@_); # The range on which pattern applies 244 local($matcher); # Subroutine used to do the match 245 local($matched); # Record matching status 246 local($not) = ''; # Shall we apply NOT on matching result? 247 $selector = &header'normalize($selector); # Normalize case 248 $matcher = $Matcher{$selector}; 249 $matcher = 'match_var' unless $matcher; 250 $pattern =~ s/^!// && ($not = '!'); 251 ¯os_subst(*pattern) if $cf'rulemac =~ /on/i; # Macro substitution 252 $matched = &$matcher($selector, $pattern, $range); 253 $matched = !$matched if $not; # Revert matching status if ! pattern 254 if ($loglvl > 19) { 255 local($logmsg) = "matching '$not$pattern' on '$selector' ($range) was "; 256 $logmsg .= $matched ? "true" : "false"; 257 &add_log($logmsg); 258 } 259 $matched; # Return matching status 260} 261 262# Pattern matching functions: 263# They are invoked as function($selector, $pattern, $range) and return true 264# if the pattern is found in the variable, according to some internal rules 265# which are different among the functions. For instance, match_single will 266# attempt a match with a login name or a regular pattern matching on the 267# whole variable if the pattern was not a single word. 268 269# Matching is done in a header which only contains an internet address. The 270# $range parameter is ignored (does not make any sense here). An optional 4th 271# parameter may be supplied to specify the matching buffer. If absent, the 272# corresponding header line is used -- this feature is used by &match_list. 273sub match_single { 274 local($selector, $pattern, $range, $buffer) = @_; 275 local($login) = 0; # Set to true when attempting login match 276 local(@matched); 277 unless (defined $buffer) { # No buffer for matching was supplied 278 $buffer = $Header{$selector}; 279 } 280 # 281 # If we attempt a match on a field holding e-mail addresses and the pattern 282 # is anchored at the beginning with a /^, then we only keep the address 283 # part and remove the comment if any. 284 # 285 # If the field holds a full e-mail address and only that, we automatically 286 # select the address part of the field for matching. -- RAM, 17/03/2001 287 # 288 # Otherwise, the field is left alone. 289 # 290 # If the pattern is only a single name, we extract the login name for 291 # matching purposes... 292 # 293 if ($Amatcher{$selector}) { # Field holds an e-mail address 294 if ( 295 $pattern =~ m|^/\^| || 296 $pattern =~ m|^[-\w.*?]+(\\\@[-\w.*?]+)?\s*$| 297 ) { 298 $buffer = (&parse_address($buffer))[0]; 299 &add_log("matching buffer reduced to '$buffer'") if $loglvl > 18; 300 } 301 if ($pattern =~ m|^[-\w.*?]+\s*$|) { # Single name may have - or . 302 $buffer = &login_name($buffer); # Match done only on login name 303 $pattern =~ tr/A-Z/a-z/; # Cannonicalize name to lower case 304 } 305 $login = 1 unless $pattern =~ m|^/|; # Ask for case-insensitive match 306 } 307 $buffer =~ s/^\s+//; # Remove leading spaces 308 $buffer =~ s/\s+$//; # And trailing ones 309 $pattern = &make_pattern($pattern); 310 $pattern .= "i" if $login; # Login matches are case-insensitive 311 @matched = eval '($buffer =~ ' . $pattern . ');'; 312 # If buffer is empty, we have to recheck the pattern in a non array context 313 # to see if there is a match. Otherwise, /(.*)/ does not seem to match an 314 # empty string as it returns an empty string in $matched[0]... 315 $matched[0] = eval '$buffer =~ ' . $pattern if $buffer eq ''; 316 &eval_error; # Make sure eval worked 317 &update_backref(*matched); # Record non-null backreferences 318 $matched[0]; # Return matching status 319} 320 321# Matching is done on a header field which may contains multiple addresses 322# This will not work if there is a ',' in the comment part of the addresses, 323# but I never saw that and I don't want to write complex code for that--RAM. 324# If a range is specified, then only the items specified by the range are 325# actually used. 326sub match_list { 327 local($selector, $pattern, $range) = @_; 328 local($_) = $Header{$selector}; # Work on a copy of the line 329 tr/\n/ /; # Make one big happy line 330 local(@list) = split(/,/); # List of addresses 331 local($min, $max) = &mrange($range, scalar(@list)); 332 return 0 unless $min; # No matching possible if null range 333 local($buffer); # Buffer on which pattern matching is done 334 local($matched) = 0; # Set to true when matching has occurred 335 @list = @list[$min - 1 .. ($max > $#list ? $#list : $max - 1)] 336 if $min != 1 || $max != 9_999_999; 337 foreach $buffer (@list) { 338 # Call match_single to perform the actual match and supply the matching 339 # buffer as the last argument. Note that since range does not make 340 # any sense for single matches, undef is passed on instead. 341 $matched = &match_single($selector, $pattern, undef, $buffer); 342 last if $matched; 343 } 344 $matched; 345} 346 347# Look for a pattern in a multi-line context 348sub match_var { 349 local($selector, $pattern, $range) = @_; 350 local($lines) = 0; # Number of lines in matching buffer 351 my $target = \$Header{$selector}; 352 # Need to special-case Body to use the *decoded* version 353 $target = $Header{'=Body='} if $selector eq 'Body'; 354 if ($range ne '<1,->') { # Optimize: count lines only if needed 355 $lines = $$target =~ tr/\n/\n/; 356 } 357 local($min, $max) = &mrange($range, $lines); 358 return 0 unless $min; # No matching possible if null range 359 my $buffer; # Buffer on which matching is attempted 360 local(@buffer); # Same, whith range line selected 361 local(@matched); 362 $pattern = &make_pattern($pattern); 363 # Optimize, since range selection is the exception and not the rule. 364 # Most likely, we use the default selection, i.e. we take everything... 365 if ($min != 1 || $max != 9_999_999) { 366 @buffer = split(/\n/, $$target); 367 @buffer = @buffer[$min - 1 .. ($max > $#buffer ? $#buffer : $max - 1)]; 368 $buffer = join("\n", @buffer); # Keep only selected lines 369 undef @buffer; # May be big, so free ASAP 370 $target = \$buffer; 371 } 372 # Ensure multi-line matching by adding trailing "m" option to pattern 373 @matched = eval '($$target =~ ' . $pattern . 'm);'; 374 # If buffer is empty, we have to recheck the pattern in a non array context 375 # to see if there is a match. Otherwise, /(.*)/ does not seem to match an 376 # empty string as it returns an empty string in $matched[0]... 377 $matched[0] = eval '$$target =~ ' . $pattern . 'm' unless length $$target; 378 &eval_error; # Make sure eval worked 379 &update_backref(*matched); # Record non-null backreferences 380 $matched[0]; # Return matching status 381} 382 383# 384# Backreference handling 385# 386 387# Reseet the backreferences at the beginning of each rule match attempt 388# The backreferences include %& and %1 .. %99. 389sub reset_backref { 390 $macro_ampersand = ''; # List of matched generic selector 391 @Backref = (); # Stores backreferences provided by perl 392} 393 394# Update the backward reference array. There is a maximum of 99 backreferences 395# per filter rule. The argument list is an array of all the backreferences 396# found in the pattern matching, but the first item has to be skipped: it is 397# the whole matching string -- see comment on make_pattern(). 398sub update_backref { 399 local(*array) = @_; # Array holding $1 .. $9, $10 .. 400 local($i, $val); 401 for ($i = 1; $i < @array; $i++) { 402 $val = $array[$i]; 403 push(@Backref, $val); # Stack backreference for later perusal 404 &add_log("stacked '$val' as backreference") if $loglvl > 18; 405 } 406} 407 408# 409# Range interpolation 410# 411 412# Return minimum and maximum for range value. A range is specified as <min,max> 413# but '-' may be used as min for 1 and max as a symbolic constant for the 414# maximum value. An arbitrarily large number is returned in that case. If a 415# negative value is used, it is added to the number of items and rounded towards 416# 1 if still negative. That way, it is possible to request the last 10 items. 417# As a special case, <3> stands for <3,3> and thus <-> means everything. 418sub mrange { 419 local($range, $items) = @_; 420 local($min, $max) = (1, 9_999_999); 421 local($rmin, $rmax); 422 $rmin = $rmax = $1 if $range =~ /<\s*([\d-]+)\s*>/; 423 ($rmin, $rmax) = $range =~ /<\s*([\d-]*)\s*,\s*([\d-]*)\s*>/ 424 unless defined $rmin; 425 $rmin = $min if $rmin eq '' || $rmin eq '-'; 426 $rmax = $max if $rmax eq '' || $rmax eq '-'; 427 $rmin = $rmin + $items + 1 if $rmin < 0; 428 $rmax = $rmax + $items + 1 if $rmax < 0; 429 $rmin = 1 if $rmin < 0; 430 $rmax = 1 if $rmax < 0; 431 ($rmin, $rmax) = (0, 0) if $rmin > $rmax; # Null range if min > max 432 return ($rmin, $rmax); 433} 434 435