1;# $Id$ 2;# 3;# Copyright (c) 1990-2006, Raphael Manfredi 4;# 5;# You may redistribute only under the terms of the Artistic License, 6;# as specified in the README file that comes with the distribution. 7;# You may reuse parts of this distribution only within the terms of 8;# that same Artistic License; a copy of which may be found at the root 9;# of the source tree for mailagent 3.0. 10;# 11;# $Log: analyze.pl,v $ 12;# Revision 3.0.1.9 1999/07/12 13:49:39 ram 13;# patch66: moved localization of the %Variable hash for APPLY 14;# 15;# Revision 3.0.1.8 1997/09/15 15:13:15 ram 16;# patch57: $lastcmd now global from analyze_mail() for BACK processing 17;# patch57: indication of relaying hosts now selectively emitted 18;# 19;# Revision 3.0.1.7 1997/01/31 18:07:47 ram 20;# patch54: esacape metacharacter '{' in regexps for perl5.003_20 21;# 22;# Revision 3.0.1.6 1996/12/24 14:47:17 ram 23;# patch45: forgot to return 0 at the end of special_user() 24;# 25;# Revision 3.0.1.5 1995/01/03 18:06:33 ram 26;# patch24: now makes use of rule environment vars from the env package 27;# patch24: removed old broken umask handling (now a part of rule env) 28;# 29;# Revision 3.0.1.4 1994/09/22 14:09:03 ram 30;# patch12: defines new folder_saved variable to store folder path 31;# 32;# Revision 3.0.1.3 1994/07/01 14:59:58 ram 33;# patch8: general umask is now reset before analyzing a message 34;# patch8: added support for the UMASK command for local rule scope 35;# patch8: now parses the new tome config variable for vacation messages 36;# patch8: disable vacation message if Illegal-Object or Illegal-Field header 37;# 38;# Revision 3.0.1.2 1994/04/25 15:17:24 ram 39;# patch7: fixed selector combination logic and added some debug logs 40;# 41;# Revision 3.0.1.1 1994/01/26 09:30:23 ram 42;# patch5: now understands new -F option to force processing 43;# 44;# Revision 3.0 1993/11/29 13:48:35 ram 45;# Baseline for mailagent 3.0 netwide release. 46;# 47;# 48# 49# Analyzing mail 50# 51 52# Special users. Note that as login name matches are done in a case-insensitive 53# manner, there is no need to upper-case any of the followings. 54sub init_special { 55 %Special = ( 56 'root', 1, # Super-user 57 'uucp', 1, # Unix to Unix copy 58 'daemon', 1, # Not a real user, hopefully 59 'news', 1, # News daemon 60 'postmaster', 1, # X-400 mailer-daemon name 61 'newsmaster', 1, # My convention for news administrator--RAM 62 'usenet', 1, # Aka newsmaster 63 'mailer-daemon', 1, # Sendmail 64 'mailer-agent', 1, # NeXT mailer 65 'nobody', 1 # Nobody we've heard of 66 ); 67} 68 69# Compute shorthand file name for logging based on the processed file 70sub mail_logname { 71 my ($file) = @_; 72 my ($mfile) = $file =~ m|.*/(.*)|; # Basename of mail file 73 $mfile = $file unless $mfile; # There was no / in name 74 $mfile = '<stdin>' unless $mfile; # No $file_name if from STDIN 75 return $mfile; 76} 77 78# Compute file size for logging, if possible (i.e. not reading from STDIN) 79sub mail_logsize { 80 my ($file) = @_; 81 return "" unless length $file; 82 my $msize = (stat($file))[7]; 83 my $size = ""; 84 my $s = $msize == 1 ? "" : "s"; 85 $size = " $msize byte$s" if defined $msize; 86 return $size; 87} 88 89# Parse mail message and apply the filtering rules on it 90sub analyze_mail { 91 local($file) = shift(@_); # Mail file to be parsed 92 local($mode) = 'INITIAL'; # Initial working mode 93 local($wmode) = $mode; # Needed for statistics routines 94 local(%Variable); # User-defined variables, visible through APPLY 95 96 # Set-up proper environment. Dynamic scoping is used on those variables 97 # for the APPLY command (see the &apply function). Note that the $wmode 98 # variable is passed to &apply_rules but is local to that function, 99 # meaning there is no feedback of the working mode when using APPLY. 100 # However, the variables listed below may be probed upon return since they 101 # are external to &apply_rules. 102 local($ever_matched) = 0; # Did we ever matched a single saving rule ? 103 local($ever_saved) = 0; # Did we ever saved a message ? 104 local($folder_saved) = ''; # Last folder we saved into (full path) 105 106 # Other local variables used only in this function 107 local($ever_seen) = 0; # Did we ever enter seen mode ? 108 local($header); # Header entry name to look for in Header table 109 110 # Reset environment and umask before each new mail processing 111 &env'setup; 112 umask($env'umask); 113 114 # Log start of processing 115 my $mfile = mail_logname($file); 116 my $msize = mail_logsize($file); 117 add_log("-- HANDLING [$mfile]$msize --") if $loglvl > 8; 118 119 # Parse the mail message in file 120 &parse_mail($file); # Parse the mail and fill-in H tables 121 return 1 unless defined $Header{'All'}; # Mail not parsed correctly 122 &reception if $loglvl > 8; # Log mail reception 123 &run_builtins; # Execute builtins, if any 124 125 # Now analyze the mail. If there is already a X-Filter header, then the 126 # mail has already been processed. In that case, the default action is 127 # performed: leave it in the incomming mailbox with no further action. 128 # This should prevent nasty loops. 129 130 &add_log ("analyzing mail") if $loglvl > 18; 131 $header = $Header{'X-Filter'}; # Mulitple occurences possible 132 if ($header ne '') { # Hmm... already filtered... 133 local(@filter) = split(/\n/, $header); # Look for each X-Filter 134 local($address) = &email_addr; # Our e-mail address 135 local($done) = 0; # Already processed ? 136 local($_); 137 foreach (@filter) { # Maybe we'll find ourselves 138 if (/mailagent.*for (\S+)/) { # Mark left by us ? 139 $done = 1 if $1 eq $address; # Yes, we did that 140 # Remove that X-Filter line, LEAVE will add one anyway 141 $Header{'Head'} =~ s/^X-Filter:\s*mailagent.*for $address\n//m; 142 last; 143 } 144 } 145 if ($done) { # We already processed that message 146 if ($force_seen) { # They used the -F option 147 &add_log("NOTICE already filtered, processing anyway") 148 if $loglvl > 5; 149 } else { 150 &add_log("NOTICE already filtered, entering seen mode") 151 if $loglvl > 5; 152 $mode = '_SEEN_'; # This is a special mode 153 } 154 $ever_seen = 1; # This will prevent vacation messages 155 &s_seen; # Update statistics 156 } 157 } 158 159 local($lastcmd) = 0; # Failure status from last command 160 &apply_rules($mode, 1); # Now apply the filtering rules on it. 161 162 # Deal with vacation mode. It applies only on mail not previously seen. 163 # The vacation mode must be turned on in the configuration file. The 164 # conditions for a vacation message to be sent are: 165 # - Message was directly sent to the user. 166 # - Message does not come from a special user like root. 167 # - Vacation message was not disabled via a VACATION command 168 # Note that we use the environment set-up by the last rule we processed. 169 170 if (!$ever_seen && $cf'vacation =~ /on/i && $env'vacation) { 171 unless (&special_user) { # Not from special user and sent to me 172 # Send vacation message only once per address per period 173 &xeqte("ONCE (%r,vacation,$env'vacperiod) MESSAGE $env'vacfile"); 174 &s_vacation; # Message received while in vacation 175 } 176 } 177 178 # Default action if no rule ever matched. Statistics routines will use 179 # our own local $wmode variable. 180 181 unless ($ever_matched) { 182 &add_log("NOTICE no match, leaving in mailbox") if $loglvl > 5; 183 &xeqte("LEAVE"); # Default action anyway 184 &s_default; # One more application of default rule 185 } else { 186 unless ($ever_saved) { 187 &add_log("NOTICE not saved, leaving in mailbox") if $loglvl > 5; 188 &xeqte("LEAVE"); # Leave if message not saved 189 &s_saved; # Message saved by default rule 190 } 191 } 192 &s_filtered($Header{'Length'}); # Update statistics 193 194 &env'cleanup; # Clean-up the environment 195 0; # Ok status 196} 197 198# This is the heart of the mail agent -- Apply the filtering rules 199sub apply_rules { 200 local($wmode, $stats)= @_; # Working mode (the mode we start in) 201 local($mode); # Mode (optional) 202 local($selector); # Selector (mandatory) 203 local($range); # Range for selection (optional) 204 local($rulentry); # Entry in rule H table 205 local($pattern); # Pattern for selection, as written in rules 206 local($action); # Related action 207 local($last_selector); # Last used selector 208 local($rules); # A copy of the rules 209 local($matched); # Flag set to true if a rule is matched 210 local(%Matched); # Records the selectors which have been matched 211 local($status); # Status returned by xeqte 212 local(@Executed); # Records already executed rules 213 local($selist); # Key used to detect identical selector lists 214 local(%Inverted); # Records inverted '!' selectors which matched 215 216 # The @Executed array records whether a specified action for a rule was 217 # executed. Loops are possible via the RESTART action, and as there is 218 # almost no way to exit from such a loop (there is one with FEED and RESYNC) 219 # I decided to prohibit them. Hence a given action is allowed to be executed 220 # only once during a mail analysis (modulo each possible working mode). 221 # For a rule number n, $Executed[n] is a collection of modes in which the 222 # rule was executed, comma separated. 223 224 $Executed[$#Rules] = ''; # Pre-extend array 225 226 # Order wrt the one in the rule file is guaranteed. I use a for construct 227 # with indexed access to be able to restart from the beginning upon 228 # execution of RESTART. This also helps filling in the @Executed array. 229 230 local($i, $j); # Indices within rule array 231 232 rule: for ($i = 0; $i <= $#Rules; $i++) { 233 $j = $i + 1; 234 $_ = $Rules[$i]; 235 236 # The %Matched array records the boolean value associated with each 237 # possible selector. If two identical selector are found, the values 238 # are OR'ed (and we stop evaluating as soon as one is true). Otherwise, 239 # the values are AND'ed (for different selectors, but all are evaluated 240 # in case we later find another identical selectors -- no sort is done). 241 # The %Inverted which records '!' selector matches has all the above 242 # rules inverted according to De Morgan's Law. 243 244 undef %Matched; # Reset matching patterns 245 undef %Inverted; # Reset negated patterns 246 $rules = $_; # Work on a copy 247 $rules =~ s/^([^{]*)\{// && ($mode = $1); # First word is the mode 248 $rules =~ s/\s*(.*)\}// && ($action = $1); # Followed by action } 249 $mode =~ s/\s*$//; # Remove trailing spaces 250 $rules =~ s/^\s+//; # Remove leading spaces 251 $last_selector = ""; # Last selector used 252 253 # Make sure we are in the correct mode. The $mode variable holds a 254 # list of comma-separated modes. If the working mode is found in it 255 # then the rules apply. Otherwise, skip them. 256 257 next rule unless &right_mode; # Skip rule if not in right mode 258 259 # Now loop over all the keys and apply the patterns in turn 260 261 &reset_backref; # Reset backreferences 262 foreach $key (split(/ /, $rules)) { 263 $rulentry = $Rule{$key}; 264 $rulentry =~ s/^\s*([^\/]*:)// && ($selector = $1); 265 $rulentry =~ s/^\s*//; 266 $pattern = $rulentry; 267 if ($last_selector ne $selector) { # Update last selector 268 $last_selector = $selector; 269 } 270 $selector =~ s/:$//; # Remove final ':' on selector 271 $range = '<1,->'; # Default range 272 $selector =~ s/\s*(<[\d\s,-]+>)$// && ($range = $1); 273 274 &add_log ("selector '$selector' on '$range', pattern '$pattern'") 275 if $loglvl > 19; 276 277 # Identical (lists of) selectors are logically OR'ed. To make sure 278 # 'To Cc:' and 'Cc To:' are correctly OR'ed, the selector list is 279 # alphabetically sorted. 280 281 $selist = join(',', sort split(' ', $selector)); 282 283 # Direct selectors and negated selectors (starting with a !) are 284 # kept separately, because the rules are dual: 285 # For normal selectors (kept in %Matched): 286 # - Identical are OR'ed 287 # - Different are AND'ed 288 # For inverted selectors (kept in %Inverted): 289 # - Identical are AND'ed 290 # - Different are OR'ed 291 # Multiple selectors like 'To Cc' are sorted according to the first 292 # selector on the list, i.e. 'To !Cc' is normal but '!To Cc' is 293 # inverted. 294 295 if ($selector =~ /^!/) { # Inverted selector 296 # In order to guarantee an optimized AND, we first check that 297 # no previous failure has been reported for the current set of 298 # selectors. 299 unless (defined $Inverted{$selist} && !$Inverted{$selist}) { 300 $Inverted{$selist} = &match($selector, $pattern, $range); 301 } 302 } else { # Normal selector 303 # Here it is the OR which is guaranteed to be optimized. Do 304 # not attempt the match if an identical selector already 305 # matched sucessfully. 306 unless (defined $Matched{$selist} && $Matched{$selist}) { 307 $Matched{$selist} = &match($selector, $pattern, $range); 308 } 309 } 310 } 311 312 # Both groups recorded in %Matched and %Inverted are globally AND'ed 313 # However, only one match is necessary within %Inverted whilst all 314 # must have matched within %Matched... 315 316 $matched = 1; # Assume everything matched 317 foreach $key (keys %Matched) { # All entries must have matched 318 $matched = $Matched{$key} ? 1 : 0; 319 &add_log("rule #$j: direct $key " . ($matched ? 'ok' : 'failed')) 320 if $loglvl > 19; 321 last unless $matched; 322 } 323 if ($matched) { # If %Matched failed, all failed! 324 foreach $key (keys %Inverted) { # Only one entry needs to match 325 $matched = $Inverted{$key} ? 1 : 0; 326 &add_log("rule #$j: neg $key " . ($matched ? 'ok' : 'failed')) 327 if $loglvl > 19; 328 last if $matched; 329 } 330 } 331 332 &add_log("matching summary rule #$j: " . ($matched ? 'ok' : 'failed')) 333 if $loglvl > 17; 334 335 if ($matched) { # Execute action if pattern matched 336 # Make sure the rule has not already been executed in that mode 337 if ($Executed[$i] =~ /,$wmode,/) { 338 &add_log("NOTICE loop detected, rule $j, state $wmode") 339 if $loglvl > 5; 340 last rule; # Processing ends here 341 } else { # Rule was never executed 342 $Executed[$i] = ',' unless $Executed[$i]; 343 $Executed[$i] .= "$wmode,"; 344 } 345 $ever_matched = 1; # At least one match 346 &add_log("MATCH on rule #$j in mode $wmode") if $loglvl > 8; 347 &track_rule($j, $wmode) if $track_all; 348 &s_match($j, $wmode) if $stats; # Record match for statistics 349 350 # By issuing an &env'restore, we make sure any local variable 351 # setting done in other rules is not seen by the actions we are 352 # about to execute. However, should the action be the last one 353 # to be performed, its settings will remain for later perusal 354 # by our caller (vacation messages come to mind). 355 356 &env'restore; # Restore vars set in previous rules 357 $status = &xeqte($action); # Execute actions 358 359 last rule if $status == $FT_CONT; 360 $ever_matched = 0; # No match if REJECT or RESTART 361 next rule if $status == $FT_REJECT; 362 $i = -1; # Restart analysis from the beginning ($FT_RESTART) 363 } 364 } 365 ($ever_saved, $ever_matched); 366} 367 368# Return true if the modes currently specified by the rule (held in $mode) 369# are selected by the current mode (in $wmode), meaning the rule has to 370# be applied. 371sub right_mode { 372 local($list) = "," . $mode . ","; 373 &add_log("in mode '$wmode' for $mode") if $loglvl > 19; 374 375 # If mode is negated, skip the rule, whatever other selectors may 376 # indicate. Thus <ALL, !INITIAL> will not be taken into account if 377 # mode is INITIAL, despite the leading ALL. They can be seen as further 378 # requirements or restrictions applied to the mode list (like in the 379 # sentence "all the listed modes *but* the one negated"). 380 381 return 0 if $list =~ /!ALL/; # !ALL cannot match, ever 382 return 0 if $list =~ /,!$wmode,/; # Negated modes logically and'ed 383 384 # Now strip out all negated modes, and if the resulting string is 385 # empty, force a match... 386 387 1 while $list =~ s/,![^,]*,/,/; # Strip out negated modes 388 $list = ',ALL,' if $list eq ','; # Emtpy list, force a match 389 390 # The special ALL mode matches anything but the other sepcial mode for 391 # already filtered messages. Otherwise, direct mode (i.e. non-negated) 392 # are logically or'ed. 393 394 if ($list =~ /,ALL,/) { 395 return 0 if $wmode eq '_SEEN_' && $list !~ /,_SEEN_,/; 396 } else { 397 return 0 unless $list =~ /,$wmode,/; 398 } 399 400 1; # Ok, rule can be applied 401} 402 403# Return true if the mail was from a special user (root, uucp...) or if the 404# mail was not directly mailed to the user (i.e. it comes from a distribution 405# list or has bounced somewhere). 406sub special_user { 407 # Before sending the vacation message, we have to make sure the mail 408 # was sent to the user directly, through a 'To:' or a 'Cc:'. Otherwise, 409 # it must be from a mailing list or a 'Bcc:' and we don't want to 410 # send something back in that case. 411 412 local($matched) = &match_list("To", $cf'user); 413 $matched = &match_list("Cc", $cf'user) unless $matched; 414 415 # Try alternate login names, in case they used a company-wide alias like 416 # First.Last or simply a plain sendmail alias. 417 418 if (!$matched && $cf'tome ne '') { 419 foreach $addr (split(/\s*,\s*/, $cf'tome)) { 420 $matched = &match_list('To', $addr); 421 $matched = &match_list('Cc', $addr) unless $matched; 422 if ($matched) { 423 &add_log("mail was sent to alternate $addr") if $loglvl > 8; 424 last; 425 } else { 426 &add_log("mail wasn't sent to alternate $addr") if $loglvl > 12; 427 } 428 } 429 } 430 431 unless ($matched) { 432 &add_log("mail was not directly sent to $cf'user") if $loglvl > 8; 433 return 1; 434 } 435 436 # If there is a Precedence: header set to either 'bulk', 'list' or 'junk', 437 # then we do not reply either. 438 local($prec) = $Header{'Precedence'}; 439 if ($prec =~ /^bulk|junk|list/i) { 440 &add_log("mail was tagged with a '$prec' precedence") if $loglvl > 8; 441 return 1; 442 } 443 # If there is an RFC-886 Illegal-Object or Illegal-Field header, do not 444 # trust the whole header integrity, and therefore do not reply. 445 if ($Header{'Illegal-Object'} ne '' || $Header{'Illegal-Field'} ne '') { 446 &add_log("mail was received with header errors") if $loglvl > 8; 447 return 1; 448 } 449 # Make sure the mail does not come from a "special" user, as listed in 450 # the %Special array (root, uucp...) 451 $matched = 0; 452 local($matched_login); 453 foreach $login (keys %Special) { 454 $matched = &match_single("From", $login); 455 $matched_login = $login if $matched; 456 last if $matched; 457 } 458 if ($matched) { 459 &add_log("mail was from special user $matched_login") 460 if $loglvl > 8; 461 return 1; 462 } 463 0; # Not from special user! 464} 465 466# Compare a machine and an e-mail address and return true if the domain 467# for that address matches the domain of the machine. We allow an extra 468# level of "domain indirection". 469sub fuzzy_domain { 470 local($first, $fhost) = @_; 471 $fhost =~ s/^\S+@([\w-.]+)/$1/; # Keep hostname part 472 $fhost =~ tr/A-Z/a-z/; # perl4 misses lc() 473 $first =~ tr/A-Z/a-z/; 474 local(@fhost) = split(/\./, $fhost); 475 local(@first) = split(/\./, $first); 476 if (@fhost > @first) { 477 shift(@fhost); # Allow extra machine name 478 } elsif (@first > @fhost) { 479 shift(@first); 480 } elsif (@fhost >= 3) { # Has at least machine.domain.top 481 shift(@first); # Allow server1.domain.top to match 482 shift(@fhost); # server2.domain.top 483 } 484 $fhost = join('.', @fhost); 485 $first = join('.', @first); 486 return $fhost eq $first; 487} 488 489# Log reception of mail (sender and subject fields). This is mainly intended 490# for people like me who parse the logfile once in a while to do more 491# statistics about mail reception. Hence the other distinction between 492# original mails and answers. 493sub reception { 494 local($subject) = $Header{'Subject'}; 495 local($sender) = $Header{'Sender'}; 496 local($from) = $Header{'From'}; 497 &add_log("FROM $from"); 498 local($faddr) = (&parse_address($from))[0]; # From address 499 local($saddr) = ''; 500 501 if ($sender ne '') { 502 $saddr = (&parse_address($sender))[0]; 503 &add_log("VIA $sender") if $saddr ne $faddr; 504 } 505 506 # Trace relaying hosts as well if the first host is unrelated to sender 507 local($relayed) = $Header{'Relayed'}; 508 local($first) = (split(/,\s+/, $relayed))[0]; # First relaying host 509 &add_log("RELAYED $relayed") if $relayed ne '' && 510 !(&fuzzy_domain($first, $saddr) || &fuzzy_domain($first, $faddr)); 511 512 if ($subject ne '') { 513 if ($subject =~ s/^Re:\s*//) { 514 &add_log("REPLY $subject"); 515 } else { 516 &add_log("ABOUT $subject"); 517 } 518 } 519 print "-------- From $from\n" if $track_all; 520} 521 522# Print match on STDOUT when -t option is used 523sub track_rule { 524 local($number, $mode) = @_; 525 print "*** Match on rule $number in mode $mode ***\n"; 526 &print_rule($number); 527} 528 529