1;# $Id$
2;#
3;#  Copyright (c) 1990-2006, Raphael Manfredi
4;#
5;#  You may redistribute only under the terms of the Artistic License,
6;#  as specified in the README file that comes with the distribution.
7;#  You may reuse parts of this distribution only within the terms of
8;#  that same Artistic License; a copy of which may be found at the root
9;#  of the source tree for mailagent 3.0.
10;#
11;# $Log: analyze.pl,v $
12;# Revision 3.0.1.9  1999/07/12  13:49:39  ram
13;# patch66: moved localization of the %Variable hash for APPLY
14;#
15;# Revision 3.0.1.8  1997/09/15  15:13:15  ram
16;# patch57: $lastcmd now global from analyze_mail() for BACK processing
17;# patch57: indication of relaying hosts now selectively emitted
18;#
19;# Revision 3.0.1.7  1997/01/31  18:07:47  ram
20;# patch54: esacape metacharacter '{' in regexps for perl5.003_20
21;#
22;# Revision 3.0.1.6  1996/12/24  14:47:17  ram
23;# patch45: forgot to return 0 at the end of special_user()
24;#
25;# Revision 3.0.1.5  1995/01/03  18:06:33  ram
26;# patch24: now makes use of rule environment vars from the env package
27;# patch24: removed old broken umask handling (now a part of rule env)
28;#
29;# Revision 3.0.1.4  1994/09/22  14:09:03  ram
30;# patch12: defines new folder_saved variable to store folder path
31;#
32;# Revision 3.0.1.3  1994/07/01  14:59:58  ram
33;# patch8: general umask is now reset before analyzing a message
34;# patch8: added support for the UMASK command for local rule scope
35;# patch8: now parses the new tome config variable for vacation messages
36;# patch8: disable vacation message if Illegal-Object or Illegal-Field header
37;#
38;# Revision 3.0.1.2  1994/04/25  15:17:24  ram
39;# patch7: fixed selector combination logic and added some debug logs
40;#
41;# Revision 3.0.1.1  1994/01/26  09:30:23  ram
42;# patch5: now understands new -F option to force processing
43;#
44;# Revision 3.0  1993/11/29  13:48:35  ram
45;# Baseline for mailagent 3.0 netwide release.
46;#
47;#
48#
49# Analyzing mail
50#
51
52# Special users. Note that as login name matches are done in a case-insensitive
53# manner, there is no need to upper-case any of the followings.
54sub init_special {
55	%Special = (
56		'root', 1,				# Super-user
57		'uucp', 1,				# Unix to Unix copy
58		'daemon', 1,			# Not a real user, hopefully
59		'news', 1,				# News daemon
60		'postmaster', 1,		# X-400 mailer-daemon name
61		'newsmaster', 1,		# My convention for news administrator--RAM
62		'usenet', 1,			# Aka newsmaster
63		'mailer-daemon', 1,		# Sendmail
64		'mailer-agent', 1,		# NeXT mailer
65		'nobody', 1				# Nobody we've heard of
66	);
67}
68
69# Compute shorthand file name for logging based on the processed file
70sub mail_logname {
71	my ($file) = @_;
72	my ($mfile) = $file =~ m|.*/(.*)|;	# Basename of mail file
73	$mfile = $file unless $mfile;		# There was no / in name
74	$mfile = '<stdin>' unless $mfile;	# No $file_name if from STDIN
75	return $mfile;
76}
77
78# Compute file size for logging, if possible (i.e. not reading from STDIN)
79sub mail_logsize {
80	my ($file) = @_;
81	return "" unless length $file;
82	my $msize = (stat($file))[7];
83	my $size = "";
84	my $s = $msize == 1 ? "" : "s";
85	$size = " $msize byte$s" if defined $msize;
86	return $size;
87}
88
89# Parse mail message and apply the filtering rules on it
90sub analyze_mail {
91	local($file) = shift(@_);	# Mail file to be parsed
92	local($mode) = 'INITIAL';	# Initial working mode
93	local($wmode) = $mode;		# Needed for statistics routines
94	local(%Variable);			# User-defined variables, visible through APPLY
95
96	# Set-up proper environment. Dynamic scoping is used on those variables
97	# for the APPLY command (see the &apply function). Note that the $wmode
98	# variable is passed to &apply_rules but is local to that function,
99	# meaning there is no feedback of the working mode when using APPLY.
100	# However, the variables listed below may be probed upon return since they
101	# are external to &apply_rules.
102	local($ever_matched) = 0;	# Did we ever matched a single saving rule ?
103	local($ever_saved) = 0;		# Did we ever saved a message ?
104	local($folder_saved) = '';	# Last folder we saved into (full path)
105
106	# Other local variables used only in this function
107	local($ever_seen) = 0;		# Did we ever enter seen mode ?
108	local($header);				# Header entry name to look for in Header table
109
110	# Reset environment and umask before each new mail processing
111	&env'setup;
112	umask($env'umask);
113
114	# Log start of processing
115	my $mfile = mail_logname($file);
116	my $msize = mail_logsize($file);
117	add_log("-- HANDLING [$mfile]$msize --") if $loglvl > 8;
118
119	# Parse the mail message in file
120	&parse_mail($file);			# Parse the mail and fill-in H tables
121	return 1 unless defined $Header{'All'};		# Mail not parsed correctly
122	&reception if $loglvl > 8;	# Log mail reception
123	&run_builtins;				# Execute builtins, if any
124
125	# Now analyze the mail. If there is already a X-Filter header, then the
126	# mail has already been processed. In that case, the default action is
127	# performed: leave it in the incomming mailbox with no further action.
128	# This should prevent nasty loops.
129
130	&add_log ("analyzing mail") if $loglvl > 18;
131	$header = $Header{'X-Filter'};				# Mulitple occurences possible
132	if ($header ne '') {						# Hmm... already filtered...
133		local(@filter) = split(/\n/, $header);	# Look for each X-Filter
134		local($address) = &email_addr;			# Our e-mail address
135		local($done) = 0;						# Already processed ?
136		local($_);
137		foreach (@filter) {						# Maybe we'll find ourselves
138			if (/mailagent.*for (\S+)/) {		# Mark left by us ?
139				$done = 1 if $1 eq $address;	# Yes, we did that
140				# Remove that X-Filter line, LEAVE will add one anyway
141				$Header{'Head'} =~ s/^X-Filter:\s*mailagent.*for $address\n//m;
142				last;
143			}
144		}
145		if ($done) {			# We already processed that message
146			if ($force_seen) {	# They used the -F option
147				&add_log("NOTICE already filtered, processing anyway")
148					if $loglvl > 5;
149			} else {
150				&add_log("NOTICE already filtered, entering seen mode")
151					if $loglvl > 5;
152				$mode = '_SEEN_';	# This is a special mode
153			}
154			$ever_seen = 1;		# This will prevent vacation messages
155			&s_seen;			# Update statistics
156		}
157	}
158
159	local($lastcmd) = 0;		# Failure status from last command
160	&apply_rules($mode, 1);		# Now apply the filtering rules on it.
161
162	# Deal with vacation mode. It applies only on mail not previously seen.
163	# The vacation mode must be turned on in the configuration file. The
164	# conditions for a vacation message to be sent are:
165	#   - Message was directly sent to the user.
166	#   - Message does not come from a special user like root.
167	#   - Vacation message was not disabled via a VACATION command
168	# Note that we use the environment set-up by the last rule we processed.
169
170	if (!$ever_seen && $cf'vacation =~ /on/i && $env'vacation) {
171		unless (&special_user) {	# Not from special user and sent to me
172			# Send vacation message only once per address per period
173			&xeqte("ONCE (%r,vacation,$env'vacperiod) MESSAGE $env'vacfile");
174			&s_vacation;		# Message received while in vacation
175		}
176	}
177
178	# Default action if no rule ever matched. Statistics routines will use
179	# our own local $wmode variable.
180
181	unless ($ever_matched) {
182		&add_log("NOTICE no match, leaving in mailbox") if $loglvl > 5;
183		&xeqte("LEAVE");			# Default action anyway
184		&s_default;					# One more application of default rule
185	} else {
186		unless ($ever_saved) {
187			&add_log("NOTICE not saved, leaving in mailbox") if $loglvl > 5;
188			&xeqte("LEAVE");		# Leave if message not saved
189			&s_saved;				# Message saved by default rule
190		}
191	}
192	&s_filtered($Header{'Length'});		# Update statistics
193
194	&env'cleanup;						# Clean-up the environment
195	0;									# Ok status
196}
197
198# This is the heart of the mail agent -- Apply the filtering rules
199sub apply_rules {
200	local($wmode, $stats)= @_;	# Working mode (the mode we start in)
201	local($mode);				# Mode (optional)
202	local($selector);			# Selector (mandatory)
203	local($range);				# Range for selection (optional)
204	local($rulentry);			# Entry in rule H table
205	local($pattern);			# Pattern for selection, as written in rules
206	local($action);				# Related action
207	local($last_selector);		# Last used selector
208	local($rules);				# A copy of the rules
209	local($matched);			# Flag set to true if a rule is matched
210	local(%Matched);			# Records the selectors which have been matched
211	local($status);				# Status returned by xeqte
212	local(@Executed);			# Records already executed rules
213	local($selist);				# Key used to detect identical selector lists
214	local(%Inverted);			# Records inverted '!' selectors which matched
215
216	# The @Executed array records whether a specified action for a rule was
217	# executed. Loops are possible via the RESTART action, and as there is
218	# almost no way to exit from such a loop (there is one with FEED and RESYNC)
219	# I decided to prohibit them. Hence a given action is allowed to be executed
220	# only once during a mail analysis (modulo each possible working mode).
221	# For a rule number n, $Executed[n] is a collection of modes in which the
222	# rule was executed, comma separated.
223
224	$Executed[$#Rules] = '';		# Pre-extend array
225
226	# Order wrt the one in the rule file is guaranteed. I use a for construct
227	# with indexed access to be able to restart from the beginning upon
228	# execution of RESTART. This also helps filling in the @Executed array.
229
230	local($i, $j);			# Indices within rule array
231
232	rule: for ($i = 0; $i <= $#Rules; $i++) {
233		$j = $i + 1;
234		$_ = $Rules[$i];
235
236		# The %Matched array records the boolean value associated with each
237		# possible selector. If two identical selector are found, the values
238		# are OR'ed (and we stop evaluating as soon as one is true). Otherwise,
239		# the values are AND'ed (for different selectors, but all are evaluated
240		# in case we later find another identical selectors -- no sort is done).
241		# The %Inverted which records '!' selector matches has all the above
242		# rules inverted according to De Morgan's Law.
243
244		undef %Matched;							# Reset matching patterns
245		undef %Inverted;						# Reset negated patterns
246		$rules = $_;							# Work on a copy
247		$rules =~ s/^([^{]*)\{// && ($mode = $1);	# First word is the mode
248		$rules =~ s/\s*(.*)\}// && ($action = $1);	# Followed by action }
249		$mode =~ s/\s*$//;							# Remove trailing spaces
250		$rules =~ s/^\s+//;						# Remove leading spaces
251		$last_selector = "";					# Last selector used
252
253		# Make sure we are in the correct mode. The $mode variable holds a
254		# list of comma-separated modes. If the working mode is found in it
255		# then the rules apply. Otherwise, skip them.
256
257		next rule unless &right_mode;		# Skip rule if not in right mode
258
259		# Now loop over all the keys and apply the patterns in turn
260
261		&reset_backref;						# Reset backreferences
262		foreach $key (split(/ /, $rules)) {
263			$rulentry = $Rule{$key};
264			$rulentry =~ s/^\s*([^\/]*:)// && ($selector = $1);
265			$rulentry =~ s/^\s*//;
266			$pattern = $rulentry;
267			if ($last_selector ne $selector) {	# Update last selector
268				$last_selector = $selector;
269			}
270			$selector =~ s/:$//;			# Remove final ':' on selector
271			$range = '<1,->';				# Default range
272			$selector =~ s/\s*(<[\d\s,-]+>)$// && ($range = $1);
273
274			&add_log ("selector '$selector' on '$range', pattern '$pattern'")
275				if $loglvl > 19;
276
277			# Identical (lists of) selectors are logically OR'ed. To make sure
278			# 'To Cc:' and 'Cc To:' are correctly OR'ed, the selector list is
279			# alphabetically sorted.
280
281			$selist = join(',', sort split(' ', $selector));
282
283			# Direct selectors and negated selectors (starting with a !) are
284			# kept separately, because the rules are dual:
285			# For normal selectors (kept in %Matched):
286			#  - Identical are OR'ed
287			#  - Different are AND'ed
288			# For inverted selectors (kept in %Inverted):
289			#  - Identical are AND'ed
290			#  - Different are OR'ed
291			# Multiple selectors like 'To Cc' are sorted according to the first
292			# selector on the list, i.e. 'To !Cc' is normal but '!To Cc' is
293			# inverted.
294
295			if ($selector =~ /^!/) {		# Inverted selector
296				# In order to guarantee an optimized AND, we first check that
297				# no previous failure has been reported for the current set of
298				# selectors.
299				unless (defined $Inverted{$selist} && !$Inverted{$selist}) {
300					$Inverted{$selist} = &match($selector, $pattern, $range);
301				}
302			} else {						# Normal selector
303				# Here it is the OR which is guaranteed to be optimized. Do
304				# not attempt the match if an identical selector already
305				# matched sucessfully.
306				unless (defined $Matched{$selist} && $Matched{$selist}) {
307					$Matched{$selist} = &match($selector, $pattern, $range);
308				}
309			}
310		}
311
312		# Both groups recorded in %Matched and %Inverted are globally AND'ed
313		# However, only one match is necessary within %Inverted whilst all
314		# must have matched within %Matched...
315
316		$matched = 1;						# Assume everything matched
317		foreach $key (keys %Matched) {		# All entries must have matched
318			$matched = $Matched{$key} ? 1 : 0;
319			&add_log("rule #$j: direct $key " . ($matched ? 'ok' : 'failed'))
320				if $loglvl > 19;
321			last unless $matched;
322		}
323		if ($matched) {						# If %Matched failed, all failed!
324			foreach $key (keys %Inverted) {	# Only one entry needs to match
325				$matched = $Inverted{$key} ? 1 : 0;
326				&add_log("rule #$j: neg $key " . ($matched ? 'ok' : 'failed'))
327					if $loglvl > 19;
328				last if $matched;
329			}
330		}
331
332		&add_log("matching summary rule #$j: " . ($matched ? 'ok' : 'failed'))
333			if $loglvl > 17;
334
335		if ($matched) {						# Execute action if pattern matched
336			# Make sure the rule has not already been executed in that mode
337			if ($Executed[$i] =~ /,$wmode,/) {
338				&add_log("NOTICE loop detected, rule $j, state $wmode")
339					if $loglvl > 5;
340				last rule;					# Processing ends here
341			} else {						# Rule was never executed
342				$Executed[$i] = ',' unless $Executed[$i];
343				$Executed[$i] .= "$wmode,";
344			}
345			$ever_matched = 1;				# At least one match
346			&add_log("MATCH on rule #$j in mode $wmode") if $loglvl > 8;
347			&track_rule($j, $wmode) if $track_all;
348			&s_match($j, $wmode) if $stats;	# Record match for statistics
349
350			# By issuing an &env'restore, we make sure any local variable
351			# setting done in other rules is not seen by the actions we are
352			# about to execute. However, should the action be the last one
353			# to be performed, its settings will remain for later perusal
354			# by our caller (vacation messages come to mind).
355
356			&env'restore;				# Restore vars set in previous rules
357			$status = &xeqte($action);	# Execute actions
358
359			last rule if $status == $FT_CONT;
360			$ever_matched = 0;				# No match if REJECT or RESTART
361			next rule if $status == $FT_REJECT;
362			$i = -1;		# Restart analysis from the beginning ($FT_RESTART)
363		}
364	}
365	($ever_saved, $ever_matched);
366}
367
368# Return true if the modes currently specified by the rule (held in $mode)
369# are selected by the current mode (in $wmode), meaning the rule has to
370# be applied.
371sub right_mode {
372	local($list) = "," . $mode . ",";
373	&add_log("in mode '$wmode' for $mode") if $loglvl > 19;
374
375	# If mode is negated, skip the rule, whatever other selectors may
376	# indicate. Thus <ALL, !INITIAL> will not be taken into account if
377	# mode is INITIAL, despite the leading ALL. They can be seen as further
378	# requirements or restrictions applied to the mode list (like in the
379	# sentence "all the listed modes *but* the one negated").
380
381	return 0 if $list =~ /!ALL/;		# !ALL cannot match, ever
382	return 0 if $list =~ /,!$wmode,/;	# Negated modes logically and'ed
383
384	# Now strip out all negated modes, and if the resulting string is
385	# empty, force a match...
386
387	1 while $list =~ s/,![^,]*,/,/;		# Strip out negated modes
388	$list = ',ALL,' if $list eq ',';	# Emtpy list, force a match
389
390	# The special ALL mode matches anything but the other sepcial mode for
391	# already filtered messages. Otherwise, direct mode (i.e. non-negated)
392	# are logically or'ed.
393
394	if ($list =~ /,ALL,/) {
395		return 0 if $wmode eq '_SEEN_' && $list !~ /,_SEEN_,/;
396	} else {
397		return 0 unless $list =~ /,$wmode,/;
398	}
399
400	1;	# Ok, rule can be applied
401}
402
403# Return true if the mail was from a special user (root, uucp...) or if the
404# mail was not directly mailed to the user (i.e. it comes from a distribution
405# list or has bounced somewhere).
406sub special_user {
407	# Before sending the vacation message, we have to make sure the mail
408	# was sent to the user directly, through a 'To:' or a 'Cc:'. Otherwise,
409	# it must be from a mailing list or a 'Bcc:' and we don't want to
410	# send something back in that case.
411
412	local($matched) = &match_list("To", $cf'user);
413	$matched = &match_list("Cc", $cf'user) unless $matched;
414
415	# Try alternate login names, in case they used a company-wide alias like
416	# First.Last or simply a plain sendmail alias.
417
418	if (!$matched && $cf'tome ne '') {
419		foreach $addr (split(/\s*,\s*/, $cf'tome)) {
420			$matched = &match_list('To', $addr);
421			$matched = &match_list('Cc', $addr) unless $matched;
422			if ($matched) {
423				&add_log("mail was sent to alternate $addr") if $loglvl > 8;
424				last;
425			} else {
426				&add_log("mail wasn't sent to alternate $addr") if $loglvl > 12;
427			}
428		}
429	}
430
431	unless ($matched) {
432		&add_log("mail was not directly sent to $cf'user") if $loglvl > 8;
433		return 1;
434	}
435
436	# If there is a Precedence: header set to either 'bulk', 'list' or 'junk',
437	# then we do not reply either.
438	local($prec) = $Header{'Precedence'};
439	if ($prec =~ /^bulk|junk|list/i) {
440		&add_log("mail was tagged with a '$prec' precedence") if $loglvl > 8;
441		return 1;
442	}
443	# If there is an RFC-886 Illegal-Object or Illegal-Field header, do not
444	# trust the whole header integrity, and therefore do not reply.
445	if ($Header{'Illegal-Object'} ne '' || $Header{'Illegal-Field'} ne '') {
446		&add_log("mail was received with header errors") if $loglvl > 8;
447		return 1;
448	}
449	# Make sure the mail does not come from a "special" user, as listed in
450	# the %Special array (root, uucp...)
451	$matched = 0;
452	local($matched_login);
453	foreach $login (keys %Special) {
454		$matched = &match_single("From", $login);
455		$matched_login = $login if $matched;
456		last if $matched;
457	}
458	if ($matched) {
459		&add_log("mail was from special user $matched_login")
460			if $loglvl > 8;
461		return 1;
462	}
463	0;	# Not from special user!
464}
465
466# Compare a machine and an e-mail address and return true if the domain
467# for that address matches the domain of the machine. We allow an extra
468# level of "domain indirection".
469sub fuzzy_domain {
470	local($first, $fhost) = @_;
471	$fhost =~ s/^\S+@([\w-.]+)/$1/;					# Keep hostname part
472	$fhost =~ tr/A-Z/a-z/;							# perl4 misses lc()
473	$first =~ tr/A-Z/a-z/;
474	local(@fhost) = split(/\./, $fhost);
475	local(@first) = split(/\./, $first);
476	if (@fhost > @first) {
477		shift(@fhost);					# Allow extra machine name
478	} elsif (@first > @fhost) {
479		shift(@first);
480	} elsif (@fhost >= 3) {				# Has at least machine.domain.top
481		shift(@first);					# Allow server1.domain.top to match
482		shift(@fhost);					# server2.domain.top
483	}
484	$fhost = join('.', @fhost);
485	$first = join('.', @first);
486	return $fhost eq $first;
487}
488
489# Log reception of mail (sender and subject fields). This is mainly intended
490# for people like me who parse the logfile once in a while to do more
491# statistics about mail reception. Hence the other distinction between
492# original mails and answers.
493sub reception {
494	local($subject) = $Header{'Subject'};
495	local($sender) = $Header{'Sender'};
496	local($from) = $Header{'From'};
497	&add_log("FROM $from");
498	local($faddr) = (&parse_address($from))[0];		# From address
499	local($saddr) = '';
500
501	if ($sender ne '') {
502		$saddr = (&parse_address($sender))[0];
503		&add_log("VIA $sender") if $saddr ne $faddr;
504	}
505
506	# Trace relaying hosts as well if the first host is unrelated to sender
507	local($relayed) = $Header{'Relayed'};
508	local($first) = (split(/,\s+/, $relayed))[0];	# First relaying host
509	&add_log("RELAYED $relayed") if $relayed ne '' &&
510		!(&fuzzy_domain($first, $saddr) || &fuzzy_domain($first, $faddr));
511
512	if ($subject ne '') {
513		if ($subject =~ s/^Re:\s*//) {
514			&add_log("REPLY $subject");
515		} else {
516			&add_log("ABOUT $subject");
517		}
518	}
519	print "-------- From $from\n" if $track_all;
520}
521
522# Print match on STDOUT when -t option is used
523sub track_rule {
524	local($number, $mode) = @_;
525	print "*** Match on rule $number in mode $mode ***\n";
526	&print_rule($number);
527}
528
529