1#!/usr/local/bin/perl -Tw
2require 5.006_001;
3use strict;
4
5# This is a fork of Tom Anderson's spamitarium.pl by Jonathan Kamens
6# <jik@kamens.us>. The changes here have been submitted back to Tom Anderson
7# for incorporation into his master version. In the meantime, you may wish to
8# use this version instead. Here are the changes in it:
9#
10# * Command-line parsing has been updated to use the standard Perl Getopt::Long
11#   syntax, while continuing to support the "bundled" options syntax supported
12#   by previous versions of the script.
13# * Options have been added to specify information that may be missing from the
14#   message header, specifically for when spamitarium is being called from a
15#   Milter and therefore the message does not yet have the Return-Path or local
16#   Received lines that will be inserted by the MTA before final delivery. See
17#   the documentation below for the options --return-path, --no-local-received,
18#   --remote-ip, --remote-name, --helo, --local-ip, --local-name, --rcpt, and
19#   --add-local-received.
20# * A --timeout option has been added for specifying how long the script should
21#   wait for input, or 0 to disable the timeout completely. This is primarily
22#   useful for debugging the script.
23# * The header parsing code has been refactored to be cleaner and more robust.
24# * In particular, empty header fields are now handled correctly (previously,
25#   they were appended to the previous header field!).
26# * Empty header fields are now included in the output, for more accurate
27#   bogofilter'ing.
28# * Typos and such have been cleaned up in the documentation.
29# * A date-parsing bug which was causing the time zone to be ignored, thus
30#   causing the X-Date-Check header to report an inaccurate delta, has been
31#   fixed.
32# * A bug which could cause spamitarium to crash upon encountering a far into
33#   the future date in the message header has been fixed.
34# * A bug which was causing some domain names to be truncated (e.g.,
35#   "omta01-mdp.westchester.pa.bo.comcast.net" became "omta01-mdp.west") has
36#   been fixed.
37# * A couple of Perl uninitialized variable warnings have been fixed.
38# * Mail::SPF is used now instead of Mail::SPF::Query, for IPv6 support.
39# * Code has been added to work around the fact that some emails generated by
40#   Constant Contact have CR rather than CRLF at the end of their Date: header
41#   lines (this is a very specific workaround for a very specific problem
42#   because technically the workaround is a violation of the SMTP RFC, though
43#   nobody else seems to care about that. :-/).
44
45=head1 NAME
46
47Spamitarium - evaluates and repairs the sanity of email headers...
48
49=cut
50
51my $version = "0.5.2";
52
53################################################
54############### Copyleft Notice ################
55################################################
56
57# Copyright © 2004 Order amid Chaos, Inc.
58# Author: Tom Anderson
59# neo+spamitarium@orderamidchaos.com
60#
61# This program is open-source software; you can redistribute it
62# and/or modify it under the terms of the GNU General Public
63# License, v2, as published by the Free Software Foundation.
64#
65# This program is distributed in the hope that it will be useful,
66# but WITHOUT ANY WARRANTY; without even the implied warranty of
67# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
68# GNU General Public License for more details.
69#
70# You should have received a copy of the GNU General Public
71# License along with this program; if not, write to:
72#
73# Free Software Foundation
74# 59 Temple Place, Suite 330
75# Boston, MA 02111-1307  USA
76#
77# http://www.gnu.org/
78
79#################################################
80################# Documentation #################
81#################################################
82
83# use "perldoc spamitarium" or "spamitarium -h" to read this
84
85=head1 SYNOPSIS
86
87=head2 Command line usage:
88
89B<spamitarium> [I<options>] < [I<rfc822_email>]
90
91=head2 Procmail usage (recommended):
92
93Add to your .procmailrc the following recipe:
94
95  :0
96  {
97        :0 fhw
98        | spamitarium -sreadxtp
99
100        # filter through bogofilter, tagging as spam
101	# or not and updating the word lists
102        :0 fw
103        | bogofilter -uep
104
105        # add back the "From" header for proper delivery
106        :0 fhw
107        | formail -I "From " -a "From "
108  }
109
110=head2 Command line options:
111
112=over 4
113
114=item B<-h>
115
116display this help file
117
118=item B<-s> [I<permitted_headers>]
119
120Allow standard fields only (RFC 822/2822/1049/1341/1521/2183/1864) ...
121all others are stripped from the email.  To exclude some headers from
122stripping, specify a comma-separated list as an argument to this
123option.
124
125=item B<-r>
126
127insert new received line containing verified received-line tokens
128
129=item B<-e>
130
131include helo string in received line
132
133=item B<-d>
134
135allow DNS lookups (forward and reverse) to help fill in all
136necessary received fields
137
138=item B<-f>
139
140force rDNS lookups even when provided already by the MTA
141
142=item B<-a>
143
144perform ASN lookups and include in received lines
145
146=item B<-p>
147
148perform SPF lookups and include in received lines
149
150=item B<-x>
151
152include custom x-headers for additional header validations:
153
154=item B<-t>
155
156validate that the date header is within close proximity to the
157received date (see $date_limit global variable to configure)
158
159=item B<-w>
160
161parse and display the body of the email in addition to the headers
162
163=item B<-b>
164
165display benchmarking info
166
167=item B<--return-path> I<addr>
168
169Specify the sender of the email for SPF lookups, overriding the
170"Return-Path" field.
171
172=item B<--no-local-received>
173
174Tell spamitarium that there is no Received line in the message from
175the local mailer, e.g., because spamitarium is being called from
176inside a Milter.
177
178The following options are only used when B<--no-local-received> is
179specified.
180
181=over 4
182
183=item B<--remote-ip> I<ip-address>
184
185Specify the IP address of the remote server sending the email.
186
187=item B<--remote-name> I<ip-address>
188
189Specify the host name of the remote server sending the email, as
190determined by a reverse DNS lookup of its IP address.
191
192=item B<--helo> I<host-name>
193
194Specify the host name sent by the remote server in the SMTP HELO (or
195EHLO) command.
196
197=item B<--local-ip> I<ip-address>
198
199Specify the IP address of the server receiving the email.
200
201=item B<--local-name> I<host-name>
202
203Specify the host name of the server receiving the email.
204
205=item B<--rcpt> I<email-address>
206
207Specify the envelope address of the recipient of the email.
208
209=item B<--add-local-received>
210
211Indicate that spamitarium should add an auto-generated local Received
212line.
213
214=back
215
216=item B<--timeout> I<seconds>
217
218How long to wait for input before giving up. Specify 0 to disable the timeout
219completely. Primarily for debugging purposes.
220
221=back
222
223
224=head2 User-defined header field list:
225
226If using the B<s> option, you may want to allow certain header fields
227other than those specified by RFCs.  These might include fields set
228by your mailing list or proxy or other custom application.  You may
229specify such a list of fields by appending them comma-delimited at
230the end of your command line.  For example, if you wanted the
231I<list-id> and I<encrypted> fields passed through, you would change
232your procmail recipe as follows:
233
234  | spamitarium -readxtp -s list-id,encrypted
235
236
237=head1 REQUIRES
238
239=over 4
240
241=item *
242
243Perl 5.6.1
244Net::DNS::Resolver
245Mail::SPF
246Net::CIDR
247DB_File
248POSIX
249Getopt::Long
250
251=back
252
253
254=head1 DESCRIPTION
255
256Spamitarium helps to remove unnecessary noise from email headers and
257to highlight just the portions which contribute positively to spam
258filtering using statistical methods.
259
260The only non-spoofable, -forgable, or -tweakable part of
261an email is the received line, as it is generated by the receiving
262mail server which ought to have no reason to munge it.  Every other
263part of an email can be influenced directly by the sender.  Received
264line tokens, when verified authentic, are therefore highly indicative
265of whether or not a given message is spam.
266
267Spamitarium reads the received headers, determines which ones are
268authentic, and then prints tokens into the header which may be keyed
269on by statistical filters.  This works much like a blacklist/whitelist,
270but when coupled with a statistical filter such as bogofilter, these
271lists are automatically generated and require no manual maintenance
272other than normal training.
273
274Moreover, headers which do not directly influence the email in any
275functional way, nor are visible to the end-user in a standard
276graphical MUA, are highly likely to contain information which
277spammers think will detract from normal statistical filtering. It
278is therefore desireable to remove these elements, specifically
279X-headers, prior to filtering.  Spamitarium removes all invisible,
280non-functional header lines.
281
282Spamitarium also looks up any IP addresses or rDNS addresses
283which are not provided in order to provide the maximum tokens on
284which to filter.  Moreover, it looks up the autonomous system number
285(ASN) associated with each "from" address in order to provide
286a small set of tokens representing the various major subnets of the
287internet.  And it checks the Sender Policy Framework (SPF) records of
288the sender to ensure that the given MX has permission to send on
289their behalf.
290
291Finally, Spamitarium assesses the headers for missing required
292header lines, inserting keyable tokens or supplying the missing
293information.  And it compares the date fields to determine if the
294email has been pre- or post-dated by a large margin in order to
295influence where it appears in your mail client and inserts an
296x-header with keyable range tokens to compensate for this.
297
298Together, all of these techniques help to remove the noise which
299accompanies, either incidentally or maliciously, most email messages.
300This results in a cleaner header consisting of more easily scored
301tokens.  This permits better accuracy with statistical filters as
302well as quicker processing and a smaller token database.
303
304
305=head1 FAQ
306
307=head2 Ask a question
308
309Ye may receive an answer here if it is asked frequently
310
311
312=head1 BUGS
313
314=over 4
315
316=item *
317
318timegm($sec,$min,$hour,$day,$mon,$year) aborts if Perl's time_t is 32
319bits large and the year is too high (>2038).
320
321=back
322
323=head1 TODO
324
325=over 4
326
327=item *
328
329Suggestions welcome.
330
331=back
332
333=head1 SEE ALSO
334
335=over 4
336
337=item *
338
339L<procmail>
340
341=item *
342
343L<bogofilter>
344
345=back
346
347
348=head1 AUTHOR
349
350 Tom Anderson <neo+spamitarium@orderamidchaos.com>
351 Jonathan Kamens <jik@kamens.us>
352
353=cut
354
355#################################################
356############### User Variables  #################
357#################################################
358
359# please edit according to your setup
360
361# default path
362our $path = "/bin:/usr/bin:/usr/local/bin";
363
364# default shell
365our $shell = "/bin/sh";
366
367# seconds before we bail waiting on input
368our $timeout = 3;
369
370# server to use for ASN lookups
371our $asn_server = "asn.routeviews.org";
372
373# Whitelist any IP addresses or ranges from SPF lookups
374our @whitelist = ("127.0.0.1","192.168.0.1-192.168.0.255");
375
376# If you want to whitelist any addresses which have authenticated
377# via poprelayd (i.e. remote workstations of users on your server)
378# set $dbfile to your popip.db location, else set it to undef
379#our $dbfile = "/etc/mail/popip.db";
380our $dbfile = undef;
381
382# distance in seconds from right now to consider a reasonable (non-spam) range to date an email
383our $date_limit = 60*60*24*2;  # 2 days
384
385# EMAIL HEADER FIELDS
386#
387# See RFC 2076 / "Common Internet Message Header Fields" for a synopsis of common mail headers
388
389	# SPECIFIED FIELDS -- all of the fields specified in RFC 822/2822, case-insensitive, in the suggested order
390	our $spec_fields = "return-path,received,resent-date,resent-from,resent-sender,resent-reply-to,".
391	 		   "resent-to,resent-cc,resent-bcc,resent-message-id,date,from,sender,reply-to,".
392			   "to,cc,bcc,message-id,in-reply-to,references,subject,comments,keywords,encrypted";
393
394	# MIME header fields (RFC 1049/1341/1521/2183)
395	$spec_fields .= ",mime-version,content-type,content-transfer-encoding,content-id,content-description,content-disposition";
396
397	# security/checksum (RFC 1864)
398	$spec_fields .= ",content-md5";
399
400	# mailing list headers (RFC 2369/2919) may be added if you like, but for now I'm choosing to leave them out
401	#$spec_fields .= ",list-id,list-help,list-unsubscribe,list-subscribe,list-post,list-owner,list-archive";
402
403	# MASKED FIELDS -- unnecessary fields often used for spam will be expunged from the spec fields list
404	# (if you know of a valid, necessary use for these, let me know)
405	our $masked_fields = "keywords,comments,encrypted,content-id,content-description";
406
407	# controversial and not strictly necessary:
408	#$masked_fields .=  ",reply-to";
409
410	# message-id fields are only machine-readable and not visible to nor readable by the recipient
411	# however, they can be useful if your client produces discussion threading
412	# uncomment this line if you don't care about threading:
413	#$masked_fields .=  ",message-id,resent-message-id,in-reply-to,references";
414
415	# resent fields are strictly informational (and not generally user-visible), therefore allowing them through is optional:
416	# MIME specifies a different way of resending messages with the "Message" content-type, so these may be considered deprecated:
417	$masked_fields .= ",resent-date,resent-from,resent-sender,resent-reply-to,resent-to,resent-cc,resent-bcc,resent-message-id";
418
419	# USER FIELDS -- User fields are those that are neither specified nor masked that you want permitted.
420	# These may include special fields for your particular mail server, filter, or mail user agent.
421	our $user_fields = "";
422
423	# NEW FIELDS -- New custom x-headers added by Spamitarium (it is recommend that you don't change these).
424	# These are disabled unless you pass the 'x' option.
425	our $new_fields = "x-date-check,x-spf";
426
427	# REQUIRED FIELDS -- Any fields that should show up in an email even if they are not sent -- i.e. if the lack of
428	# these fields may be useful for the filter, a no-req-field tag will be added.  The only *required* fields according to
429	# RFC 2822 are "from", "sender", "reply-to", and "date", others are just suggested.  However, "sender" and "reply-to" are
430	# commonly not supplied, and so should probably not be in this list.  On the other hand, "subject" and a few others may
431	# be desired in this list.
432	our $req_fields = "received,from,to,date,subject";
433
434# of course, modify the first line of this file,
435# the shebang, to point to your perl interpreter
436
437# do not edit below this line unless you really
438# know what you're doing
439
440#################################################
441############## Include Libraries ################
442#################################################
443
444use Benchmark;
445use Data::Dumper;
446use File::Basename;
447use Time::Local;
448use Net::DNS::Resolver;
449use Mail::SPF;
450use Net::CIDR;
451use DB_File;
452use POSIX;
453use Getopt::Long;
454use Sys::Syslog;
455
456#################################################
457############## Default Globals ##################
458#################################################
459
460$> = $<; # set effective user ID to real UID
461$) = $(; # set effective group ID to real GID
462
463# Make %ENV safer
464delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH SHELL)};
465
466# Set the environment explicitly
467$ENV{PATH} = $path;
468$ENV{SHELL} = $shell;
469
470# options flags
471our $options = "";
472our $return_path = "";
473our $opt_remote_ip = "";
474our $opt_remote_name = "";
475our $opt_local_ip = "";
476our $opt_local_name = "";
477our $opt_helo = "";
478our $opt_rcpt = "";
479our $no_local_received = undef;
480our $add_local_received = undef;
481
482# define the control-linefeed syntax for this system
483our $CRLF =
484"\n";
485#($^O=~/VMS/i)? 	"\n": 		# VMS
486#("\t" ne "\011")? 	"\r\n": 	# EBCDIC
487#			"\015\012"; 	# others
488
489# DNS query options
490our $res = Net::DNS::Resolver->new(
491	nameservers     => [qw(127.0.0.1)],
492	udp_timeout     => 2,
493	retry           => 1,
494	#debug          => 1
495);
496
497# convert whitelist into CIDR notation
498our @cidr_list = ();
499foreach my $IP (@whitelist) {
500  if (not eval {@cidr_list = Net::CIDR::cidradd ($IP, @cidr_list)}) {
501    error("warn","Error processing whitelist: \"$IP\" is not a valid IP address or range.");
502  }
503}
504
505################################################
506##################### Main #####################
507################################################
508
509openlog(basename $0, 'mail');
510
511# process options
512sub usage {
513    my $spamitarium = $1 if $0 =~ /^([\w\/.\-~]*)$/;
514    system("perldoc $spamitarium");
515}
516
517# Allow bundling of first set of options, as well as optional hyphen.
518if (@ARGV and $ARGV[0] =~ /^-?[hrdfasebwxtp]+$/) {
519    if ($ARGV[0] =~ s/s//) {
520	splice(@ARGV, 1, 0, "-s");
521    }
522    $ARGV[0] =~ s/^([^-])/-$1/;
523    Getopt::Long::Configure("bundling");
524}
525
526if (! GetOptions("help|h" => sub { &usage; exit(0); },
527		 "r" => sub { $options .= "r"; }, # process received headers
528		 "d" => sub { $options .= "d"; }, # perform domain lookups where needed
529		 "f" => sub { $options .= "f"; }, # force RDNS lookups even where MTA provided
530		 "a" => sub { $options .= "a"; }, # perform ASN lookups
531		 "s:s" => sub { $options .= "s";  # standard fields only (strip others)
532			 $user_fields = $_[1]; },
533		 "e" => sub { $options .= "e"; }, # include the helo received field in output
534		 "b" => sub { $options .= "b"; }, # output benchmarking info
535		 "w" => sub { $options .= "w"; }, # process whole email (including body)
536		 "x" => sub { $options .= "x"; }, # insert custom x-header fields
537		 "t" => sub { $options .= "t"; }, # perform date range checks
538		 "p" => sub { $options .= "p"; }, # perform SPF lookups
539		 "return-path|returnpath=s" => \$return_path,
540		 "remote-ip|remoteip=s" => \$opt_remote_ip,
541		 "remote-name|remotename=s" => \$opt_remote_name,
542		 "local-ip|localip=s" => \$opt_local_ip,
543		 "local-name|localname=s" => \$opt_local_name,
544		 "helo=s" => \$opt_helo,
545		 "rcpt=s" => \$opt_rcpt,
546		 "no-local-received|nolocalreceived" => \$no_local_received,
547		 "add-local-received|addlocalreceived" => \$add_local_received,
548                 "timeout=i" => \$timeout,  # 0 to disable timeout
549		 )) {
550    &usage;
551    exit(1);
552}
553
554# open popip database for reading
555our %db;
556&opendb_read if $dbfile;
557
558# start timing the process
559my $start_time = new Benchmark if $options =~ /b/;
560my ($start_parse, $end_parse, $start_rcvd, $end_rcvd, $start_set, $end_set);
561
562# get STDIN and process the email
563eval
564{
565	# set an alarm so that we don't hang on an empty STDIN
566	local $SIG{ALRM} = sub { die "timeout" };
567        if ($timeout > 0) {
568            alarm $timeout;
569        }
570
571	# parse the header
572	$start_parse = new Benchmark if $options =~ /b/;
573	my ($header,$parse_benchmark) = parse_header();
574	$end_parse = new Benchmark if $options =~ /b/;
575
576	# cancel timeout if we got this far
577	alarm 0;
578
579	# default date if none provided
580	unless (defined $header->{'date'})
581	{
582		$header->{'date'}->[0]->{'name'} = "Date";
583		$header->{'date'}->[0]->{'value'} = gmtime time;
584	}
585
586	# process the received lines
587	if ($options =~ /r/)
588	{
589		$start_rcvd = new Benchmark if $options =~ /b/;
590		$header->{'received'} = process_rcvd($header->{'received'},$return_path || ($header->{'return-path'} && $header->{'return-path'}->[0]->{'value'}));
591		$end_rcvd = new Benchmark if $options =~ /b/;
592	}
593
594	# add new custom header fields
595	if ($options =~ /x/)
596	{
597		if ($options =~ /t/)
598		{
599			$header->{'x-date-check'}->[0]->{'name'} = "X-Date-Check";
600			$header->{'x-date-check'}->[0]->{'value'} = date_check($header->{'date'}->[0]->{'value'},$header->{'received'} && $header->{'received'}->[0]->{'date'});
601		}
602
603		if ($options =~ /p/)
604		{
605	                for (my $x = 0; $x < scalar @{$header->{'received'}}; $x++)
606	                {
607                       	        if (defined $header->{'received'}->[$x]->{'spf'} && $header->{'received'}->[$x]->{'spf'} =~ /\w/)
608                               	{
609                                    push(@{$header->{'x-spf'}},
610                                         {'name' => "X-SPF", 'value' => $header->{'received'}->[$x]->{'spf'}});
611                                }
612	                }
613		}
614	}
615
616	# This needs to be done after the x-spf headers are generated
617	# above, so that an x-spf header is generated for the
618	# auto-generated local Received line.
619	if ($no_local_received and not $add_local_received) {
620	    splice(@{$header->{'received'}}, 0, 1);
621	}
622
623	# output the new header containing the changes
624	$start_set = new Benchmark if $options =~ /b/;
625	print set_header($header);
626	$end_set = new Benchmark if $options =~ /b/;
627
628	# add the body if desired
629	print parse_body() if $options =~ /w/;
630};
631
632# propagate errors
633die if $@ && $@ !~ /timeout/i;
634
635# print timeout message
636if ($@ =~ /timeout/i) { error("die","Timed out... make sure to supply an email for processing.  Try 'spamitarium -h' for details.\n"); }
637
638# calculate total running time
639if ($options =~ /b/)
640{
641	my $end_time = new Benchmark;
642	my $td = timediff($end_time, $start_time);
643	my $usr = $td->[1]+$td->[3]; my $sys = $td->[2]+$td->[4];
644	my $cpu = $usr+$sys; my $wall = $td->[0];
645	print "Total running time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF";
646
647	$td = timediff($end_parse, $start_parse);
648	$usr = $td->[1]+$td->[3]; $sys = $td->[2]+$td->[4];
649	$cpu = $usr+$sys; $wall = $td->[0];
650	print "Input parsing time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF";
651
652	if ($options =~ /r/)
653	{
654		$td = timediff($end_rcvd, $start_rcvd);
655		$usr = $td->[1]+$td->[3]; $sys = $td->[2]+$td->[4];
656		$cpu = $usr+$sys; $wall = $td->[0];
657		print "Received line processing time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF";
658	}
659
660	$td = timediff($end_set, $start_set);
661	$usr = $td->[1]+$td->[3]; $sys = $td->[2]+$td->[4];
662	$cpu = $usr+$sys; $wall = $td->[0];
663	print "Rebuilding email time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF";
664}
665
666# close popip database
667&closedb if $dbfile;
668
669exit(0);
670
671################################################
672################ Parse Header  #################
673################################################
674
675sub parse_header
676{
677    local($_);
678    my $header_text = "";
679
680    while (<STDIN>) {
681        last if (/^\r?\n$/);
682        $header_text .= $_;
683    }
684
685    # This is really gross. There is a certain prominent email marketing
686    # company whose software has a bug in it which causes Date: headers to
687    # sometimes be terminated with just CR rather than CRLF. If we interpret
688    # RFC 5321 section 2.3.8 strictly, then we're required to treat such a
689    # Date: header and the one following it as a single header field, but
690    # strict adherence to the RFC when that results in obviously broken
691    # behavior is not the best approach. On the other hand, when we're straying
692    # from the RFC, we want to do so as minimally as possible. Therefore, what
693    # we are doing here is checking specifically for this exact problem --
694    # Date: headers ending with CR rather than CRLF -- and correcting for just
695    # that one, limited case.
696    # This handles input with both CRLF and LF line terminators.
697    $header_text =~ s/^(Date:.*)\r([^\n].*(.)\n)/$1$3\n$2/m;
698
699    my(@headers) = split(/\n\b/, $header_text);
700
701    my $header = {};
702    my $last_header = undef;
703
704    for (@headers) {
705        s/\s+$//;
706        s/\s+/ /g;  # collapse whitespace
707        if (s/^(\S+):\s*//) {
708            my $name = $1;
709            my $tag = $name;
710            $tag =~ tr/A-Z/a-z/;  # header names are case-insensitive
711            if (! $_ and $header->{$tag} and
712                grep(! $_->{'value'}, @{$header->{$tag}})) {
713                # If an empty header field is repeated multiple times, we only
714                # need to preserve one of them.
715                next;
716            }
717            push(@{$header->{$tag}}, {'name' => $name, 'value' => $_});
718            $last_header = $header->{$tag}->[-1];
719        }
720        else {
721            # What's the right thing to do here? Either there's no colon or
722            # there's whitespace before the colon, both of which are RFC
723            # violations. Our best guess is to append this to the previous
724            # header.
725            if (! $last_header) {
726                error("warn", "Bad initial header line '$_' ignored\n");
727            }
728            else {
729                error("warn", "Bad header line '$_' appended to preceding '" .
730                      $last_header->{'name'} . "' header field");
731                $last_header->{'value'} .= " " . $_;
732            }
733        }
734    }
735
736    return $header;
737}
738
739sub date_check
740{
741	my ($date,$rcvd) = shift;
742	my ($dow, $day, $mon, $year, $hour, $min, $sec, $rmdr) = "?";
743
744	if ($date =~ /\s*?(\w{1,9}),?\s+?(\d+?)\s+?(\w{3})\s+?(\d{4})\s+?(\d{1,2}):(\d{2}):(\d{2})(.*)/i)
745	{
746		$dow=$1; $day=$2; $mon=$3; $year=$4; $hour=$5; $min=$6; $sec=$7; $rmdr=$8;
747		$mon = $mon=~/Dec/i?11:$mon=~/Nov/i?10:$mon=~/Oct/i?9:$mon=~/Sep/i?8:$mon=~/Aug/i?7:$mon=~/Jul/i?6:$mon=~/Jun/i?5:$mon=~/May/i?4:$mon=~/Apr/i?3:$mon=~/Mar/i?2:$mon=~/Feb/i?1:0;
748
749		eval {
750		    $date = timegm($sec,$min,$hour,$day,$mon,$year);
751		};
752		if ($@) {
753		    return "date-out-of-range (overflow)";
754		}
755
756		# adjust for local time
757		if ($rmdr =~ /\+\d(\d)\d\d/) { $date -= $1 * 60 * 60; }
758		if ($rmdr =~ /\-\d(\d)\d\d/) { $date += $1 * 60 * 60; }
759	}
760	else { return "date-format-unknown"; }
761
762        if ($rcvd && $rcvd =~ /\s*?(\w{1,9}),?\s+?(\d+?)\s+?(\w{3})\s+?(\d{4})\s+?(\d{1,2}):(\d{2}):(\d{2})(.*)/i)
763        {
764                $dow=$1; $day=$2; $mon=$3; $year=$4; $hour=$5; $min=$6; $sec=$7; $rmdr=$8;
765		$mon = $mon=~/Dec/i?11:$mon=~/Nov/i?10:$mon=~/Oct/i?9:$mon=~/Sep/i?8:$mon=~/Aug/i?7:$mon=~/Jul/i?6:$mon=~/Jun/i?5:$mon=~/May/i?4:$mon=~/Apr/i?3:$mon=~/Mar/i?2:$mon=~/Feb/i?1:0;
766
767		eval {
768		    $rcvd = timegm($sec,$min,$hour,$day,$mon,$year);
769		};
770		if ($@) {
771		    return "date-out-of-range (overflow)";
772		}
773
774                # adjust for local time
775                if ($rmdr =~ /\+\d(\d)\d\d/) { $rcvd -= $1 * 60 * 60; }
776                if ($rmdr =~ /\-\d(\d)\d\d/) { $rcvd += $1 * 60 * 60; }
777        }
778        else { $rcvd = time; }
779
780	# check for range +/-
781	my $diff = $rcvd - $date; my $diff_days = round($diff/(60*60*24));
782	if (($diff < $date_limit) and ($diff > $date_limit * -1)) { return "date-in-range ($diff_days days)"; }
783	else { return "date-out-of-range ($diff_days days)"; }
784}
785
786sub round
787{
788    my $num = shift;
789    return int(($num*100)+0.5)/100;
790}
791
792################################################
793################# Parse Body  ##################
794################################################
795
796sub parse_body
797{
798	# this function is really only used for
799	# email-to-email comparisons, where processing
800	# the entire email is required... usually
801	# we'll just process the header
802
803	my $body = "";
804        while (<STDIN>) { $body .= $_; }
805	return $body;
806}
807
808################################################
809########### Process Received Lines  ############
810################################################
811
812sub process_rcvd
813{
814	my $rcvd = shift;
815	my $rtrn = shift;
816
817	# heuristics
818	my $LUSER	= qr~(?:\w|-|\.)+?~;
819	my $DOMAIN	= qr~(?:\w|-|\.)+\.\w{2,4}~;
820	my $IP		= qr~(?:\d{1,3}\.){3}\d{1,3}~;
821	my $EMAIL 	= qr~$LUSER\@$DOMAIN~;
822	my $HELO	= qr~[^\s\0\/\\\#]+?~;
823	my $RDNS	= qr~(?:$DOMAIN|\[?$IP\]?|unknown|unverified)~;
824
825	my $edge_ip = "";
826	my $untrusted = 0;
827
828	if ($no_local_received) {
829	    my $local_rcvd;
830	    $local_rcvd->{'value'} = "auto-generated";
831	    $local_rcvd->{'name'} = "Received";
832	    $local_rcvd->{'sane'} = "trusted";
833	    $local_rcvd->{'rdns'} = $opt_remote_name || "";
834	    $local_rcvd->{'ipad'} = $opt_remote_ip || "";
835	    $local_rcvd->{'mtan'} = $opt_local_name || "";
836	    $local_rcvd->{'mtai'} = $opt_local_ip || "";
837	    $local_rcvd->{'helo'} = $opt_helo || "";
838	    $local_rcvd->{'fore'} = $opt_rcpt || "";
839	    $local_rcvd->{'from'} = $return_path || "";
840
841	    splice(@{$rcvd}, 0, 0, $local_rcvd);
842	}
843
844	# check if we were passed a valid array of received lines
845	unless ((defined $rcvd) && (ref($rcvd) eq "ARRAY") && $rcvd->[0]->{'value'})
846	{
847		no strict 'refs';
848		my %rcvd_hash = ('value' => "from localhost; " . gmtime time, 'name' => "Received");
849		my @rcvd_array; $rcvd_array[0] = \%rcvd_hash;  $rcvd = \@rcvd_array;
850	}
851	else {
852
853	# iterate through each received header, parsing and validating the info
854	for (my $x = 0; $x < scalar @$rcvd; $x++)
855	{
856		# skip processing if we already lost confidence in this trail of received lines
857		#if ($untrusted) { $rcvd->[$x]->{'sane'} = "untrusted"; next; }
858
859		my $helo = $rcvd->[$x]->{'helo'} || "";
860		my $ipad = $rcvd->[$x]->{'ipad'} || "";
861		my $rdns = $rcvd->[$x]->{'rdns'} || "";
862		my $from = $rcvd->[$x]->{'from'} || "";
863		my $mtan = $rcvd->[$x]->{'mtan'} || "";
864		my $mtai = $rcvd->[$x]->{'mtai'} || "";
865		my $fore = $rcvd->[$x]->{'fore'} || "";
866
867		my $idnt=""; my $mtav=""; my $with=""; my $date="";
868
869		# try to take into account all known MTA formats
870
871		if    ($rcvd->[$x]->{'value'} =~ s/\(envelope-(?:sender|from) <($EMAIL)>\)//gis) 			{ $from=$1; 					}# print "X-$x-matched-01: from=$from, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
872		if    ($rcvd->[$x]->{'value'} =~ s/;\s+?(\w{3}, \d{1,2} \w{3} \d{2,4}.*?)$//gis)			{ $date=$1; 					}# print "X-$x-matched-02: date=$date, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
873		if    ($rcvd->[$x]->{'value'} =~ s/for\s+?<?($EMAIL)>?(?: \(single-drop\))?//gis) 			{ $fore=$1; 					}# print "X-$x-matched-03: fore=$fore, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
874		if    ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?) \(($IP)\) \((.*?)\)//gis) 				{ $mtan=$1; $mtai=$2; $mtav=$3; 		}# print "X-$x-matched-04: mtan=$mtan, mtai=$mtai, mtav=$mtav, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
875		elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?) \[($IP)\]//gis) 					{ $mtan=$1; $mtai=$2; 				}# print "X-$x-matched-05: mtan=$mtan, mtai=$mtai, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
876		elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?) \((.+?)\)//gis) 					{ $mtan=$1; $mtav=$2; 				}# print "X-$x-matched-06: mtan=$mtan, mtav=$mtav, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
877		elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?($IP)(?=\W|;|$)//gis) 						{ $mtai=$1; 					}# print "X-$x-matched-07: mtai=$mtai, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
878		elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?($DOMAIN)(?=\W|;|$)//gis) 					{ $mtan=$1; 					}# print "X-$x-matched-08: mtan=$mtan, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
879		elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?)(?=\W|;|$)//gis) 					{ $mtan=$1; 					}# print "X-$x-matched-09: mtan=$mtan, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
880		if    ($rcvd->[$x]->{'value'} =~ s/(?:with)\s+?(\S+?) \((.*?)\)//gis) 					{ $with=$1; $mtav=$2 if !$mtav; 		}# print "X-$x-matched-10: with=$with, mtav=$mtav, remaining=$rcvd->[$x]->{'value'} $CRLF";}
881		elsif ($rcvd->[$x]->{'value'} =~ s/(?:with)\s+?(\S+?)(?=\W|;|$)//gis) 					{ $with=$1; 					}# print "X-$x-matched-11: with=$with, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
882		if    ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(HELO ($HELO)\) \(($LUSER)\@\[?($IP)\]?//gis) 	{ $rdns=$1; $helo=$2; $idnt=$3; $ipad=$4; 	}# print "X-$x-matched-12: rdns=$rdns, helo=$helo, idnt=$idnt, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
883		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(HELO ($HELO)\) \(\[?($IP)\]?//gis) 		{ $rdns=$1; $helo=$2; $ipad=$3; 		}# print "X-$x-matched-13: rdns=$rdns, helo=$helo, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
884		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(\[($IP)\] helo=($HELO)\)//gis) 			{ $rdns=$1; $ipad=$2; $helo=$3; 		}# print "X-$x-matched-14: rdns=$rdns, ipad=$ipad, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
885		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(($LUSER)\@\[?($IP)\]?\)//gis) 			{ $rdns=$1; $idnt=$2; $ipad=$3; 		}# print "X-$x-matched-15: rdns=$rdns, idnt=$idnt, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
886		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS)\(($IP)\)//gis) 					{ $rdns=$1; $ipad=$2; 				}# print "X-$x-matched-16: rdns=$rdns, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
887		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[($IP)\] \(helo=($HELO) ident=($LUSER)\)//gis) 		{ $ipad=$1; $helo=$2; $idnt=$3; 		}# print "X-$x-matched-17: ipad=$ipad, helo=$helo, idnt=$idnt, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
888		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[($IP)\] \(account ($LUSER) HELO ($HELO)\)//gis) 		{ $ipad=$1; $idnt=$2; $helo=$3; 		}# print "X-$x-matched-18: ipad=$ipad, idnt=$idnt, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
889		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[($IP)\] \(helo=($HELO)\)//gis) 				{ $ipad=$1; $helo=$2; 				}# print "X-$x-matched-19: ipad=$ipad, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
890		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[?($IP)\]?:?\d*? \(HELO ($HELO)\)//gis) 			{ $ipad=$1; $helo=$2; 				}# print "X-$x-matched-20: ipad=$ipad, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
891		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(IDENT:($LUSER)\@($RDNS) \[($IP)\]//gis) 		{ $helo=$1; $idnt=$2; $rdns=$3; $ipad=$4; 	}# print "X-$x-matched-21: helo=$helo, idnt=$idnt, rdns=$rdns, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
892		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(<?($RDNS)>?\s?\[($IP)\]//gis) 			{ $helo=$1; $rdns=$2; $ipad=$3; 		}# print "X-$x-matched-22: helo=$helo, rdns=$rdns, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
893		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(\[($IP)\] ident=($LUSER)\)//gis) 			{ $helo=$1; $ipad=$2; $idnt=$3; 		}# print "X-$x-matched-23: helo=$helo, ipad=$ipad, idnt=$idnt, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
894		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(proxying for ($IP)\) \(.*? user ($LUSER)\)//gis)	{ $helo=$1; $ipad=$2; $idnt=$3; 		}# print "X-$x-matched-24: helo=$helo, ipad=$ipad, idnt=$idnt, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
895		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(account ($LUSER) \[($IP)\] verified\)//gis) 	{ $helo=$1; $idnt=$2; $ipad=$3; 		}# print "X-$x-matched-25: helo=$helo, idnt=$idnt, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
896		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\(?($HELO) \(?\[?($IP)\]?\)?//gis) 				{ $helo=$1; $ipad=$2; 				}# print "X-$x-matched-26: helo=$helo, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
897		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(localhost \[.*?:($IP)\]\)//gis) 			{ $helo=$1; $ipad=$2; 				}# print "X-$x-matched-27: helo=$helo, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
898		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(($LUSER)\@($RDNS)\)//gis) 			{ $helo=$1; $idnt=$2; $rdns=$3; 		}# print "X-$x-matched-28: helo=$helo, idnt=$idnt, rdns=$rdns, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
899		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(($RDNS)\)//gis) 					{ $helo=$1; $rdns=$2; 				}# print "X-$x-matched-29: helo=$helo, rdns=$rdns, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
900		elsif ($rcvd->[$x]->{'value'} =~ s/\(from\s+?($LUSER)\@($RDNS)\)//gis)					{ $idnt=$1; $rdns=$2; 				}# print "X-$x-matched-30: idnt=$idnt, rdns=$rdns, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
901		elsif ($rcvd->[$x]->{'value'} =~ s/\(from\s+?($LUSER)\@($HELO)\)//gis)					{ $idnt=$1; $helo=$2; 				}# print "X-$x-matched-31: idnt=$idnt, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
902		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\(?\[?($IP)\]?\)?//gis) 					{ $ipad=$1; 					}# print "X-$x-matched-32: ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
903		elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO)(?=\W|;|$)//gis) 					{ $helo=$1; 					}# print "X-$x-matched-33: helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; }
904
905		# lookup IP if not provided
906		$ipad = host($rdns) if !$ipad && $rdns && $options =~ /d/;
907		$ipad = host($helo) if !$ipad && !$rdns && $helo && $helo =~ /$DOMAIN/ && $options =~ /d/;
908
909		# exclude lines with no IP
910		#next if !$ipad && ((scalar @$rcvd) > 1);
911
912		# ensure the local received line has a date stamp
913		$date = gmtime time unless $date || $x;
914
915		# save "from" info for comparison in next iteration
916		$rcvd->[$x]->{'rdns'}	= $rdns;
917		$rcvd->[$x]->{'ipad'}	= $ipad;
918		$rcvd->[$x]->{'date'}	= $date;
919
920		# exclude lines from local, private (RFC 1918), and invalid IP address ranges
921		my $reserved = qr~^((?:127\.)|(?:10\.)|(?:172\.(?:1[6-9]|2[0-9]|31)\.)|(?:192\.168\.)|(?:169\.254\.))~;
922		my $valid = qr~^((?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5]))$~;
923		next if (($rdns and $rdns =~ /localhost/i) || ($ipad and ($ipad =~ /$reserved/ || $ipad !~ /$valid/))) && ((scalar @$rcvd) > 1);
924
925		# lookup MTA IP/rDNS if not provided
926		$mtai = host($mtan) if !$mtai && $mtan && $options =~ /d/;
927		$mtan = host($mtai) if !$mtan && $mtai && $options =~ /d/;
928
929		# exclude lines from within our class B (/16) network
930		next if (($edge_ip && $ipad && is_same_class_B($edge_ip,$ipad))||(!$edge_ip && $mtai && $ipad && is_same_class_B($mtai,$ipad))) && ((scalar @$rcvd) > 1);
931
932		# perform reverse DNS lookup if not provided by MTA
933		$rdns = host($ipad) if !$rdns && $ipad && $options =~ /d/;
934
935		# force a reverse DNS lookup on all IPs, even those with an RDNS set by the MTA
936		$rdns = host($ipad) if $ipad && $options =~ /f/;
937
938		# perform ASN lookup (RFC 1930/2270)
939		my $asn = "";
940		$asn = asn($ipad) if $ipad && $options =~ /a/;
941
942		# perform SPF lookup (RFC 4408)
943		my $result = ""; my $received_spf = "";
944		if ($options =~ /p/)
945		{
946			if (scalar @cidr_list && eval{Net::CIDR::cidrlookup($ipad, @cidr_list)})
947			{
948				$result = "pass";
949				$received_spf = "pass ($ipad is locally whitelisted)";
950			}
951
952			if ($dbfile && !$result)
953			{
954				&retie unless tied %db;
955				if ($db{$ipad})
956				{
957					$result = "pass";
958					$received_spf = "pass ($ipad is authenticated via POP3)";
959				}
960			}
961
962			if ($rtrn && $ipad && !$result)
963			{
964			    my $srvr = $rdns?$rdns:($helo?$helo:$ipad);
965			    eval {
966				my $spf_server = Mail::SPF::Server->new();
967				my $request = Mail::SPF::Request->new(
968				    identity => $rtrn,
969				    ip_address => $ipad,
970				    helo_identity => $srvr
971				    );
972				my $response = $spf_server->process($request);
973				$result = $response->code;
974				$received_spf = $response->received_spf_header;
975			    };
976			    if ($@) {
977				syslog('warning', 'SPF query error (ip=%s, sender=%s, helo=%s): %s',
978				       $ipad, $rtrn, $srvr, $@);
979				$@ = undef;
980				$result = undef;
981			    }
982			}
983
984			if (!$result)
985			{
986				$result = "error";
987				$received_spf = "unable to determine sender info";
988			}
989		}
990
991		# we implicitely trust the received line set "by" our own server as valid (first untrusted "from")
992		if (!$edge_ip)
993		{
994			$edge_ip = $mtai;
995			$rcvd->[$x]->{'sane'} = set_rcvd($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$result);
996			$rcvd->[$x]->{'spf'} = $received_spf if $options =~ /p/;
997		}
998
999		# now we'll try to establish the validity of each received line by checking
1000		# for continuity and rejecting lines that don't fit the "from/by" chain
1001		else
1002		{
1003			#print " by " . $mtan . " / prev from " . $rcvd->[$x-1]->{'rdns'} . "$CRLF";
1004			#print " by " . $mtai . " / prev from " . $rcvd->[$x-1]->{'ipad'} . "$CRLF";
1005			if (
1006				(
1007					($mtan && $rcvd->[$x-1]->{'rdns'} && $mtan =~ /$rcvd->[$x-1]->{'rdns'}/) ||
1008			     		($mtai && $rcvd->[$x-1]->{'ipad'} && $mtai =~ /$rcvd->[$x-1]->{'ipad'}/)
1009				) && 	(!$untrusted)
1010			   )
1011			     {
1012				$rcvd->[$x]->{'sane'} = set_rcvd($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$result);
1013				$rcvd->[$x]->{'spf'} = $received_spf if $options =~ /p/;
1014			     }
1015			else
1016			{
1017				$helo = "untrusted-".$helo if $helo; $ipad = "untrusted-".$ipad if $ipad;
1018				$idnt = "untrusted-".$idnt if $idnt; $rdns = "untrusted-".$rdns if $rdns;
1019				$from = "untrusted-".$from if $from; $mtan = "untrusted-".$mtan if $mtan;
1020				$mtai = "untrusted-".$mtai if $mtai; $mtav = "untrusted-".$mtav if $mtav;
1021				$fore = "untrusted-".$fore if $fore; $with = "untrusted-".$with if $with;
1022				$date = ""; $asn = ""; $result = "";
1023				$rcvd->[$x]->{'sane'} = set_rcvd($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$result);
1024				$untrusted = 1;
1025			}
1026		}
1027	}}
1028
1029	return $rcvd;
1030}
1031
1032sub is_same_class_B
1033{
1034	my ($ip1,$ip2) = @_;
1035	$ip1 =~ s/^(\d{1,3}\.\d{1,3}\.).*?$/$1/gis;
1036	$ip2 =~ s/^(\d{1,3}\.\d{1,3}\.).*?$/$1/gis;
1037
1038	return ($ip1 eq $ip2)? 1:0;
1039}
1040
1041sub asn
1042{
1043	my $target = shift;
1044	my $output = "";
1045
1046        my $IP          = qr~(?:\d{1,3}\.){3}\d{1,3}~;
1047        my $DOMAIN      = qr~(?:\w|-|\.)+?\.\w{2,4}~;
1048
1049	if ( $target =~ s/(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/$4.$3.$2.$1.$asn_server/ )
1050	{
1051		# uncomment this code if you do not want to use Net::DNS::Resolver and you have 'host' on your system
1052		#open (HOST, "host -t txt $target 2>/dev/null |") or error("warn","Host lookup failed: $!");
1053		#while (<HOST>) { $output = $1 if /\Q$target\E(?: descriptive)? text "(\d*?)".*/; }
1054		#close HOST;
1055
1056                # find ASN info via Net::DNS::Resolver
1057	        if (my $query = $res->send($target,"TXT")) { foreach ($query->answer) { $output = $1 if $_->string =~ /$DOMAIN\.\s+?\d+?\s+?IN\s+?TXT\s+?"(\d+?)"\s+?"$IP"\s+?"\d+?"/; }}
1058                #else { error("warn","ASN lookup failed: " . $res->errorstring); }
1059	}
1060
1061	return $output;
1062}
1063
1064sub host
1065{
1066	my $target = shift;
1067	my $output = "";
1068
1069	my $IP		= qr~(?:\d{1,3}\.){3}\d{1,3}~;
1070	my $DOMAIN	= qr~(?:\w|-|\.)+?\.\w{2,4}~;
1071
1072	if ($target =~ s/($IP|$DOMAIN)/$1/)
1073	{
1074		# uncomment this code if you do not want to use Net::DNS::Resolver and you have 'host' on your system
1075		#open (HOST, "host $target 2>/dev/null |") or error("warn","Host lookup failed: $!");
1076		#while (<HOST>) { $output = $1 if /$DOMAIN (?:domain name pointer|has address) ($IP|$DOMAIN)\.?/; }
1077		#close HOST;
1078
1079		# find DNS info via Net::DNS::Resolver
1080	        if (my $query = $res->send($target)) { foreach ($query->answer) { $output = $1 if $_->string =~ /$DOMAIN\.\s+?\d+?\s+?IN\s+?(?:PTR|A)\s+?($IP|$DOMAIN)\.?/; }}
1081        	#else { error("warn","DNS lookup failed: " . $res->errorstring); }
1082	}
1083
1084	return $output;
1085}
1086
1087sub set_rcvd
1088{
1089	my ($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$spf) = @_;
1090
1091	my $output = "from";
1092	if ($options =~ /e/) {
1093	$output .= ($helo)? 		" helo-$helo" 	: " no-helo";}		# sender's salutation
1094	$output .= ($rdns)? 		" $rdns" 	: " no-rdns";		# sender's name
1095	$output .= ($ipad)? 		" $ipad" 	: " no-ipad";		# sender's IP
1096        if ($options =~ /p/) {
1097	$output .= ($spf)?		" spf-$spf"	: " no-spf";}		# sender's policy result
1098	$output .= ($asn)?  		" as$asn" 	: " no-asn";		# sender's ASN
1099	$output .= ($mtan||$mtai)? 	" $CRLF\t  by"	: "";
1100	$output .= ($mtan)? 		" $mtan" 	: " no-mta-name";	# receiving MTA's name
1101	$output .= ($mtai)? 		" $mtai" 	: " no-mta-ip";		# receiving MTA's IP
1102	$output .= ($fore)? 		" $CRLF\t  for" : "";
1103	$output .= ($fore)? 		" <$fore>" 	: " no-to-addr";	# envelope to address
1104	$output .= ($date)? 		"; $date" 	: " no-date";		# received date/time
1105
1106	#print "outputting received: $output" . $CRLF;
1107	return $output;
1108}
1109
1110sub opendb_read
1111{
1112    tie(%db, "DB_File", $dbfile, O_RDONLY, 0, $DB_HASH) or error("warn","Can't open $dbfile: $!");
1113}
1114
1115sub closedb
1116{
1117    untie %db;
1118    undef %db;
1119}
1120
1121sub retie
1122{
1123    &closedb;
1124    &opendb_read;
1125}
1126
1127################################################
1128################ Output Header #################
1129################################################
1130
1131sub set_header
1132{
1133	my $header = shift;
1134	my $output = "";
1135	my $name = "";
1136
1137	# exclude the "masked fields" from display
1138	foreach $name (split(/,/,$masked_fields)) { $spec_fields =~ s/(?<=,)$name,?//; }
1139
1140	# output the fields in the order specified by RFC 2822 - minus the masked fields
1141	foreach $name (split(/,/,$spec_fields)) { $output .= set_field($header,$name); delete $header->{$name}; }
1142
1143	# set any user-specified fields
1144	foreach $name (split(/,/,$user_fields)) { $output .= set_field($header,$name); delete $header->{$name}; }
1145
1146	# set new custom x-header fields
1147	if ($options =~ /x/) { foreach $name (split(/,/,$new_fields)) { $output .= set_field($header,$name); delete $header->{$name}; } }
1148
1149	# then set any remaining fields (if allowed to set non-standard fields)
1150	if ($options !~ /s/) { foreach $name (keys %{$header}) { $output .= set_field($header,$name); } }
1151
1152	$output .= $CRLF;
1153
1154	return $output;
1155}
1156
1157sub set_field {
1158    my($header, $name) = @_;
1159    my $output = "";
1160
1161    if ($header->{$name}) {
1162        foreach my $header (@{$header->{$name}}) {
1163            if ($name eq "received" and $options =~ /r/) {
1164                if ($header->{'sane'} and $header->{'sane'} =~ /\w/) {
1165                    $output .= $header->{'name'} . ": " . $header->{'sane'} .
1166                        $CRLF;
1167                }
1168                # else {
1169                #     $output .= $header->{'name'} . ": sanity check failed" .
1170                #         $CRLF;
1171                # }
1172            }
1173            elsif ($header->{'name'} and defined($header->{'value'})) {
1174                $output .= $header->{'name'} . ": " . $header->{'value'} .
1175                    $CRLF;
1176            }
1177            else {
1178                my $dumped = Data::Dumper->new([$header], [qw(header)])->
1179                    Indent(0)->Dump();
1180                error("warn", "Header for $name, $dumped, is missing name " .
1181                      "and/or value?");
1182            }
1183        }
1184    }
1185    elsif ($req_fields =~ /(?:^|,)$name(?:,|$)/) {
1186        $output .= ucfirst($name) . ": [no-$name] " . $CRLF;
1187    }
1188
1189    return $output;
1190}
1191
1192################################################
1193################ Error Handling ################
1194################################################
1195
1196sub error
1197{
1198	my ($action,$msg) = @_;
1199
1200	die $msg if $action eq "die";
1201	warn $msg unless $action eq "die";
1202	# add other actions if you like
1203}
1204
1205sub sig_trap
1206{
1207	my $sig = shift;
1208	my ($action,$more) = ("warn","");
1209
1210	sig:
1211	{
1212        	$action = "die",  last sig if $sig =~ /ALRM/;
1213	        $action = "warn", last sig if $sig =~ /PIPE/;
1214	        $action = "warn", last sig if $sig =~ /CHLD/;
1215        	$action = "die" , last sig if $sig =~ /INT/;
1216	        $action = "die" , last sig if $sig =~ /HUP/;
1217        	$action = "warn";
1218	}
1219
1220	my $waitedpid = wait;
1221	$more = "; Reaped pid $waitedpid, exited with status " . ($? >> 8) if $waitedpid;
1222
1223	$SIG{$sig} = \&sig_trap;
1224
1225	error ($action,"Trapped signal SIG$sig$more");
1226}
1227
1228################################################
1229################################################
1230################################################
1231