1#!/usr/local/bin/perl -Tw 2require 5.006_001; 3use strict; 4 5# This is a fork of Tom Anderson's spamitarium.pl by Jonathan Kamens 6# <jik@kamens.us>. The changes here have been submitted back to Tom Anderson 7# for incorporation into his master version. In the meantime, you may wish to 8# use this version instead. Here are the changes in it: 9# 10# * Command-line parsing has been updated to use the standard Perl Getopt::Long 11# syntax, while continuing to support the "bundled" options syntax supported 12# by previous versions of the script. 13# * Options have been added to specify information that may be missing from the 14# message header, specifically for when spamitarium is being called from a 15# Milter and therefore the message does not yet have the Return-Path or local 16# Received lines that will be inserted by the MTA before final delivery. See 17# the documentation below for the options --return-path, --no-local-received, 18# --remote-ip, --remote-name, --helo, --local-ip, --local-name, --rcpt, and 19# --add-local-received. 20# * A --timeout option has been added for specifying how long the script should 21# wait for input, or 0 to disable the timeout completely. This is primarily 22# useful for debugging the script. 23# * The header parsing code has been refactored to be cleaner and more robust. 24# * In particular, empty header fields are now handled correctly (previously, 25# they were appended to the previous header field!). 26# * Empty header fields are now included in the output, for more accurate 27# bogofilter'ing. 28# * Typos and such have been cleaned up in the documentation. 29# * A date-parsing bug which was causing the time zone to be ignored, thus 30# causing the X-Date-Check header to report an inaccurate delta, has been 31# fixed. 32# * A bug which could cause spamitarium to crash upon encountering a far into 33# the future date in the message header has been fixed. 34# * A bug which was causing some domain names to be truncated (e.g., 35# "omta01-mdp.westchester.pa.bo.comcast.net" became "omta01-mdp.west") has 36# been fixed. 37# * A couple of Perl uninitialized variable warnings have been fixed. 38# * Mail::SPF is used now instead of Mail::SPF::Query, for IPv6 support. 39# * Code has been added to work around the fact that some emails generated by 40# Constant Contact have CR rather than CRLF at the end of their Date: header 41# lines (this is a very specific workaround for a very specific problem 42# because technically the workaround is a violation of the SMTP RFC, though 43# nobody else seems to care about that. :-/). 44 45=head1 NAME 46 47Spamitarium - evaluates and repairs the sanity of email headers... 48 49=cut 50 51my $version = "0.5.2"; 52 53################################################ 54############### Copyleft Notice ################ 55################################################ 56 57# Copyright © 2004 Order amid Chaos, Inc. 58# Author: Tom Anderson 59# neo+spamitarium@orderamidchaos.com 60# 61# This program is open-source software; you can redistribute it 62# and/or modify it under the terms of the GNU General Public 63# License, v2, as published by the Free Software Foundation. 64# 65# This program is distributed in the hope that it will be useful, 66# but WITHOUT ANY WARRANTY; without even the implied warranty of 67# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 68# GNU General Public License for more details. 69# 70# You should have received a copy of the GNU General Public 71# License along with this program; if not, write to: 72# 73# Free Software Foundation 74# 59 Temple Place, Suite 330 75# Boston, MA 02111-1307 USA 76# 77# http://www.gnu.org/ 78 79################################################# 80################# Documentation ################# 81################################################# 82 83# use "perldoc spamitarium" or "spamitarium -h" to read this 84 85=head1 SYNOPSIS 86 87=head2 Command line usage: 88 89B<spamitarium> [I<options>] < [I<rfc822_email>] 90 91=head2 Procmail usage (recommended): 92 93Add to your .procmailrc the following recipe: 94 95 :0 96 { 97 :0 fhw 98 | spamitarium -sreadxtp 99 100 # filter through bogofilter, tagging as spam 101 # or not and updating the word lists 102 :0 fw 103 | bogofilter -uep 104 105 # add back the "From" header for proper delivery 106 :0 fhw 107 | formail -I "From " -a "From " 108 } 109 110=head2 Command line options: 111 112=over 4 113 114=item B<-h> 115 116display this help file 117 118=item B<-s> [I<permitted_headers>] 119 120Allow standard fields only (RFC 822/2822/1049/1341/1521/2183/1864) ... 121all others are stripped from the email. To exclude some headers from 122stripping, specify a comma-separated list as an argument to this 123option. 124 125=item B<-r> 126 127insert new received line containing verified received-line tokens 128 129=item B<-e> 130 131include helo string in received line 132 133=item B<-d> 134 135allow DNS lookups (forward and reverse) to help fill in all 136necessary received fields 137 138=item B<-f> 139 140force rDNS lookups even when provided already by the MTA 141 142=item B<-a> 143 144perform ASN lookups and include in received lines 145 146=item B<-p> 147 148perform SPF lookups and include in received lines 149 150=item B<-x> 151 152include custom x-headers for additional header validations: 153 154=item B<-t> 155 156validate that the date header is within close proximity to the 157received date (see $date_limit global variable to configure) 158 159=item B<-w> 160 161parse and display the body of the email in addition to the headers 162 163=item B<-b> 164 165display benchmarking info 166 167=item B<--return-path> I<addr> 168 169Specify the sender of the email for SPF lookups, overriding the 170"Return-Path" field. 171 172=item B<--no-local-received> 173 174Tell spamitarium that there is no Received line in the message from 175the local mailer, e.g., because spamitarium is being called from 176inside a Milter. 177 178The following options are only used when B<--no-local-received> is 179specified. 180 181=over 4 182 183=item B<--remote-ip> I<ip-address> 184 185Specify the IP address of the remote server sending the email. 186 187=item B<--remote-name> I<ip-address> 188 189Specify the host name of the remote server sending the email, as 190determined by a reverse DNS lookup of its IP address. 191 192=item B<--helo> I<host-name> 193 194Specify the host name sent by the remote server in the SMTP HELO (or 195EHLO) command. 196 197=item B<--local-ip> I<ip-address> 198 199Specify the IP address of the server receiving the email. 200 201=item B<--local-name> I<host-name> 202 203Specify the host name of the server receiving the email. 204 205=item B<--rcpt> I<email-address> 206 207Specify the envelope address of the recipient of the email. 208 209=item B<--add-local-received> 210 211Indicate that spamitarium should add an auto-generated local Received 212line. 213 214=back 215 216=item B<--timeout> I<seconds> 217 218How long to wait for input before giving up. Specify 0 to disable the timeout 219completely. Primarily for debugging purposes. 220 221=back 222 223 224=head2 User-defined header field list: 225 226If using the B<s> option, you may want to allow certain header fields 227other than those specified by RFCs. These might include fields set 228by your mailing list or proxy or other custom application. You may 229specify such a list of fields by appending them comma-delimited at 230the end of your command line. For example, if you wanted the 231I<list-id> and I<encrypted> fields passed through, you would change 232your procmail recipe as follows: 233 234 | spamitarium -readxtp -s list-id,encrypted 235 236 237=head1 REQUIRES 238 239=over 4 240 241=item * 242 243Perl 5.6.1 244Net::DNS::Resolver 245Mail::SPF 246Net::CIDR 247DB_File 248POSIX 249Getopt::Long 250 251=back 252 253 254=head1 DESCRIPTION 255 256Spamitarium helps to remove unnecessary noise from email headers and 257to highlight just the portions which contribute positively to spam 258filtering using statistical methods. 259 260The only non-spoofable, -forgable, or -tweakable part of 261an email is the received line, as it is generated by the receiving 262mail server which ought to have no reason to munge it. Every other 263part of an email can be influenced directly by the sender. Received 264line tokens, when verified authentic, are therefore highly indicative 265of whether or not a given message is spam. 266 267Spamitarium reads the received headers, determines which ones are 268authentic, and then prints tokens into the header which may be keyed 269on by statistical filters. This works much like a blacklist/whitelist, 270but when coupled with a statistical filter such as bogofilter, these 271lists are automatically generated and require no manual maintenance 272other than normal training. 273 274Moreover, headers which do not directly influence the email in any 275functional way, nor are visible to the end-user in a standard 276graphical MUA, are highly likely to contain information which 277spammers think will detract from normal statistical filtering. It 278is therefore desireable to remove these elements, specifically 279X-headers, prior to filtering. Spamitarium removes all invisible, 280non-functional header lines. 281 282Spamitarium also looks up any IP addresses or rDNS addresses 283which are not provided in order to provide the maximum tokens on 284which to filter. Moreover, it looks up the autonomous system number 285(ASN) associated with each "from" address in order to provide 286a small set of tokens representing the various major subnets of the 287internet. And it checks the Sender Policy Framework (SPF) records of 288the sender to ensure that the given MX has permission to send on 289their behalf. 290 291Finally, Spamitarium assesses the headers for missing required 292header lines, inserting keyable tokens or supplying the missing 293information. And it compares the date fields to determine if the 294email has been pre- or post-dated by a large margin in order to 295influence where it appears in your mail client and inserts an 296x-header with keyable range tokens to compensate for this. 297 298Together, all of these techniques help to remove the noise which 299accompanies, either incidentally or maliciously, most email messages. 300This results in a cleaner header consisting of more easily scored 301tokens. This permits better accuracy with statistical filters as 302well as quicker processing and a smaller token database. 303 304 305=head1 FAQ 306 307=head2 Ask a question 308 309Ye may receive an answer here if it is asked frequently 310 311 312=head1 BUGS 313 314=over 4 315 316=item * 317 318timegm($sec,$min,$hour,$day,$mon,$year) aborts if Perl's time_t is 32 319bits large and the year is too high (>2038). 320 321=back 322 323=head1 TODO 324 325=over 4 326 327=item * 328 329Suggestions welcome. 330 331=back 332 333=head1 SEE ALSO 334 335=over 4 336 337=item * 338 339L<procmail> 340 341=item * 342 343L<bogofilter> 344 345=back 346 347 348=head1 AUTHOR 349 350 Tom Anderson <neo+spamitarium@orderamidchaos.com> 351 Jonathan Kamens <jik@kamens.us> 352 353=cut 354 355################################################# 356############### User Variables ################# 357################################################# 358 359# please edit according to your setup 360 361# default path 362our $path = "/bin:/usr/bin:/usr/local/bin"; 363 364# default shell 365our $shell = "/bin/sh"; 366 367# seconds before we bail waiting on input 368our $timeout = 3; 369 370# server to use for ASN lookups 371our $asn_server = "asn.routeviews.org"; 372 373# Whitelist any IP addresses or ranges from SPF lookups 374our @whitelist = ("127.0.0.1","192.168.0.1-192.168.0.255"); 375 376# If you want to whitelist any addresses which have authenticated 377# via poprelayd (i.e. remote workstations of users on your server) 378# set $dbfile to your popip.db location, else set it to undef 379#our $dbfile = "/etc/mail/popip.db"; 380our $dbfile = undef; 381 382# distance in seconds from right now to consider a reasonable (non-spam) range to date an email 383our $date_limit = 60*60*24*2; # 2 days 384 385# EMAIL HEADER FIELDS 386# 387# See RFC 2076 / "Common Internet Message Header Fields" for a synopsis of common mail headers 388 389 # SPECIFIED FIELDS -- all of the fields specified in RFC 822/2822, case-insensitive, in the suggested order 390 our $spec_fields = "return-path,received,resent-date,resent-from,resent-sender,resent-reply-to,". 391 "resent-to,resent-cc,resent-bcc,resent-message-id,date,from,sender,reply-to,". 392 "to,cc,bcc,message-id,in-reply-to,references,subject,comments,keywords,encrypted"; 393 394 # MIME header fields (RFC 1049/1341/1521/2183) 395 $spec_fields .= ",mime-version,content-type,content-transfer-encoding,content-id,content-description,content-disposition"; 396 397 # security/checksum (RFC 1864) 398 $spec_fields .= ",content-md5"; 399 400 # mailing list headers (RFC 2369/2919) may be added if you like, but for now I'm choosing to leave them out 401 #$spec_fields .= ",list-id,list-help,list-unsubscribe,list-subscribe,list-post,list-owner,list-archive"; 402 403 # MASKED FIELDS -- unnecessary fields often used for spam will be expunged from the spec fields list 404 # (if you know of a valid, necessary use for these, let me know) 405 our $masked_fields = "keywords,comments,encrypted,content-id,content-description"; 406 407 # controversial and not strictly necessary: 408 #$masked_fields .= ",reply-to"; 409 410 # message-id fields are only machine-readable and not visible to nor readable by the recipient 411 # however, they can be useful if your client produces discussion threading 412 # uncomment this line if you don't care about threading: 413 #$masked_fields .= ",message-id,resent-message-id,in-reply-to,references"; 414 415 # resent fields are strictly informational (and not generally user-visible), therefore allowing them through is optional: 416 # MIME specifies a different way of resending messages with the "Message" content-type, so these may be considered deprecated: 417 $masked_fields .= ",resent-date,resent-from,resent-sender,resent-reply-to,resent-to,resent-cc,resent-bcc,resent-message-id"; 418 419 # USER FIELDS -- User fields are those that are neither specified nor masked that you want permitted. 420 # These may include special fields for your particular mail server, filter, or mail user agent. 421 our $user_fields = ""; 422 423 # NEW FIELDS -- New custom x-headers added by Spamitarium (it is recommend that you don't change these). 424 # These are disabled unless you pass the 'x' option. 425 our $new_fields = "x-date-check,x-spf"; 426 427 # REQUIRED FIELDS -- Any fields that should show up in an email even if they are not sent -- i.e. if the lack of 428 # these fields may be useful for the filter, a no-req-field tag will be added. The only *required* fields according to 429 # RFC 2822 are "from", "sender", "reply-to", and "date", others are just suggested. However, "sender" and "reply-to" are 430 # commonly not supplied, and so should probably not be in this list. On the other hand, "subject" and a few others may 431 # be desired in this list. 432 our $req_fields = "received,from,to,date,subject"; 433 434# of course, modify the first line of this file, 435# the shebang, to point to your perl interpreter 436 437# do not edit below this line unless you really 438# know what you're doing 439 440################################################# 441############## Include Libraries ################ 442################################################# 443 444use Benchmark; 445use Data::Dumper; 446use File::Basename; 447use Time::Local; 448use Net::DNS::Resolver; 449use Mail::SPF; 450use Net::CIDR; 451use DB_File; 452use POSIX; 453use Getopt::Long; 454use Sys::Syslog; 455 456################################################# 457############## Default Globals ################## 458################################################# 459 460$> = $<; # set effective user ID to real UID 461$) = $(; # set effective group ID to real GID 462 463# Make %ENV safer 464delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH SHELL)}; 465 466# Set the environment explicitly 467$ENV{PATH} = $path; 468$ENV{SHELL} = $shell; 469 470# options flags 471our $options = ""; 472our $return_path = ""; 473our $opt_remote_ip = ""; 474our $opt_remote_name = ""; 475our $opt_local_ip = ""; 476our $opt_local_name = ""; 477our $opt_helo = ""; 478our $opt_rcpt = ""; 479our $no_local_received = undef; 480our $add_local_received = undef; 481 482# define the control-linefeed syntax for this system 483our $CRLF = 484"\n"; 485#($^O=~/VMS/i)? "\n": # VMS 486#("\t" ne "\011")? "\r\n": # EBCDIC 487# "\015\012"; # others 488 489# DNS query options 490our $res = Net::DNS::Resolver->new( 491 nameservers => [qw(127.0.0.1)], 492 udp_timeout => 2, 493 retry => 1, 494 #debug => 1 495); 496 497# convert whitelist into CIDR notation 498our @cidr_list = (); 499foreach my $IP (@whitelist) { 500 if (not eval {@cidr_list = Net::CIDR::cidradd ($IP, @cidr_list)}) { 501 error("warn","Error processing whitelist: \"$IP\" is not a valid IP address or range."); 502 } 503} 504 505################################################ 506##################### Main ##################### 507################################################ 508 509openlog(basename $0, 'mail'); 510 511# process options 512sub usage { 513 my $spamitarium = $1 if $0 =~ /^([\w\/.\-~]*)$/; 514 system("perldoc $spamitarium"); 515} 516 517# Allow bundling of first set of options, as well as optional hyphen. 518if (@ARGV and $ARGV[0] =~ /^-?[hrdfasebwxtp]+$/) { 519 if ($ARGV[0] =~ s/s//) { 520 splice(@ARGV, 1, 0, "-s"); 521 } 522 $ARGV[0] =~ s/^([^-])/-$1/; 523 Getopt::Long::Configure("bundling"); 524} 525 526if (! GetOptions("help|h" => sub { &usage; exit(0); }, 527 "r" => sub { $options .= "r"; }, # process received headers 528 "d" => sub { $options .= "d"; }, # perform domain lookups where needed 529 "f" => sub { $options .= "f"; }, # force RDNS lookups even where MTA provided 530 "a" => sub { $options .= "a"; }, # perform ASN lookups 531 "s:s" => sub { $options .= "s"; # standard fields only (strip others) 532 $user_fields = $_[1]; }, 533 "e" => sub { $options .= "e"; }, # include the helo received field in output 534 "b" => sub { $options .= "b"; }, # output benchmarking info 535 "w" => sub { $options .= "w"; }, # process whole email (including body) 536 "x" => sub { $options .= "x"; }, # insert custom x-header fields 537 "t" => sub { $options .= "t"; }, # perform date range checks 538 "p" => sub { $options .= "p"; }, # perform SPF lookups 539 "return-path|returnpath=s" => \$return_path, 540 "remote-ip|remoteip=s" => \$opt_remote_ip, 541 "remote-name|remotename=s" => \$opt_remote_name, 542 "local-ip|localip=s" => \$opt_local_ip, 543 "local-name|localname=s" => \$opt_local_name, 544 "helo=s" => \$opt_helo, 545 "rcpt=s" => \$opt_rcpt, 546 "no-local-received|nolocalreceived" => \$no_local_received, 547 "add-local-received|addlocalreceived" => \$add_local_received, 548 "timeout=i" => \$timeout, # 0 to disable timeout 549 )) { 550 &usage; 551 exit(1); 552} 553 554# open popip database for reading 555our %db; 556&opendb_read if $dbfile; 557 558# start timing the process 559my $start_time = new Benchmark if $options =~ /b/; 560my ($start_parse, $end_parse, $start_rcvd, $end_rcvd, $start_set, $end_set); 561 562# get STDIN and process the email 563eval 564{ 565 # set an alarm so that we don't hang on an empty STDIN 566 local $SIG{ALRM} = sub { die "timeout" }; 567 if ($timeout > 0) { 568 alarm $timeout; 569 } 570 571 # parse the header 572 $start_parse = new Benchmark if $options =~ /b/; 573 my ($header,$parse_benchmark) = parse_header(); 574 $end_parse = new Benchmark if $options =~ /b/; 575 576 # cancel timeout if we got this far 577 alarm 0; 578 579 # default date if none provided 580 unless (defined $header->{'date'}) 581 { 582 $header->{'date'}->[0]->{'name'} = "Date"; 583 $header->{'date'}->[0]->{'value'} = gmtime time; 584 } 585 586 # process the received lines 587 if ($options =~ /r/) 588 { 589 $start_rcvd = new Benchmark if $options =~ /b/; 590 $header->{'received'} = process_rcvd($header->{'received'},$return_path || ($header->{'return-path'} && $header->{'return-path'}->[0]->{'value'})); 591 $end_rcvd = new Benchmark if $options =~ /b/; 592 } 593 594 # add new custom header fields 595 if ($options =~ /x/) 596 { 597 if ($options =~ /t/) 598 { 599 $header->{'x-date-check'}->[0]->{'name'} = "X-Date-Check"; 600 $header->{'x-date-check'}->[0]->{'value'} = date_check($header->{'date'}->[0]->{'value'},$header->{'received'} && $header->{'received'}->[0]->{'date'}); 601 } 602 603 if ($options =~ /p/) 604 { 605 for (my $x = 0; $x < scalar @{$header->{'received'}}; $x++) 606 { 607 if (defined $header->{'received'}->[$x]->{'spf'} && $header->{'received'}->[$x]->{'spf'} =~ /\w/) 608 { 609 push(@{$header->{'x-spf'}}, 610 {'name' => "X-SPF", 'value' => $header->{'received'}->[$x]->{'spf'}}); 611 } 612 } 613 } 614 } 615 616 # This needs to be done after the x-spf headers are generated 617 # above, so that an x-spf header is generated for the 618 # auto-generated local Received line. 619 if ($no_local_received and not $add_local_received) { 620 splice(@{$header->{'received'}}, 0, 1); 621 } 622 623 # output the new header containing the changes 624 $start_set = new Benchmark if $options =~ /b/; 625 print set_header($header); 626 $end_set = new Benchmark if $options =~ /b/; 627 628 # add the body if desired 629 print parse_body() if $options =~ /w/; 630}; 631 632# propagate errors 633die if $@ && $@ !~ /timeout/i; 634 635# print timeout message 636if ($@ =~ /timeout/i) { error("die","Timed out... make sure to supply an email for processing. Try 'spamitarium -h' for details.\n"); } 637 638# calculate total running time 639if ($options =~ /b/) 640{ 641 my $end_time = new Benchmark; 642 my $td = timediff($end_time, $start_time); 643 my $usr = $td->[1]+$td->[3]; my $sys = $td->[2]+$td->[4]; 644 my $cpu = $usr+$sys; my $wall = $td->[0]; 645 print "Total running time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF"; 646 647 $td = timediff($end_parse, $start_parse); 648 $usr = $td->[1]+$td->[3]; $sys = $td->[2]+$td->[4]; 649 $cpu = $usr+$sys; $wall = $td->[0]; 650 print "Input parsing time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF"; 651 652 if ($options =~ /r/) 653 { 654 $td = timediff($end_rcvd, $start_rcvd); 655 $usr = $td->[1]+$td->[3]; $sys = $td->[2]+$td->[4]; 656 $cpu = $usr+$sys; $wall = $td->[0]; 657 print "Received line processing time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF"; 658 } 659 660 $td = timediff($end_set, $start_set); 661 $usr = $td->[1]+$td->[3]; $sys = $td->[2]+$td->[4]; 662 $cpu = $usr+$sys; $wall = $td->[0]; 663 print "Rebuilding email time was $wall wallclock secs; $usr usr + $sys sys = $cpu CPU secs.$CRLF"; 664} 665 666# close popip database 667&closedb if $dbfile; 668 669exit(0); 670 671################################################ 672################ Parse Header ################# 673################################################ 674 675sub parse_header 676{ 677 local($_); 678 my $header_text = ""; 679 680 while (<STDIN>) { 681 last if (/^\r?\n$/); 682 $header_text .= $_; 683 } 684 685 # This is really gross. There is a certain prominent email marketing 686 # company whose software has a bug in it which causes Date: headers to 687 # sometimes be terminated with just CR rather than CRLF. If we interpret 688 # RFC 5321 section 2.3.8 strictly, then we're required to treat such a 689 # Date: header and the one following it as a single header field, but 690 # strict adherence to the RFC when that results in obviously broken 691 # behavior is not the best approach. On the other hand, when we're straying 692 # from the RFC, we want to do so as minimally as possible. Therefore, what 693 # we are doing here is checking specifically for this exact problem -- 694 # Date: headers ending with CR rather than CRLF -- and correcting for just 695 # that one, limited case. 696 # This handles input with both CRLF and LF line terminators. 697 $header_text =~ s/^(Date:.*)\r([^\n].*(.)\n)/$1$3\n$2/m; 698 699 my(@headers) = split(/\n\b/, $header_text); 700 701 my $header = {}; 702 my $last_header = undef; 703 704 for (@headers) { 705 s/\s+$//; 706 s/\s+/ /g; # collapse whitespace 707 if (s/^(\S+):\s*//) { 708 my $name = $1; 709 my $tag = $name; 710 $tag =~ tr/A-Z/a-z/; # header names are case-insensitive 711 if (! $_ and $header->{$tag} and 712 grep(! $_->{'value'}, @{$header->{$tag}})) { 713 # If an empty header field is repeated multiple times, we only 714 # need to preserve one of them. 715 next; 716 } 717 push(@{$header->{$tag}}, {'name' => $name, 'value' => $_}); 718 $last_header = $header->{$tag}->[-1]; 719 } 720 else { 721 # What's the right thing to do here? Either there's no colon or 722 # there's whitespace before the colon, both of which are RFC 723 # violations. Our best guess is to append this to the previous 724 # header. 725 if (! $last_header) { 726 error("warn", "Bad initial header line '$_' ignored\n"); 727 } 728 else { 729 error("warn", "Bad header line '$_' appended to preceding '" . 730 $last_header->{'name'} . "' header field"); 731 $last_header->{'value'} .= " " . $_; 732 } 733 } 734 } 735 736 return $header; 737} 738 739sub date_check 740{ 741 my ($date,$rcvd) = shift; 742 my ($dow, $day, $mon, $year, $hour, $min, $sec, $rmdr) = "?"; 743 744 if ($date =~ /\s*?(\w{1,9}),?\s+?(\d+?)\s+?(\w{3})\s+?(\d{4})\s+?(\d{1,2}):(\d{2}):(\d{2})(.*)/i) 745 { 746 $dow=$1; $day=$2; $mon=$3; $year=$4; $hour=$5; $min=$6; $sec=$7; $rmdr=$8; 747 $mon = $mon=~/Dec/i?11:$mon=~/Nov/i?10:$mon=~/Oct/i?9:$mon=~/Sep/i?8:$mon=~/Aug/i?7:$mon=~/Jul/i?6:$mon=~/Jun/i?5:$mon=~/May/i?4:$mon=~/Apr/i?3:$mon=~/Mar/i?2:$mon=~/Feb/i?1:0; 748 749 eval { 750 $date = timegm($sec,$min,$hour,$day,$mon,$year); 751 }; 752 if ($@) { 753 return "date-out-of-range (overflow)"; 754 } 755 756 # adjust for local time 757 if ($rmdr =~ /\+\d(\d)\d\d/) { $date -= $1 * 60 * 60; } 758 if ($rmdr =~ /\-\d(\d)\d\d/) { $date += $1 * 60 * 60; } 759 } 760 else { return "date-format-unknown"; } 761 762 if ($rcvd && $rcvd =~ /\s*?(\w{1,9}),?\s+?(\d+?)\s+?(\w{3})\s+?(\d{4})\s+?(\d{1,2}):(\d{2}):(\d{2})(.*)/i) 763 { 764 $dow=$1; $day=$2; $mon=$3; $year=$4; $hour=$5; $min=$6; $sec=$7; $rmdr=$8; 765 $mon = $mon=~/Dec/i?11:$mon=~/Nov/i?10:$mon=~/Oct/i?9:$mon=~/Sep/i?8:$mon=~/Aug/i?7:$mon=~/Jul/i?6:$mon=~/Jun/i?5:$mon=~/May/i?4:$mon=~/Apr/i?3:$mon=~/Mar/i?2:$mon=~/Feb/i?1:0; 766 767 eval { 768 $rcvd = timegm($sec,$min,$hour,$day,$mon,$year); 769 }; 770 if ($@) { 771 return "date-out-of-range (overflow)"; 772 } 773 774 # adjust for local time 775 if ($rmdr =~ /\+\d(\d)\d\d/) { $rcvd -= $1 * 60 * 60; } 776 if ($rmdr =~ /\-\d(\d)\d\d/) { $rcvd += $1 * 60 * 60; } 777 } 778 else { $rcvd = time; } 779 780 # check for range +/- 781 my $diff = $rcvd - $date; my $diff_days = round($diff/(60*60*24)); 782 if (($diff < $date_limit) and ($diff > $date_limit * -1)) { return "date-in-range ($diff_days days)"; } 783 else { return "date-out-of-range ($diff_days days)"; } 784} 785 786sub round 787{ 788 my $num = shift; 789 return int(($num*100)+0.5)/100; 790} 791 792################################################ 793################# Parse Body ################## 794################################################ 795 796sub parse_body 797{ 798 # this function is really only used for 799 # email-to-email comparisons, where processing 800 # the entire email is required... usually 801 # we'll just process the header 802 803 my $body = ""; 804 while (<STDIN>) { $body .= $_; } 805 return $body; 806} 807 808################################################ 809########### Process Received Lines ############ 810################################################ 811 812sub process_rcvd 813{ 814 my $rcvd = shift; 815 my $rtrn = shift; 816 817 # heuristics 818 my $LUSER = qr~(?:\w|-|\.)+?~; 819 my $DOMAIN = qr~(?:\w|-|\.)+\.\w{2,4}~; 820 my $IP = qr~(?:\d{1,3}\.){3}\d{1,3}~; 821 my $EMAIL = qr~$LUSER\@$DOMAIN~; 822 my $HELO = qr~[^\s\0\/\\\#]+?~; 823 my $RDNS = qr~(?:$DOMAIN|\[?$IP\]?|unknown|unverified)~; 824 825 my $edge_ip = ""; 826 my $untrusted = 0; 827 828 if ($no_local_received) { 829 my $local_rcvd; 830 $local_rcvd->{'value'} = "auto-generated"; 831 $local_rcvd->{'name'} = "Received"; 832 $local_rcvd->{'sane'} = "trusted"; 833 $local_rcvd->{'rdns'} = $opt_remote_name || ""; 834 $local_rcvd->{'ipad'} = $opt_remote_ip || ""; 835 $local_rcvd->{'mtan'} = $opt_local_name || ""; 836 $local_rcvd->{'mtai'} = $opt_local_ip || ""; 837 $local_rcvd->{'helo'} = $opt_helo || ""; 838 $local_rcvd->{'fore'} = $opt_rcpt || ""; 839 $local_rcvd->{'from'} = $return_path || ""; 840 841 splice(@{$rcvd}, 0, 0, $local_rcvd); 842 } 843 844 # check if we were passed a valid array of received lines 845 unless ((defined $rcvd) && (ref($rcvd) eq "ARRAY") && $rcvd->[0]->{'value'}) 846 { 847 no strict 'refs'; 848 my %rcvd_hash = ('value' => "from localhost; " . gmtime time, 'name' => "Received"); 849 my @rcvd_array; $rcvd_array[0] = \%rcvd_hash; $rcvd = \@rcvd_array; 850 } 851 else { 852 853 # iterate through each received header, parsing and validating the info 854 for (my $x = 0; $x < scalar @$rcvd; $x++) 855 { 856 # skip processing if we already lost confidence in this trail of received lines 857 #if ($untrusted) { $rcvd->[$x]->{'sane'} = "untrusted"; next; } 858 859 my $helo = $rcvd->[$x]->{'helo'} || ""; 860 my $ipad = $rcvd->[$x]->{'ipad'} || ""; 861 my $rdns = $rcvd->[$x]->{'rdns'} || ""; 862 my $from = $rcvd->[$x]->{'from'} || ""; 863 my $mtan = $rcvd->[$x]->{'mtan'} || ""; 864 my $mtai = $rcvd->[$x]->{'mtai'} || ""; 865 my $fore = $rcvd->[$x]->{'fore'} || ""; 866 867 my $idnt=""; my $mtav=""; my $with=""; my $date=""; 868 869 # try to take into account all known MTA formats 870 871 if ($rcvd->[$x]->{'value'} =~ s/\(envelope-(?:sender|from) <($EMAIL)>\)//gis) { $from=$1; }# print "X-$x-matched-01: from=$from, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 872 if ($rcvd->[$x]->{'value'} =~ s/;\s+?(\w{3}, \d{1,2} \w{3} \d{2,4}.*?)$//gis) { $date=$1; }# print "X-$x-matched-02: date=$date, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 873 if ($rcvd->[$x]->{'value'} =~ s/for\s+?<?($EMAIL)>?(?: \(single-drop\))?//gis) { $fore=$1; }# print "X-$x-matched-03: fore=$fore, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 874 if ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?) \(($IP)\) \((.*?)\)//gis) { $mtan=$1; $mtai=$2; $mtav=$3; }# print "X-$x-matched-04: mtan=$mtan, mtai=$mtai, mtav=$mtav, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 875 elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?) \[($IP)\]//gis) { $mtan=$1; $mtai=$2; }# print "X-$x-matched-05: mtan=$mtan, mtai=$mtai, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 876 elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?) \((.+?)\)//gis) { $mtan=$1; $mtav=$2; }# print "X-$x-matched-06: mtan=$mtan, mtav=$mtav, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 877 elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?($IP)(?=\W|;|$)//gis) { $mtai=$1; }# print "X-$x-matched-07: mtai=$mtai, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 878 elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?($DOMAIN)(?=\W|;|$)//gis) { $mtan=$1; }# print "X-$x-matched-08: mtan=$mtan, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 879 elsif ($rcvd->[$x]->{'value'} =~ s/by\s+?(\S+?)(?=\W|;|$)//gis) { $mtan=$1; }# print "X-$x-matched-09: mtan=$mtan, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 880 if ($rcvd->[$x]->{'value'} =~ s/(?:with)\s+?(\S+?) \((.*?)\)//gis) { $with=$1; $mtav=$2 if !$mtav; }# print "X-$x-matched-10: with=$with, mtav=$mtav, remaining=$rcvd->[$x]->{'value'} $CRLF";} 881 elsif ($rcvd->[$x]->{'value'} =~ s/(?:with)\s+?(\S+?)(?=\W|;|$)//gis) { $with=$1; }# print "X-$x-matched-11: with=$with, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 882 if ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(HELO ($HELO)\) \(($LUSER)\@\[?($IP)\]?//gis) { $rdns=$1; $helo=$2; $idnt=$3; $ipad=$4; }# print "X-$x-matched-12: rdns=$rdns, helo=$helo, idnt=$idnt, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 883 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(HELO ($HELO)\) \(\[?($IP)\]?//gis) { $rdns=$1; $helo=$2; $ipad=$3; }# print "X-$x-matched-13: rdns=$rdns, helo=$helo, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 884 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(\[($IP)\] helo=($HELO)\)//gis) { $rdns=$1; $ipad=$2; $helo=$3; }# print "X-$x-matched-14: rdns=$rdns, ipad=$ipad, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 885 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS) \(($LUSER)\@\[?($IP)\]?\)//gis) { $rdns=$1; $idnt=$2; $ipad=$3; }# print "X-$x-matched-15: rdns=$rdns, idnt=$idnt, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 886 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($RDNS)\(($IP)\)//gis) { $rdns=$1; $ipad=$2; }# print "X-$x-matched-16: rdns=$rdns, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 887 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[($IP)\] \(helo=($HELO) ident=($LUSER)\)//gis) { $ipad=$1; $helo=$2; $idnt=$3; }# print "X-$x-matched-17: ipad=$ipad, helo=$helo, idnt=$idnt, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 888 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[($IP)\] \(account ($LUSER) HELO ($HELO)\)//gis) { $ipad=$1; $idnt=$2; $helo=$3; }# print "X-$x-matched-18: ipad=$ipad, idnt=$idnt, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 889 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[($IP)\] \(helo=($HELO)\)//gis) { $ipad=$1; $helo=$2; }# print "X-$x-matched-19: ipad=$ipad, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 890 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\[?($IP)\]?:?\d*? \(HELO ($HELO)\)//gis) { $ipad=$1; $helo=$2; }# print "X-$x-matched-20: ipad=$ipad, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 891 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(IDENT:($LUSER)\@($RDNS) \[($IP)\]//gis) { $helo=$1; $idnt=$2; $rdns=$3; $ipad=$4; }# print "X-$x-matched-21: helo=$helo, idnt=$idnt, rdns=$rdns, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 892 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(<?($RDNS)>?\s?\[($IP)\]//gis) { $helo=$1; $rdns=$2; $ipad=$3; }# print "X-$x-matched-22: helo=$helo, rdns=$rdns, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 893 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(\[($IP)\] ident=($LUSER)\)//gis) { $helo=$1; $ipad=$2; $idnt=$3; }# print "X-$x-matched-23: helo=$helo, ipad=$ipad, idnt=$idnt, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 894 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(proxying for ($IP)\) \(.*? user ($LUSER)\)//gis) { $helo=$1; $ipad=$2; $idnt=$3; }# print "X-$x-matched-24: helo=$helo, ipad=$ipad, idnt=$idnt, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 895 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(account ($LUSER) \[($IP)\] verified\)//gis) { $helo=$1; $idnt=$2; $ipad=$3; }# print "X-$x-matched-25: helo=$helo, idnt=$idnt, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 896 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\(?($HELO) \(?\[?($IP)\]?\)?//gis) { $helo=$1; $ipad=$2; }# print "X-$x-matched-26: helo=$helo, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 897 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(localhost \[.*?:($IP)\]\)//gis) { $helo=$1; $ipad=$2; }# print "X-$x-matched-27: helo=$helo, ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 898 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(($LUSER)\@($RDNS)\)//gis) { $helo=$1; $idnt=$2; $rdns=$3; }# print "X-$x-matched-28: helo=$helo, idnt=$idnt, rdns=$rdns, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 899 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO) \(($RDNS)\)//gis) { $helo=$1; $rdns=$2; }# print "X-$x-matched-29: helo=$helo, rdns=$rdns, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 900 elsif ($rcvd->[$x]->{'value'} =~ s/\(from\s+?($LUSER)\@($RDNS)\)//gis) { $idnt=$1; $rdns=$2; }# print "X-$x-matched-30: idnt=$idnt, rdns=$rdns, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 901 elsif ($rcvd->[$x]->{'value'} =~ s/\(from\s+?($LUSER)\@($HELO)\)//gis) { $idnt=$1; $helo=$2; }# print "X-$x-matched-31: idnt=$idnt, helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 902 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?\(?\[?($IP)\]?\)?//gis) { $ipad=$1; }# print "X-$x-matched-32: ipad=$ipad, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 903 elsif ($rcvd->[$x]->{'value'} =~ s/^from\s+?($HELO)(?=\W|;|$)//gis) { $helo=$1; }# print "X-$x-matched-33: helo=$helo, remaining=$rcvd->[$x]->{'value'} $CRLF"; } 904 905 # lookup IP if not provided 906 $ipad = host($rdns) if !$ipad && $rdns && $options =~ /d/; 907 $ipad = host($helo) if !$ipad && !$rdns && $helo && $helo =~ /$DOMAIN/ && $options =~ /d/; 908 909 # exclude lines with no IP 910 #next if !$ipad && ((scalar @$rcvd) > 1); 911 912 # ensure the local received line has a date stamp 913 $date = gmtime time unless $date || $x; 914 915 # save "from" info for comparison in next iteration 916 $rcvd->[$x]->{'rdns'} = $rdns; 917 $rcvd->[$x]->{'ipad'} = $ipad; 918 $rcvd->[$x]->{'date'} = $date; 919 920 # exclude lines from local, private (RFC 1918), and invalid IP address ranges 921 my $reserved = qr~^((?:127\.)|(?:10\.)|(?:172\.(?:1[6-9]|2[0-9]|31)\.)|(?:192\.168\.)|(?:169\.254\.))~; 922 my $valid = qr~^((?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5])\.(?:0?0?\d|[01]?\d\d|2[0-4]\d|25[0-5]))$~; 923 next if (($rdns and $rdns =~ /localhost/i) || ($ipad and ($ipad =~ /$reserved/ || $ipad !~ /$valid/))) && ((scalar @$rcvd) > 1); 924 925 # lookup MTA IP/rDNS if not provided 926 $mtai = host($mtan) if !$mtai && $mtan && $options =~ /d/; 927 $mtan = host($mtai) if !$mtan && $mtai && $options =~ /d/; 928 929 # exclude lines from within our class B (/16) network 930 next if (($edge_ip && $ipad && is_same_class_B($edge_ip,$ipad))||(!$edge_ip && $mtai && $ipad && is_same_class_B($mtai,$ipad))) && ((scalar @$rcvd) > 1); 931 932 # perform reverse DNS lookup if not provided by MTA 933 $rdns = host($ipad) if !$rdns && $ipad && $options =~ /d/; 934 935 # force a reverse DNS lookup on all IPs, even those with an RDNS set by the MTA 936 $rdns = host($ipad) if $ipad && $options =~ /f/; 937 938 # perform ASN lookup (RFC 1930/2270) 939 my $asn = ""; 940 $asn = asn($ipad) if $ipad && $options =~ /a/; 941 942 # perform SPF lookup (RFC 4408) 943 my $result = ""; my $received_spf = ""; 944 if ($options =~ /p/) 945 { 946 if (scalar @cidr_list && eval{Net::CIDR::cidrlookup($ipad, @cidr_list)}) 947 { 948 $result = "pass"; 949 $received_spf = "pass ($ipad is locally whitelisted)"; 950 } 951 952 if ($dbfile && !$result) 953 { 954 &retie unless tied %db; 955 if ($db{$ipad}) 956 { 957 $result = "pass"; 958 $received_spf = "pass ($ipad is authenticated via POP3)"; 959 } 960 } 961 962 if ($rtrn && $ipad && !$result) 963 { 964 my $srvr = $rdns?$rdns:($helo?$helo:$ipad); 965 eval { 966 my $spf_server = Mail::SPF::Server->new(); 967 my $request = Mail::SPF::Request->new( 968 identity => $rtrn, 969 ip_address => $ipad, 970 helo_identity => $srvr 971 ); 972 my $response = $spf_server->process($request); 973 $result = $response->code; 974 $received_spf = $response->received_spf_header; 975 }; 976 if ($@) { 977 syslog('warning', 'SPF query error (ip=%s, sender=%s, helo=%s): %s', 978 $ipad, $rtrn, $srvr, $@); 979 $@ = undef; 980 $result = undef; 981 } 982 } 983 984 if (!$result) 985 { 986 $result = "error"; 987 $received_spf = "unable to determine sender info"; 988 } 989 } 990 991 # we implicitely trust the received line set "by" our own server as valid (first untrusted "from") 992 if (!$edge_ip) 993 { 994 $edge_ip = $mtai; 995 $rcvd->[$x]->{'sane'} = set_rcvd($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$result); 996 $rcvd->[$x]->{'spf'} = $received_spf if $options =~ /p/; 997 } 998 999 # now we'll try to establish the validity of each received line by checking 1000 # for continuity and rejecting lines that don't fit the "from/by" chain 1001 else 1002 { 1003 #print " by " . $mtan . " / prev from " . $rcvd->[$x-1]->{'rdns'} . "$CRLF"; 1004 #print " by " . $mtai . " / prev from " . $rcvd->[$x-1]->{'ipad'} . "$CRLF"; 1005 if ( 1006 ( 1007 ($mtan && $rcvd->[$x-1]->{'rdns'} && $mtan =~ /$rcvd->[$x-1]->{'rdns'}/) || 1008 ($mtai && $rcvd->[$x-1]->{'ipad'} && $mtai =~ /$rcvd->[$x-1]->{'ipad'}/) 1009 ) && (!$untrusted) 1010 ) 1011 { 1012 $rcvd->[$x]->{'sane'} = set_rcvd($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$result); 1013 $rcvd->[$x]->{'spf'} = $received_spf if $options =~ /p/; 1014 } 1015 else 1016 { 1017 $helo = "untrusted-".$helo if $helo; $ipad = "untrusted-".$ipad if $ipad; 1018 $idnt = "untrusted-".$idnt if $idnt; $rdns = "untrusted-".$rdns if $rdns; 1019 $from = "untrusted-".$from if $from; $mtan = "untrusted-".$mtan if $mtan; 1020 $mtai = "untrusted-".$mtai if $mtai; $mtav = "untrusted-".$mtav if $mtav; 1021 $fore = "untrusted-".$fore if $fore; $with = "untrusted-".$with if $with; 1022 $date = ""; $asn = ""; $result = ""; 1023 $rcvd->[$x]->{'sane'} = set_rcvd($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$result); 1024 $untrusted = 1; 1025 } 1026 } 1027 }} 1028 1029 return $rcvd; 1030} 1031 1032sub is_same_class_B 1033{ 1034 my ($ip1,$ip2) = @_; 1035 $ip1 =~ s/^(\d{1,3}\.\d{1,3}\.).*?$/$1/gis; 1036 $ip2 =~ s/^(\d{1,3}\.\d{1,3}\.).*?$/$1/gis; 1037 1038 return ($ip1 eq $ip2)? 1:0; 1039} 1040 1041sub asn 1042{ 1043 my $target = shift; 1044 my $output = ""; 1045 1046 my $IP = qr~(?:\d{1,3}\.){3}\d{1,3}~; 1047 my $DOMAIN = qr~(?:\w|-|\.)+?\.\w{2,4}~; 1048 1049 if ( $target =~ s/(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})/$4.$3.$2.$1.$asn_server/ ) 1050 { 1051 # uncomment this code if you do not want to use Net::DNS::Resolver and you have 'host' on your system 1052 #open (HOST, "host -t txt $target 2>/dev/null |") or error("warn","Host lookup failed: $!"); 1053 #while (<HOST>) { $output = $1 if /\Q$target\E(?: descriptive)? text "(\d*?)".*/; } 1054 #close HOST; 1055 1056 # find ASN info via Net::DNS::Resolver 1057 if (my $query = $res->send($target,"TXT")) { foreach ($query->answer) { $output = $1 if $_->string =~ /$DOMAIN\.\s+?\d+?\s+?IN\s+?TXT\s+?"(\d+?)"\s+?"$IP"\s+?"\d+?"/; }} 1058 #else { error("warn","ASN lookup failed: " . $res->errorstring); } 1059 } 1060 1061 return $output; 1062} 1063 1064sub host 1065{ 1066 my $target = shift; 1067 my $output = ""; 1068 1069 my $IP = qr~(?:\d{1,3}\.){3}\d{1,3}~; 1070 my $DOMAIN = qr~(?:\w|-|\.)+?\.\w{2,4}~; 1071 1072 if ($target =~ s/($IP|$DOMAIN)/$1/) 1073 { 1074 # uncomment this code if you do not want to use Net::DNS::Resolver and you have 'host' on your system 1075 #open (HOST, "host $target 2>/dev/null |") or error("warn","Host lookup failed: $!"); 1076 #while (<HOST>) { $output = $1 if /$DOMAIN (?:domain name pointer|has address) ($IP|$DOMAIN)\.?/; } 1077 #close HOST; 1078 1079 # find DNS info via Net::DNS::Resolver 1080 if (my $query = $res->send($target)) { foreach ($query->answer) { $output = $1 if $_->string =~ /$DOMAIN\.\s+?\d+?\s+?IN\s+?(?:PTR|A)\s+?($IP|$DOMAIN)\.?/; }} 1081 #else { error("warn","DNS lookup failed: " . $res->errorstring); } 1082 } 1083 1084 return $output; 1085} 1086 1087sub set_rcvd 1088{ 1089 my ($helo,$ipad,$idnt,$rdns,$from,$mtan,$mtai,$mtav,$fore,$with,$date,$asn,$spf) = @_; 1090 1091 my $output = "from"; 1092 if ($options =~ /e/) { 1093 $output .= ($helo)? " helo-$helo" : " no-helo";} # sender's salutation 1094 $output .= ($rdns)? " $rdns" : " no-rdns"; # sender's name 1095 $output .= ($ipad)? " $ipad" : " no-ipad"; # sender's IP 1096 if ($options =~ /p/) { 1097 $output .= ($spf)? " spf-$spf" : " no-spf";} # sender's policy result 1098 $output .= ($asn)? " as$asn" : " no-asn"; # sender's ASN 1099 $output .= ($mtan||$mtai)? " $CRLF\t by" : ""; 1100 $output .= ($mtan)? " $mtan" : " no-mta-name"; # receiving MTA's name 1101 $output .= ($mtai)? " $mtai" : " no-mta-ip"; # receiving MTA's IP 1102 $output .= ($fore)? " $CRLF\t for" : ""; 1103 $output .= ($fore)? " <$fore>" : " no-to-addr"; # envelope to address 1104 $output .= ($date)? "; $date" : " no-date"; # received date/time 1105 1106 #print "outputting received: $output" . $CRLF; 1107 return $output; 1108} 1109 1110sub opendb_read 1111{ 1112 tie(%db, "DB_File", $dbfile, O_RDONLY, 0, $DB_HASH) or error("warn","Can't open $dbfile: $!"); 1113} 1114 1115sub closedb 1116{ 1117 untie %db; 1118 undef %db; 1119} 1120 1121sub retie 1122{ 1123 &closedb; 1124 &opendb_read; 1125} 1126 1127################################################ 1128################ Output Header ################# 1129################################################ 1130 1131sub set_header 1132{ 1133 my $header = shift; 1134 my $output = ""; 1135 my $name = ""; 1136 1137 # exclude the "masked fields" from display 1138 foreach $name (split(/,/,$masked_fields)) { $spec_fields =~ s/(?<=,)$name,?//; } 1139 1140 # output the fields in the order specified by RFC 2822 - minus the masked fields 1141 foreach $name (split(/,/,$spec_fields)) { $output .= set_field($header,$name); delete $header->{$name}; } 1142 1143 # set any user-specified fields 1144 foreach $name (split(/,/,$user_fields)) { $output .= set_field($header,$name); delete $header->{$name}; } 1145 1146 # set new custom x-header fields 1147 if ($options =~ /x/) { foreach $name (split(/,/,$new_fields)) { $output .= set_field($header,$name); delete $header->{$name}; } } 1148 1149 # then set any remaining fields (if allowed to set non-standard fields) 1150 if ($options !~ /s/) { foreach $name (keys %{$header}) { $output .= set_field($header,$name); } } 1151 1152 $output .= $CRLF; 1153 1154 return $output; 1155} 1156 1157sub set_field { 1158 my($header, $name) = @_; 1159 my $output = ""; 1160 1161 if ($header->{$name}) { 1162 foreach my $header (@{$header->{$name}}) { 1163 if ($name eq "received" and $options =~ /r/) { 1164 if ($header->{'sane'} and $header->{'sane'} =~ /\w/) { 1165 $output .= $header->{'name'} . ": " . $header->{'sane'} . 1166 $CRLF; 1167 } 1168 # else { 1169 # $output .= $header->{'name'} . ": sanity check failed" . 1170 # $CRLF; 1171 # } 1172 } 1173 elsif ($header->{'name'} and defined($header->{'value'})) { 1174 $output .= $header->{'name'} . ": " . $header->{'value'} . 1175 $CRLF; 1176 } 1177 else { 1178 my $dumped = Data::Dumper->new([$header], [qw(header)])-> 1179 Indent(0)->Dump(); 1180 error("warn", "Header for $name, $dumped, is missing name " . 1181 "and/or value?"); 1182 } 1183 } 1184 } 1185 elsif ($req_fields =~ /(?:^|,)$name(?:,|$)/) { 1186 $output .= ucfirst($name) . ": [no-$name] " . $CRLF; 1187 } 1188 1189 return $output; 1190} 1191 1192################################################ 1193################ Error Handling ################ 1194################################################ 1195 1196sub error 1197{ 1198 my ($action,$msg) = @_; 1199 1200 die $msg if $action eq "die"; 1201 warn $msg unless $action eq "die"; 1202 # add other actions if you like 1203} 1204 1205sub sig_trap 1206{ 1207 my $sig = shift; 1208 my ($action,$more) = ("warn",""); 1209 1210 sig: 1211 { 1212 $action = "die", last sig if $sig =~ /ALRM/; 1213 $action = "warn", last sig if $sig =~ /PIPE/; 1214 $action = "warn", last sig if $sig =~ /CHLD/; 1215 $action = "die" , last sig if $sig =~ /INT/; 1216 $action = "die" , last sig if $sig =~ /HUP/; 1217 $action = "warn"; 1218 } 1219 1220 my $waitedpid = wait; 1221 $more = "; Reaped pid $waitedpid, exited with status " . ($? >> 8) if $waitedpid; 1222 1223 $SIG{$sig} = \&sig_trap; 1224 1225 error ($action,"Trapped signal SIG$sig$more"); 1226} 1227 1228################################################ 1229################################################ 1230################################################ 1231