1#!/usr/local/bin/perl -w 2## FIXME : remove the -w above when you find the code is fixed ! 3# 4#This is spamstats.pl v0.6b 5# 6# 7#Changelog 8#0.6c 5 February 2006 9# Support for transparent bzip2 compression. Thanks to Yen-Ming Lee for the 10# patch. Requires Compress::Bzip2. 11# 12#0.6b 16 August 2005 13# Support the new spamd (v3.1.0rc1) logging format. 14 15#0.6a 20 April 2004 16# Here appears the -spamd option, to be used if you want to use only spamd logs, 17# and no correlation with mailer logs. Beware this option removes some useful 18# features : most spamed accounts, etc. 19# Thanks to Jean-Louis Bergamo (aka JLB) for this! 20# 21#0.6 8 April 2004 22# Adds cricket support for -minmax options. Plenty of new cool graphs available! 23# Fixes some minor bugs linked to these upgrades 24# 25#0.5b 5 March 2004 26# Fixes a typo which lead to confusion in the volume counting. 27# Thanks to Matthew McGehrin for the bugreport 28# 29#0.5a 25 February 2004 30# Two patches from Radko Keves, with these : 31# * Support for BSDs' sendmail daemons (sm-mta) 32# * Added the -firstdate option, useful for multiple files parsing 33# 34#0.5 30 January 2004 35# Many thanks to Cyril Chaboisseau (http://www.obs.coe.int) who did : 36# * -minmax option that display min and max values for each displayed value 37# * some cleanups I removed :-p (renaming "clean message" into "ham" would 38# break some existing configs :( ) 39# * a lot of cleanup in the displaying code, that was very awful 40# * tidied and reordered the code 41# Few other changes: 42# * Fixed the bug where the -number and the -html options were leading to a 43# poor output 44# 45#0.4b5 11 August 2003 46#Fixed the bug when a month starting with a zero is entered as start/enddate. 47# 48#0.4b4 10 June 2003 49# Fixed the infile == 0 bug, thanks to Yen-Ming Lee 50# Fixes sendmail parsing when email is delivered through procmail, raised by Dirk Kuypers 51# 52# 53#0.4b3 2 June 2003 54#Applied patches from Bob Apthorpe for : 55# * more elegant fix of the two digits month intput problem 56# * better input handling, now files to process can be specified in @ARGV without the --file switch 57# * Added documentation and scripts to graph spamstats output with cricket. 58# 59#0.4b2 30 May 2003 60#Regexp bugfix in exim mailer_in handling 61#Regexp bugfix in spamd ("processing message" seems to have changed to "checking message") on some setups. 62#Updated README into a more english (and less french) syntax 63# 64#0.4b1 19 May 2003 65#This is a very tiny bugfix. 66#Fixes parsing mistakes on sendmail setups that relay emails as outputs. 67#Emails were undetected on those setups. 68# 69#0.4b 10 Mar 2003 70#WARNING : this release changes the default behaviour of spamstats calculations !! 71#From this version on spamstats counts spams and non-spams per recipient, not per mailer ID. 72#(Until this version, a multirecipient message sent to both "foo@yourdomain.com" and "bar@yourdomain.com" 73#counted only as one spam. From now on it counts as two. 74#New option : -agglo-recipients uses spamstats "old" mode : one count per mailer ID, not per recipient. 75#WARNING : FOR NOW EXIM USERS PROBABLY WANT TO USE THIS OPTION, ON SOME EXIM CONFIGS 76# THERE ARE RISKS LOG ANALYSIS BE BROKEN IF NOT USED! 77#Applied patch from Jim Breton <jimb@alongtheway.REMOVETHIS.com> for a better display. 78# 79#0.4 25 Feb 2003 80#[Probably very incomplete] sendmail support 81#Only sendmail regexp were added, no code modification ! 82#This is not a very important release in terms of work. Hopefully it is in terms 83#of capabilities :-) 84# 85#0.3b2 30 Jan 2003 86#Fix a problem where script will issue warnings when parsed log file is empty or 87#contains no reference to used mailer (only contains spamd messages). 88# 89#0.3b 04 Jan 2003 90#Added a (hopefully) useful time filter specification to be used : duration specification. 91# 92#0.3a 29 Dec 2002 93#Date/Time filter now works. 94#Some tiny code cleanup. 95#HTML output support. 96# 97#0.3alpha 17 Dec 2002 98#Exim support 99#Some work on date/time filtering support, far from complete. These options are useless for now. 100# 101#0.2f 26 Nov 2002 102#If one input file does not exist, mentions which! 103# 104#0.2e 26 Nov 2002 105#Option "-noabsolute" makes spamstats not complain if argument log file names are not absolute. 106#Now reports total Volume of Spam and Volume of ham in general statistics. 107# 108#0.2d 109#Local recipients were not counted, only relayed ones. 110#Regexp was modified to just match both. 111#Thanks to Jon Gabrielson for bug report 112# 113#0.2c 114#No more lower/upper case distinction in top recipients classification 115#Thanks to Kenneth Nerhood for bug report 116# 117#0.2b 118#Fixes stupid bug from 0.2 where spamd process had to run as user "spamd" 119#Thanks to Kenneth Nerhood for bug report 120# 121# 122#Parses Postfix a spamd log file (or several) and extract top Spam receivers. 123#Also displays spam statistics. 124# 125#Author : Vincent Deffontaines <vincent@gryzor.REMOVETHISIFNOSPAM.com> 126#Script Basis, Postfix support Copyright : Vincent Deffontaines 127# KDX (www.kdx.fr) 128# Council of Europe (www.coe.int) 129# 130#Exim support Copyright : Vincent Deffontaines. 131#Sendmail support Copyright : Vincent Deffontaines. 132# 133#Please send me contributions/ modifications/ comments that could be useful to this script! 134#Others mailers than Postfix/Exim support shouldn't be hard to implement. 135#Author will help and include modifications to this script as long as mailers are free software. 136# 137#This program is free software; you can redistribute it and/or 138#modify it under the terms of the GNU General Public License 139#as published by the Free Software Foundation; version 2 140#of the License. 141# 142#This program is distributed in the hope that it will be useful, 143#but WITHOUT ANY WARRANTY; without even the implied warranty of 144#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 145#GNU General Public License for more details. 146# 147#You should have received a copy of the GNU General Public License 148#along with this program; if not, write to the Free Software 149#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 150# 151 152# 153# 154use strict; 155use Time::Local; 156use Date::Manip qw (ParseDate UnixDate); 157 158use Getopt::Long; 159#Only useful for Debugging, useless if you don't hack through this code :-) 160#use Data::Dumper; 161 162use Compress::Zlib; 163use Compress::Bzip2 qw(&bzopen $bzerrno); 164 165my %infile; 166undef %infile; 167my $number = 0; 168my $help = 0; 169my $nogeneral = 0; 170my $debug = 0; 171my $noabsolute = 0; 172my $error = 0; 173my $starttime = "none"; 174my $endtime = "none"; 175my $startdate = "none"; 176my $enddate = "none"; 177my $startdate_d = 1; 178my $firstdate = 0; 179my $firstdate_d = "Dec 31 23:59:59"; 180my $lastdate_d = "Jan 1 00:00:00"; 181my $skipstarttest = 0; 182my $skipendtest = 0; 183my $mailerlogtype = undef; 184my $html = 0; 185my $duration = 0; 186my $agglo_rcpt = 0; 187my $Spamd = 0; 188my %html_tags = ( 189 'br' => '', 190 'b' => '', 191 'i' => '', 192 'html' => '', 193 'body' => '', 194 'endtag' => '', 195 'starttag' => '', 196 'vspace' => '' 197); 198my $spam_percent = 0; 199my $clean_percent = 0; 200my $minmax = undef; 201 202my %Defs = (); #Parse regexp definitions for each mailer and for spamd 203$Defs{'mailer_in'}{'postfix'} = 204'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/cleanup\[(\d*)\]:\s+([^:]+):\s*message-id=(.*)$'; 205 206#$Defs{'mailer_in'}{'exim'} = '^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[\d+\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+<=\s+.*\@\S+\s+[^\[]+\[\d+\.\d+\.\d+\.\d+\]\s+P=\S+\s+S=\d+\s+id=(.*)$'; 207#$Defs{'mailer_in'}{'exim'} = '^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[(\d+)\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+<=\s+.*\@\S+\s+(?:U=\S+|H=.*)\s+P=\S+\s+S=\S+\s+id=(.*)$'; 208$Defs{'mailer_in'}{'exim'} = 209'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[(\d+)\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+<=\s+.*\@\S+\s+H=.*\s+P=\S+.*\s+S=\S+(.*)$'; 210$Defs{'mailer_in'}{'sm-mta'} = 211'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sm-mta\[(\d+)\]:\s+(\S+):\s+from=<[^>]*>,\s+size=\d+,\s+class=\S+,\s+nrcpts=\d+,\s+msgid=<([^>]+)>.*,\s+proto=\S+,\s+daemon=\S+,\s+relay=.*$'; 212$Defs{'mailer_in'}{'sendmail'} = 213'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sendmail\[(\d+)\]:\s+(\S+):\s+from=<[^>]*>,\s+size=\d+,\s+class=\S+,\s+nrcpts=\d+,\s+msgid=<([^>]+)>.*,\s+proto=\S+,\s+daemon=\S+,\s+relay=.*$'; 214$Defs{'spamd_in'}{'postfix'} = 215'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*(.*)\s+for\s+\S+'; 216$Defs{'spamd_in'}{'exim'} = 217'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*<(.*)>\s+for\s+\S+'; 218$Defs{'spamd_in'}{'sm-mta'} = 219'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*<(.*)>\s+for\s+'; 220$Defs{'spamd_in'}{'sendmail'} = 221'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*<(.*)>\s+for\s+'; 222 223$Defs{'spamd_clean'} = 224'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamd|spamassassin)\[(\d+)\]:\s+(?:spamd:|)\s*clean\s+message\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\.'; 225$Defs{'spamd_spam'} = 226'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamd|spamassassin)\[(\d+)\]:\s+(?:spamd:|)\s*identified\s+spam\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\.'; 227$Defs{'mailer_out'}{'postfix'} = 228'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/(?:pipe|local)\[\d+\]:\s+([^:]+):\s+to=<([^>]+)>'; 229 230#'Mar 10 02:11:24 barrel postfix/smtp[20611]: 5A9BF22E04: to=<obfuscated>, relay=127.0.0.1[127.0.0.1], delay=2, status=sent (250 ok 1047280284 qp 20787)' 231$Defs{'mailer_out'}{'exim'} = 232'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[\d+\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+=>\s+(.*\@\S+)\s+'; 233$Defs{'mailer_out'}{'sm-mta'} = 234'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sm-mta\[\d+\]:\s+(\S+):\s+to=(?:\|.*ctladdr=<|<)([^>]+)>.*,\s+delay=\S+,\s+xdelay=\S+,\s+mailer=\S+,\s+pri=\d+.*,\s+dsn=\S+,\s+stat=\S+'; 235$Defs{'mailer_out'}{'sendmail'} = 236'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sendmail\[\d+\]:\s+(\S+):\s+to=(?:\|.*ctladdr=<|<)([^>]+)>.*,\s+delay=\S+,\s+xdelay=\S+,\s+mailer=\S+,\s+pri=\d+.*,\s+dsn=\S+,\s+stat=\S+'; 237 238sub Print_Usage() { 239 print "{Exim/Postfix/Sendmail} & spamd logfile analyser. Extracts top N Spam receivers\n"; 240 print "$0 [-help] [-debug][-file=/path/to/filename] [-file=...] [-number=...] [-nogeneral]\n"; 241 print " [-startdate=dd-mm] [-starttime=hh:mm:ss] [-enddate=dd-mm] [-endtime=hh:mm:ss]\n"; 242 print " [-duration=number of seconds] /path/to/file1 [/path/to/file2] [/path/to/file3.gz]\n"; 243 print " [-firstdate]\n"; 244 print "GENERAL OPTIONS\n"; 245 print "\t-debug\t\t\t: Displays informations that _might_ indicate problems while parsing.\n"; 246 print "\t-help\t\t\t: Displays this help and exits.\n"; 247 print "\t-file /path/file\t: Analyses maillog file for spam results (as logged by spamd) :\n" 248 . "\t\t\t\t Several files can be asked for parsing at a time, including .gz files\n" 249 . "\t\t\t\t Default /var/log/maillog\n" 250 . "\t\t\t\t This switch is DEPRECATED, simply specify filenames after all options,\n" 251 . "\t\t\t\t without any switch.\n"; 252 print "\t-number number\t\t: specifies number of top spam receivers to display (default : 0).\n"; 253 print "\t-nogeneral\t\t: do not display general stats.\n"; 254 print "\t-noabsolute\t\t: lets non-absolute named logfiles be processed.\n"; 255 print "\t-html\t\t\t: HTML output\n"; 256 print "\t-minmax\t\t\t: Display minimum and maximum values\n"; 257 print "TIME FILTER OPTIONS (no time filter used if no option specified)\n"; 258 print "\t-startdate dd-mm\t: Process only data logged from that date\n"; 259 print "\t\t\t\t Default : today if starttime specified, else unused\n"; 260 print "\t-enddate dd-mm\t: Process only data logged until that date\n"; 261 print "\t\t\t\t Default : today if endtime specified, else unused\n"; 262 print "\t-starttime hh:mm:ss\t: Process only data logged from that time (default time : 0:00:00)\n"; 263 print "\t-endtime hh:mm:ss\t: Process only data logged until that time (default time : current time)\n"; 264 print "\t-firstdate\t\t: Displays only first and last date of log messages\n"; 265 print "\t\t\t\t (useful for multiple files parsing).\n"; 266 print "\t-duration seconds\t: Work only on specified duration.\n"; 267 print "\t\t\t\t To be used with start XOR end{time/date}, obviously not with both.\n"; 268 print "\t\t\t\t Default : unused\n"; 269 print "\t\t\t\t Default if no other time switch : process n seconds until current time.\n"; 270 print "\tWhy no year in dates input? Just because there is no year reported in postfix maillogs\n"; 271 print "\tThis will obviously cause time filter problems around new year!\n"; 272 print "\t-agglo-recipients\t: Old spamstats counting. One count by mail ID, not by actual recipient.\n"; 273 print "\t\t\t\t EXIM users WANT to set this for now!\n"; 274 print "\t-spamd\t: Log file contains ONLY spamd log. it will not show per user stats.\n"; 275} 276 277sub unify($$); 278 279sub unify($$) 280 281#Converts (value, unit) from bytes, kilobytes, megabytes into a more human readable expression 282{ 283 my $volume = shift @_; 284 my $unit = shift @_; 285 if ( ( eval( $volume / 1024 ) > 5 ) 286 and ( ( $unit eq "bytes" ) or ( $unit eq "kbytes" ) ) ) 287 { 288 $volume = $volume / 1024; 289 $unit eq "Mbytes" and $unit = "Gbytes"; 290 $unit eq "kbytes" and $unit = "Mbytes"; 291 $unit eq "bytes" and $unit = "kbytes"; 292 unify( $volume, $unit ); 293 } 294 else { 295 return ( $volume, $unit ); 296 } 297} 298 299sub c_d($) 300 301{ 302 if ($1 eq "none" ) { 303 return 0; 304 } 305 else { 306 return 1; 307 } 308 return 0; 309} 310 311 312sub check_date($) 313 314 #Checks given date is correct (expected format string: "d[d]-m[m]" or "none"). 315 #Returns 0 if correct, 1 if not. 316{ 317 my $date = shift @_; 318 $date eq 'none' and return 0; 319 unless ( $date =~ /^(\d{1,2})-(\d{1,2})$/ ) { 320 return 1; 321 } 322 my $day = int($1); 323 my $month = int($2); 324 unless ( ( $day < 32 ) and ( $month < 13 ) and ( ( $day * $month ) > 0 ) ) { 325 return 1; 326 } 327 return 0; 328} 329 330sub check_time($) 331 332#Checks given time is correct (expected format string: "h[h]:mm:ss" or "none"). 333 #Returns 0 if correct, 1 if not. 334{ 335 my $time = shift @_; 336 $time eq 'none' and return 0; 337 unless ( $time =~ /^(\d{1,2}):(\d{2}):(\d{2})$/ ) { 338 return 1; 339 } 340 my $hour = $1; 341 my $minute = $2; 342 my $second = $3; 343 unless ( ( $hour < 25 ) and ( $minute < 60 ) and ( $second < 60 ) ) { 344 return 1; 345 } 346 return 0; 347} 348 349sub convert_d_t_e($) 350 351{ 352 my $string = shift @_;; 353 $string =~ s/\s+\(.*\)\s*$//; 354 my $date = ParseDate($string); 355 my $result = UnixDate($date,"%s"); 356 return $result; 357} 358 359 360 361sub convert_date_time_to_epoch($$) 362 363 #Input : date "d[d]-m[m]", time "h[h]:mm:ss" 364 #Output : Pseudo epoch (no year included in input) 365 #Returns -1 in case of trouble. 366 #This function will ALWAYS be BUGGY around new year days 367#This function also presents a bug in case of "bisexctial" (correct word?) year (when Feb 29 exists). 368#This is due to year not being logged, which indeeds confuse things on such days. 369{ 370 my $date = shift @_; 371 my $time = shift @_; 372 unless ( $date =~ /^(\d{1,2})-(\d{1,2})$/ ) { 373 return -1; 374 } 375 my $day = int($1); 376 my $month = int($2); 377 378 #Remove leading 0 in mounth is there is one 379 unless ( $time =~ /^(\d{1,2}):(\d{2}):(\d{2})$/ ) { 380 return -1; 381 } 382 my $hour = int($1); 383 my $minute = int($2); 384 my $second = int($3); 385 my %months = (); 386 $months{1} = 31; 387 $months{2} = 28; 388 $months{3} = 31; 389 $months{4} = 30; 390 $months{5} = 31; 391 $months{6} = 30; 392 $months{7} = 31; 393 $months{8} = 31; 394 $months{9} = 30; 395 $months{10} = 31; 396 $months{11} = 30; 397 $months{12} = 31; 398 my $result = 399 ( $months{$month} + $day ) * 24 * 3600 + $hour * 3600 + $minute * 60 + 400 $second; 401 return ($result); 402} 403 404sub fill_zeros($) 405 406 #Input : a list of numbers 407#Output : same numbers list, each preceeded by a zero if originally less than 2 characters long 408{ 409 my $list = shift @_; 410 411 #print "DEBUG : $list\n"; 412 #return $list; 413 foreach my $number (@$list) { 414 $number =~ /^\d$/ and $number = "0" . $number; 415 } 416 return @$list; 417} 418 419my @infiles = (); 420GetOptions( 421 "file=s" => \@infiles, 422 "number=i" => \$number, 423 "help" => \$help, 424 "debug" => \$debug, 425 "noabsolute" => \$noabsolute, 426 "nogeneral" => \$nogeneral, 427 "html" => \$html, 428 "minmax" => \$minmax, 429 "startdate=s" => \$startdate, 430 "enddate=s" => \$enddate, 431 "firstdate" => \$firstdate, 432 "starttime=s" => \$starttime, 433 "endtime=s" => \$endtime, 434 "duration=s" => \$duration, 435 "agglo-recipients" => \$agglo_rcpt, 436 "spamd" => \$Spamd 437); 438 439push @infiles, @ARGV if (@ARGV); 440my $defmaillog = '/var/log/maillog'; 441push @infiles, $defmaillog if ( $#infiles == -1 && -f $defmaillog ); 442foreach my $fn (@infiles) { 443 if ( -f $fn ) { 444 $infile{$fn} = 1; 445 } 446} 447 448if ($help) { 449 Print_Usage (); 450 exit 0; 451} 452 453#Sanity checks 454unless ( check_date($startdate) == 0 ) { 455 print STDERR "Bad input format start date was entered\n"; 456 $error++; 457} 458unless ( check_date($enddate) == 0 ) { 459 print STDERR "Bad input format end date was entered\n"; 460 $error++; 461} 462unless ( check_time($starttime) == 0 ) { 463 print STDERR "Bad input format start time was entered\n"; 464 $error++; 465} 466unless ( check_time($endtime) == 0 ) { 467 print STDERR "Bad input format end date was entered\n"; 468 $error++; 469} 470unless ( $duration =~ /^\d+$/ ) { 471 print STDERR "Bad input : duration is supposed to be numeric\n"; 472 $error++; 473} 474 475$html and %html_tags = ( 476 'br' => 'br>', 477 'b' => 'b>', 478 'i' => 'i>', 479 'html' => 'html>', 480 'body' => 'body>', 481 'endtag' => '</', 482 'starttag' => '<', 483 'vspace' => '<img width=50 height=0>' 484); 485 486foreach my $file ( sort keys %infile ) { 487 unless ( $file =~ /[a-zA-Z\.\/ \\0-9]+/ ) { 488 die "Illegal characters read in parameter file name!\n"; 489 } 490 491 unless ( -f $file ) { 492 print STDERR "$file : File does not exist!\n"; 493 $error++; 494 } 495 if ( ( $file !~ /^\// ) and ( $noabsolute == 0 ) ) { 496 print STDERR 497"$file : Path to file must be absolute, or you must specify the \"-noabsolute\" option\n"; 498 $error++; 499 } 500} 501 502if ( ( $starttime eq "none" ) and ( $startdate eq "none" ) ) { 503 $skipstarttest = 1; 504} 505if ( ( $enddate eq "none" ) and ( $endtime eq "none" ) ) { 506 $skipendtest = 1; 507} 508 509if ( ( $duration > 0 ) and ( $skipstarttest == 0 ) and ( $skipendtest == 0 ) ) { 510 print STDERR 511"Input redundancy : You may not specify starttime, endtime and duration\n"; 512 $error++; 513} 514$error and exit 1; 515 516 517if ( $startdate eq 'none' ) { 518 my ( $day, $month ) = (localtime)[ 3, 4 ]; 519 $month++; 520 $startdate = $day . "-" . $month; 521 $startdate_d = 0; 522} 523 524if ( $enddate eq 'none' ) { 525 my ( $day, $month ) = (localtime)[ 3, 4 ]; 526 $month++; 527 $enddate = $day . "-" . $month; 528} 529 530$starttime eq 'none' and $starttime = '00:00:00'; 531if ( $endtime eq 'none' ) { 532 my @tab = (localtime)[ 0, 1, 2 ]; 533 @tab = fill_zeros( \@tab ); 534 $endtime = join ( ':', reverse(@tab) ); 535} 536 537my %found; #Hash to track if we found some actual values. 538my $min_clean_score = 5.5; 539my $max_clean_score = 0; 540my $min_clean_time = 99; 541my $max_clean_time = 0; 542my $min_clean_msgsize = 99999; 543my $max_clean_msgsize = 0; 544my $min_spam_score = 99; 545my $max_spam_score = 5.5; 546my $min_spam_time = 99; 547my $max_spam_time = 0; 548my $min_spam_msgsize = 99999; 549my $max_spam_msgsize = 0; 550 551# and $endtime = join(':',reverse(fill_zeros((localtime)[0,1,2]))); 552 553print $html_tags{'starttag'} 554 . $html_tags{'html'} 555 . $html_tags{'starttag'} 556 . $html_tags{'body'}; 557 558#print "Time filter used : From $startdate $starttime to $enddate $endtime\n"; 559 560my $epoch_start = convert_date_time_to_epoch( $startdate, $starttime ); 561my $epoch_end = convert_date_time_to_epoch( $enddate, $endtime ); 562 563if ( $duration > 0 ) { 564 if ( ( $skipstarttest == 1 ) and ( $skipendtest == 1 ) ) { 565 $epoch_start = $epoch_end - $duration; 566 $duration = 0; 567 $skipstarttest = 0; 568 $skipendtest = 0; 569 } 570 elsif ( $skipstarttest == 1 ) { 571 $epoch_start = $epoch_end - $duration; 572 $duration = 0; 573 $skipstarttest = 0; 574 $skipendtest = 0; 575 } 576 elsif ( $skipendtest == 1 ) { 577 $epoch_end = $epoch_start + $duration; 578 $duration = 0; 579 $skipstarttest = 0; 580 $skipendtest = 0; 581 } 582} 583 584#print "DEBUG : $epoch_start to $epoch_end\n"; 585 586$epoch_start > $epoch_end 587 and print STDERR "WARNING : time filter seems incorrect : it starts after it ends! $epoch_start > $epoch_end\n"; 588my %mounths = ( 589 "Jan" => 1, 590 "Feb" => 2, 591 "Mar" => 3, 592 "Apr" => 4, 593 "May" => 5, 594 "Jun" => 6, 595 "Jul" => 7, 596 "Aug" => 8, 597 "Sep" => 9, 598 "Oct" => 10, 599 "Nov" => 11, 600 "Dec" => 12 601); 602 603my $is_gz = 0; 604my $gz; 605my $gzerrno; 606my $bzerrno; 607 608my %spam = (); 609my %clean = (); 610 611my %mailer_table = (); 612my %spamd_table = (); 613my %spamd_pid = (); 614 615my $spam_score = 0; 616my $clean_score = 0; 617my $spam_time = 0; 618my $clean_time = 0; 619my $basic_spam_nb = 0; 620my $basic_clean_nb = 0; 621 622my $spam_volume = 0; 623my $clean_volume = 0; 624 625my $incorrect_lines = 0; #Count unparsable lines 626my $correct_lines = 0; #Count parsable lines 627 628#Processing 629FILELOOP: foreach my $file ( keys %infile ) { 630 undef $mailerlogtype; 631 my $first_date = ""; 632 my $last_date = ""; 633 my $line; 634 my $linetime = 0; 635 $is_gz = 0; 636 637 #$eof = 0; 638 if ( $file =~ /\.gz$/ ) #We have a gz file 639 { 640 #print BLUE "Opening $file\n".$Stag.$NewLinetag; 641 $gz = gzopen( $file, "r" ) or die "Cannot open $file : $gzerrno\n"; 642 $is_gz = 1; 643 } 644 elsif ( $file =~ /\.bz2$/ ) #We have a bz2 file 645 { 646 #print BLUE "Opening $file\n".$Stag.$NewLinetag; 647 $gz = bzopen( $file, "r" ) or die "Cannot open $file : $bzerrno\n"; 648 $is_gz = 2; 649 } 650 else { 651 652 #print BLUE "Opening $file\n".$Stag.$NewLinetag; 653 open( FILE, $file ) or die "Unable to open file!\n"; 654 } 655 656 while (1) 657 #while (not $eof) 658 { 659 if ($is_gz) { 660 die "File not open!\n" if not defined $gz; 661 unless ( $gz->gzreadline($line) > 0 ) { 662 663 #$eof = 1; 664 $gz->gzclose(); 665# printf "$startdate\n"; 666 if ( $firstdate == 0 ){ 667 print $html_tags{'starttag'} . $html_tags{'br'}; 668 print "File $file : from $first_date to $last_date\n"; 669 } 670 else { 671 #find firstdate 672 if (convert_d_t_e($first_date) < 673 convert_d_t_e($firstdate_d)) { 674 $firstdate_d = $first_date; 675 } 676 if ($startdate_d) { 677 $firstdate_d = $startdate; 678 } 679 #find lastdate 680 if ( convert_d_t_e($last_date) ge 681 convert_d_t_e($lastdate_d) ) { 682 $lastdate_d = $last_date; 683 } 684 if ($startdate_d) { 685 $lastdate_d = $enddate; 686 } 687 688 } 689 690 next FILELOOP; 691 } 692 } 693 else { 694 { 695 unless ( defined( $line = <FILE> ) ) { 696 697 #$eof = 1; 698 close FILE; 699 if ($firstdate == 0 ){ 700 print $html_tags{'starttag'} . $html_tags{'br'}; 701 print "File $file : from $first_date to $last_date\n"; 702 } 703 else { 704 #find firstdate 705 if (convert_d_t_e($first_date) < 706 convert_d_t_e($firstdate_d)) { 707 $firstdate_d = $first_date; 708 } 709 if ($startdate_d) { 710 $firstdate_d = $startdate; 711 } 712 #find lastdate 713 if ( convert_d_t_e($last_date) ge 714 convert_d_t_e($lastdate_d) ) { 715 $lastdate_d = $last_date; 716 } 717 if ($startdate_d) { 718 $lastdate_d = $enddate; 719 } 720 721 } 722 next FILELOOP; 723 } 724 } 725 } 726 727 unless ( $line =~ /^\s*([a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+)\s+\S+\s+/ ) { 728 $incorrect_lines++; 729 next; 730 } 731 unless ( defined $mailerlogtype ) { 732 if ( $line =~ 733/^\s*([a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+)\s+\S+\s+(exim|postfix|sendmail|sm-mta)/ 734 ) 735 { 736 $mailerlogtype = $2; 737 738 #print $mailerlogtype." style log file detected\n"; 739 } 740 } 741 742 $correct_lines++; 743 if ( $first_date eq "" ) { $first_date = $1; } 744 $last_date = $1; 745 if ( ( $skipstarttest * $skipendtest ) == 0 ) { 746 $last_date =~ /^([a-zA-Z]{3})\s+(\d+)\s+(\d+:\d+:\d+)$/ 747 and $linetime = 748 convert_date_time_to_epoch( $2 . "-" . $mounths{$1}, $3 ); 749 } 750 unless ( $skipstarttest == 1 ) { 751 $linetime < $epoch_start and next; 752 } 753 754 unless ( $skipendtest == 1 ) { 755 $linetime > $epoch_end and next; 756 } 757 758 #Here is Mailer analysis section. Spamd analysis is below. 759 #We are not running this code unless we know which mailer we are having: 760 if ( defined $mailerlogtype ) { 761 762 #Email IN 763#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/cleanup\[(\d*)\]:\s+([^:]+):\s*message-id=(.*)$/) 764 if ( $line =~ /$Defs{'mailer_in'}{$mailerlogtype}/ ) { 765 if ( defined $mailer_table{$2} ) { 766 delete $mailer_table{$2}; 767 if ($debug) { 768 print $html_tags{'starttag'} . $html_tags{'br'}; 769 print "INFO: A message \"id\" already existed as $2. Deleted it from mailer_table before renew.\n"; 770 } 771 } 772 773 #Exim specific :-( 774 if ( $mailerlogtype eq 'exim' ) { 775 my $dollar2 = $2; 776 my $id = undef; 777 if ( $3 =~ /^\s*id=(.*)$/ ) { 778 $id = $1; 779 } 780 else { 781 782 #print "TWO\n"; 783 $id = 784 "I_have_no_id_��:-(" 785 ; #Hope this will never be a real id ... 786 } 787 $mailer_table{$dollar2} = $id; 788 } 789 else { 790 $mailer_table{$2} = $3; 791 } 792 793#print "DEBUG : postfix received message on ID $3, message code $2\n"; 794 next; 795 } 796 797 #EMAIL SENT 798#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/(?:pipe|local)\[\d+\]:\s+([^:]+):\s+to=<([^>]+)>/) 799 if ( $line =~ /$Defs{'mailer_out'}{$mailerlogtype}/ ) { 800 if ( defined $mailer_table{$1} ) { 801 802 #Exim specific code :-( 803 if ( $mailerlogtype eq "exim" ) { 804 my $blah = $1; 805 my $tmp_email = $2; 806 if ( $mailer_table{$1} =~ /^I_have_no_id_��:-\($/ ) { 807 foreach my $key ( keys %spamd_table ) { 808 if ( $key =~ /$blah/ ) { 809 810#print "I think I maybe resolved a floating Exim ID\n"; 811 if ( $spamd_table{$key} eq "spam" ) { 812 $spam{ lc($tmp_email) }++; 813 814 # print "SPAM for $tmp_email\n"; 815 } 816 elsif ( $spamd_table{$key} eq "clean" ) { 817 $clean{ lc($tmp_email) }++; 818 819 # print "CLEAN for $tmp_email\n"; 820 } 821 } 822 } 823 } 824 } 825 826 #End exim specific code 827 if ( defined $spamd_table{ $mailer_table{$1} } ) { 828 if ( $spamd_table{ $mailer_table{$1} } eq "spam" ) { 829 $spam{ lc($2) }++; 830 } 831 elsif ( $spamd_table{ $mailer_table{$1} } eq "clean" ) { 832 $clean{ lc($2) }++; 833 } 834 if ($agglo_rcpt) { 835 delete $spamd_table{ $mailer_table{$1} }; 836 delete $mailer_table{$1}; 837 } 838 } 839 } 840 else { 841 if ($debug) { 842 print $html_tags{'starttag'} 843 . $html_tags{'br'} 844 . $html_tags{'starttag'} 845 . $html_tags{'b'}; 846 print 847"CRITICAL : Warning : Mailer delivered a message it never received? id $1"; 848 print $html_tags{'endtag'} . $html_tags{'b'} . "\n"; 849 } 850 } 851 next; 852 } 853 } 854 855#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+processing\s+message\s*(.*)\s+for\s+\S+/) 856 if ( defined $mailerlogtype ) { 857 if ( $line =~ /$Defs{'spamd_in'}{$mailerlogtype}/ ) { 858 859 #foreach my $key(keys %spamd_pid) 860 #{ 861 # if ($spamd_pid{$key} eq $2) 862 # { 863 # delete $spamd_pid{$key}; 864# print "INFO: A message \"id\" already existed as $2. Deleted it from spamd_pid before renew.\n"; 865 # } 866 #} 867 $spamd_pid{$1} = $2; 868 next; 869 } 870 } 871 872 #Detected as NON spam - Lets delete all its references from the buffer 873#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+clean\s+message\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\./) 874 if ( $line =~ /$Defs{'spamd_clean'}/ ) { 875 if ( defined $spamd_pid{$1} ) { 876 $spamd_table{ $spamd_pid{$1} } = "clean"; 877 delete( $spamd_pid{$1} ); 878 } 879 else { 880 if ($debug) { 881 print $html_tags{'starttag'} 882 . $html_tags{'br'} 883 . $html_tags{'starttag'} 884 . $html_tags{'b'}; 885 print "CRITICAL : spamd sent an answer for a message it did not receive? pid $1"; 886 print $html_tags{'endtag'} . $html_tags{'b'} . "\n"; 887 } 888 if ($Spamd){ 889 $clean{$1}++; 890 } 891 } 892 $basic_clean_nb++; 893 $clean_score += $2; 894 $clean_time += $3; 895 $clean_volume += $4; 896 if ( defined $minmax ) { 897 if ( $2 < $min_clean_score ) 898 {$found{'min_clean_score'}=1; 899 $min_clean_score = $2; 900 } 901 if ( $2 > $max_clean_score ) 902 {$found{'max_clean_score'}=1; 903 $max_clean_score = $2; 904 } 905 if ( $3 < $min_clean_time ) 906 {$found{'min_clean_time'}=1; 907 $min_clean_time = $3; 908 } 909 if ( $3 > $max_clean_time ) 910 {$found{'max_clean_time'}=1; 911 $max_clean_time = $3; 912 } 913 if ( $4 < $min_clean_msgsize ) 914 {$found{'min_clean_msgsize'}=1; 915 $min_clean_msgsize =$4; 916 } 917 if ( $4 > $max_clean_msgsize ) 918 {$found{'max_clean_msgsize'}=1; 919 $max_clean_msgsize =$4; 920 } 921 } 922 next; 923 } 924 925 #SPAM FOUND 926#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+identified\s+spam\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\./) 927 if ( $line =~ /$Defs{'spamd_spam'}/ ) { 928 if ( defined $spamd_pid{$1} ) { 929 $spamd_table{ $spamd_pid{$1} } = "spam"; 930 931 #print "spamd_table {".$spamd_pid{$1}."} is spam\n"; 932 delete( $spamd_pid{$1} ); 933 } 934 else { 935 if ($debug) { 936 print $html_tags{'starttag'} 937 . $html_tags{'br'} 938 . $html_tags{'starttag'} 939 . $html_tags{'b'}; 940 print "CRITICAL : spamd sent an answer for a message it did not receive? pid $1"; 941 print $html_tags{'endtag'} . $html_tags{'b'} . "\n"; 942 } 943 if ($Spamd){ 944 $spam{$1}++; 945 } 946 } 947 $basic_spam_nb++; 948 $spam_score += $2; 949 $spam_time += $3; 950 $spam_volume += $4; 951 if ( defined $minmax ) { 952 if ( $2 < $min_spam_score ) 953 {$found{'min_spam_score'}=1; 954 $min_spam_score = $2; 955 } 956 if ( $2 > $max_spam_score ) 957 {$found{'max_spam_score'}=1; 958 $max_spam_score = $2; 959 } 960 if ( $3 < $min_spam_time ) 961 {$found{'min_spam_time'}=1; 962 $min_spam_time = $3; 963 } 964 if ( $3 > $max_spam_time ) 965 {$found{'max_spam_time'}=1; 966 $max_spam_time = $3; 967 } 968 if ( $4 < $min_spam_msgsize ) 969 {$found{'min_spam_msgsize'}=1; 970 $min_spam_msgsize = $4; 971 } 972 if ( $4 > $max_spam_msgsize ) 973 {$found{'max_spam_msgsize'}=1; 974 $max_spam_msgsize = $4; 975 } 976 } 977 next; 978 } 979 } 980 981 #We are in a non-existent case! 982 print STDERR 983"WARNING, a piece of the program that shouldnt be run was reached!\nInvestigate!\n"; 984} 985 986my %stats = (); 987foreach my $key ( keys %spam ) { 988 push @{ $stats{ $spam{$key} } }, $key; 989} 990 991unless ($nogeneral) { 992 my $nb_spam = 0; 993 my $nb_clean = 0; 994 foreach my $key ( keys %spam ) { 995 $nb_spam += $spam{$key}; 996 } 997 foreach my $key ( keys %clean ) { 998 $nb_clean += $clean{$key}; 999 } 1000 1001 #output if firstdate is called 1002 1003 if ($firstdate == 1) { 1004 print $html_tags{'starttag'} . $html_tags{'br'}; 1005 print "Statistic from $firstdate_d to $lastdate_d\n"; 1006 } 1007 1008 #General stats 1009 #Ok, not beautiful code. But its only run once... 1010 print $html_tags{'starttag'} 1011 . $html_tags{'br'} 1012 . $html_tags{'starttag'} 1013 . $html_tags{'br'}; 1014 print "Total number of emails processed by the spam filter : " 1015 . $html_tags{'starttag'} 1016 . $html_tags{'b'} 1017 . eval( $nb_spam + $nb_clean ) 1018 . $html_tags{'endtag'} 1019 . $html_tags{'b'} . "\n"; 1020 print $html_tags{'starttag'} . $html_tags{'br'}; 1021 print $html_tags{'starttag'} . $html_tags{'b'}; 1022 if ( $nb_spam + $nb_clean > 0 ) { 1023 $spam_percent = eval( 100 * $nb_spam / ( $nb_spam + $nb_clean ) ); 1024 printf( "%-40s:%10d (%6.2f%%)\n", "Number of spams", $nb_spam, 1025 $spam_percent ); 1026 } 1027 else { 1028 printf( "%-40s:%10s\n", "Number of spams", "n/a" ); 1029 } 1030 print $html_tags{'endtag'} . $html_tags{'b'}; 1031 1032 print $html_tags{'starttag'} . $html_tags{'br'}; 1033 print $html_tags{'starttag'} . $html_tags{'b'}; 1034 if ( $nb_spam + $nb_clean > 0 ) { 1035 $clean_percent = eval( 100 * $nb_clean / ( $nb_spam + $nb_clean ) ); 1036 printf( 1037 "%-40s:%10d (%6.2f%%)\n", "Number of clean messages", 1038 $nb_clean, $clean_percent 1039 ); 1040 } 1041 else { 1042 printf( "%-40s:%10s\n", "Number of clean messages", "n/a" ); 1043 } 1044 print $html_tags{'endtag'} . $html_tags{'b'}; 1045 print $html_tags{'starttag'} . $html_tags{'br'}; 1046 printf( "%-40s:", "Average message analysis time" ); 1047 print $html_tags{'starttag'} . $html_tags{'b'}; 1048 if ( $basic_spam_nb + $basic_clean_nb > 0 ) { 1049 printf "%10.2f", 1050 eval( ( $spam_time + $clean_time ) / 1051 ( $basic_spam_nb + $basic_clean_nb ) ); 1052 } 1053 else { 1054 print "n/a"; 1055 } 1056 print $html_tags{'endtag'} . $html_tags{'b'}; 1057 print " seconds\n"; 1058 print $html_tags{'starttag'} . $html_tags{'br'}; 1059 printf( "%-40s:", "Average spam analysis time" ); 1060 print $html_tags{'starttag'} . $html_tags{'b'}; 1061 if ( $basic_spam_nb > 0 ) { 1062 printf "%10.2f", eval( $spam_time / $basic_spam_nb ); 1063 } 1064 else { 1065 print "n/a"; 1066 } 1067 print $html_tags{'endtag'} . $html_tags{'b'}; 1068 print " seconds"; 1069 defined $found{'min_spam_time'} or $min_spam_time = ""; 1070 defined $found{'max_spam_time'} or $max_spam_time = ""; 1071 print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min spam time = $min_spam_time, max spam time = $max_spam_time)" 1072 if ( defined $minmax ); 1073 print "\n"; 1074 1075 print $html_tags{'starttag'} . $html_tags{'br'}; 1076 printf( "%-40s:", "Average clean message analysis time" ); 1077 print $html_tags{'starttag'} . $html_tags{'b'}; 1078 if ( $basic_clean_nb > 0 ) { 1079 printf "%10.2f", eval( $clean_time / $basic_clean_nb ); 1080 } 1081 else { 1082 print "n/a"; 1083 } 1084 print $html_tags{'endtag'} . $html_tags{'b'}; 1085 print " seconds"; 1086 defined $found{'min_clean_time'} or $min_clean_time = ""; 1087 defined $found{'max_clean_time'} or $max_clean_time = ""; 1088 print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min clean time = $min_clean_time, max clean time = $max_clean_time)" 1089 if ( defined $minmax ); 1090 print "\n"; 1091 1092 # 1093 #Spam with multiple recipients count only as one in the average... 1094 print $html_tags{'starttag'} . $html_tags{'br'}; 1095 printf( "%-40s:", "Average message score" ); 1096 print $html_tags{'starttag'} . $html_tags{'b'}; 1097 if ( $basic_clean_nb + $basic_spam_nb > 0 ) { 1098 printf "%10.2f", 1099 eval( ( $spam_score + $clean_score ) / 1100 ( $basic_clean_nb + $basic_spam_nb ) ); 1101 } 1102 else { 1103 print "n/a"; 1104 } 1105 print $html_tags{'endtag'} . $html_tags{'b'} . "\n"; 1106 1107 #Spam with multiple recipients count only as one in the average... 1108 print $html_tags{'starttag'} . $html_tags{'br'}; 1109 printf( "%-40s:", "Average spam score" ); 1110 print $html_tags{'starttag'} . $html_tags{'b'}; 1111 if ( $basic_spam_nb > 0 ) { 1112 printf "%10.2f", eval( $spam_score / $basic_spam_nb ); 1113 } 1114 else { 1115 print "n/a"; 1116 } 1117 defined $found{'min_spam_score'} or $min_spam_score = ""; 1118 defined $found{'max_spam_score'} or $max_spam_score = ""; 1119 print $html_tags{'endtag'} . $html_tags{'b'}; 1120 print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t\t(min spam score = $min_spam_score, max spam score = $max_spam_score)" 1121 if ( defined $minmax ); 1122 print "\n"; 1123 1124 #Spam with multiple recipients count only as one in the average... 1125 print $html_tags{'starttag'} . $html_tags{'br'}; 1126 printf( "%-40s:", "Average clean message score" ); 1127 print $html_tags{'starttag'} . $html_tags{'b'}; 1128 if ($basic_clean_nb) { 1129 printf "%10.2f", eval( $clean_score / $basic_clean_nb ); 1130 } 1131 else { 1132 print "n/a"; 1133 } 1134 print $html_tags{'endtag'} . $html_tags{'b'}; 1135 defined $found{'min_clean_score'} or $min_clean_score = ""; 1136 defined $found{'max_clean_score'} or $max_clean_score = ""; 1137 print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t\t(min clean score = $min_clean_score, max clean score = $max_clean_score)" 1138 if ( defined $minmax ); 1139 print "\n"; 1140 1141 my $unit = "bytes"; 1142 ( $spam_volume, $unit ) = unify( $spam_volume, $unit ); 1143 1144 print $html_tags{'starttag'} . $html_tags{'br'}; 1145 printf( "%-40s:", "Total spam volume" ); 1146 print $html_tags{'starttag'} . $html_tags{'b'}; 1147 printf "%10d ", $spam_volume; 1148 print $html_tags{'endtag'} . $html_tags{'b'} . $unit; 1149 if ( defined $minmax ) { 1150 my $unitmin = "bytes"; 1151 my $unitmax = "bytes"; 1152 ( $min_spam_msgsize, $unitmin ) = unify( $min_spam_msgsize, $unitmin ); 1153 ( $max_spam_msgsize, $unitmax ) = unify( $max_spam_msgsize, $unitmax ); 1154 defined $found{'min_spam_msgsize'} or $min_spam_msgsize = ""; 1155 defined $found{'max_spam_msgsize'} or $max_spam_msgsize = ""; 1156 defined $found{'min_spam_msgsize'} or $unitmin = ""; 1157 defined $found{'max_spam_msgsize'} or $unitmax = ""; 1158 print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min spam size = "; 1159 defined $found{'min_spam_msgsize'} and printf("%2d",$min_spam_msgsize); 1160 print " $unitmin, max spam size = "; 1161 defined $found{'max_spam_msgsize'} and printf("%2d",$max_spam_msgsize); 1162 print " $unitmax"; 1163 } 1164 print "\n"; 1165 $unit = "bytes"; 1166 ( $clean_volume, $unit ) = unify( $clean_volume, $unit ); 1167 print $html_tags{'starttag'} . $html_tags{'br'}; 1168 printf( "%-40s:", "Total clean volume" ); 1169 print $html_tags{'starttag'} . $html_tags{'b'}; 1170 printf "%10d ", $clean_volume; 1171 print $html_tags{'endtag'} . $html_tags{'b'} . $unit; 1172 1173 if ( defined $minmax ) { 1174 my $unitmin = "bytes"; 1175 my $unitmax = "bytes"; 1176 ( $min_clean_msgsize, $unitmin ) = 1177 unify( $min_clean_msgsize, $unitmin ); 1178 ( $max_clean_msgsize, $unitmax ) = 1179 unify( $max_clean_msgsize, $unitmax ); 1180 defined $found{'min_clean_msgsize'} or $min_clean_msgsize = ""; 1181 defined $found{'max_clean_msgsize'} or $max_clean_msgsize = ""; 1182 defined $found{'min_clean_msgsize'} or $unitmin = ""; 1183 defined $found{'max_clean_msgsize'} or $unitmax = ""; 1184 print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min clean size = "; 1185 defined $found{'min_clean_msgsize'} and printf("%2d",$min_clean_msgsize); 1186 print " $unitmin, max clean size = "; 1187 defined $found{'max_clean_msgsize'} and printf("%2d",$max_clean_msgsize); 1188 print " $unitmax"; 1189 } 1190 print "\n"; 1191} 1192 1193#Top spammed addresses 1194if ($number) { 1195 print $html_tags{'starttag'} . $html_tags{'br'}; 1196 print $html_tags{'starttag'} . $html_tags{'br'}; 1197 print "Recipients with highest number of spams : (top $number)\n"; 1198 foreach my $key ( sort { $b <=> $a } keys %stats ) { 1199 $number <= 0 and last; 1200 print $html_tags{'starttag'} . $html_tags{'br'}; 1201 print $key. " spams : \n"; 1202 foreach my $email ( @{ $stats{$key} } ) { 1203 print $html_tags{'starttag'} . $html_tags{'br'} .$html_tags{'vspace'}; 1204 print "\t" . $email . "\n"; 1205 $number--; 1206 } 1207 } 1208} 1209 1210if ( ( $correct_lines == 0 ) 1211 or ( ( $incorrect_lines / $correct_lines ) > 0.1 ) ) 1212{ 1213 print $html_tags{'starttag'} . $html_tags{'br'}; 1214 print $html_tags{'starttag'} . $html_tags{'br'}; 1215 print 1216"INFO: It seems at least one input file contains other things that {exim/postfix} or spamd lines!\n"; 1217} 1218 1219print $html_tags{'endtag'} . $html_tags{'body'}; 1220print $html_tags{'endtag'} . $html_tags{'html'}; 1221