1#!/usr/local/bin/perl -w
2## FIXME : remove the -w above when you find the code is fixed !
3#
4#This is spamstats.pl v0.6b
5#
6#
7#Changelog
8#0.6c 5 February 2006
9# Support for transparent bzip2 compression. Thanks to Yen-Ming Lee for the
10# patch. Requires Compress::Bzip2.
11#
12#0.6b 16 August 2005
13# Support the new spamd (v3.1.0rc1) logging format.
14
15#0.6a 20 April 2004
16# Here appears the -spamd option, to be used if you want to use only spamd logs,
17# and no correlation with mailer logs. Beware this option removes some useful
18# features : most spamed accounts, etc.
19# Thanks to Jean-Louis Bergamo (aka JLB) for this!
20#
21#0.6  8 April 2004
22# Adds cricket support for -minmax options. Plenty of new cool graphs available!
23# Fixes some minor bugs linked to these upgrades
24#
25#0.5b 5 March 2004
26# Fixes a typo which lead to confusion in the volume counting.
27# Thanks to Matthew McGehrin for the bugreport
28#
29#0.5a 25 February 2004
30# Two patches from Radko Keves, with these :
31# * Support for BSDs' sendmail daemons (sm-mta)
32# * Added the -firstdate option, useful for multiple files parsing
33#
34#0.5 30 January 2004
35# Many thanks to Cyril Chaboisseau (http://www.obs.coe.int) who did :
36#  * -minmax option that display min and max values for each displayed value
37#  * some cleanups I removed :-p (renaming "clean message" into "ham" would
38#     break some existing configs :( )
39#  * a lot of cleanup in the displaying code, that was very awful
40#  * tidied and reordered the code
41# Few other changes:
42#  * Fixed the bug where the -number and the -html options were leading to a
43#     poor output
44#
45#0.4b5 11 August 2003
46#Fixed the bug when a month starting with a zero is entered as start/enddate.
47#
48#0.4b4 10 June 2003
49# Fixed the infile == 0 bug, thanks to Yen-Ming Lee
50# Fixes sendmail parsing when email is delivered through procmail, raised by Dirk Kuypers
51#
52#
53#0.4b3 2 June 2003
54#Applied patches from Bob Apthorpe for :
55#  * more elegant fix of the two digits month intput problem
56#  * better input handling, now files to process can be specified in @ARGV without the --file switch
57#  * Added documentation and scripts to graph spamstats output with cricket.
58#
59#0.4b2 30 May 2003
60#Regexp bugfix in exim mailer_in handling
61#Regexp bugfix in spamd ("processing message" seems to have changed to "checking message") on some setups.
62#Updated README into a more english (and less french) syntax
63#
64#0.4b1 19 May 2003
65#This is a very tiny bugfix.
66#Fixes parsing mistakes on sendmail setups that relay emails as outputs.
67#Emails were undetected on those setups.
68#
69#0.4b 10 Mar 2003
70#WARNING : this release changes the default behaviour of spamstats calculations !!
71#From this version on spamstats counts spams and non-spams per recipient, not per mailer ID.
72#(Until this version, a multirecipient message sent to both "foo@yourdomain.com" and "bar@yourdomain.com"
73#counted only as one spam. From now on it counts as two.
74#New option : -agglo-recipients  uses spamstats "old" mode : one count per mailer ID, not per recipient.
75#WARNING : FOR NOW EXIM USERS PROBABLY WANT TO USE THIS OPTION, ON SOME EXIM CONFIGS
76#          THERE ARE RISKS LOG ANALYSIS BE BROKEN IF NOT USED!
77#Applied patch from Jim Breton <jimb@alongtheway.REMOVETHIS.com> for a better display.
78#
79#0.4 25 Feb 2003
80#[Probably very incomplete] sendmail support
81#Only sendmail regexp were added, no code modification !
82#This is not a very important release in terms of work. Hopefully it is in terms
83#of capabilities :-)
84#
85#0.3b2 30 Jan 2003
86#Fix a problem where script will issue warnings when parsed log file is empty or
87#contains no reference to used mailer (only contains spamd messages).
88#
89#0.3b 04 Jan 2003
90#Added a (hopefully) useful time filter specification to be used : duration specification.
91#
92#0.3a 29 Dec 2002
93#Date/Time filter now works.
94#Some tiny code cleanup.
95#HTML output support.
96#
97#0.3alpha 17 Dec 2002
98#Exim support
99#Some work on date/time filtering support, far from complete. These options are useless for now.
100#
101#0.2f 26 Nov 2002
102#If one input file does not exist, mentions which!
103#
104#0.2e 26 Nov 2002
105#Option "-noabsolute" makes spamstats not complain if argument log file names are not absolute.
106#Now reports total Volume of Spam and Volume of ham in general statistics.
107#
108#0.2d
109#Local recipients were not counted, only relayed ones.
110#Regexp was modified to just match both.
111#Thanks to Jon Gabrielson for bug report
112#
113#0.2c
114#No more lower/upper case distinction in top recipients classification
115#Thanks to Kenneth Nerhood for bug report
116#
117#0.2b
118#Fixes stupid bug from 0.2 where spamd process had to run as user "spamd"
119#Thanks to Kenneth Nerhood for bug report
120#
121#
122#Parses Postfix a spamd log file (or several) and extract top Spam receivers.
123#Also displays spam statistics.
124#
125#Author : Vincent Deffontaines <vincent@gryzor.REMOVETHISIFNOSPAM.com>
126#Script Basis, Postfix support Copyright : Vincent Deffontaines
127#                                          KDX (www.kdx.fr)
128#                                          Council of Europe (www.coe.int)
129#
130#Exim support                  Copyright : Vincent Deffontaines.
131#Sendmail support                  Copyright : Vincent Deffontaines.
132#
133#Please send me contributions/ modifications/ comments that could be useful to this script!
134#Others mailers than Postfix/Exim support shouldn't be hard to implement.
135#Author will help and include modifications to this script as long as mailers are free software.
136#
137#This program is free software; you can redistribute it and/or
138#modify it under the terms of the GNU General Public License
139#as published by the Free Software Foundation; version 2
140#of the License.
141#
142#This program is distributed in the hope that it will be useful,
143#but WITHOUT ANY WARRANTY; without even the implied warranty of
144#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
145#GNU General Public License for more details.
146#
147#You should have received a copy of the GNU General Public License
148#along with this program; if not, write to the Free Software
149#Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
150#
151
152#
153#
154use strict;
155use Time::Local;
156use Date::Manip qw (ParseDate UnixDate);
157
158use Getopt::Long;
159#Only useful for Debugging, useless if you don't hack through this code :-)
160#use Data::Dumper;
161
162use Compress::Zlib;
163use Compress::Bzip2 qw(&bzopen $bzerrno);
164
165my %infile;
166undef %infile;
167my $number        = 0;
168my $help          = 0;
169my $nogeneral     = 0;
170my $debug         = 0;
171my $noabsolute    = 0;
172my $error         = 0;
173my $starttime     = "none";
174my $endtime       = "none";
175my $startdate     = "none";
176my $enddate       = "none";
177my $startdate_d   = 1;
178my $firstdate     = 0;
179my $firstdate_d   = "Dec 31 23:59:59";
180my $lastdate_d    = "Jan  1 00:00:00";
181my $skipstarttest = 0;
182my $skipendtest   = 0;
183my $mailerlogtype = undef;
184my $html          = 0;
185my $duration      = 0;
186my $agglo_rcpt    = 0;
187my $Spamd         = 0;
188my %html_tags     = (
189    'br'       => '',
190    'b'        => '',
191    'i'        => '',
192    'html'     => '',
193    'body'     => '',
194    'endtag'   => '',
195    'starttag' => '',
196    'vspace'   => ''
197);
198my $spam_percent  = 0;
199my $clean_percent = 0;
200my $minmax        = undef;
201
202my %Defs = ();    #Parse regexp definitions for each mailer and for spamd
203$Defs{'mailer_in'}{'postfix'} =
204'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/cleanup\[(\d*)\]:\s+([^:]+):\s*message-id=(.*)$';
205
206#$Defs{'mailer_in'}{'exim'} = '^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[\d+\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+<=\s+.*\@\S+\s+[^\[]+\[\d+\.\d+\.\d+\.\d+\]\s+P=\S+\s+S=\d+\s+id=(.*)$';
207#$Defs{'mailer_in'}{'exim'} = '^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[(\d+)\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+<=\s+.*\@\S+\s+(?:U=\S+|H=.*)\s+P=\S+\s+S=\S+\s+id=(.*)$';
208$Defs{'mailer_in'}{'exim'} =
209'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[(\d+)\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+<=\s+.*\@\S+\s+H=.*\s+P=\S+.*\s+S=\S+(.*)$';
210$Defs{'mailer_in'}{'sm-mta'} =
211'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sm-mta\[(\d+)\]:\s+(\S+):\s+from=<[^>]*>,\s+size=\d+,\s+class=\S+,\s+nrcpts=\d+,\s+msgid=<([^>]+)>.*,\s+proto=\S+,\s+daemon=\S+,\s+relay=.*$';
212$Defs{'mailer_in'}{'sendmail'} =
213'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sendmail\[(\d+)\]:\s+(\S+):\s+from=<[^>]*>,\s+size=\d+,\s+class=\S+,\s+nrcpts=\d+,\s+msgid=<([^>]+)>.*,\s+proto=\S+,\s+daemon=\S+,\s+relay=.*$';
214$Defs{'spamd_in'}{'postfix'} =
215'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*(.*)\s+for\s+\S+';
216$Defs{'spamd_in'}{'exim'} =
217'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*<(.*)>\s+for\s+\S+';
218$Defs{'spamd_in'}{'sm-mta'} =
219'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*<(.*)>\s+for\s+';
220$Defs{'spamd_in'}{'sendmail'} =
221'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamassassin|spamd)\[(\d+)\]:\s+(?:spamd:|)\s*(?:processing|checking)\s+message\s*<(.*)>\s+for\s+';
222
223$Defs{'spamd_clean'} =
224'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamd|spamassassin)\[(\d+)\]:\s+(?:spamd:|)\s*clean\s+message\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\.';
225$Defs{'spamd_spam'} =
226'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+(?:spamd|spamassassin)\[(\d+)\]:\s+(?:spamd:|)\s*identified\s+spam\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\.';
227$Defs{'mailer_out'}{'postfix'} =
228'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/(?:pipe|local)\[\d+\]:\s+([^:]+):\s+to=<([^>]+)>';
229
230#'Mar 10 02:11:24 barrel postfix/smtp[20611]: 5A9BF22E04: to=<obfuscated>, relay=127.0.0.1[127.0.0.1], delay=2, status=sent (250 ok 1047280284 qp 20787)'
231$Defs{'mailer_out'}{'exim'} =
232'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+exim\[\d+\]:\s+\d{4}-\d{2}-\d{2}\s+\d+:\d+:\d+\s+(\S+)\s+=>\s+(.*\@\S+)\s+';
233$Defs{'mailer_out'}{'sm-mta'} =
234'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sm-mta\[\d+\]:\s+(\S+):\s+to=(?:\|.*ctladdr=<|<)([^>]+)>.*,\s+delay=\S+,\s+xdelay=\S+,\s+mailer=\S+,\s+pri=\d+.*,\s+dsn=\S+,\s+stat=\S+';
235$Defs{'mailer_out'}{'sendmail'} =
236'^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+sendmail\[\d+\]:\s+(\S+):\s+to=(?:\|.*ctladdr=<|<)([^>]+)>.*,\s+delay=\S+,\s+xdelay=\S+,\s+mailer=\S+,\s+pri=\d+.*,\s+dsn=\S+,\s+stat=\S+';
237
238sub Print_Usage() {
239    print "{Exim/Postfix/Sendmail} & spamd logfile analyser. Extracts top N Spam receivers\n";
240    print "$0 [-help] [-debug][-file=/path/to/filename] [-file=...] [-number=...] [-nogeneral]\n";
241    print "   [-startdate=dd-mm] [-starttime=hh:mm:ss] [-enddate=dd-mm] [-endtime=hh:mm:ss]\n";
242    print "   [-duration=number of seconds] /path/to/file1 [/path/to/file2] [/path/to/file3.gz]\n";
243    print "   [-firstdate]\n";
244    print "GENERAL OPTIONS\n";
245    print "\t-debug\t\t\t: Displays informations that _might_ indicate problems while parsing.\n";
246    print "\t-help\t\t\t: Displays this help and exits.\n";
247    print "\t-file /path/file\t: Analyses maillog file for spam results (as logged by spamd) :\n"
248      . "\t\t\t\t  Several files can be asked for parsing at a time, including .gz files\n"
249      . "\t\t\t\t  Default /var/log/maillog\n"
250      . "\t\t\t\t  This switch is DEPRECATED, simply specify filenames after all options,\n"
251      . "\t\t\t\t   without any switch.\n";
252    print "\t-number number\t\t: specifies number of top spam receivers to display (default : 0).\n";
253    print "\t-nogeneral\t\t: do not display general stats.\n";
254    print "\t-noabsolute\t\t: lets non-absolute named logfiles be processed.\n";
255    print "\t-html\t\t\t: HTML output\n";
256    print "\t-minmax\t\t\t: Display minimum and maximum values\n";
257    print "TIME FILTER OPTIONS (no time filter used if no option specified)\n";
258    print "\t-startdate dd-mm\t: Process only data logged from that date\n";
259    print "\t\t\t\t  Default : today if starttime specified, else unused\n";
260    print "\t-enddate   dd-mm\t: Process only data logged until that date\n";
261    print "\t\t\t\t  Default : today if endtime specified, else unused\n";
262    print "\t-starttime hh:mm:ss\t: Process only data logged from that time (default time : 0:00:00)\n";
263    print "\t-endtime   hh:mm:ss\t: Process only data logged until that time (default time : current time)\n";
264    print "\t-firstdate\t\t: Displays only first and last date of log messages\n";
265    print "\t\t\t\t  (useful for multiple files parsing).\n";
266    print "\t-duration  seconds\t: Work only on specified duration.\n";
267    print "\t\t\t\t  To be used with start XOR end{time/date}, obviously not with both.\n";
268    print "\t\t\t\t  Default : unused\n";
269    print "\t\t\t\t  Default if no other time switch : process n seconds until current time.\n";
270    print "\tWhy no year in dates input? Just because there is no year reported in postfix maillogs\n";
271    print "\tThis will obviously cause time filter problems around new year!\n";
272    print "\t-agglo-recipients\t: Old spamstats counting. One count by mail ID, not by actual recipient.\n";
273    print "\t\t\t\t  EXIM users WANT to set this for now!\n";
274    print "\t-spamd\t: Log file contains ONLY spamd log. it will not show per user stats.\n";
275}
276
277sub unify($$);
278
279sub unify($$)
280
281#Converts (value, unit) from bytes, kilobytes, megabytes into a more human readable expression
282{
283    my $volume = shift @_;
284    my $unit   = shift @_;
285    if ( ( eval( $volume / 1024 ) > 5 )
286        and ( ( $unit eq "bytes" ) or ( $unit eq "kbytes" ) ) )
287    {
288        $volume = $volume / 1024;
289        $unit eq "Mbytes" and $unit = "Gbytes";
290        $unit eq "kbytes" and $unit = "Mbytes";
291        $unit eq "bytes"  and $unit = "kbytes";
292        unify( $volume, $unit );
293    }
294    else {
295        return ( $volume, $unit );
296    }
297}
298
299sub c_d($)
300
301{
302    if ($1 eq "none" ) {
303        return 0;
304        }
305    else {
306        return 1;
307    }
308    return 0;
309}
310
311
312sub check_date($)
313
314  #Checks given date is correct (expected format string: "d[d]-m[m]" or "none").
315  #Returns 0 if correct, 1 if not.
316{
317    my $date = shift @_;
318    $date eq 'none' and return 0;
319    unless ( $date =~ /^(\d{1,2})-(\d{1,2})$/ ) {
320        return 1;
321    }
322    my $day   = int($1);
323    my $month = int($2);
324    unless ( ( $day < 32 ) and ( $month < 13 ) and ( ( $day * $month ) > 0 ) ) {
325        return 1;
326    }
327    return 0;
328}
329
330sub check_time($)
331
332#Checks given time is correct (expected format string: "h[h]:mm:ss" or "none").
333  #Returns 0 if correct, 1 if not.
334{
335    my $time = shift @_;
336    $time eq 'none' and return 0;
337    unless ( $time =~ /^(\d{1,2}):(\d{2}):(\d{2})$/ ) {
338        return 1;
339    }
340    my $hour   = $1;
341    my $minute = $2;
342    my $second = $3;
343    unless ( ( $hour < 25 ) and ( $minute < 60 ) and ( $second < 60 ) ) {
344        return 1;
345    }
346    return 0;
347}
348
349sub convert_d_t_e($)
350
351{
352        my $string =  shift @_;;
353        $string =~ s/\s+\(.*\)\s*$//;
354        my $date = ParseDate($string);
355        my $result = UnixDate($date,"%s");
356      return $result;
357}
358
359
360
361sub convert_date_time_to_epoch($$)
362
363  #Input : date "d[d]-m[m]", time "h[h]:mm:ss"
364  #Output : Pseudo epoch (no year included in input)
365  #Returns -1 in case of trouble.
366  #This function will ALWAYS be BUGGY around new year days
367#This function also presents a bug in case of "bisexctial" (correct word?) year (when Feb 29 exists).
368#This is due to year not being logged, which indeeds confuse things on such days.
369{
370    my $date = shift @_;
371    my $time = shift @_;
372    unless ( $date =~ /^(\d{1,2})-(\d{1,2})$/ ) {
373        return -1;
374    }
375    my $day   = int($1);
376    my $month = int($2);
377
378    #Remove leading 0 in mounth is there is one
379    unless ( $time =~ /^(\d{1,2}):(\d{2}):(\d{2})$/ ) {
380        return -1;
381    }
382    my $hour   = int($1);
383    my $minute = int($2);
384    my $second = int($3);
385    my %months = ();
386    $months{1}  = 31;
387    $months{2}  = 28;
388    $months{3}  = 31;
389    $months{4}  = 30;
390    $months{5}  = 31;
391    $months{6}  = 30;
392    $months{7}  = 31;
393    $months{8}  = 31;
394    $months{9}  = 30;
395    $months{10} = 31;
396    $months{11} = 30;
397    $months{12} = 31;
398    my $result =
399      ( $months{$month} + $day ) * 24 * 3600 + $hour * 3600 + $minute * 60 +
400      $second;
401    return ($result);
402}
403
404sub fill_zeros($)
405
406  #Input : a list of numbers
407#Output : same numbers list, each preceeded by a zero if originally less than 2 characters long
408{
409    my $list = shift @_;
410
411    #print "DEBUG : $list\n";
412    #return $list;
413    foreach my $number (@$list) {
414        $number =~ /^\d$/ and $number = "0" . $number;
415    }
416    return @$list;
417}
418
419my @infiles = ();
420GetOptions(
421    "file=s"           => \@infiles,
422    "number=i"         => \$number,
423    "help"             => \$help,
424    "debug"            => \$debug,
425    "noabsolute"       => \$noabsolute,
426    "nogeneral"        => \$nogeneral,
427    "html"             => \$html,
428    "minmax"           => \$minmax,
429    "startdate=s"      => \$startdate,
430    "enddate=s"        => \$enddate,
431    "firstdate"        => \$firstdate,
432    "starttime=s"      => \$starttime,
433    "endtime=s"        => \$endtime,
434    "duration=s"       => \$duration,
435    "agglo-recipients" => \$agglo_rcpt,
436    "spamd"            => \$Spamd
437);
438
439push @infiles, @ARGV if (@ARGV);
440my $defmaillog = '/var/log/maillog';
441push @infiles, $defmaillog if ( $#infiles == -1 && -f $defmaillog );
442foreach my $fn (@infiles) {
443    if ( -f $fn ) {
444        $infile{$fn} = 1;
445    }
446}
447
448if ($help) {
449    Print_Usage ();
450    exit 0;
451}
452
453#Sanity checks
454unless ( check_date($startdate) == 0 ) {
455    print STDERR "Bad input format start date was entered\n";
456    $error++;
457}
458unless ( check_date($enddate) == 0 ) {
459    print STDERR "Bad input format end date was entered\n";
460    $error++;
461}
462unless ( check_time($starttime) == 0 ) {
463    print STDERR "Bad input format start time was entered\n";
464    $error++;
465}
466unless ( check_time($endtime) == 0 ) {
467    print STDERR "Bad input format end date was entered\n";
468    $error++;
469}
470unless ( $duration =~ /^\d+$/ ) {
471    print STDERR "Bad input : duration is supposed to be numeric\n";
472    $error++;
473}
474
475$html and %html_tags = (
476    'br'       => 'br>',
477    'b'        => 'b>',
478    'i'        => 'i>',
479    'html'     => 'html>',
480    'body'     => 'body>',
481    'endtag'   => '</',
482    'starttag' => '<',
483    'vspace'   => '<img width=50 height=0>'
484);
485
486foreach my $file ( sort keys %infile ) {
487    unless ( $file =~ /[a-zA-Z\.\/ \\0-9]+/ ) {
488        die "Illegal characters read in parameter file name!\n";
489    }
490
491    unless ( -f $file ) {
492        print STDERR "$file : File does not exist!\n";
493        $error++;
494    }
495    if ( ( $file !~ /^\// ) and ( $noabsolute == 0 ) ) {
496        print STDERR
497"$file : Path to file must be absolute, or you must specify the \"-noabsolute\" option\n";
498        $error++;
499    }
500}
501
502if ( ( $starttime eq "none" ) and ( $startdate eq "none" ) ) {
503    $skipstarttest = 1;
504}
505if ( ( $enddate eq "none" ) and ( $endtime eq "none" ) ) {
506    $skipendtest = 1;
507}
508
509if ( ( $duration > 0 ) and ( $skipstarttest == 0 ) and ( $skipendtest == 0 ) ) {
510    print STDERR
511"Input redundancy : You may not specify starttime, endtime and duration\n";
512    $error++;
513}
514$error and exit 1;
515
516
517if ( $startdate eq 'none' ) {
518    my ( $day, $month ) = (localtime)[ 3, 4 ];
519    $month++;
520    $startdate = $day . "-" . $month;
521    $startdate_d = 0;
522}
523
524if ( $enddate eq 'none' ) {
525    my ( $day, $month ) = (localtime)[ 3, 4 ];
526    $month++;
527    $enddate = $day . "-" . $month;
528}
529
530$starttime eq 'none' and $starttime = '00:00:00';
531if ( $endtime eq 'none' ) {
532    my @tab = (localtime)[ 0, 1, 2 ];
533    @tab = fill_zeros( \@tab );
534    $endtime = join ( ':', reverse(@tab) );
535}
536
537my %found; #Hash to track if we found some actual values.
538my $min_clean_score   = 5.5;
539my $max_clean_score   = 0;
540my $min_clean_time    = 99;
541my $max_clean_time    = 0;
542my $min_clean_msgsize = 99999;
543my $max_clean_msgsize = 0;
544my $min_spam_score    = 99;
545my $max_spam_score    = 5.5;
546my $min_spam_time     = 99;
547my $max_spam_time     = 0;
548my $min_spam_msgsize  = 99999;
549my $max_spam_msgsize  = 0;
550
551#   and $endtime = join(':',reverse(fill_zeros((localtime)[0,1,2])));
552
553print $html_tags{'starttag'}
554  . $html_tags{'html'}
555  . $html_tags{'starttag'}
556  . $html_tags{'body'};
557
558#print "Time filter used : From $startdate $starttime to $enddate $endtime\n";
559
560my $epoch_start = convert_date_time_to_epoch( $startdate, $starttime );
561my $epoch_end   = convert_date_time_to_epoch( $enddate,   $endtime );
562
563if ( $duration > 0 ) {
564    if ( ( $skipstarttest == 1 ) and ( $skipendtest == 1 ) ) {
565        $epoch_start   = $epoch_end - $duration;
566        $duration      = 0;
567        $skipstarttest = 0;
568        $skipendtest   = 0;
569    }
570    elsif ( $skipstarttest == 1 ) {
571        $epoch_start   = $epoch_end - $duration;
572        $duration      = 0;
573        $skipstarttest = 0;
574        $skipendtest   = 0;
575    }
576    elsif ( $skipendtest == 1 ) {
577        $epoch_end     = $epoch_start + $duration;
578        $duration      = 0;
579        $skipstarttest = 0;
580        $skipendtest   = 0;
581    }
582}
583
584#print "DEBUG : $epoch_start to $epoch_end\n";
585
586$epoch_start > $epoch_end
587  and print STDERR "WARNING : time filter seems incorrect : it starts after it ends! $epoch_start > $epoch_end\n";
588my %mounths = (
589    "Jan" => 1,
590    "Feb" => 2,
591    "Mar" => 3,
592    "Apr" => 4,
593    "May" => 5,
594    "Jun" => 6,
595    "Jul" => 7,
596    "Aug" => 8,
597    "Sep" => 9,
598    "Oct" => 10,
599    "Nov" => 11,
600    "Dec" => 12
601);
602
603my $is_gz = 0;
604my $gz;
605my $gzerrno;
606my $bzerrno;
607
608my %spam  = ();
609my %clean = ();
610
611my %mailer_table = ();
612my %spamd_table  = ();
613my %spamd_pid    = ();
614
615my $spam_score     = 0;
616my $clean_score    = 0;
617my $spam_time      = 0;
618my $clean_time     = 0;
619my $basic_spam_nb  = 0;
620my $basic_clean_nb = 0;
621
622my $spam_volume  = 0;
623my $clean_volume = 0;
624
625my $incorrect_lines = 0;    #Count unparsable lines
626my $correct_lines   = 0;    #Count parsable lines
627
628#Processing
629FILELOOP: foreach my $file ( keys %infile ) {
630    undef $mailerlogtype;
631    my $first_date = "";
632    my $last_date  = "";
633    my $line;
634    my $linetime = 0;
635    $is_gz = 0;
636
637    #$eof = 0;
638    if ( $file =~ /\.gz$/ )    #We have a gz file
639    {
640        #print BLUE "Opening $file\n".$Stag.$NewLinetag;
641        $gz = gzopen( $file, "r" ) or die "Cannot open $file : $gzerrno\n";
642        $is_gz = 1;
643    }
644    elsif ( $file =~ /\.bz2$/ )    #We have a bz2 file
645    {
646        #print BLUE "Opening $file\n".$Stag.$NewLinetag;
647        $gz = bzopen( $file, "r" ) or die "Cannot open $file : $bzerrno\n";
648        $is_gz = 2;
649    }
650    else {
651
652        #print BLUE "Opening $file\n".$Stag.$NewLinetag;
653        open( FILE, $file ) or die "Unable to open file!\n";
654    }
655
656    while (1)
657      #while (not $eof)
658    {
659        if ($is_gz) {
660            die "File not open!\n" if not defined $gz;
661            unless ( $gz->gzreadline($line) > 0 ) {
662
663                #$eof = 1;
664                $gz->gzclose();
665#                printf "$startdate\n";
666               if ( $firstdate == 0 ){
667                print $html_tags{'starttag'} . $html_tags{'br'};
668                print "File $file : from $first_date to $last_date\n";
669               }
670                else {
671                #find firstdate
672                if  (convert_d_t_e($first_date) <
673                     convert_d_t_e($firstdate_d)) {
674                                $firstdate_d = $first_date;
675                               }
676                if ($startdate_d) {
677                        $firstdate_d = $startdate;
678                        }
679               #find lastdate
680                if ( convert_d_t_e($last_date) ge
681                      convert_d_t_e($lastdate_d) ) {
682                                $lastdate_d = $last_date;
683                               }
684                if ($startdate_d) {
685                        $lastdate_d = $enddate;
686                        }
687
688                }
689
690                next FILELOOP;
691            }
692        }
693        else {
694            {
695                unless ( defined( $line = <FILE> ) ) {
696
697                    #$eof = 1;
698                    close FILE;
699                    if ($firstdate == 0 ){
700                    print $html_tags{'starttag'} . $html_tags{'br'};
701                    print "File $file : from $first_date to $last_date\n";
702                            }
703                else {
704                #find firstdate
705                if  (convert_d_t_e($first_date) <
706                     convert_d_t_e($firstdate_d)) {
707                                $firstdate_d = $first_date;
708                               }
709                if ($startdate_d) {
710                        $firstdate_d = $startdate;
711                        }
712               #find lastdate
713                if ( convert_d_t_e($last_date) ge
714                      convert_d_t_e($lastdate_d) ) {
715                                $lastdate_d = $last_date;
716                               }
717                if ($startdate_d) {
718                        $lastdate_d = $enddate;
719                        }
720
721                }
722                    next FILELOOP;
723                }
724            }
725        }
726
727        unless ( $line =~ /^\s*([a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+)\s+\S+\s+/ ) {
728            $incorrect_lines++;
729            next;
730        }
731        unless ( defined $mailerlogtype ) {
732            if ( $line =~
733/^\s*([a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+)\s+\S+\s+(exim|postfix|sendmail|sm-mta)/
734              )
735            {
736                $mailerlogtype = $2;
737
738                #print $mailerlogtype." style log file detected\n";
739            }
740        }
741
742        $correct_lines++;
743        if ( $first_date eq "" ) { $first_date = $1; }
744        $last_date = $1;
745        if ( ( $skipstarttest * $skipendtest ) == 0 ) {
746            $last_date =~ /^([a-zA-Z]{3})\s+(\d+)\s+(\d+:\d+:\d+)$/
747              and $linetime =
748              convert_date_time_to_epoch( $2 . "-" . $mounths{$1}, $3 );
749        }
750        unless ( $skipstarttest == 1 ) {
751            $linetime < $epoch_start and next;
752        }
753
754        unless ( $skipendtest == 1 ) {
755            $linetime > $epoch_end and next;
756        }
757
758        #Here is Mailer analysis section. Spamd analysis is below.
759        #We are not running this code unless we know which mailer we are having:
760        if ( defined $mailerlogtype ) {
761
762            #Email IN
763#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/cleanup\[(\d*)\]:\s+([^:]+):\s*message-id=(.*)$/)
764            if ( $line =~ /$Defs{'mailer_in'}{$mailerlogtype}/ ) {
765                if ( defined $mailer_table{$2} ) {
766                    delete $mailer_table{$2};
767                    if ($debug) {
768                        print $html_tags{'starttag'} . $html_tags{'br'};
769                        print "INFO: A message \"id\" already existed as $2. Deleted it from mailer_table before renew.\n";
770                    }
771                }
772
773                #Exim specific :-(
774                if ( $mailerlogtype eq 'exim' ) {
775                    my $dollar2 = $2;
776                    my $id      = undef;
777                    if ( $3 =~ /^\s*id=(.*)$/ ) {
778                        $id = $1;
779                    }
780                    else {
781
782                        #print "TWO\n";
783                        $id =
784                          "I_have_no_id_��:-("
785                          ;    #Hope this will never be a real id ...
786                    }
787                    $mailer_table{$dollar2} = $id;
788                }
789                else {
790                    $mailer_table{$2} = $3;
791                }
792
793#print "DEBUG : postfix received message on ID $3, message code $2\n";
794                next;
795            }
796
797            #EMAIL SENT
798#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+postfix\/(?:pipe|local)\[\d+\]:\s+([^:]+):\s+to=<([^>]+)>/)
799            if ( $line =~ /$Defs{'mailer_out'}{$mailerlogtype}/ ) {
800                if ( defined $mailer_table{$1} ) {
801
802                    #Exim specific code :-(
803                    if ( $mailerlogtype eq "exim" ) {
804                        my $blah      = $1;
805                        my $tmp_email = $2;
806                        if ( $mailer_table{$1} =~ /^I_have_no_id_��:-\($/ ) {
807                            foreach my $key ( keys %spamd_table ) {
808                                if ( $key =~ /$blah/ ) {
809
810#print "I think I maybe resolved a floating Exim ID\n";
811                                    if ( $spamd_table{$key} eq "spam" ) {
812                                        $spam{ lc($tmp_email) }++;
813
814                                        #   print "SPAM for $tmp_email\n";
815                                    }
816                                    elsif ( $spamd_table{$key} eq "clean" ) {
817                                        $clean{ lc($tmp_email) }++;
818
819                                        #   print "CLEAN for $tmp_email\n";
820                                    }
821                                }
822                            }
823                        }
824                    }
825
826                    #End exim specific code
827                    if ( defined $spamd_table{ $mailer_table{$1} } ) {
828                        if ( $spamd_table{ $mailer_table{$1} } eq "spam" ) {
829                            $spam{ lc($2) }++;
830                        }
831                        elsif ( $spamd_table{ $mailer_table{$1} } eq "clean" ) {
832                            $clean{ lc($2) }++;
833                        }
834                        if ($agglo_rcpt) {
835                            delete $spamd_table{ $mailer_table{$1} };
836                            delete $mailer_table{$1};
837                        }
838                    }
839                }
840                else {
841                    if ($debug) {
842                        print $html_tags{'starttag'}
843                          . $html_tags{'br'}
844                          . $html_tags{'starttag'}
845                          . $html_tags{'b'};
846                        print
847"CRITICAL : Warning : Mailer delivered a message it never received? id $1";
848                        print $html_tags{'endtag'} . $html_tags{'b'} . "\n";
849                    }
850                }
851                next;
852            }
853        }
854
855#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+processing\s+message\s*(.*)\s+for\s+\S+/)
856        if ( defined $mailerlogtype ) {
857            if ( $line =~ /$Defs{'spamd_in'}{$mailerlogtype}/ ) {
858
859                #foreach my $key(keys %spamd_pid)
860                #{
861                #   if ($spamd_pid{$key} eq $2)
862                #   {
863                #      delete $spamd_pid{$key};
864#      print "INFO: A message \"id\" already existed as $2. Deleted it from spamd_pid before renew.\n";
865                #   }
866                #}
867                $spamd_pid{$1} = $2;
868                next;
869            }
870        }
871
872        #Detected as NON spam - Lets delete all its references from the buffer
873#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+clean\s+message\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\./)
874        if ( $line =~ /$Defs{'spamd_clean'}/ ) {
875            if ( defined $spamd_pid{$1} ) {
876                $spamd_table{ $spamd_pid{$1} } = "clean";
877                delete( $spamd_pid{$1} );
878            }
879            else {
880                if ($debug) {
881                    print $html_tags{'starttag'}
882                      . $html_tags{'br'}
883                      . $html_tags{'starttag'}
884                      . $html_tags{'b'};
885                    print "CRITICAL : spamd sent an answer for a message it did not receive? pid $1";
886                    print $html_tags{'endtag'} . $html_tags{'b'} . "\n";
887                }
888                if ($Spamd){
889                  $clean{$1}++;
890                }
891            }
892            $basic_clean_nb++;
893            $clean_score  += $2;
894            $clean_time   += $3;
895            $clean_volume += $4;
896            if ( defined $minmax ) {
897                if ( $2 < $min_clean_score )
898		  {$found{'min_clean_score'}=1;
899		  $min_clean_score   = $2;
900		}
901                if ( $2 > $max_clean_score )
902		 {$found{'max_clean_score'}=1;
903		 $max_clean_score   = $2;
904		}
905                if ( $3 < $min_clean_time )
906		  {$found{'min_clean_time'}=1;
907		  $min_clean_time    = $3;
908		}
909                if ( $3 > $max_clean_time )
910		  {$found{'max_clean_time'}=1;
911		  $max_clean_time    = $3;
912		}
913                if ( $4 < $min_clean_msgsize )
914		  {$found{'min_clean_msgsize'}=1;
915		  $min_clean_msgsize =$4;
916		}
917                if ( $4 > $max_clean_msgsize )
918		  {$found{'max_clean_msgsize'}=1;
919		  $max_clean_msgsize =$4;
920		}
921            }
922            next;
923        }
924
925        #SPAM FOUND
926#if ($line =~ /^\s*[a-zA-Z]{3}\s+\d+\s+\d+:\d+:\d+\s+\S+\s+spamd\[(\d+)\]:\s+identified\s+spam\s*\(([^\/]+)\/[^\)]+\)\s+for\s+\S+\d+\s+in\s+(\S+)\s+seconds,\s+(\d+)\s+bytes\./)
927        if ( $line =~ /$Defs{'spamd_spam'}/ ) {
928            if ( defined $spamd_pid{$1} ) {
929                $spamd_table{ $spamd_pid{$1} } = "spam";
930
931                #print "spamd_table {".$spamd_pid{$1}."} is spam\n";
932                delete( $spamd_pid{$1} );
933            }
934            else {
935                if ($debug) {
936                    print $html_tags{'starttag'}
937                      . $html_tags{'br'}
938                      . $html_tags{'starttag'}
939                      . $html_tags{'b'};
940                    print "CRITICAL : spamd sent an answer for a message it did not receive? pid $1";
941                    print $html_tags{'endtag'} . $html_tags{'b'} . "\n";
942                }
943                if ($Spamd){
944                  $spam{$1}++;
945                }
946            }
947            $basic_spam_nb++;
948            $spam_score  += $2;
949            $spam_time   += $3;
950            $spam_volume += $4;
951            if ( defined $minmax ) {
952                if ( $2 < $min_spam_score )
953		  {$found{'min_spam_score'}=1;
954		   $min_spam_score   = $2;
955		}
956                if ( $2 > $max_spam_score )
957		  {$found{'max_spam_score'}=1;
958		  $max_spam_score   = $2;
959		}
960                if ( $3 < $min_spam_time )
961		  {$found{'min_spam_time'}=1;
962		   $min_spam_time    = $3;
963		}
964                if ( $3 > $max_spam_time )
965		  {$found{'max_spam_time'}=1;
966		   $max_spam_time    = $3;
967		}
968                if ( $4 < $min_spam_msgsize )
969		  {$found{'min_spam_msgsize'}=1;
970		   $min_spam_msgsize = $4;
971		}
972                if ( $4 > $max_spam_msgsize )
973		  {$found{'max_spam_msgsize'}=1;
974		  $max_spam_msgsize = $4;
975		}
976            }
977            next;
978        }
979    }
980
981    #We are in a non-existent case!
982    print STDERR
983"WARNING, a piece of the program that shouldnt be run was reached!\nInvestigate!\n";
984}
985
986my %stats = ();
987foreach my $key ( keys %spam ) {
988    push @{ $stats{ $spam{$key} } }, $key;
989}
990
991unless ($nogeneral) {
992    my $nb_spam  = 0;
993    my $nb_clean = 0;
994    foreach my $key ( keys %spam ) {
995        $nb_spam += $spam{$key};
996    }
997    foreach my $key ( keys %clean ) {
998        $nb_clean += $clean{$key};
999    }
1000
1001    #output if firstdate is called
1002
1003    if ($firstdate == 1) {
1004        print $html_tags{'starttag'} . $html_tags{'br'};
1005        print "Statistic from $firstdate_d to $lastdate_d\n";
1006        }
1007
1008    #General stats
1009    #Ok, not beautiful code. But its only run once...
1010    print $html_tags{'starttag'}
1011      . $html_tags{'br'}
1012      . $html_tags{'starttag'}
1013      . $html_tags{'br'};
1014    print "Total number of emails processed by the spam filter : "
1015      . $html_tags{'starttag'}
1016      . $html_tags{'b'}
1017      . eval( $nb_spam + $nb_clean )
1018      . $html_tags{'endtag'}
1019      . $html_tags{'b'} . "\n";
1020    print $html_tags{'starttag'} . $html_tags{'br'};
1021    print $html_tags{'starttag'} . $html_tags{'b'};
1022    if ( $nb_spam + $nb_clean > 0 ) {
1023        $spam_percent = eval( 100 * $nb_spam / ( $nb_spam + $nb_clean ) );
1024        printf( "%-40s:%10d (%6.2f%%)\n", "Number of spams", $nb_spam,
1025            $spam_percent );
1026    }
1027    else {
1028        printf( "%-40s:%10s\n", "Number of spams", "n/a" );
1029    }
1030    print $html_tags{'endtag'} . $html_tags{'b'};
1031
1032    print $html_tags{'starttag'} . $html_tags{'br'};
1033    print $html_tags{'starttag'} . $html_tags{'b'};
1034    if ( $nb_spam + $nb_clean > 0 ) {
1035        $clean_percent = eval( 100 * $nb_clean / ( $nb_spam + $nb_clean ) );
1036        printf(
1037            "%-40s:%10d (%6.2f%%)\n", "Number of clean messages",
1038            $nb_clean,                $clean_percent
1039        );
1040    }
1041    else {
1042        printf( "%-40s:%10s\n", "Number of clean messages", "n/a" );
1043    }
1044    print $html_tags{'endtag'} . $html_tags{'b'};
1045    print $html_tags{'starttag'} . $html_tags{'br'};
1046    printf( "%-40s:", "Average message analysis time" );
1047    print $html_tags{'starttag'} . $html_tags{'b'};
1048    if ( $basic_spam_nb + $basic_clean_nb > 0 ) {
1049        printf "%10.2f",
1050          eval( ( $spam_time + $clean_time ) /
1051          ( $basic_spam_nb + $basic_clean_nb ) );
1052    }
1053    else {
1054        print "n/a";
1055    }
1056    print $html_tags{'endtag'} . $html_tags{'b'};
1057    print " seconds\n";
1058    print $html_tags{'starttag'} . $html_tags{'br'};
1059    printf( "%-40s:", "Average spam analysis time" );
1060    print $html_tags{'starttag'} . $html_tags{'b'};
1061    if ( $basic_spam_nb > 0 ) {
1062        printf "%10.2f", eval( $spam_time / $basic_spam_nb );
1063    }
1064    else {
1065        print "n/a";
1066    }
1067    print $html_tags{'endtag'} . $html_tags{'b'};
1068    print " seconds";
1069    defined $found{'min_spam_time'} or $min_spam_time = "";
1070    defined $found{'max_spam_time'} or $max_spam_time = "";
1071    print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min spam time = $min_spam_time, max spam time = $max_spam_time)"
1072      if ( defined $minmax );
1073    print "\n";
1074
1075    print $html_tags{'starttag'} . $html_tags{'br'};
1076    printf( "%-40s:", "Average clean message analysis time" );
1077    print $html_tags{'starttag'} . $html_tags{'b'};
1078    if ( $basic_clean_nb > 0 ) {
1079        printf "%10.2f", eval( $clean_time / $basic_clean_nb );
1080    }
1081    else {
1082        print "n/a";
1083    }
1084    print $html_tags{'endtag'} . $html_tags{'b'};
1085    print " seconds";
1086    defined $found{'min_clean_time'} or $min_clean_time = "";
1087    defined $found{'max_clean_time'} or $max_clean_time = "";
1088    print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min clean time = $min_clean_time, max clean time = $max_clean_time)"
1089      if ( defined $minmax );
1090    print "\n";
1091
1092    #
1093    #Spam with multiple recipients count only as one in the average...
1094    print $html_tags{'starttag'} . $html_tags{'br'};
1095    printf( "%-40s:", "Average message score" );
1096    print $html_tags{'starttag'} . $html_tags{'b'};
1097    if ( $basic_clean_nb + $basic_spam_nb > 0 ) {
1098        printf "%10.2f",
1099          eval( ( $spam_score + $clean_score ) /
1100          ( $basic_clean_nb + $basic_spam_nb ) );
1101    }
1102    else {
1103        print "n/a";
1104    }
1105    print $html_tags{'endtag'} . $html_tags{'b'} . "\n";
1106
1107    #Spam with multiple recipients count only as one in the average...
1108    print $html_tags{'starttag'} . $html_tags{'br'};
1109    printf( "%-40s:", "Average spam score" );
1110    print $html_tags{'starttag'} . $html_tags{'b'};
1111    if ( $basic_spam_nb > 0 ) {
1112        printf "%10.2f", eval( $spam_score / $basic_spam_nb );
1113    }
1114    else {
1115        print "n/a";
1116    }
1117    defined $found{'min_spam_score'} or $min_spam_score = "";
1118    defined $found{'max_spam_score'} or $max_spam_score = "";
1119    print $html_tags{'endtag'} . $html_tags{'b'};
1120    print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t\t(min spam score = $min_spam_score, max spam score = $max_spam_score)"
1121      if ( defined $minmax );
1122    print "\n";
1123
1124    #Spam with multiple recipients count only as one in the average...
1125    print $html_tags{'starttag'} . $html_tags{'br'};
1126    printf( "%-40s:", "Average clean message score" );
1127    print $html_tags{'starttag'} . $html_tags{'b'};
1128    if ($basic_clean_nb) {
1129        printf "%10.2f", eval( $clean_score / $basic_clean_nb );
1130    }
1131    else {
1132        print "n/a";
1133    }
1134    print $html_tags{'endtag'} . $html_tags{'b'};
1135    defined $found{'min_clean_score'} or $min_clean_score = "";
1136    defined $found{'max_clean_score'} or $max_clean_score = "";
1137    print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t\t(min clean score = $min_clean_score, max clean score = $max_clean_score)"
1138      if ( defined $minmax );
1139    print "\n";
1140
1141    my $unit = "bytes";
1142    ( $spam_volume, $unit ) = unify( $spam_volume, $unit );
1143
1144    print $html_tags{'starttag'} . $html_tags{'br'};
1145    printf( "%-40s:", "Total spam volume" );
1146    print $html_tags{'starttag'} . $html_tags{'b'};
1147    printf "%10d ", $spam_volume;
1148    print $html_tags{'endtag'} . $html_tags{'b'} . $unit;
1149    if ( defined $minmax ) {
1150        my $unitmin = "bytes";
1151        my $unitmax = "bytes";
1152        ( $min_spam_msgsize, $unitmin ) = unify( $min_spam_msgsize, $unitmin );
1153        ( $max_spam_msgsize, $unitmax ) = unify( $max_spam_msgsize, $unitmax );
1154    defined $found{'min_spam_msgsize'} or $min_spam_msgsize = "";
1155    defined $found{'max_spam_msgsize'} or $max_spam_msgsize = "";
1156    defined $found{'min_spam_msgsize'} or $unitmin = "";
1157    defined $found{'max_spam_msgsize'} or $unitmax = "";
1158	print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min spam size = ";
1159	defined $found{'min_spam_msgsize'} and printf("%2d",$min_spam_msgsize);
1160	print " $unitmin, max spam size = ";
1161	defined $found{'max_spam_msgsize'} and printf("%2d",$max_spam_msgsize);
1162	print " $unitmax";
1163    }
1164    print "\n";
1165    $unit = "bytes";
1166    ( $clean_volume, $unit ) = unify( $clean_volume, $unit );
1167    print $html_tags{'starttag'} . $html_tags{'br'};
1168    printf( "%-40s:", "Total clean volume" );
1169    print $html_tags{'starttag'} . $html_tags{'b'};
1170    printf "%10d ", $clean_volume;
1171    print $html_tags{'endtag'} . $html_tags{'b'} . $unit;
1172
1173    if ( defined $minmax ) {
1174        my $unitmin = "bytes";
1175        my $unitmax = "bytes";
1176        ( $min_clean_msgsize, $unitmin ) =
1177          unify( $min_clean_msgsize, $unitmin );
1178        ( $max_clean_msgsize, $unitmax ) =
1179          unify( $max_clean_msgsize, $unitmax );
1180    defined $found{'min_clean_msgsize'} or $min_clean_msgsize = "";
1181    defined $found{'max_clean_msgsize'} or $max_clean_msgsize = "";
1182    defined $found{'min_clean_msgsize'} or $unitmin = "";
1183    defined $found{'max_clean_msgsize'} or $unitmax = "";
1184	print "\n".$html_tags{'starttag'} . $html_tags{'br'} . $html_tags{'vspace'}. "\t(min clean size = ";
1185	defined $found{'min_clean_msgsize'} and printf("%2d",$min_clean_msgsize);
1186	print " $unitmin, max clean size = ";
1187	defined $found{'max_clean_msgsize'} and printf("%2d",$max_clean_msgsize);
1188	print " $unitmax";
1189    }
1190    print "\n";
1191}
1192
1193#Top spammed addresses
1194if ($number) {
1195    print $html_tags{'starttag'} . $html_tags{'br'};
1196    print $html_tags{'starttag'} . $html_tags{'br'};
1197    print "Recipients with highest number of spams : (top $number)\n";
1198    foreach my $key ( sort { $b <=> $a } keys %stats ) {
1199        $number <= 0 and last;
1200        print $html_tags{'starttag'} . $html_tags{'br'};
1201        print $key. " spams : \n";
1202        foreach my $email ( @{ $stats{$key} } ) {
1203            print $html_tags{'starttag'} . $html_tags{'br'} .$html_tags{'vspace'};
1204            print "\t" . $email . "\n";
1205            $number--;
1206        }
1207    }
1208}
1209
1210if ( ( $correct_lines == 0 )
1211    or ( ( $incorrect_lines / $correct_lines ) > 0.1 ) )
1212{
1213    print $html_tags{'starttag'} . $html_tags{'br'};
1214    print $html_tags{'starttag'} . $html_tags{'br'};
1215    print
1216"INFO: It seems at least one input file contains other things that {exim/postfix} or spamd lines!\n";
1217}
1218
1219print $html_tags{'endtag'} . $html_tags{'body'};
1220print $html_tags{'endtag'} . $html_tags{'html'};
1221