1#!/usr/local/bin/perl -w -w
2#
3# amavis-stats -- generate rrds from amavis log output
4#
5# Copyright (C) 2003, Mark Lawrence (nomad@null.net)
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation; either version 2 of the License, or
10# (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License with
18# the Debian GNU/Linux distribution in file /usr/share/common-licenses/GPL;
19# if not, write to the Free Software Foundation, Inc., 59 Temple Place,
20# Suite 330, Boston, MA  02111-1307  USA
21#
22# On Debian systems, the complete text of the GNU General Public
23# License, version 2, can be found in /usr/share/common-licenses/GPL-2.
24
25
26# ########################################################################
27# Dependencies
28# ########################################################################
29use strict;
30use warnings;
31use diagnostics;
32use Getopt::Std;
33use Time::localtime;
34use Time::Local;
35use RRDs;
36use Fcntl ':flock';
37use POSIX qw(strftime);
38use POSIX qw(locale_h);
39
40
41# ########################################################################
42# Globals
43# ########################################################################
44our (
45    $me,         # this program name
46    $version,    # this program version
47    $debug,      # result of the -d flag
48    $verbose,    # result of the -v flag
49    $locale,     # use setlocale to determine language of dates?
50    $pkg,        # name of this package (usually amavis-stats)
51    $statedir,   # location of rrd files
52    $lockfile,   # lock file to prevent more than one invocation at a time
53    $logfile,    # input log file on command line
54    $statefile,  # between invocation status file
55    $namesfile,  # mappings of IDs to virus names
56    $countfile,  # mappings of IDs to virus occurences
57    $seenfile,   # mappings of IDs to virus first/last seen times
58    $rrdstep,    # rrd step size
59    $spos,       # start position of input file this run
60    $pos,        # current position in input file
61    $eof,        # position of end of input file
62    $line,       # string containing current line
63    $epoch,      # seconds since 1970
64    $lastepoch,  # seconds since 1970, previous time around
65    $numv,       # number of virus seen, continually incrementing
66    $lastupdate, # epoch of last global rrd update
67    $year,       # current year
68    %months,     # locale-dependent hash of months-to-index
69    %rvid,       # in-memory mapping of virus names to IDs
70    %vnames,     # in-memory mapping of IDs to virus names
71    %occurence,  # in-memory mapping of IDs to counts/occurences
72    %firstseen,  # in-memory mapping of virus first seen times
73    %lastseen,   # in-memory mapping of virus last seen times
74    %spamsess,   # in-memory store of session ids (amavis process-count field)
75                 # used for SPAM-TAG
76    %opt         # command line options
77);
78
79
80# ########################################################################
81# Initial values & Constants
82# ########################################################################
83($me = $0) =~ s%.*/%%;    # get rid of the leading directory
84$version   = "0.1.12"; # this value is auto-updated by packing system
85$pkg       = "amavis-stats";
86$locale    = "C";
87$lockfile  = "/var/tmp/$pkg";
88$statedir  = "/usr/local/www/$pkg";
89$statefile = "$statedir/$pkg.state"; # last read position of the logfile
90$namesfile = "$statedir/$pkg.names"; # stores the virus name to id mappings
91$countfile = "$statedir/$pkg.count"; # per virus totals
92$seenfile  = "$statedir/$pkg.seen";  # first and last time() seen
93$rrdstep   = 300;
94
95
96# ########################################################################
97# Subroutines
98# ########################################################################
99
100#
101# Message about this program and how to use it
102#
103sub usage() {
104    print STDERR "usage: $0 [-hVqdrl] file\n";
105}
106
107sub help() {
108    usage();
109    print STDERR << "EOF";
110
111    Version: $version
112
113    This program generates virus infection statistics from amavis/syslog
114    log files. It is typically called from cron(8), but can also be used
115    from the command line when populating the databases with historical
116    data.
117
118    -h        : this (help) message
119    -v        : does nothing (legacy verbose option)
120    -q        : quiet mode - no output
121    -d        : print debugging messages to stderr
122    -r        : reset file pointer to 0, instead of starting at last position
123    -l        : take locale from the environment (instead of English)
124    -V        : display version and exit
125
126    examples:
127
128    Initial import of existing data:
129    amavis# $me /var/log/mail.info.2
130    amavis# $me -r /var/log/mail.info.1
131    amavis# $me -r /var/log/mail.info.0
132    amavis# $me -r /var/log/mail.info
133
134    Normal usage:
135    amavis# $me /var/log/amavis.log
136
137EOF
138
139}
140
141
142#
143# Command line options processing
144#
145sub init()
146{
147    dbg("$me version $version");
148
149    my $opt_string = 'hvqdf:lrV';
150    if (!getopts( "$opt_string", \%opt )) {
151        usage();
152        do_exit(1);
153    }
154
155    if ($opt{h}) {
156        help();
157        do_exit(1);
158    }
159
160    if ($opt{V}) {
161        print "$version\n";
162        do_exit(1);
163    }
164
165    $verbose = 1;
166    $verbose = 0 if ($opt{q});
167    $debug   = 1 if $opt{d};
168
169
170    if ($opt{f}) { # legacy way to specify input file
171        $logfile = $opt{f};
172        if ( ! -f $logfile ) {
173            do_exit(1, "file \"$logfile\" does not exist");
174        }
175
176    } elsif ($ARGV[0]) { # now expect file on command line
177        $logfile = $ARGV[0];
178        if ( ! -f $logfile ) {
179            do_exit(1, "file \"$logfile\" does not exist");
180        }
181
182    } else {
183        usage();
184        do_exit(1);
185    }
186
187    $year    = localtime->year() + 1900;
188
189    # build default (English?) hash of Month-to-Numbers
190    %months = (
191        "Jan" => "0", "Feb" => "1",  "Mar" => "2",  "Apr" => "3",
192        "May" => "4", "Jun" => "5",  "Jul" => "6",  "Aug" => "7",
193        "Sep" => "8", "Oct" => "9", "Nov" => "10", "Dec" => "11"
194    );
195
196    # Set up a locale-depenedent hash of Month-to-Numbers
197    if ($opt{l}) {
198        $locale = setlocale(LC_TIME);
199        dbg("locale is set to \"$locale\"");
200        for (0..11) {
201            my $tmp = strftime("%b", 0, 0, 0, 1, $_, 96);
202            dbg("$tmp");
203            $months{$tmp} = $_;
204        }
205    }
206
207}
208
209
210#
211# Make sure that only one copy is running at a time
212#
213sub semlock {
214    open (LOCKF, ">$lockfile") or do_exit(1, "Could not open $lockfile: $!");
215    unless (flock(LOCKF, LOCK_EX | LOCK_NB)) {
216        print_err("warning: Could not lock $lockfile: $!");
217        sleep 2;
218        unless (flock(LOCKF, LOCK_EX | LOCK_NB)) {
219            do_exit(1, "Could not lock $lockfile: $!");
220        }
221    }
222    print LOCKF "$$\n";
223    dbg("Have lock on $lockfile");
224}
225
226
227#
228# Undo our lock. This is only for the sake of completeness - all file
229# handles are closed (and locks lost) on program exit anyway.
230#
231sub semunlock {
232    close LOCKF;
233    if (unlink("$lockfile")) {
234        dbg("lock $lockfile removed");
235    }
236}
237
238
239#
240# For completeness, remove the lock before exiting, otherwise the lock
241# file remains untidily behind...
242#
243sub do_exit {
244    my ($code, $msg) = @_;
245    semunlock();
246
247    if ($code == 0) {
248        exit 0;
249    } else {
250        if (defined $msg) {
251            print STDERR "$me: $msg\n";
252        }
253        exit $code;
254    }
255}
256
257
258#
259# Load the values of the previous run into variables
260#
261sub loadState {
262    dbg("loadState()");
263    $spos = undef;
264
265    #
266    # Check that we have somewhere to save our status - Not much point
267    # in continuing otherwise.
268    #
269    if ((! -d "$statedir") or (! -w "$statedir")) {
270        do_exit(1,"$statedir does not exist or cannot be written to.");
271    }
272
273
274    #
275    # Grab the previous position reached in the log file, plus
276    # the total number of different viruses we have seen
277    #
278    if (-f "$statefile") {
279
280        dbg("opening file $statefile");
281
282        open (IN, "$statefile") or die "Could not open $statefile";
283        while (my $line = <IN>) {
284            if ($line =~ /^pos:\s*(\d+)/) {
285                $spos = $1;
286            }
287            elsif ($line =~ /^numv:\s*(\d+)/) {
288                $numv = $1;
289            }
290            elsif ($line =~ /^lastupdate:\s*(\d+)/) {
291                $lastupdate = $1;
292            }
293            elsif ($line =~ /^spamsess:\s*(.*)/) {
294                my @arr = split(/\s+/, $1);
295                foreach (@arr) {
296                    $spamsess{$_} = 1;
297                }
298            }
299        }
300        close IN;
301
302        dbg("opening file $namesfile");
303        open (IN, "$namesfile") or die "Could not open $namesfile";
304        while (my $line = <IN>) {
305            if ($line =~ /^(\d+)\s+(.*)/) {
306                my $id = $1;
307                my $name = $2;
308                if ($name =~ /^spam$/) { # from version 0.1.12 names changed
309                    $name = "Not-Delivered(SPAM)";
310                } elsif ($name =~ /^passed$/) {
311                    $name = "Passed";
312                } elsif ($name =~ /^banned$/) {
313                    $name = "Banned";
314                } elsif ($name =~ /^infected$/) {
315                    $name = "Infected";
316                }
317                $rvid{$name} = $id;
318                $vnames{$id} = $name;
319            }
320        }
321        close IN;
322
323        dbg("opening file $countfile");
324        open (IN, "$countfile") or die "Could not open $countfile";
325        while (my $line = <IN>) {
326            if ($line =~ /^(\d+)\s+(\d+)/) {
327                $occurence{$1} = $2;
328            }
329        }
330        close IN;
331
332        dbg("opening file $seenfile");
333        open (IN, "$seenfile") or die "Could not open $seenfile";
334        while (my $line = <IN>) {
335            if ($line =~ /^(\d+)\s+(\d+)\s+(\d+)/) {
336                $firstseen{$1} = $2;
337                $lastseen{$1}  = $3;
338            }
339        }
340        close IN;
341    }
342
343    #
344    # If we have not run before reset...
345    #
346    if (!defined $spos) {
347        msg("First Time Run");
348        $spos      = 0; # position into the log file
349        $numv     = 0; # number of virus types seen
350        $lastupdate = 0; # number of virus types seen
351    }
352
353    #
354    # If -r <file> on command line start at beginning of file
355    #
356    if ($opt{r}) {
357        $spos = 0;
358    }
359
360    if ($debug) {
361        dbg("start position: $spos numv: $numv lastupdate: $lastupdate");
362        my $tmp = "left over spam session ids: ";
363        foreach my $sid (keys %spamsess) {
364            $tmp = "$tmp $sid";
365        }
366        dbg($tmp);
367        while ( my ($id,$count) = each (%occurence)) {
368            my $name = $vnames{$id};
369            dbg("#$id: $name, seen $count times");
370        }
371    }
372}
373
374
375sub saveState {
376
377    #
378    # Reset the value of spos and save it for the next time we are called
379    #
380    dbg("saveState(): eof: $eof numv: $numv lastupdate: $lastupdate");
381
382    open (OUT, ">$statefile") or die "Could not write to $statefile";
383    print OUT "pos: $pos\n";
384    print OUT "numv: $numv\n";
385    print OUT "lastupdate: $lastupdate\n";
386    print OUT "LC_TIME: $locale\n";
387    print OUT "spamsess: ";
388    foreach my $sid (keys %spamsess) {
389        print OUT "$sid ";
390    }
391    print OUT "\n";
392    close OUT;
393
394    open (NAMES, ">$namesfile") or die "Could not write to $namesfile";
395    open (COUNT, ">$countfile") or die "Could not write to $countfile";
396    open (SEEN, ">$seenfile") or die "Could not write to $seenfile";
397
398    while (my ($id, $virus) = each (%vnames)) {
399        print NAMES "$id $virus\n";
400        print COUNT "$id $occurence{$id}\n";
401        print SEEN "$id $firstseen{$id} $lastseen{$id}\n";
402    }
403    close NAMES;
404    close COUNT;
405    close SEEN;
406
407}
408
409
410#
411# Take a virus/name, and return an ID. Create the ID if the virus doesn't
412# already exist
413#
414sub getVid {
415    my ($virus, $epoch) = @_;
416    my $id = 0;
417
418    if (!exists $rvid{$virus}) {
419        $numv++;
420        $id = $numv;
421
422        msg("New id (#$id, $virus) seen at $epoch");
423
424        $vnames{$id}    = $virus;
425        $rvid{$virus}   = $id;
426        $firstseen{$id} = $epoch;
427
428    } else {
429        $id = $rvid{$virus};
430    }
431
432    return $id;
433}
434
435
436#
437# Increment by one the number of times we have seen this virus. Also
438# record the time we last saw it.
439#
440sub upCount {
441    my ($id, $epoch) = @_;
442
443    if (!exists $occurence{$id}) {
444        $occurence{$id} = 1;
445
446    } else {
447        $occurence{$id}++;
448    }
449
450    $lastseen{$id} = $epoch;
451}
452
453
454
455#
456#
457#
458sub classify {
459    my $id;
460    my ($mon, $day, $time, $host, $prog, $sid) = split(/\s+/, $line);
461
462    #
463    # Check that the environment locale matches what is being written
464    # by syslog
465    #
466    my $tmp = $months{"$mon"};
467    if (!defined($tmp)) {
468        do_exit(1, "Unknown month \"$mon\" (current locale is \"$locale\")");
469    }
470
471    $mon = $tmp;
472
473    #
474    # Generate a seconds-since-1970 epoch and formated date string
475    #
476    my ($hour,$min,$sec) = split (/:/, $time);
477    $epoch = timelocal($sec, $min, $hour, $day, $mon, $year-1900);
478
479    if ($epoch > time()) {
480        # date is last actually last year
481        $epoch = timelocal($sec, $min, $hour, $day, $mon, $year-1901);
482    }
483    if (!defined $lastepoch) {
484        $lastepoch = $epoch - 1;
485    }
486
487    my $isodate = sprintf("%4u-%02u-%02u", $year, $mon+1, $day) .
488                  " $hour:$min:$sec";
489    dbg("line at $isodate epoch: $epoch");
490
491    #
492    # Update all rrds if we are more than $rrdstep seconds since the last
493    # update
494    #
495    if ($lastupdate == 0) {
496        $lastupdate = int($epoch / $rrdstep) * $rrdstep;
497        dbg("First update: $lastupdate");
498    }
499
500    my $count = int(($epoch - $lastupdate) / $rrdstep);
501    for (my $i = 1; $i <= $count; $i++) {
502        $lastupdate = $lastupdate + $rrdstep;
503        foreach $id (keys %vnames) {
504            updateRRD($id, $lastupdate);
505        }
506    }
507
508    #
509    # If this line contains a SPAM tag of some sort don't classify it
510    # now, but record the fact and move on to the next line
511    #
512    if ($line =~ /\sSPAM(-TAG)*,\s/) {
513        dbg("SPAM: $epoch: $isodate $sid");
514        $spamsess{$sid} = 1;
515        return;
516    }
517
518    #
519    # Save the stats according to the classification of the email
520    #
521    if ($line =~ /Passed/) {
522
523        #
524        # Update the overall passed emails statistics
525        #
526        if (defined $spamsess{$sid}) {
527            dbg("Passed(SPAM) $epoch: $isodate");
528            $id = getVid("Passed(SPAM)", $epoch);
529        } else {
530            dbg("Passed: $epoch: $isodate");
531            $id = getVid("Passed", $epoch);
532        }
533        upCount($id, $epoch);
534
535    } elsif ($line =~ /\sNot-Delivered,\s/) {
536
537        #
538        # Update the overall not delivered statistics
539        #
540        if (defined $spamsess{$sid}) {
541            dbg("Not-Delivered(SPAM): $epoch: $isodate");
542            $id = getVid("Not-Delivered(SPAM)", $epoch);
543        } else {
544            dbg("Not-Delivered: $epoch: $isodate");
545            $id = getVid("Not-Delivered", $epoch);
546        }
547        upCount($id, $epoch);
548
549    } elsif ($line =~ /\sBANNED\sname\/type\s/) {
550        dbg("Banned $epoch: $isodate");
551
552        #
553        # Update the overall banned emails statistics
554        #
555        $id = getVid("Banned", $epoch);
556        upCount($id, $epoch);
557
558    } elsif ($line =~ /\sINFECTED\s+\((.*?[\(.*?\)]*)\)/   or # amavisd-new
559             $line =~ /\sPossible virus.*->\s+'(.*?)'/     or # amavis-ng
560             $line =~ /.*parts\/\d+:\s+(.*?)\s+FOUND/      or # amavis-ng
561             $line =~ /\squarantine[:|d;].*?virus='(.*?)'/ or # amavisd
562             $line =~ /.*part-\d+:\s+(.*?)\s+FOUND/ ) {         # clamav
563        my $viruses = $1;
564        dbg("viruses: \"$viruses\" at $epoch: $isodate");
565
566        #
567        # Update the overall infected emails statistics
568        #
569        $id = getVid("Infected", $epoch);
570        upCount($id, $epoch);
571
572        #
573        # What is this specific nasty little bugger(s) called?
574        # Update his statistics as well.
575        #
576        my @list = split(/,+\s+/, $viruses);
577        my %seen;
578        foreach my $virus (@list) {
579            if (!$seen{$virus}) {
580                $id = getVid($virus, $epoch);
581                upCount($id, $epoch);
582                $seen{$virus} = 1;
583            }
584        }
585    }
586
587    #
588    # Since we have classified this session-id we need to remove it
589    # from the %spamsess hash
590    delete $spamsess{$sid};
591}
592
593
594
595
596#
597#
598#
599sub parseFile {
600
601    my ($fname, $start, $stop) = @_;
602    dbg("parseFile ($fname, $start, $stop)");
603
604    #
605    # Open up the file we need to parse
606    #
607    unless (open (LOGFILE, $fname)) {
608        do_exit(1, "Could not open file $fname: $!");
609    }
610    unless (seek (LOGFILE, $start, 0)) {
611        do_exit(1, "Could not seek to $start in file $fname: $!");
612    }
613
614    #
615    # Loop each line until the current end of file
616    #
617    $pos = $start;
618    my $lineid = 0;
619    while ($pos < $stop and $line = <LOGFILE>)
620    {
621        $lineid++;
622        $lastepoch = $epoch;
623
624        if ($line =~ /amavis.*?\[\d+\]:/) {
625            classify();
626        }
627        #
628        # Where did we get to in the file?
629        #
630        $pos = tell(LOGFILE);
631
632        #
633        # Save the current statistics every 1000 lines. This way
634        # if the program dies we don't have to start again from the
635        # beginning each time. Also good for monitoring the graphs
636        # to see where we are up to.
637        #
638        if (!($lineid % 1000)) {
639            saveState();;
640        }
641
642    }
643    close(LOGFILE);
644
645}
646
647
648#
649# Find the previous (rotated) log file and parse that according to
650# our last position
651#
652sub parseRotFile {
653    my ($logfile, $spos) = @_;
654
655    my $now = time();
656    my $today     = localtime($now);
657    my $yesterday = localtime($now - 60*60*24);
658
659    $today = sprintf("%4u%02u%02u", $today->year + 1900,
660                                    $today->mon + 1,
661                                    $today->mday);
662
663    $yesterday = sprintf("%4u%02u%02u", $yesterday->year + 1900,
664                                        $yesterday->mon + 1,
665                                        $yesterday->mday);
666
667    my $rotlogfile = undef;
668
669    if (-f "$logfile.0") {
670        $rotlogfile = $logfile . ".0";
671    } elsif (-f "$logfile.1") {
672        $rotlogfile = $logfile . ".1";
673    } elsif (-f "$logfile.01") {
674        $rotlogfile = $logfile . ".01";
675    } elsif (-f "$logfile-$today") {
676        $rotlogfile = $logfile . "-$today";
677    } elsif (-f "$logfile-$yesterday") {
678        $rotlogfile = $logfile . "-$yesterday";
679    }
680
681    if (defined($rotlogfile)) {
682        parseFile ($rotlogfile, $spos, (stat $rotlogfile)[7]);
683    } else {
684        print_err("Could not open rotated logfile.");
685        print_err("  Tried extentions .0, .1, .01, -$today, -$yesterday");
686        do_exit(1);
687    }
688}
689
690
691sub createRRD {
692    my ($file, $epoch) = @_;
693    dbg("createRRD: $file, $epoch");
694
695    RRDs::create($file,
696                "--start", ($epoch - 1),
697                "--step", $rrdstep,
698                "DS:hits:COUNTER:".$rrdstep.":0:U",
699                "RRA:AVERAGE:0.5:1:300",
700                "RRA:AVERAGE:0.5:6:700",
701                "RRA:AVERAGE:0.5:24:775",
702                "RRA:AVERAGE:0.5:288:797",
703                "RRA:MAX:0.5:1:300",
704                "RRA:MAX:0.5:6:700",
705                "RRA:MAX:0.5:24:775",
706                "RRA:MAX:0.5:288:797"
707                );
708
709    my $err = RRDs::error;
710    if ($err) {
711        print_err("createRRD: $err");
712        return -1;
713    }
714
715    return 1;
716}
717
718
719sub updateRRD () {
720    my ($id, $epoch) = @_;
721    my $count        = $occurence{$id};
722    my $rrdfile      = "$statedir/$id.rrd";
723    my $err;
724    my $last;
725
726    if (! -f $rrdfile) {
727        if (! createRRD($rrdfile, $epoch - $rrdstep)) {
728            do_exit(1, "updateRRD: Can't create file $rrdfile: $!");
729        }
730        my $upd = ($epoch - $rrdstep) . ":0";
731        dbg("Update: $rrdfile at ", $epoch - $rrdstep, " count 0");
732        RRDs::update($rrdfile, $upd);
733    }
734
735    dbg("Update: $rrdfile at $epoch count $count");
736
737    $last = RRDs::last($rrdfile);
738    $err = RRDs::error;
739    if ($err) {
740        print_err("updateRRD: $err");
741        return -1;
742    }
743
744    #
745    # We sometimes get two hits in the same second. Check for that here
746    # and basically ignore it.
747    #
748    if ($epoch > $last) {
749        my $upd = $epoch . ":" . $count;
750        RRDs::update($rrdfile, $upd);
751
752        $err = RRDs::error;
753        if ($err) {
754            print_err("updateRRD: $err");
755            print_err("Attempted to update $rrdfile at $epoch count $count");
756            return -1;
757        }
758    }
759
760    return 1;
761}
762
763
764
765sub dbg {
766    print "$me: @_\n" if ($debug);
767}
768
769sub msg {
770    print "$me: @_\n" if ($verbose);
771}
772
773sub print_err
774{
775    print STDERR "$me: error: @_\n";
776}
777
778
779# ########################################################################
780# main() program
781# ########################################################################
782
783init();
784semlock();
785loadState();
786
787$eof = (stat $logfile)[7];
788
789if ($eof < $spos) {
790    #
791    # The log file has rotated under us, so do the rotated logfile first.
792    #
793    msg("Logfile \"$logfile\" appears to have rotated");
794    parseRotFile($logfile, $spos);
795    $spos = 0; # reset to the start of the file
796}
797
798parseFile ($logfile, $spos, $eof);
799saveState();
800semunlock();
801
802