1#!/usr/local/bin/perl -w -w 2# 3# amavis-stats -- generate rrds from amavis log output 4# 5# Copyright (C) 2003, Mark Lawrence (nomad@null.net) 6# 7# This program is free software; you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation; either version 2 of the License, or 10# (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License with 18# the Debian GNU/Linux distribution in file /usr/share/common-licenses/GPL; 19# if not, write to the Free Software Foundation, Inc., 59 Temple Place, 20# Suite 330, Boston, MA 02111-1307 USA 21# 22# On Debian systems, the complete text of the GNU General Public 23# License, version 2, can be found in /usr/share/common-licenses/GPL-2. 24 25 26# ######################################################################## 27# Dependencies 28# ######################################################################## 29use strict; 30use warnings; 31use diagnostics; 32use Getopt::Std; 33use Time::localtime; 34use Time::Local; 35use RRDs; 36use Fcntl ':flock'; 37use POSIX qw(strftime); 38use POSIX qw(locale_h); 39 40 41# ######################################################################## 42# Globals 43# ######################################################################## 44our ( 45 $me, # this program name 46 $version, # this program version 47 $debug, # result of the -d flag 48 $verbose, # result of the -v flag 49 $locale, # use setlocale to determine language of dates? 50 $pkg, # name of this package (usually amavis-stats) 51 $statedir, # location of rrd files 52 $lockfile, # lock file to prevent more than one invocation at a time 53 $logfile, # input log file on command line 54 $statefile, # between invocation status file 55 $namesfile, # mappings of IDs to virus names 56 $countfile, # mappings of IDs to virus occurences 57 $seenfile, # mappings of IDs to virus first/last seen times 58 $rrdstep, # rrd step size 59 $spos, # start position of input file this run 60 $pos, # current position in input file 61 $eof, # position of end of input file 62 $line, # string containing current line 63 $epoch, # seconds since 1970 64 $lastepoch, # seconds since 1970, previous time around 65 $numv, # number of virus seen, continually incrementing 66 $lastupdate, # epoch of last global rrd update 67 $year, # current year 68 %months, # locale-dependent hash of months-to-index 69 %rvid, # in-memory mapping of virus names to IDs 70 %vnames, # in-memory mapping of IDs to virus names 71 %occurence, # in-memory mapping of IDs to counts/occurences 72 %firstseen, # in-memory mapping of virus first seen times 73 %lastseen, # in-memory mapping of virus last seen times 74 %spamsess, # in-memory store of session ids (amavis process-count field) 75 # used for SPAM-TAG 76 %opt # command line options 77); 78 79 80# ######################################################################## 81# Initial values & Constants 82# ######################################################################## 83($me = $0) =~ s%.*/%%; # get rid of the leading directory 84$version = "0.1.12"; # this value is auto-updated by packing system 85$pkg = "amavis-stats"; 86$locale = "C"; 87$lockfile = "/var/tmp/$pkg"; 88$statedir = "/usr/local/www/$pkg"; 89$statefile = "$statedir/$pkg.state"; # last read position of the logfile 90$namesfile = "$statedir/$pkg.names"; # stores the virus name to id mappings 91$countfile = "$statedir/$pkg.count"; # per virus totals 92$seenfile = "$statedir/$pkg.seen"; # first and last time() seen 93$rrdstep = 300; 94 95 96# ######################################################################## 97# Subroutines 98# ######################################################################## 99 100# 101# Message about this program and how to use it 102# 103sub usage() { 104 print STDERR "usage: $0 [-hVqdrl] file\n"; 105} 106 107sub help() { 108 usage(); 109 print STDERR << "EOF"; 110 111 Version: $version 112 113 This program generates virus infection statistics from amavis/syslog 114 log files. It is typically called from cron(8), but can also be used 115 from the command line when populating the databases with historical 116 data. 117 118 -h : this (help) message 119 -v : does nothing (legacy verbose option) 120 -q : quiet mode - no output 121 -d : print debugging messages to stderr 122 -r : reset file pointer to 0, instead of starting at last position 123 -l : take locale from the environment (instead of English) 124 -V : display version and exit 125 126 examples: 127 128 Initial import of existing data: 129 amavis# $me /var/log/mail.info.2 130 amavis# $me -r /var/log/mail.info.1 131 amavis# $me -r /var/log/mail.info.0 132 amavis# $me -r /var/log/mail.info 133 134 Normal usage: 135 amavis# $me /var/log/amavis.log 136 137EOF 138 139} 140 141 142# 143# Command line options processing 144# 145sub init() 146{ 147 dbg("$me version $version"); 148 149 my $opt_string = 'hvqdf:lrV'; 150 if (!getopts( "$opt_string", \%opt )) { 151 usage(); 152 do_exit(1); 153 } 154 155 if ($opt{h}) { 156 help(); 157 do_exit(1); 158 } 159 160 if ($opt{V}) { 161 print "$version\n"; 162 do_exit(1); 163 } 164 165 $verbose = 1; 166 $verbose = 0 if ($opt{q}); 167 $debug = 1 if $opt{d}; 168 169 170 if ($opt{f}) { # legacy way to specify input file 171 $logfile = $opt{f}; 172 if ( ! -f $logfile ) { 173 do_exit(1, "file \"$logfile\" does not exist"); 174 } 175 176 } elsif ($ARGV[0]) { # now expect file on command line 177 $logfile = $ARGV[0]; 178 if ( ! -f $logfile ) { 179 do_exit(1, "file \"$logfile\" does not exist"); 180 } 181 182 } else { 183 usage(); 184 do_exit(1); 185 } 186 187 $year = localtime->year() + 1900; 188 189 # build default (English?) hash of Month-to-Numbers 190 %months = ( 191 "Jan" => "0", "Feb" => "1", "Mar" => "2", "Apr" => "3", 192 "May" => "4", "Jun" => "5", "Jul" => "6", "Aug" => "7", 193 "Sep" => "8", "Oct" => "9", "Nov" => "10", "Dec" => "11" 194 ); 195 196 # Set up a locale-depenedent hash of Month-to-Numbers 197 if ($opt{l}) { 198 $locale = setlocale(LC_TIME); 199 dbg("locale is set to \"$locale\""); 200 for (0..11) { 201 my $tmp = strftime("%b", 0, 0, 0, 1, $_, 96); 202 dbg("$tmp"); 203 $months{$tmp} = $_; 204 } 205 } 206 207} 208 209 210# 211# Make sure that only one copy is running at a time 212# 213sub semlock { 214 open (LOCKF, ">$lockfile") or do_exit(1, "Could not open $lockfile: $!"); 215 unless (flock(LOCKF, LOCK_EX | LOCK_NB)) { 216 print_err("warning: Could not lock $lockfile: $!"); 217 sleep 2; 218 unless (flock(LOCKF, LOCK_EX | LOCK_NB)) { 219 do_exit(1, "Could not lock $lockfile: $!"); 220 } 221 } 222 print LOCKF "$$\n"; 223 dbg("Have lock on $lockfile"); 224} 225 226 227# 228# Undo our lock. This is only for the sake of completeness - all file 229# handles are closed (and locks lost) on program exit anyway. 230# 231sub semunlock { 232 close LOCKF; 233 if (unlink("$lockfile")) { 234 dbg("lock $lockfile removed"); 235 } 236} 237 238 239# 240# For completeness, remove the lock before exiting, otherwise the lock 241# file remains untidily behind... 242# 243sub do_exit { 244 my ($code, $msg) = @_; 245 semunlock(); 246 247 if ($code == 0) { 248 exit 0; 249 } else { 250 if (defined $msg) { 251 print STDERR "$me: $msg\n"; 252 } 253 exit $code; 254 } 255} 256 257 258# 259# Load the values of the previous run into variables 260# 261sub loadState { 262 dbg("loadState()"); 263 $spos = undef; 264 265 # 266 # Check that we have somewhere to save our status - Not much point 267 # in continuing otherwise. 268 # 269 if ((! -d "$statedir") or (! -w "$statedir")) { 270 do_exit(1,"$statedir does not exist or cannot be written to."); 271 } 272 273 274 # 275 # Grab the previous position reached in the log file, plus 276 # the total number of different viruses we have seen 277 # 278 if (-f "$statefile") { 279 280 dbg("opening file $statefile"); 281 282 open (IN, "$statefile") or die "Could not open $statefile"; 283 while (my $line = <IN>) { 284 if ($line =~ /^pos:\s*(\d+)/) { 285 $spos = $1; 286 } 287 elsif ($line =~ /^numv:\s*(\d+)/) { 288 $numv = $1; 289 } 290 elsif ($line =~ /^lastupdate:\s*(\d+)/) { 291 $lastupdate = $1; 292 } 293 elsif ($line =~ /^spamsess:\s*(.*)/) { 294 my @arr = split(/\s+/, $1); 295 foreach (@arr) { 296 $spamsess{$_} = 1; 297 } 298 } 299 } 300 close IN; 301 302 dbg("opening file $namesfile"); 303 open (IN, "$namesfile") or die "Could not open $namesfile"; 304 while (my $line = <IN>) { 305 if ($line =~ /^(\d+)\s+(.*)/) { 306 my $id = $1; 307 my $name = $2; 308 if ($name =~ /^spam$/) { # from version 0.1.12 names changed 309 $name = "Not-Delivered(SPAM)"; 310 } elsif ($name =~ /^passed$/) { 311 $name = "Passed"; 312 } elsif ($name =~ /^banned$/) { 313 $name = "Banned"; 314 } elsif ($name =~ /^infected$/) { 315 $name = "Infected"; 316 } 317 $rvid{$name} = $id; 318 $vnames{$id} = $name; 319 } 320 } 321 close IN; 322 323 dbg("opening file $countfile"); 324 open (IN, "$countfile") or die "Could not open $countfile"; 325 while (my $line = <IN>) { 326 if ($line =~ /^(\d+)\s+(\d+)/) { 327 $occurence{$1} = $2; 328 } 329 } 330 close IN; 331 332 dbg("opening file $seenfile"); 333 open (IN, "$seenfile") or die "Could not open $seenfile"; 334 while (my $line = <IN>) { 335 if ($line =~ /^(\d+)\s+(\d+)\s+(\d+)/) { 336 $firstseen{$1} = $2; 337 $lastseen{$1} = $3; 338 } 339 } 340 close IN; 341 } 342 343 # 344 # If we have not run before reset... 345 # 346 if (!defined $spos) { 347 msg("First Time Run"); 348 $spos = 0; # position into the log file 349 $numv = 0; # number of virus types seen 350 $lastupdate = 0; # number of virus types seen 351 } 352 353 # 354 # If -r <file> on command line start at beginning of file 355 # 356 if ($opt{r}) { 357 $spos = 0; 358 } 359 360 if ($debug) { 361 dbg("start position: $spos numv: $numv lastupdate: $lastupdate"); 362 my $tmp = "left over spam session ids: "; 363 foreach my $sid (keys %spamsess) { 364 $tmp = "$tmp $sid"; 365 } 366 dbg($tmp); 367 while ( my ($id,$count) = each (%occurence)) { 368 my $name = $vnames{$id}; 369 dbg("#$id: $name, seen $count times"); 370 } 371 } 372} 373 374 375sub saveState { 376 377 # 378 # Reset the value of spos and save it for the next time we are called 379 # 380 dbg("saveState(): eof: $eof numv: $numv lastupdate: $lastupdate"); 381 382 open (OUT, ">$statefile") or die "Could not write to $statefile"; 383 print OUT "pos: $pos\n"; 384 print OUT "numv: $numv\n"; 385 print OUT "lastupdate: $lastupdate\n"; 386 print OUT "LC_TIME: $locale\n"; 387 print OUT "spamsess: "; 388 foreach my $sid (keys %spamsess) { 389 print OUT "$sid "; 390 } 391 print OUT "\n"; 392 close OUT; 393 394 open (NAMES, ">$namesfile") or die "Could not write to $namesfile"; 395 open (COUNT, ">$countfile") or die "Could not write to $countfile"; 396 open (SEEN, ">$seenfile") or die "Could not write to $seenfile"; 397 398 while (my ($id, $virus) = each (%vnames)) { 399 print NAMES "$id $virus\n"; 400 print COUNT "$id $occurence{$id}\n"; 401 print SEEN "$id $firstseen{$id} $lastseen{$id}\n"; 402 } 403 close NAMES; 404 close COUNT; 405 close SEEN; 406 407} 408 409 410# 411# Take a virus/name, and return an ID. Create the ID if the virus doesn't 412# already exist 413# 414sub getVid { 415 my ($virus, $epoch) = @_; 416 my $id = 0; 417 418 if (!exists $rvid{$virus}) { 419 $numv++; 420 $id = $numv; 421 422 msg("New id (#$id, $virus) seen at $epoch"); 423 424 $vnames{$id} = $virus; 425 $rvid{$virus} = $id; 426 $firstseen{$id} = $epoch; 427 428 } else { 429 $id = $rvid{$virus}; 430 } 431 432 return $id; 433} 434 435 436# 437# Increment by one the number of times we have seen this virus. Also 438# record the time we last saw it. 439# 440sub upCount { 441 my ($id, $epoch) = @_; 442 443 if (!exists $occurence{$id}) { 444 $occurence{$id} = 1; 445 446 } else { 447 $occurence{$id}++; 448 } 449 450 $lastseen{$id} = $epoch; 451} 452 453 454 455# 456# 457# 458sub classify { 459 my $id; 460 my ($mon, $day, $time, $host, $prog, $sid) = split(/\s+/, $line); 461 462 # 463 # Check that the environment locale matches what is being written 464 # by syslog 465 # 466 my $tmp = $months{"$mon"}; 467 if (!defined($tmp)) { 468 do_exit(1, "Unknown month \"$mon\" (current locale is \"$locale\")"); 469 } 470 471 $mon = $tmp; 472 473 # 474 # Generate a seconds-since-1970 epoch and formated date string 475 # 476 my ($hour,$min,$sec) = split (/:/, $time); 477 $epoch = timelocal($sec, $min, $hour, $day, $mon, $year-1900); 478 479 if ($epoch > time()) { 480 # date is last actually last year 481 $epoch = timelocal($sec, $min, $hour, $day, $mon, $year-1901); 482 } 483 if (!defined $lastepoch) { 484 $lastepoch = $epoch - 1; 485 } 486 487 my $isodate = sprintf("%4u-%02u-%02u", $year, $mon+1, $day) . 488 " $hour:$min:$sec"; 489 dbg("line at $isodate epoch: $epoch"); 490 491 # 492 # Update all rrds if we are more than $rrdstep seconds since the last 493 # update 494 # 495 if ($lastupdate == 0) { 496 $lastupdate = int($epoch / $rrdstep) * $rrdstep; 497 dbg("First update: $lastupdate"); 498 } 499 500 my $count = int(($epoch - $lastupdate) / $rrdstep); 501 for (my $i = 1; $i <= $count; $i++) { 502 $lastupdate = $lastupdate + $rrdstep; 503 foreach $id (keys %vnames) { 504 updateRRD($id, $lastupdate); 505 } 506 } 507 508 # 509 # If this line contains a SPAM tag of some sort don't classify it 510 # now, but record the fact and move on to the next line 511 # 512 if ($line =~ /\sSPAM(-TAG)*,\s/) { 513 dbg("SPAM: $epoch: $isodate $sid"); 514 $spamsess{$sid} = 1; 515 return; 516 } 517 518 # 519 # Save the stats according to the classification of the email 520 # 521 if ($line =~ /Passed/) { 522 523 # 524 # Update the overall passed emails statistics 525 # 526 if (defined $spamsess{$sid}) { 527 dbg("Passed(SPAM) $epoch: $isodate"); 528 $id = getVid("Passed(SPAM)", $epoch); 529 } else { 530 dbg("Passed: $epoch: $isodate"); 531 $id = getVid("Passed", $epoch); 532 } 533 upCount($id, $epoch); 534 535 } elsif ($line =~ /\sNot-Delivered,\s/) { 536 537 # 538 # Update the overall not delivered statistics 539 # 540 if (defined $spamsess{$sid}) { 541 dbg("Not-Delivered(SPAM): $epoch: $isodate"); 542 $id = getVid("Not-Delivered(SPAM)", $epoch); 543 } else { 544 dbg("Not-Delivered: $epoch: $isodate"); 545 $id = getVid("Not-Delivered", $epoch); 546 } 547 upCount($id, $epoch); 548 549 } elsif ($line =~ /\sBANNED\sname\/type\s/) { 550 dbg("Banned $epoch: $isodate"); 551 552 # 553 # Update the overall banned emails statistics 554 # 555 $id = getVid("Banned", $epoch); 556 upCount($id, $epoch); 557 558 } elsif ($line =~ /\sINFECTED\s+\((.*?[\(.*?\)]*)\)/ or # amavisd-new 559 $line =~ /\sPossible virus.*->\s+'(.*?)'/ or # amavis-ng 560 $line =~ /.*parts\/\d+:\s+(.*?)\s+FOUND/ or # amavis-ng 561 $line =~ /\squarantine[:|d;].*?virus='(.*?)'/ or # amavisd 562 $line =~ /.*part-\d+:\s+(.*?)\s+FOUND/ ) { # clamav 563 my $viruses = $1; 564 dbg("viruses: \"$viruses\" at $epoch: $isodate"); 565 566 # 567 # Update the overall infected emails statistics 568 # 569 $id = getVid("Infected", $epoch); 570 upCount($id, $epoch); 571 572 # 573 # What is this specific nasty little bugger(s) called? 574 # Update his statistics as well. 575 # 576 my @list = split(/,+\s+/, $viruses); 577 my %seen; 578 foreach my $virus (@list) { 579 if (!$seen{$virus}) { 580 $id = getVid($virus, $epoch); 581 upCount($id, $epoch); 582 $seen{$virus} = 1; 583 } 584 } 585 } 586 587 # 588 # Since we have classified this session-id we need to remove it 589 # from the %spamsess hash 590 delete $spamsess{$sid}; 591} 592 593 594 595 596# 597# 598# 599sub parseFile { 600 601 my ($fname, $start, $stop) = @_; 602 dbg("parseFile ($fname, $start, $stop)"); 603 604 # 605 # Open up the file we need to parse 606 # 607 unless (open (LOGFILE, $fname)) { 608 do_exit(1, "Could not open file $fname: $!"); 609 } 610 unless (seek (LOGFILE, $start, 0)) { 611 do_exit(1, "Could not seek to $start in file $fname: $!"); 612 } 613 614 # 615 # Loop each line until the current end of file 616 # 617 $pos = $start; 618 my $lineid = 0; 619 while ($pos < $stop and $line = <LOGFILE>) 620 { 621 $lineid++; 622 $lastepoch = $epoch; 623 624 if ($line =~ /amavis.*?\[\d+\]:/) { 625 classify(); 626 } 627 # 628 # Where did we get to in the file? 629 # 630 $pos = tell(LOGFILE); 631 632 # 633 # Save the current statistics every 1000 lines. This way 634 # if the program dies we don't have to start again from the 635 # beginning each time. Also good for monitoring the graphs 636 # to see where we are up to. 637 # 638 if (!($lineid % 1000)) { 639 saveState();; 640 } 641 642 } 643 close(LOGFILE); 644 645} 646 647 648# 649# Find the previous (rotated) log file and parse that according to 650# our last position 651# 652sub parseRotFile { 653 my ($logfile, $spos) = @_; 654 655 my $now = time(); 656 my $today = localtime($now); 657 my $yesterday = localtime($now - 60*60*24); 658 659 $today = sprintf("%4u%02u%02u", $today->year + 1900, 660 $today->mon + 1, 661 $today->mday); 662 663 $yesterday = sprintf("%4u%02u%02u", $yesterday->year + 1900, 664 $yesterday->mon + 1, 665 $yesterday->mday); 666 667 my $rotlogfile = undef; 668 669 if (-f "$logfile.0") { 670 $rotlogfile = $logfile . ".0"; 671 } elsif (-f "$logfile.1") { 672 $rotlogfile = $logfile . ".1"; 673 } elsif (-f "$logfile.01") { 674 $rotlogfile = $logfile . ".01"; 675 } elsif (-f "$logfile-$today") { 676 $rotlogfile = $logfile . "-$today"; 677 } elsif (-f "$logfile-$yesterday") { 678 $rotlogfile = $logfile . "-$yesterday"; 679 } 680 681 if (defined($rotlogfile)) { 682 parseFile ($rotlogfile, $spos, (stat $rotlogfile)[7]); 683 } else { 684 print_err("Could not open rotated logfile."); 685 print_err(" Tried extentions .0, .1, .01, -$today, -$yesterday"); 686 do_exit(1); 687 } 688} 689 690 691sub createRRD { 692 my ($file, $epoch) = @_; 693 dbg("createRRD: $file, $epoch"); 694 695 RRDs::create($file, 696 "--start", ($epoch - 1), 697 "--step", $rrdstep, 698 "DS:hits:COUNTER:".$rrdstep.":0:U", 699 "RRA:AVERAGE:0.5:1:300", 700 "RRA:AVERAGE:0.5:6:700", 701 "RRA:AVERAGE:0.5:24:775", 702 "RRA:AVERAGE:0.5:288:797", 703 "RRA:MAX:0.5:1:300", 704 "RRA:MAX:0.5:6:700", 705 "RRA:MAX:0.5:24:775", 706 "RRA:MAX:0.5:288:797" 707 ); 708 709 my $err = RRDs::error; 710 if ($err) { 711 print_err("createRRD: $err"); 712 return -1; 713 } 714 715 return 1; 716} 717 718 719sub updateRRD () { 720 my ($id, $epoch) = @_; 721 my $count = $occurence{$id}; 722 my $rrdfile = "$statedir/$id.rrd"; 723 my $err; 724 my $last; 725 726 if (! -f $rrdfile) { 727 if (! createRRD($rrdfile, $epoch - $rrdstep)) { 728 do_exit(1, "updateRRD: Can't create file $rrdfile: $!"); 729 } 730 my $upd = ($epoch - $rrdstep) . ":0"; 731 dbg("Update: $rrdfile at ", $epoch - $rrdstep, " count 0"); 732 RRDs::update($rrdfile, $upd); 733 } 734 735 dbg("Update: $rrdfile at $epoch count $count"); 736 737 $last = RRDs::last($rrdfile); 738 $err = RRDs::error; 739 if ($err) { 740 print_err("updateRRD: $err"); 741 return -1; 742 } 743 744 # 745 # We sometimes get two hits in the same second. Check for that here 746 # and basically ignore it. 747 # 748 if ($epoch > $last) { 749 my $upd = $epoch . ":" . $count; 750 RRDs::update($rrdfile, $upd); 751 752 $err = RRDs::error; 753 if ($err) { 754 print_err("updateRRD: $err"); 755 print_err("Attempted to update $rrdfile at $epoch count $count"); 756 return -1; 757 } 758 } 759 760 return 1; 761} 762 763 764 765sub dbg { 766 print "$me: @_\n" if ($debug); 767} 768 769sub msg { 770 print "$me: @_\n" if ($verbose); 771} 772 773sub print_err 774{ 775 print STDERR "$me: error: @_\n"; 776} 777 778 779# ######################################################################## 780# main() program 781# ######################################################################## 782 783init(); 784semlock(); 785loadState(); 786 787$eof = (stat $logfile)[7]; 788 789if ($eof < $spos) { 790 # 791 # The log file has rotated under us, so do the rotated logfile first. 792 # 793 msg("Logfile \"$logfile\" appears to have rotated"); 794 parseRotFile($logfile, $spos); 795 $spos = 0; # reset to the start of the file 796} 797 798parseFile ($logfile, $spos, $eof); 799saveState(); 800semunlock(); 801 802