1#!/usr/local/bin/perl -w 2 3# sqlgrey: a postfix greylisting policy server using an SQL backend 4# based on postgrey 5# Copyright 2004 (c) ETH Zurich 6# Copyright 2004 (c) Lionel Bouton 7 8# 9# This program is free software; you can redistribute it and/or modify 10# it under the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 2 of the License, or 12# (at your option) any later version. 13# 14# This program is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with this program; if not, write to the Free Software 21# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 22# 23 24package sqlgrey_logstats; 25use strict; 26use Pod::Usage; 27use Getopt::Long qw(:config posix_default no_ignore_case); 28use Time::Local; 29use Date::Calc; 30 31my $VERSION = "1.8.0"; 32 33# supports IPv4 and IPv6 34my $ipregexp = '[\dabcdef\.:]+'; 35 36###################### 37# Time-related methods 38my %months = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5, 39 "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10, "Dec" => 11 ); 40 41sub validate_tstamp { 42 my $self = shift; 43 my $value = shift; 44 my ($monthname, $mday, $hour, $min, $sec); 45 if ($value =~ /^(\w{3}) ([\d ]\d) (\d\d):(\d\d):(\d\d)$/) { 46 ($monthname, $mday, $hour, $min, $sec) = ($1, $2, $3, $4, $5); 47 } else { 48 $self->debug("invalid date format: $value\n"); 49 return undef; 50 } 51 my $month = $months{$monthname}; 52 my $year = $self->{year}; 53 if ($month > $self->{month}) { 54 # yes we can compute stats across years... 55 $year--; 56 } 57 my $epoch_seconds = Time::Local::timelocal($sec, $min, $hour, $mday, $month, $year); 58 if (! $epoch_seconds) { 59 $self->debug("can't compute timestamp from: $value\n"); 60 return undef; 61 } 62 if ($epoch_seconds < $self->{begin} or $epoch_seconds > $self->{end}) { 63 $self->debug("date out of range: $value\n"); 64 return undef; 65 } 66 return $epoch_seconds; 67} 68 69# What was the tstamp yesterday at 00:00 ? 70sub yesterday_tstamp { 71 # Get today 00:00:00 and deduce one day 72 my ($day, $month, $year) = reverse Date::Calc::Add_Delta_Days(Date::Calc::Today(), -1 ); 73 # Adjust Date::Calc 1-12 month to 0-11 74 $month--; 75 return Time::Local::timelocal(0,0,0,$day,$month,$year); 76} 77 78# What was the tstamp today at 00:00 ? 79sub today_tstamp { 80 # Get today 00:00:00 81 return Time::Local::timelocal(0, 0, 0, ((localtime())[3,4,5])); 82} 83 84# set time period 85sub yesterday { 86 my $self = shift; 87 $self->{begin} = $self->yesterday_tstamp(); 88 $self->{end} = $self->{begin} + (60 * 60 * 24); 89} 90 91sub today { 92 my $self = shift; 93 $self->{begin} = $self->today_tstamp(); 94 $self->{end} = time(); 95} 96 97sub lasthour { 98 my $self = shift; 99 my $now = time(); 100 $self->{begin} = $now - (60 * 60); 101 $self->{end} = $now; 102} 103 104sub last24h { 105 my $self = shift; 106 my $now = time(); 107 $self->{begin} = $now - (60 * 60 * 24); 108 $self->{end} = $now; 109} 110 111sub lastweek { 112 my $self = shift; 113 $self->{end} = $self->today_tstamp(); 114 $self->{begin} = $self->{end} - (60 * 60 * 24 * 7); 115} 116 117################## 118# Argument parsing 119sub parse_args { 120 my $self = shift; 121 my %opt = (); 122 123 GetOptions(\%opt, 'help|h', 'man', 'version', 'yesterday|y', 'today|t', 124 'lasthour', 'last24h|d', 'lastweek|w', 'programname', 'debug', 125 'top-domain=i', 'top-from=i', 'top-spam=i', 'top-throttled=i', 126 'print-delayed') 127 or pod2usage(1); 128 129 if ($opt{debug}) { 130 $self->{debug} = 1; 131 } 132 133 if ($opt{help}) { pod2usage(1) } 134 if ($opt{man}) { pod2usage(-exitstatus => 0, -verbose => 2) } 135 if ($opt{version}) { print "sqlgrey-logstats.pl $VERSION\n"; exit(0) } 136 137 my $setperiod_count = 0; 138 if ($opt{yesterday}) { 139 $self->yesterday(); 140 $setperiod_count++; 141 } 142 if ($opt{today}) { 143 $self->today(); 144 $setperiod_count++; 145 } 146 if ($opt{lasthour}) { 147 $self->lasthour(); 148 $setperiod_count++; 149 } 150 if ($opt{last24h}) { 151 $self->last24h(); 152 $setperiod_count++; 153 } 154 if ($opt{lastweek}) { 155 $self->lastweek(); 156 $setperiod_count++; 157 } 158 if ($setperiod_count > 1) { 159 pod2usage(1); 160 } 161 162 if ($opt{'top-domain'}) { 163 $self->{top_domain} = $opt{'top-domain'}; 164 } 165 if ($opt{'top-from'}) { 166 $self->{top_from} = $opt{'top-from'}; 167 } 168 if ($opt{'top-spam'}) { 169 $self->{top_spam} = $opt{'top-spam'}; 170 } 171 172 if ($opt{'top-throttled'}) { 173 $self->{top_throttled} = $opt{'top-throttled'}; 174 } 175 176 if ($opt{'print-delayed'}) { 177 $self->{print_delayed} = 1; 178 } 179 180 # compute current year and month 181 ($self->{month}, $self->{year}) = (localtime)[4,5]; 182 183 if ($opt{programname}) { 184 $self->{programname} = $opt{programname}; 185 } 186} 187 188################ 189# percent string 190sub percent { 191 my $portion = shift; 192 my $total = shift; 193 if ($total == 0) { 194 return "N/A%"; 195 } 196 return sprintf ("%.2f%%", ($portion / $total) * 100); 197} 198 199# quick debug function 200sub debug { 201 my $self = shift; 202 if (defined $self->{debug}) { 203 print shift; 204 } 205} 206 207sub split_date_event { 208 my ($self, $line) = @_; 209 210 if ($line =~ 211 m/^(\w{3} [\d ]\d \d\d:\d\d:\d\d)\s\S+\s$self->{programname}: (\w+): (.*)$/o 212 ) { 213 my $time = $self->validate_tstamp($1); 214 if (! defined $time) { 215 return (undef,undef,undef); 216 } else { 217 #$self->debug("match: $time, $2, $3\n"); 218 return ($time, $2, $3); 219 } 220 } else { 221 $self->debug("not matched: $line\n"); 222 return (undef,undef,undef); 223 } 224} 225 226sub parse_grey { 227 my ($self, $time, $event) = @_; 228 ## old format 229 if ($event =~ /^domain awl match: updating ($ipregexp), (.*)$/i) { 230 $self->{events}++; 231 $self->{passed}++; 232 $self->{domain_awl_match}{$1}{$2}++; 233 $self->{domain_awl_match_count}++; 234 } elsif ($event =~ /^from awl match: updating ($ipregexp), (.*)$/i) { 235 $self->{events}++; 236 $self->{passed}++; 237 $self->{from_awl_match}{$1}{$2}++; 238 $self->{from_awl_match_count}++; 239 } elsif ($event =~ /^new: ($ipregexp), (.*) -> (.*)$/i) { 240 $self->{events}++; 241 $self->{new}{$1}++; 242 $self->{new_count}++; 243 } elsif ($event =~ /^throttling: ($ipregexp), (.*) -> (.*)$/i) { 244 $self->{events}++; 245 $self->{throttled}{$1}{$2}++; 246 $self->{throttled_count}++; 247 } elsif ($event =~ /^early reconnect: ($ipregexp), (.*) -> (.*)$/i) { 248 $self->{events}++; 249 $self->{early}{$1}++; 250 $self->{early_count}++; 251 } elsif ($event =~ /^reconnect ok: ($ipregexp), (.*) -> (.*) \((.*)\)/i) { 252 $self->{events}++; 253 $self->{passed}++; 254 $self->{reconnect}{$1}{$2}++; 255 $self->{reconnect_count}++; 256 ## new format 257 } elsif ($event =~ /^domain awl match: updating ($ipregexp)\($ipregexp\), (.*)$/i) { 258 $self->{events}++; 259 $self->{passed}++; 260 $self->{domain_awl_match}{$1}{$2}++; 261 $self->{domain_awl_match_count}++; 262 ## new format for from_awl match (deverp log) 263 } elsif ($event =~ /^from awl match: updating ($ipregexp)\($ipregexp\), (.*)\(.*\)$/i) { 264 $self->{events}++; 265 $self->{passed}++; 266 $self->{from_awl_match}{$1}{$2}++; 267 $self->{from_awl_match_count}++; 268 } elsif ($event =~ /^from awl match: updating ($ipregexp)\($ipregexp\), (.*)$/i) { 269 $self->{events}++; 270 $self->{passed}++; 271 $self->{from_awl_match}{$1}{$2}++; 272 $self->{from_awl_match_count}++; 273 } elsif ($event =~ /^new: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) { 274 $self->{events}++; 275 $self->{new}{$1}++; 276 $self->{new_count}++; 277 } elsif ($event =~ /^throttling: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) { 278 $self->{events}++; 279 $self->{throttled}{$1}{$2}++; 280 $self->{throttled_count}++; 281 } elsif ($event =~ /^early reconnect: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) { 282 $self->{events}++; 283 $self->{early}{$1}++; 284 $self->{early_count}++; 285 } elsif ($event =~ /^reconnect ok: ($ipregexp)\($ipregexp\), (.*) -> (.*) \((.*)\)/i) { 286 $self->{events}++; 287 $self->{passed}++; 288 $self->{reconnect}{$1}{$2}++; 289 $self->{reconnect_count}++; 290 } elsif ($event =~ /^domain awl: $ipregexp, .* added$/i) { 291 ## what? 292 } elsif ($event =~ /^from awl: $ipregexp, .* added$/i) { 293 ## what? 294 } elsif ($event =~ /^from awl: $ipregexp, .* added/i) { 295 ## what? 296 } elsif ($event =~ /^domain awl: $ipregexp, .* added/i) { 297 ## what? 298 } else { 299 $self->debug("unknown grey event at $time: $event\n"); 300 } 301} 302 303sub parse_whitelist { 304 my ($self, $time, $event) = @_; 305 if ($event =~ /^.*, $ipregexp\(.*\) -> .*$/i) { 306 $self->{events}++; 307 $self->{passed}++; 308 $self->{whitelisted}++; 309 } else { 310 $self->debug("unknown whitelist event at $time: $event\n"); 311 } 312} 313 314sub parse_spam { 315 my ($self, $time, $event) = @_; 316 if ($event =~ /^([\d\.]+): (.*) -> (.*) at (.*)$/) { 317 $self->{rejected_count}++; 318 $self->{rejected}{$1}{$2}++; 319 } else { 320 $self->debug("unknown spam event at $time: $event\n"); 321 } 322} 323 324# TODO 325sub parse_perf { 326} 327 328# distribute processing to appropriate parser 329sub parse_line { 330 my ($self, $line) = @_; 331 332 my ($time, $type, $event) = $self->split_date_event($line); 333 if (! defined $time) { 334 return; 335 } 336 # else parse event 337 if ($type eq 'grey') { 338 $self->parse_grey($time, $event); 339 } elsif ($type eq 'whitelist') { 340 $self->parse_whitelist($time, $event); 341 } elsif ($type eq 'spam') { 342 $self->parse_spam($time, $event); 343 } elsif ($type eq 'perf') { 344 $self->parse_perf($time, $event); 345 } # don't care for other types 346} 347 348# format a title 349sub print_title { 350 my $self = shift; 351 my $title = shift; 352 my $ln = length($title); 353 my $line = ' ' . '-' x ($ln + 2) . ' '; 354 print $line . "\n"; 355 print "| $title |\n"; 356 print $line . "\n\n"; 357} 358 359# breaks down and print an hash 360sub print_distribution { 361 my $self = shift; 362 my $hash_to_print = shift; 363 my $max_to_print = shift; 364 my $title = shift; 365 366 my @top; 367 my $idx; 368 my $count = 0; 369 foreach my $id (keys(%{$hash_to_print})) { 370 $count++; 371 my $hash; 372 $hash->{count} = 0; 373 $hash->{id} = $id; 374 foreach my $subval (keys(%{$hash_to_print->{$id}})) { 375 $hash->{count} += $hash_to_print->{$id}{$subval}; 376 } 377 $top[$#top+1] = $hash; 378 @top = reverse sort { $a->{count} <=> $b->{count} } @top; 379 pop @top if (($max_to_print != -1) && ($#top >= $max_to_print)); 380 } 381 if ($max_to_print != -1) { 382 $self->print_title("$title (top " . ($#top + 1) . ", " . ($#top + 1 - $count) . " hidden)"); 383 } else { 384 $self->print_title($title); 385 } 386 for ($idx = 0; $idx <= $#top; $idx++) { 387 my @dtop; 388 foreach my $subval (keys(%{$hash_to_print->{$top[$idx]->{id}}})) { 389 my $hash; 390 $hash->{count} = $hash_to_print->{$top[$idx]->{id}}{$subval}; 391 $hash->{domain} = $subval; 392 $dtop[$#dtop+1] = $hash; 393 @dtop = sort { $a->{count} <=> $b->{count} } @dtop; 394 } 395 @dtop = reverse @dtop; 396 print "$top[$idx]->{id}: $top[$idx]->{count}\n"; 397 for (my $didx = 0; $didx <= $#dtop; $didx++) { 398 print " $dtop[$didx]->{domain}: $dtop[$didx]->{count}\n"; 399 } 400 } 401 print "\n"; 402} 403sub print_domain_awl { 404 my $self = shift; 405 $self->print_distribution($self->{domain_awl_match}, $self->{top_domain}, 406 "Domain AWL"); 407} 408 409sub print_from_awl { 410 my $self = shift; 411 412 $self->print_distribution($self->{from_awl_match}, $self->{top_from}, 413 "From AWL"); 414} 415 416sub print_spam { 417 my $self = shift; 418 419 $self->print_distribution($self->{rejected}, $self->{top_spam}, 420 "Spam"); 421} 422 423sub print_delayed { 424 my $self = shift; 425 426 if (! defined $self->{print_delayed}) { 427 return; 428 } 429 $self->print_distribution($self->{reconnect}, -1, 430 "Delayed"); 431} 432 433sub print_throttled { 434 my $self = shift; 435 436 $self->print_distribution($self->{throttled}, $self->{top_throttled}, 437 "Throttled"); 438} 439 440sub print_stats { 441 my $self = shift; 442 print "##################\n" . 443 "## Global stats ##\n" . 444 "##################\n\n"; 445 print "Events : " . $self->{events} . "\n"; 446 print "Passed : " . $self->{passed} . "\n"; 447 print "Early : " . $self->{early_count} . "\n"; 448 print "Delayed : " . $self->{new_count} . "\n\n"; 449 450 print "Probable SPAM : " . $self->{rejected_count} . "\n"; 451 print "Throttled : " . $self->{throttled_count} . "\n\n"; 452 453 print "###############################\n" . 454 "## Whitelist/AWL performance ##\n" . 455 "###############################\n\n"; 456 print "Breakdown for $self->{passed} accepted messages:\n\n"; 457 458 print "Whitelists : " . 459 percent($self->{whitelisted}, $self->{passed}) . 460 "\t($self->{whitelisted})\n"; 461 print "Domain AWL : " . 462 percent($self->{domain_awl_match_count}, $self->{passed}) . 463 "\t($self->{domain_awl_match_count})\n"; 464 print "From AWL : " . 465 percent($self->{from_awl_match_count}, $self->{passed}) . 466 "\t($self->{from_awl_match_count})\n"; 467 print "Delayed : " . 468 percent($self->{reconnect_count},$self->{passed}) . 469 "\t($self->{reconnect_count})\n\n"; 470 471 $self->print_domain_awl(); 472 $self->print_from_awl(); 473 $self->print_spam(); 474 $self->print_throttled(); 475 $self->print_delayed(); 476} 477 478# create parser with no period limits 479# and counters set to 0 480my $parser = bless { 481 begin => 0, 482 end => (1 << 31) - 1, 483 programname => 'sqlgrey', 484 events => 0, 485 passed => 0, 486 whitelisted => 0, 487 rejected_count => 0, 488 new_count => 0, 489 throttled_count => 0, 490 early_count => 0, 491 domain_awl_match_count => 0, 492 from_awl_match_count => 0, 493 domain_awl_match => {}, 494 from_awl_match => {}, 495 rejected => {}, 496 reconnect => {}, 497 reconnect_count => 0, 498 top_domain => -1, 499 top_from => -1, 500 top_spam => -1, 501 top_throttled => -1, 502}, 'sqlgrey_logstats'; 503 504$parser->parse_args(); 505 506while (<STDIN>) { 507 chomp; 508 $parser->parse_line($_); 509} 510 511$parser->print_stats(); 512 513__END__ 514 515=head1 NAME 516 517sqlgrey-logstats.pl - SQLgrey log parser 518 519=head1 SYNOPSIS 520 521B<sqlgrey-logstats.pl> [I<options>...] < syslogfile 522 523 -h, --help display this help and exit 524 --man display man page 525 --version output version information and exit 526 --debug output detailed log parsing steps 527 528 -y, --yesterday compute stats for yesterday 529 -t, --today compute stats for today 530 --lasthour compute stats for last hour 531 -d, --lastday compute stats for last 24 hours 532 -w, --lastweek compute stats for last 7 days 533 534 --programname program name looked into log file 535 536 --top-from how many from AWL entries to print (default: all) 537 --top-domain how many domain AWL entries to print (default: all) 538 --top-spam how many SPAM sources to print (default: all) 539 --top-throttled how many throttled sources to print (default: all) 540 --print-delayed print delayed sources (default: don't) 541 542=head1 DESCRIPTION 543 544sqlgrey-logstats.pl ... 545 546=head1 SEE ALSO 547 548See L<http://www.greylisting.org/> for a description of what greylisting 549is and L<http://www.postfix.org/SMTPD_POLICY_README.html> for a 550description of how Postfix policy servers work. 551 552=head1 COPYRIGHT 553 554Copyright (c) 2004 by Lionel Bouton. 555 556=head1 LICENSE 557 558This program is free software; you can redistribute it and/or modify 559it under the terms of the GNU General Public License as published by 560the Free Software Foundation; either version 2 of the License, or 561(at your option) any later version. 562 563This program is distributed in the hope that it will be useful, 564but WITHOUT ANY WARRANTY; without even the implied warranty of 565MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 566GNU General Public License for more details. 567 568You should have received a copy of the GNU General Public License along 569with this program; if not, write to the Free Software Foundation, Inc., 57059 Temple Place, Suite 330, Boston, MA 02111-1307 USA 571 572=head1 AUTHOR 573 574S<Lionel Bouton E<lt>lionel-dev@bouton.nameE<gt>> 575 576=cut 577