1#!/usr/local/bin/perl -w
2
3# sqlgrey: a postfix greylisting policy server using an SQL backend
4# based on postgrey
5# Copyright 2004 (c) ETH Zurich
6# Copyright 2004 (c) Lionel Bouton
7
8#
9#    This program is free software; you can redistribute it and/or modify
10#    it under the terms of the GNU General Public License as published by
11#    the Free Software Foundation; either version 2 of the License, or
12#    (at your option) any later version.
13#
14#    This program is distributed in the hope that it will be useful,
15#    but WITHOUT ANY WARRANTY; without even the implied warranty of
16#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17#    GNU General Public License for more details.
18#
19#    You should have received a copy of the GNU General Public License
20#    along with this program; if not, write to the Free Software
21#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22#
23
24package sqlgrey_logstats;
25use strict;
26use Pod::Usage;
27use Getopt::Long qw(:config posix_default no_ignore_case);
28use Time::Local;
29use Date::Calc;
30
31my $VERSION = "1.8.0";
32
33# supports IPv4 and IPv6
34my $ipregexp = '[\dabcdef\.:]+';
35
36######################
37# Time-related methods
38my %months = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5,
39	       "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10, "Dec" => 11 );
40
41sub validate_tstamp {
42    my $self = shift;
43    my $value = shift;
44    my ($monthname, $mday, $hour, $min, $sec);
45    if ($value =~ /^(\w{3}) ([\d ]\d) (\d\d):(\d\d):(\d\d)$/) {
46        ($monthname, $mday, $hour, $min, $sec) = ($1, $2, $3, $4, $5);
47    } else {
48	$self->debug("invalid date format: $value\n");
49        return undef;
50    }
51    my $month = $months{$monthname};
52    my $year = $self->{year};
53    if ($month > $self->{month}) {
54	# yes we can compute stats across years...
55	$year--;
56    }
57    my $epoch_seconds = Time::Local::timelocal($sec, $min, $hour, $mday, $month, $year);
58    if (! $epoch_seconds) {
59	$self->debug("can't compute timestamp from: $value\n");
60        return undef;
61    }
62    if ($epoch_seconds < $self->{begin} or $epoch_seconds > $self->{end}) {
63	$self->debug("date out of range: $value\n");
64        return undef;
65    }
66    return $epoch_seconds;
67}
68
69# What was the tstamp yesterday at 00:00 ?
70sub yesterday_tstamp {
71    # Get today 00:00:00 and deduce one day
72    my ($day, $month, $year) = reverse Date::Calc::Add_Delta_Days(Date::Calc::Today(), -1 );
73    # Adjust Date::Calc 1-12 month to 0-11
74    $month--;
75    return Time::Local::timelocal(0,0,0,$day,$month,$year);
76}
77
78# What was the tstamp today at 00:00 ?
79sub today_tstamp {
80    # Get today 00:00:00
81    return Time::Local::timelocal(0, 0, 0, ((localtime())[3,4,5]));
82}
83
84# set time period
85sub yesterday {
86    my $self = shift;
87    $self->{begin} = $self->yesterday_tstamp();
88    $self->{end} = $self->{begin} + (60 * 60 * 24);
89}
90
91sub today {
92    my $self = shift;
93    $self->{begin} = $self->today_tstamp();
94    $self->{end} = time();
95}
96
97sub lasthour {
98    my $self = shift;
99    my $now = time();
100    $self->{begin} = $now - (60 * 60);
101    $self->{end} = $now;
102}
103
104sub last24h {
105    my $self = shift;
106    my $now = time();
107    $self->{begin} = $now - (60 * 60 * 24);
108    $self->{end} = $now;
109}
110
111sub lastweek {
112    my $self = shift;
113    $self->{end} = $self->today_tstamp();
114    $self->{begin} = $self->{end} - (60 * 60 * 24 * 7);
115}
116
117##################
118# Argument parsing
119sub parse_args {
120    my $self = shift;
121    my %opt = ();
122
123    GetOptions(\%opt, 'help|h', 'man', 'version', 'yesterday|y', 'today|t',
124	       'lasthour', 'last24h|d', 'lastweek|w', 'programname', 'debug',
125	       'top-domain=i', 'top-from=i', 'top-spam=i', 'top-throttled=i',
126	       'print-delayed')
127	or pod2usage(1);
128
129    if ($opt{debug}) {
130	$self->{debug} = 1;
131    }
132
133    if ($opt{help})    { pod2usage(1) }
134    if ($opt{man})     { pod2usage(-exitstatus => 0, -verbose => 2) }
135    if ($opt{version}) { print "sqlgrey-logstats.pl $VERSION\n"; exit(0) }
136
137    my $setperiod_count = 0;
138    if ($opt{yesterday}) {
139	$self->yesterday();
140	$setperiod_count++;
141    }
142    if ($opt{today}) {
143	$self->today();
144	$setperiod_count++;
145    }
146    if ($opt{lasthour}) {
147	$self->lasthour();
148	$setperiod_count++;
149    }
150    if ($opt{last24h}) {
151	$self->last24h();
152	$setperiod_count++;
153    }
154    if ($opt{lastweek}) {
155	$self->lastweek();
156	$setperiod_count++;
157    }
158    if ($setperiod_count > 1) {
159	pod2usage(1);
160    }
161
162    if ($opt{'top-domain'}) {
163	$self->{top_domain} = $opt{'top-domain'};
164    }
165    if ($opt{'top-from'}) {
166	$self->{top_from} = $opt{'top-from'};
167    }
168    if ($opt{'top-spam'}) {
169	$self->{top_spam} = $opt{'top-spam'};
170    }
171
172    if ($opt{'top-throttled'}) {
173	$self->{top_throttled} = $opt{'top-throttled'};
174    }
175
176    if ($opt{'print-delayed'}) {
177	$self->{print_delayed} = 1;
178    }
179
180    # compute current year and month
181    ($self->{month}, $self->{year}) = (localtime)[4,5];
182
183    if ($opt{programname}) {
184	$self->{programname} = $opt{programname};
185    }
186}
187
188################
189# percent string
190sub percent {
191    my $portion = shift;
192    my $total = shift;
193    if ($total == 0) {
194	return "N/A%";
195    }
196    return sprintf ("%.2f%%", ($portion / $total) * 100);
197}
198
199# quick debug function
200sub debug {
201    my $self = shift;
202    if (defined $self->{debug}) {
203	print shift;
204    }
205}
206
207sub split_date_event {
208    my ($self, $line) = @_;
209
210    if ($line =~
211	m/^(\w{3} [\d ]\d \d\d:\d\d:\d\d)\s\S+\s$self->{programname}: (\w+): (.*)$/o
212	) {
213	my $time = $self->validate_tstamp($1);
214	if (! defined $time) {
215	    return (undef,undef,undef);
216	} else {
217	    #$self->debug("match: $time, $2, $3\n");
218	    return ($time, $2, $3);
219	}
220    } else {
221	$self->debug("not matched: $line\n");
222	return (undef,undef,undef);
223    }
224}
225
226sub parse_grey {
227    my ($self, $time, $event) = @_;
228    ## old format
229    if ($event =~ /^domain awl match: updating ($ipregexp), (.*)$/i) {
230	$self->{events}++;
231	$self->{passed}++;
232	$self->{domain_awl_match}{$1}{$2}++;
233	$self->{domain_awl_match_count}++;
234    } elsif ($event =~ /^from awl match: updating ($ipregexp), (.*)$/i) {
235	$self->{events}++;
236	$self->{passed}++;
237	$self->{from_awl_match}{$1}{$2}++;
238	$self->{from_awl_match_count}++;
239    } elsif ($event =~ /^new: ($ipregexp), (.*) -> (.*)$/i) {
240	$self->{events}++;
241	$self->{new}{$1}++;
242	$self->{new_count}++;
243    } elsif ($event =~ /^throttling: ($ipregexp), (.*) -> (.*)$/i) {
244	$self->{events}++;
245	$self->{throttled}{$1}{$2}++;
246	$self->{throttled_count}++;
247    } elsif ($event =~ /^early reconnect: ($ipregexp), (.*) -> (.*)$/i) {
248	$self->{events}++;
249	$self->{early}{$1}++;
250	$self->{early_count}++;
251    } elsif ($event =~ /^reconnect ok: ($ipregexp), (.*) -> (.*) \((.*)\)/i) {
252	$self->{events}++;
253	$self->{passed}++;
254	$self->{reconnect}{$1}{$2}++;
255	$self->{reconnect_count}++;
256    ## new format
257    } elsif ($event =~ /^domain awl match: updating ($ipregexp)\($ipregexp\), (.*)$/i) {
258	$self->{events}++;
259	$self->{passed}++;
260	$self->{domain_awl_match}{$1}{$2}++;
261	$self->{domain_awl_match_count}++;
262    ## new format for from_awl match (deverp log)
263    } elsif ($event =~ /^from awl match: updating ($ipregexp)\($ipregexp\), (.*)\(.*\)$/i) {
264	$self->{events}++;
265	$self->{passed}++;
266	$self->{from_awl_match}{$1}{$2}++;
267	$self->{from_awl_match_count}++;
268    } elsif ($event =~ /^from awl match: updating ($ipregexp)\($ipregexp\), (.*)$/i) {
269	$self->{events}++;
270	$self->{passed}++;
271	$self->{from_awl_match}{$1}{$2}++;
272	$self->{from_awl_match_count}++;
273    } elsif ($event =~ /^new: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) {
274	$self->{events}++;
275	$self->{new}{$1}++;
276	$self->{new_count}++;
277    } elsif ($event =~ /^throttling: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) {
278	$self->{events}++;
279	$self->{throttled}{$1}{$2}++;
280	$self->{throttled_count}++;
281    } elsif ($event =~ /^early reconnect: ($ipregexp)\($ipregexp\), (.*) -> (.*)$/i) {
282	$self->{events}++;
283	$self->{early}{$1}++;
284	$self->{early_count}++;
285    } elsif ($event =~ /^reconnect ok: ($ipregexp)\($ipregexp\), (.*) -> (.*) \((.*)\)/i) {
286	$self->{events}++;
287	$self->{passed}++;
288	$self->{reconnect}{$1}{$2}++;
289	$self->{reconnect_count}++;
290    } elsif ($event =~ /^domain awl: $ipregexp, .* added$/i) {
291	## what?
292    } elsif ($event =~ /^from awl: $ipregexp, .* added$/i) {
293	## what?
294    } elsif ($event =~ /^from awl: $ipregexp, .* added/i) {
295	## what?
296    } elsif ($event =~ /^domain awl: $ipregexp, .* added/i) {
297	## what?
298    } else {
299	$self->debug("unknown grey event at $time: $event\n");
300    }
301}
302
303sub parse_whitelist {
304    my ($self, $time, $event) = @_;
305    if ($event =~ /^.*, $ipregexp\(.*\) -> .*$/i) {
306	$self->{events}++;
307	$self->{passed}++;
308	$self->{whitelisted}++;
309    } else {
310	$self->debug("unknown whitelist event at $time: $event\n");
311    }
312}
313
314sub parse_spam {
315    my ($self, $time, $event) = @_;
316    if ($event =~ /^([\d\.]+): (.*) -> (.*) at (.*)$/) {
317	$self->{rejected_count}++;
318	$self->{rejected}{$1}{$2}++;
319    } else {
320	$self->debug("unknown spam event at $time: $event\n");
321    }
322}
323
324# TODO
325sub parse_perf {
326}
327
328# distribute processing to appropriate parser
329sub parse_line {
330    my ($self, $line) = @_;
331
332    my ($time, $type, $event) = $self->split_date_event($line);
333    if (! defined $time) {
334	return;
335    }
336    # else parse event
337    if ($type eq 'grey') {
338	$self->parse_grey($time, $event);
339    } elsif ($type eq 'whitelist') {
340	$self->parse_whitelist($time, $event);
341    } elsif ($type eq 'spam') {
342	$self->parse_spam($time, $event);
343    } elsif ($type eq 'perf') {
344	$self->parse_perf($time, $event);
345    } # don't care for other types
346}
347
348# format a title
349sub print_title {
350    my $self = shift;
351    my $title = shift;
352    my $ln = length($title);
353    my $line = ' ' . '-' x ($ln + 2) . ' ';
354    print $line . "\n";
355    print "| $title |\n";
356    print $line . "\n\n";
357}
358
359# breaks down and print an hash
360sub print_distribution {
361    my $self = shift;
362    my $hash_to_print = shift;
363    my $max_to_print = shift;
364    my $title = shift;
365
366    my @top;
367    my $idx;
368    my $count = 0;
369    foreach my $id (keys(%{$hash_to_print})) {
370	$count++;
371	my $hash;
372	$hash->{count} = 0;
373	$hash->{id} = $id;
374	foreach my $subval (keys(%{$hash_to_print->{$id}})) {
375	    $hash->{count} += $hash_to_print->{$id}{$subval};
376	}
377	$top[$#top+1] = $hash;
378	@top = reverse sort { $a->{count} <=> $b->{count} } @top;
379	pop @top if (($max_to_print != -1) && ($#top >= $max_to_print));
380    }
381    if ($max_to_print != -1) {
382	$self->print_title("$title (top " . ($#top + 1) . ", " . ($#top + 1 - $count) . " hidden)");
383    } else {
384	$self->print_title($title);
385    }
386    for ($idx = 0; $idx <= $#top; $idx++) {
387	my @dtop;
388	foreach my $subval (keys(%{$hash_to_print->{$top[$idx]->{id}}})) {
389	    my $hash;
390	    $hash->{count} = $hash_to_print->{$top[$idx]->{id}}{$subval};
391	    $hash->{domain} = $subval;
392	    $dtop[$#dtop+1] = $hash;
393	    @dtop = sort { $a->{count} <=> $b->{count} } @dtop;
394	}
395	@dtop = reverse @dtop;
396	print "$top[$idx]->{id}: $top[$idx]->{count}\n";
397	for (my $didx = 0; $didx <= $#dtop; $didx++) {
398	    print "            $dtop[$didx]->{domain}: $dtop[$didx]->{count}\n";
399	}
400    }
401    print "\n";
402}
403sub print_domain_awl {
404    my $self = shift;
405    $self->print_distribution($self->{domain_awl_match}, $self->{top_domain},
406			      "Domain AWL");
407}
408
409sub print_from_awl {
410    my $self = shift;
411
412    $self->print_distribution($self->{from_awl_match}, $self->{top_from},
413			      "From AWL");
414}
415
416sub print_spam {
417    my $self = shift;
418
419    $self->print_distribution($self->{rejected}, $self->{top_spam},
420			      "Spam");
421}
422
423sub print_delayed {
424    my $self = shift;
425
426    if (! defined $self->{print_delayed}) {
427	return;
428    }
429    $self->print_distribution($self->{reconnect}, -1,
430			      "Delayed");
431}
432
433sub print_throttled {
434    my $self = shift;
435
436    $self->print_distribution($self->{throttled}, $self->{top_throttled},
437			      "Throttled");
438}
439
440sub print_stats {
441    my $self = shift;
442    print "##################\n" .
443	"## Global stats ##\n" .
444	"##################\n\n";
445    print "Events        : " . $self->{events} . "\n";
446    print "Passed        : " . $self->{passed} . "\n";
447    print "Early         : " . $self->{early_count} . "\n";
448    print "Delayed       : " . $self->{new_count} . "\n\n";
449
450    print "Probable SPAM : " . $self->{rejected_count} . "\n";
451    print "Throttled     : " . $self->{throttled_count} . "\n\n";
452
453    print "###############################\n" .
454	"## Whitelist/AWL performance ##\n" .
455	"###############################\n\n";
456    print "Breakdown for $self->{passed} accepted messages:\n\n";
457
458    print "Whitelists  : " .
459        percent($self->{whitelisted}, $self->{passed}) .
460	"\t($self->{whitelisted})\n";
461    print "Domain AWL  : " .
462        percent($self->{domain_awl_match_count}, $self->{passed}) .
463        "\t($self->{domain_awl_match_count})\n";
464    print "From AWL    : " .
465	percent($self->{from_awl_match_count}, $self->{passed}) .
466	"\t($self->{from_awl_match_count})\n";
467    print "Delayed     : " .
468	percent($self->{reconnect_count},$self->{passed}) .
469	"\t($self->{reconnect_count})\n\n";
470
471    $self->print_domain_awl();
472    $self->print_from_awl();
473    $self->print_spam();
474    $self->print_throttled();
475    $self->print_delayed();
476}
477
478# create parser with no period limits
479# and counters set to 0
480my $parser = bless {
481    begin => 0,
482    end => (1 << 31) - 1,
483    programname => 'sqlgrey',
484    events => 0,
485    passed => 0,
486    whitelisted => 0,
487    rejected_count => 0,
488    new_count => 0,
489    throttled_count => 0,
490    early_count => 0,
491    domain_awl_match_count => 0,
492    from_awl_match_count => 0,
493    domain_awl_match => {},
494    from_awl_match => {},
495    rejected => {},
496    reconnect => {},
497    reconnect_count => 0,
498    top_domain => -1,
499    top_from => -1,
500    top_spam => -1,
501    top_throttled => -1,
502}, 'sqlgrey_logstats';
503
504$parser->parse_args();
505
506while (<STDIN>) {
507    chomp;
508    $parser->parse_line($_);
509}
510
511$parser->print_stats();
512
513__END__
514
515=head1 NAME
516
517sqlgrey-logstats.pl - SQLgrey log parser
518
519=head1 SYNOPSIS
520
521B<sqlgrey-logstats.pl> [I<options>...] < syslogfile
522
523 -h, --help             display this help and exit
524     --man              display man page
525     --version          output version information and exit
526     --debug            output detailed log parsing steps
527
528 -y, --yesterday        compute stats for yesterday
529 -t, --today            compute stats for today
530     --lasthour         compute stats for last hour
531 -d, --lastday          compute stats for last 24 hours
532 -w, --lastweek         compute stats for last 7 days
533
534     --programname      program name looked into log file
535
536     --top-from         how many from AWL entries to print (default: all)
537     --top-domain       how many domain AWL entries to print (default: all)
538     --top-spam         how many SPAM sources to print (default: all)
539     --top-throttled    how many throttled sources to print (default: all)
540     --print-delayed    print delayed sources (default: don't)
541
542=head1 DESCRIPTION
543
544sqlgrey-logstats.pl ...
545
546=head1 SEE ALSO
547
548See L<http://www.greylisting.org/> for a description of what greylisting
549is and L<http://www.postfix.org/SMTPD_POLICY_README.html> for a
550description of how Postfix policy servers work.
551
552=head1 COPYRIGHT
553
554Copyright (c) 2004 by Lionel Bouton.
555
556=head1 LICENSE
557
558This program is free software; you can redistribute it and/or modify
559it under the terms of the GNU General Public License as published by
560the Free Software Foundation; either version 2 of the License, or
561(at your option) any later version.
562
563This program is distributed in the hope that it will be useful,
564but WITHOUT ANY WARRANTY; without even the implied warranty of
565MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
566GNU General Public License for more details.
567
568You should have received a copy of the GNU General Public License along
569with this program; if not, write to the Free Software Foundation, Inc.,
57059 Temple Place, Suite 330, Boston, MA  02111-1307  USA
571
572=head1 AUTHOR
573
574S<Lionel Bouton E<lt>lionel-dev@bouton.nameE<gt>>
575
576=cut
577