1#!@PERL@ -w
2#
3# ufdb_analyse_users.pl  -  analyse users from ufdbguardd.log
4#
5# WARNING: analysis of users is slow since large log files are processed.
6#          It may take 50 seconds to process 400 MB on a common server.
7#
8# $Id: ufdb_analyse_users.pl.in,v 1.2 2016/10/12 18:34:57 root Exp root $
9
10use Getopt::Long;
11
12my $dummy;
13my $debug = 0;
14my $need_help;
15my $logfilename;
16
17my $min_count = 2;
18my $report_size = 20;
19
20my $n_blocked = 0;
21my $n_passed = 0;
22
23my $process_passed = 1;
24my $process_blocked = 0;
25
26my @users = ();
27my %userhash;
28
29
30sub parse_logfile ($)
31{
32   my $fn = shift;
33   my $ldate;
34   my $ltime;
35   my $laction;
36   my $luser;
37   my $lcategory;
38   my $lurl;
39
40   print "logfile $fn\n"  if ($debug);
41
42   open IN, "< $fn"  or die "cannot open file \"$fn\": $!";
43   while (<IN>)
44   {
45      chomp;
46      @terms = split;
47      $laction = $terms[3];
48      if (defined($laction))
49      {
50	 if (($laction eq 'PASS'  && $process_passed)  ||
51	     ($laction eq 'BLOCK' && $process_blocked))
52	 {
53	    $luser = $terms[4];
54	    $luser = 'anonymous'  if ($luser eq '-');
55	    if ($userhash{$luser})
56	    {
57	       $n_blocked++  if $laction eq 'BLOCK';
58	       $n_passed++   if $laction eq 'PASS';
59	       $ldate = $terms[0];
60	       $ltime = $terms[1];
61	       $lcategory = $terms[7];
62	       $lurl = $terms[8];
63	       $lurl =~ s,^http://,,;
64	       $lurl =~ s,[&\?].*,....,;
65	       printf "%s %s  %-14s  %-14s  %-5s  %s\n", $ldate, $ltime, $luser, $lcategory, $laction, $lurl;
66	    }
67	 }
68      }
69   }
70   close IN;
71}
72
73
74sub print_analysis ()
75{
76   my $nlines;
77
78   printf "------------------------------------------------------------------------------\n";
79   printf "%d URLs: %d blocked, %d passed.\n", $n_blocked+$n_passed, $n_blocked, $n_passed;
80   if ($process_passed && $process_blocked)
81   {
82      printf "The list is based on blocked and passed URLs.\n";
83   }
84   elsif ($process_passed)
85   {
86      printf "The list is based on only passed URLs.\n";
87   }
88   elsif ($process_blocked)
89   {
90      printf "The list is based on only blocked URLs.\n";
91   }
92}
93
94
95$dummy = GetOptions(
96		'help|?'         => \$need_help,
97		"debug"          => \$debug,
98		"user=s"         => \@users,
99		"report-size=i"  => \$report_size,
100		"count-passed!"  => \$process_passed,
101		"count-blocked!" => \$process_blocked );
102# allow both comma-separated list of users and multiple occurences of -user option
103@users = split(/,/,join(',',@users));
104
105if ($process_passed == 0  &&  $process_blocked == 0)
106{
107   print "error: no-count-passed and no-count-blocked options imply that not one URL will be counted.\n";
108   exit 2;
109}
110
111if ($need_help  ||  !defined( $ARGV[0] ))
112{
113   print "usage: ufdb_analyse_users.pl -user <userid> [-[no-]count-passed] [-[no-]count-blocked] [-report-size=N] [-debug] <logfiles>\n";
114   print "defaults: count-passed, no-count-blocked, report-size=20\n";
115   exit 1;
116}
117
118if ($#users == -1)
119{
120   print "error: must use at least one -user option.\n";
121   exit 2;
122}
123
124# put all users in a hash for fast lookup.
125foreach $u (@users)
126{
127   $userhash{$u} = 1;
128}
129
130foreach $logfilename (@ARGV)
131{
132   parse_logfile $logfilename;
133}
134print_analysis;
135
136exit 0;
137