1#!/usr/local/bin/perl
2#****************************************************************************
3#****************************************************************************
4#
5#   AWFFull - A Webalizer Fork, Full o' features
6#
7#   awffull_history_regen_german.pl
8#       Pre-processing an old webalizer install prior to an upgrade
9#       to AWFFull.
10#
11#   Copyright (C) 2005, 2008 by Stephen McInerney (spm@stedee.id.au)
12#   Copyright (C) 2007 by Andreas Schoenberg (asg@ftpproxy.org)
13#
14#   This file is part of AWFFull.
15#
16#   AWFFull is free software: you can redistribute it and/or modify
17#   it under the terms of the GNU General Public License as published by
18#   the Free Software Foundation, either version 3 of the License, or
19#   (at your option) any later version.
20#
21#   AWFFull is distributed in the hope that it will be useful,
22#   but WITHOUT ANY WARRANTY; without even the implied warranty of
23#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24#   GNU General Public License for more details.
25#
26#   You should have received a copy of the GNU General Public License
27#   along with AWFFull.  If not, see <http://www.gnu.org/licenses/>.
28#
29#****************************************************************************
30#****************************************************************************
31#
32#                     awffull_history_regen_german.pl
33#
34# DESCRIPTION
35# --------------
36# Given a directory, this script will parse all old weblizer html (per month)
37# files and spit out a complete history file (via STDOUT).
38# This new history file will contain all years/months from all the
39# webalizer html files.
40#
41# Designed for pre-processing an old webalizer install prior to an
42# upgrade to AWFFull.
43#
44#****************************************************************************
45#****************************************************************************
46#  Modification History
47# 11-Sep-2005 steve     Initial Creation
48# 17-Sep-2005 steve     major tidy and functionalise
49# 10-May-2007 asg	Patched to work with german version of webalizer
50#****************************************************************************
51#****************************************************************************
52#
53###  *** Sample text to parse for
54#
55# <TR><TH COLSPAN=3 ALIGN=center BGCOLOR="#C0C0C0">Monats-Statistik f&uuml;r July 2005</TH></TR>
56# <TR><TH HEIGHT=4></TH></TR>
57# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Anfragen</FONT></TD>
58# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>12217843</B></FONT></TD></TR>
59# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Dateien</FONT></TD>
60# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>5384438</B></FONT></TD></TR>
61# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Seiten</FONT></TD>
62# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>1031846</B></FONT></TD></TR>
63# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Besuche</FONT></TD>
64# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>226836</B></FONT></TD></TR>
65# <TR><TD WIDTH=380><FONT SIZE="-1">Summe kb</FONT></TD>
66# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>39965939</B></FONT></TD></TR>
67# <TR><TH HEIGHT=4></TH></TR>
68# <TR><TD WIDTH=380><FONT SIZE="-1">Summe unterschiedlicher Rechner (IP-Adressen)</FONT></TD>
69# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>120135</B></FONT></TD></TR>
70# <TR><TD WIDTH=380><FONT SIZE="-1">Summe unterschiedlicher URLs</FONT></TD>
71#****************************************************************************
72
73use strict;               # die on all bad programming
74use Getopt::Long 2.33;    # Command Line Option Processing
75use Pod::Usage;           # For inline documentation
76
77###########################
78## Global Variables
79###########################
80my $DATE        = '/bin/date';   # Location of the GNU Date Command - default
81my $exit_status = 0;             # Script Return. 0 = success!
82
83## Options
84my $opt_UsageDir    = ".";       # Directory to look for webalizer usage files
85my $opt_DateCommand = $DATE;     # Location of the GNU Date Command
86
87###########################
88###########################
89##         MAIN
90###########################
91###########################
92
93ProcessCommandLine();
94$exit_status = RegenerateHistory();
95
96if ($exit_status == 2) {
97    printf(STDERR "Failed to find any Webalizer usage_YYYYMM.html files.\n");
98}
99
100exit($exit_status);
101
102##########################################################################
103##########################################################################
104####                          END OF MAIN
105##########################################################################
106##########################################################################
107
108
109####             SUBROUTINES
110
111##########################################################################
112##########################################################################
113## ProcessCommandLine
114##       Parse the Commandline Arguments
115##########################################################################
116sub ProcessCommandLine {
117    my $result;    # result from Calling GetOptions
118
119    my $opt_Help;  # Local options
120    my $opt_Man;   #  use for man page, or help screen
121
122    Getopt::Long::Configure("gnu_getopt");    # Configure to use GNU style Options
123
124    $result =
125        GetOptions("dir|d:s" => \$opt_UsageDir,
126                   "help|\?" => \$opt_Help,
127                   "man"     => \$opt_Man,
128                   "date:s"  => \$opt_DateCommand,
129                  )
130        || pod2usage(-verbose => 0);
131    if ($opt_Help) { pod2usage(-verbose => 1); }
132    if ($opt_Man)  { pod2usage(-verbose => 2); }
133    if (!-x $opt_DateCommand) {
134        printf("Invalid Date command: %s\n", $opt_DateCommand);
135        exit(1);
136    }
137} ## end sub ProcessCommandLine
138
139
140##########################################################################
141##########################################################################
142## RegenerateHistory
143##      Do the hard work - process the data, generate the output
144##########################################################################
145sub RegenerateHistory {
146    my $usagefile;    # The current file we're processing
147
148    # Up to Flags
149    my $in_MonthlyStats = 0;    # We are currently in the right place for monthly stats in the page
150    my $in_HitsStats    = 0;    # Now in Hits Stats
151    my $in_FilesStats   = 0;    # Now in File Stats
152    my $in_PageStats    = 0;    # Now in Page Stats
153    my $in_VisitStats   = 0;    # Now in Visit Stats
154    my $in_KByteStats   = 0;    # Now in KByte Stats
155    my $in_SiteStats    = 0;    # Now in Site Stats
156
157    my @line = ();              # The current input line
158    my %History;                # The hash holding all the ripped data
159
160    my $cur_month = "";         # The current month
161    my $cur_year  = 0;          # The current year
162    my $nofiles   = 2;          # Return the value 2 if no files are found
163
164    ### Month stuff
165    my @MoY = qw(Januar Februar Maerz April Mai Juni Juli August September Oktober November Dezember);
166    my %MoY;
167    @MoY{@MoY} = (0 .. 11);
168    my @DinM = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31);
169
170    opendir(DIR, $opt_UsageDir) or die "Cannot open directory $opt_UsageDir";
171    while ($usagefile = readdir DIR) {
172        if ($usagefile =~ /^usage_[0-9]{6}\.html$/) {
173            open(FILE, "<$opt_UsageDir/$usagefile") or die "Cannot open file $usagefile";
174
175        FILELINE:
176            while (<FILE>) {
177                if (/>Monats-Statistik f&uuml;r /) {
178                    $in_MonthlyStats = 1;
179                    @line            = split /( |<)/;
180                    $cur_month       = $line[14];
181                    $cur_year        = $line[16];
182                    $nofiles         = 0;
183                } ## end if (/>Monats-Statistik f&uuml;r /)
184                next FILELINE if (!$in_MonthlyStats);
185
186                if ($in_MonthlyStats) {
187
188                    # Exit this file, end of useful info
189                    last FILELINE if (/>Summe unterschiedlicher URLs</);
190
191                    # HITS - set value
192                    if ($in_HitsStats) {
193                        @line                                           = split /(<|>)/;
194                        $History{$cur_year}{@MoY{$cur_month} + 1}{HITS} = $line[12];
195                        $in_HitsStats                                   = 0;
196                    }
197
198                    # FILES - set value
199                    elsif ($in_FilesStats) {
200                        @line                                            = split /(<|>)/;
201                        $History{$cur_year}{@MoY{$cur_month} + 1}{FILES} = $line[12];
202                        $in_FilesStats                                   = 0;
203                    }
204
205                    # PAGES - set value
206                    elsif ($in_PageStats) {
207                        @line                                            = split /(<|>)/;
208                        $History{$cur_year}{@MoY{$cur_month} + 1}{PAGES} = $line[12];
209                        $in_PageStats                                    = 0;
210                    }
211
212                    # VISITS - set value
213                    elsif ($in_VisitStats) {
214                        @line                                             = split /(<|>)/;
215                        $History{$cur_year}{@MoY{$cur_month} + 1}{VISITS} = $line[12];
216                        $in_VisitStats                                    = 0;
217                    }
218
219                    # KBYTES - set value
220                    elsif ($in_KByteStats) {
221                        @line                                             = split /(<|>)/;
222                        $History{$cur_year}{@MoY{$cur_month} + 1}{KBYTES} = $line[12];
223                        $in_KByteStats                                    = 0;
224                    }
225
226                    # SITES - set value
227                    elsif ($in_SiteStats) {
228                        @line                                            = split /(<|>)/;
229                        $History{$cur_year}{@MoY{$cur_month} + 1}{SITES} = $line[12];
230                        $in_SiteStats                                    = 0;
231                    }
232
233                    # Else, all the checks for a next section
234                    elsif (/>Summe Anfragen</) {
235                        $in_HitsStats = 1;
236                    } elsif (/>Summe Dateien</) {
237                        $in_FilesStats = 1;
238                    } elsif (/>Summe Seiten</) {
239                        $in_PageStats = 1;
240                    } elsif (/>Summe Besuche</) {
241                        $in_VisitStats = 1;
242                    } elsif (/>Summe kb</) {
243                        $in_KByteStats = 1;
244                    } elsif (/>Summe unterschiedlicher Rechner/) {
245                        $in_SiteStats = 1;
246                    }
247                }    ## if ($in_MonthlyStats) {
248            }    ## while (<FILE>) {
249            close(FILE);
250
251        }    ## if ($usagefile =~ /^usage_[0-9]{6}\.html$/) {
252    }    ## while (DIR) {
253    closedir(DIR);
254
255    my $key_year;
256    my $key_month;
257    foreach $key_year (sort (keys %History)) {
258        foreach $key_month (sort numerically (keys %{$History{$key_year}})) {
259            my $DaysInMonth = $DinM[$key_month - 1];
260            if ($key_month == 2) {
261                my $testmonth = `$opt_DateCommand "+%m" --date="29 feb $key_year"`;
262                if ($testmonth == 2) {
263                    $DaysInMonth = 29;
264                }
265            } ## end if ($key_month == 2)
266            printf("%d %d %d %d %d %d 1 %d %d %d\n",
267                   $key_month,                             $key_year,
268                   $History{$key_year}{$key_month}{HITS},  $History{$key_year}{$key_month}{FILES},
269                   $History{$key_year}{$key_month}{SITES}, $History{$key_year}{$key_month}{KBYTES},
270                   $DaysInMonth,                           $History{$key_year}{$key_month}{PAGES},
271                   $History{$key_year}{$key_month}{VISITS}
272                  );
273        }    ## foreach $key_month
274    }    ## foreach $key_year
275
276    return ($nofiles);
277} ## end sub RegenerateHistory
278
279
280##########################################################################
281##########################################################################
282## numerically
283##      Do a numerical sort
284##########################################################################
285sub numerically { $a <=> $b }
286
287
288##########################################################################
289##########################################################################
290##########################################################################
291
292__END__
293
294=pod
295
296=head1 NAME
297
298awffull_history_regen.pl - Generate a history file from old Webalizer usage files
299
300=head1 SYNOPSIS
301
302awffull_history_regen.pl [options]
303
304NB! Must have the GNU Date command!
305
306=head1 OPTIONS
307
308=over 8
309
310=item B<--help>
311
312Print a brief help message and exit.
313
314=item B<--man>
315
316Print the manual page and exit.
317
318=item B<--dir directory>
319
320The directory to use, looking for old webalizer usage_YYYYMM.html files. If
321not present will use the current directory.
322
323=item B<--date gnu-date-location>
324
325This program requires the GNU date command, use this option, if it's in a non-standard place.
326
327=head1 DESCRIPTION
328
329Generate a history file from old Webalizer usage files.
330
331The resulting history file is sent only to STDOUT.
332
333=cut
334
335