1#!/usr/local/bin/perl 2#**************************************************************************** 3#**************************************************************************** 4# 5# AWFFull - A Webalizer Fork, Full o' features 6# 7# awffull_history_regen_german.pl 8# Pre-processing an old webalizer install prior to an upgrade 9# to AWFFull. 10# 11# Copyright (C) 2005, 2008 by Stephen McInerney (spm@stedee.id.au) 12# Copyright (C) 2007 by Andreas Schoenberg (asg@ftpproxy.org) 13# 14# This file is part of AWFFull. 15# 16# AWFFull is free software: you can redistribute it and/or modify 17# it under the terms of the GNU General Public License as published by 18# the Free Software Foundation, either version 3 of the License, or 19# (at your option) any later version. 20# 21# AWFFull is distributed in the hope that it will be useful, 22# but WITHOUT ANY WARRANTY; without even the implied warranty of 23# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24# GNU General Public License for more details. 25# 26# You should have received a copy of the GNU General Public License 27# along with AWFFull. If not, see <http://www.gnu.org/licenses/>. 28# 29#**************************************************************************** 30#**************************************************************************** 31# 32# awffull_history_regen_german.pl 33# 34# DESCRIPTION 35# -------------- 36# Given a directory, this script will parse all old weblizer html (per month) 37# files and spit out a complete history file (via STDOUT). 38# This new history file will contain all years/months from all the 39# webalizer html files. 40# 41# Designed for pre-processing an old webalizer install prior to an 42# upgrade to AWFFull. 43# 44#**************************************************************************** 45#**************************************************************************** 46# Modification History 47# 11-Sep-2005 steve Initial Creation 48# 17-Sep-2005 steve major tidy and functionalise 49# 10-May-2007 asg Patched to work with german version of webalizer 50#**************************************************************************** 51#**************************************************************************** 52# 53### *** Sample text to parse for 54# 55# <TR><TH COLSPAN=3 ALIGN=center BGCOLOR="#C0C0C0">Monats-Statistik für July 2005</TH></TR> 56# <TR><TH HEIGHT=4></TH></TR> 57# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Anfragen</FONT></TD> 58# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>12217843</B></FONT></TD></TR> 59# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Dateien</FONT></TD> 60# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>5384438</B></FONT></TD></TR> 61# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Seiten</FONT></TD> 62# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>1031846</B></FONT></TD></TR> 63# <TR><TD WIDTH=380><FONT SIZE="-1">Summe Besuche</FONT></TD> 64# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>226836</B></FONT></TD></TR> 65# <TR><TD WIDTH=380><FONT SIZE="-1">Summe kb</FONT></TD> 66# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>39965939</B></FONT></TD></TR> 67# <TR><TH HEIGHT=4></TH></TR> 68# <TR><TD WIDTH=380><FONT SIZE="-1">Summe unterschiedlicher Rechner (IP-Adressen)</FONT></TD> 69# <TD ALIGN=right COLSPAN=2><FONT SIZE="-1"><B>120135</B></FONT></TD></TR> 70# <TR><TD WIDTH=380><FONT SIZE="-1">Summe unterschiedlicher URLs</FONT></TD> 71#**************************************************************************** 72 73use strict; # die on all bad programming 74use Getopt::Long 2.33; # Command Line Option Processing 75use Pod::Usage; # For inline documentation 76 77########################### 78## Global Variables 79########################### 80my $DATE = '/bin/date'; # Location of the GNU Date Command - default 81my $exit_status = 0; # Script Return. 0 = success! 82 83## Options 84my $opt_UsageDir = "."; # Directory to look for webalizer usage files 85my $opt_DateCommand = $DATE; # Location of the GNU Date Command 86 87########################### 88########################### 89## MAIN 90########################### 91########################### 92 93ProcessCommandLine(); 94$exit_status = RegenerateHistory(); 95 96if ($exit_status == 2) { 97 printf(STDERR "Failed to find any Webalizer usage_YYYYMM.html files.\n"); 98} 99 100exit($exit_status); 101 102########################################################################## 103########################################################################## 104#### END OF MAIN 105########################################################################## 106########################################################################## 107 108 109#### SUBROUTINES 110 111########################################################################## 112########################################################################## 113## ProcessCommandLine 114## Parse the Commandline Arguments 115########################################################################## 116sub ProcessCommandLine { 117 my $result; # result from Calling GetOptions 118 119 my $opt_Help; # Local options 120 my $opt_Man; # use for man page, or help screen 121 122 Getopt::Long::Configure("gnu_getopt"); # Configure to use GNU style Options 123 124 $result = 125 GetOptions("dir|d:s" => \$opt_UsageDir, 126 "help|\?" => \$opt_Help, 127 "man" => \$opt_Man, 128 "date:s" => \$opt_DateCommand, 129 ) 130 || pod2usage(-verbose => 0); 131 if ($opt_Help) { pod2usage(-verbose => 1); } 132 if ($opt_Man) { pod2usage(-verbose => 2); } 133 if (!-x $opt_DateCommand) { 134 printf("Invalid Date command: %s\n", $opt_DateCommand); 135 exit(1); 136 } 137} ## end sub ProcessCommandLine 138 139 140########################################################################## 141########################################################################## 142## RegenerateHistory 143## Do the hard work - process the data, generate the output 144########################################################################## 145sub RegenerateHistory { 146 my $usagefile; # The current file we're processing 147 148 # Up to Flags 149 my $in_MonthlyStats = 0; # We are currently in the right place for monthly stats in the page 150 my $in_HitsStats = 0; # Now in Hits Stats 151 my $in_FilesStats = 0; # Now in File Stats 152 my $in_PageStats = 0; # Now in Page Stats 153 my $in_VisitStats = 0; # Now in Visit Stats 154 my $in_KByteStats = 0; # Now in KByte Stats 155 my $in_SiteStats = 0; # Now in Site Stats 156 157 my @line = (); # The current input line 158 my %History; # The hash holding all the ripped data 159 160 my $cur_month = ""; # The current month 161 my $cur_year = 0; # The current year 162 my $nofiles = 2; # Return the value 2 if no files are found 163 164 ### Month stuff 165 my @MoY = qw(Januar Februar Maerz April Mai Juni Juli August September Oktober November Dezember); 166 my %MoY; 167 @MoY{@MoY} = (0 .. 11); 168 my @DinM = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31); 169 170 opendir(DIR, $opt_UsageDir) or die "Cannot open directory $opt_UsageDir"; 171 while ($usagefile = readdir DIR) { 172 if ($usagefile =~ /^usage_[0-9]{6}\.html$/) { 173 open(FILE, "<$opt_UsageDir/$usagefile") or die "Cannot open file $usagefile"; 174 175 FILELINE: 176 while (<FILE>) { 177 if (/>Monats-Statistik für /) { 178 $in_MonthlyStats = 1; 179 @line = split /( |<)/; 180 $cur_month = $line[14]; 181 $cur_year = $line[16]; 182 $nofiles = 0; 183 } ## end if (/>Monats-Statistik für /) 184 next FILELINE if (!$in_MonthlyStats); 185 186 if ($in_MonthlyStats) { 187 188 # Exit this file, end of useful info 189 last FILELINE if (/>Summe unterschiedlicher URLs</); 190 191 # HITS - set value 192 if ($in_HitsStats) { 193 @line = split /(<|>)/; 194 $History{$cur_year}{@MoY{$cur_month} + 1}{HITS} = $line[12]; 195 $in_HitsStats = 0; 196 } 197 198 # FILES - set value 199 elsif ($in_FilesStats) { 200 @line = split /(<|>)/; 201 $History{$cur_year}{@MoY{$cur_month} + 1}{FILES} = $line[12]; 202 $in_FilesStats = 0; 203 } 204 205 # PAGES - set value 206 elsif ($in_PageStats) { 207 @line = split /(<|>)/; 208 $History{$cur_year}{@MoY{$cur_month} + 1}{PAGES} = $line[12]; 209 $in_PageStats = 0; 210 } 211 212 # VISITS - set value 213 elsif ($in_VisitStats) { 214 @line = split /(<|>)/; 215 $History{$cur_year}{@MoY{$cur_month} + 1}{VISITS} = $line[12]; 216 $in_VisitStats = 0; 217 } 218 219 # KBYTES - set value 220 elsif ($in_KByteStats) { 221 @line = split /(<|>)/; 222 $History{$cur_year}{@MoY{$cur_month} + 1}{KBYTES} = $line[12]; 223 $in_KByteStats = 0; 224 } 225 226 # SITES - set value 227 elsif ($in_SiteStats) { 228 @line = split /(<|>)/; 229 $History{$cur_year}{@MoY{$cur_month} + 1}{SITES} = $line[12]; 230 $in_SiteStats = 0; 231 } 232 233 # Else, all the checks for a next section 234 elsif (/>Summe Anfragen</) { 235 $in_HitsStats = 1; 236 } elsif (/>Summe Dateien</) { 237 $in_FilesStats = 1; 238 } elsif (/>Summe Seiten</) { 239 $in_PageStats = 1; 240 } elsif (/>Summe Besuche</) { 241 $in_VisitStats = 1; 242 } elsif (/>Summe kb</) { 243 $in_KByteStats = 1; 244 } elsif (/>Summe unterschiedlicher Rechner/) { 245 $in_SiteStats = 1; 246 } 247 } ## if ($in_MonthlyStats) { 248 } ## while (<FILE>) { 249 close(FILE); 250 251 } ## if ($usagefile =~ /^usage_[0-9]{6}\.html$/) { 252 } ## while (DIR) { 253 closedir(DIR); 254 255 my $key_year; 256 my $key_month; 257 foreach $key_year (sort (keys %History)) { 258 foreach $key_month (sort numerically (keys %{$History{$key_year}})) { 259 my $DaysInMonth = $DinM[$key_month - 1]; 260 if ($key_month == 2) { 261 my $testmonth = `$opt_DateCommand "+%m" --date="29 feb $key_year"`; 262 if ($testmonth == 2) { 263 $DaysInMonth = 29; 264 } 265 } ## end if ($key_month == 2) 266 printf("%d %d %d %d %d %d 1 %d %d %d\n", 267 $key_month, $key_year, 268 $History{$key_year}{$key_month}{HITS}, $History{$key_year}{$key_month}{FILES}, 269 $History{$key_year}{$key_month}{SITES}, $History{$key_year}{$key_month}{KBYTES}, 270 $DaysInMonth, $History{$key_year}{$key_month}{PAGES}, 271 $History{$key_year}{$key_month}{VISITS} 272 ); 273 } ## foreach $key_month 274 } ## foreach $key_year 275 276 return ($nofiles); 277} ## end sub RegenerateHistory 278 279 280########################################################################## 281########################################################################## 282## numerically 283## Do a numerical sort 284########################################################################## 285sub numerically { $a <=> $b } 286 287 288########################################################################## 289########################################################################## 290########################################################################## 291 292__END__ 293 294=pod 295 296=head1 NAME 297 298awffull_history_regen.pl - Generate a history file from old Webalizer usage files 299 300=head1 SYNOPSIS 301 302awffull_history_regen.pl [options] 303 304NB! Must have the GNU Date command! 305 306=head1 OPTIONS 307 308=over 8 309 310=item B<--help> 311 312Print a brief help message and exit. 313 314=item B<--man> 315 316Print the manual page and exit. 317 318=item B<--dir directory> 319 320The directory to use, looking for old webalizer usage_YYYYMM.html files. If 321not present will use the current directory. 322 323=item B<--date gnu-date-location> 324 325This program requires the GNU date command, use this option, if it's in a non-standard place. 326 327=head1 DESCRIPTION 328 329Generate a history file from old Webalizer usage files. 330 331The resulting history file is sent only to STDOUT. 332 333=cut 334 335