1#!/usr/local/bin/perl 2#----------------------------------------------------------------------------- 3# Allows you to get one unique output log file, sorted on date, 4# built from particular sources. 5# This tool is part of AWStats log analyzer but can be use 6# alone for any other log analyzer. 7# See COPYING.TXT file about AWStats GNU General Public License. 8#----------------------------------------------------------------------------- 9 10use strict; no strict "refs"; 11#use diagnostics; 12use POSIX qw( strftime ); 13 14 15#----------------------------------------------------------------------------- 16# Defines 17#----------------------------------------------------------------------------- 18 19# ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION 20my $UseThread=0; 21&Check_Thread_Use(); 22my $NbOfDNSLookupAsked = 0; 23my %threadarray = (); 24my %MyDNSTable = (); 25my %TmpDNSLookup = (); 26 27# ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION 28#my $UseThread=1; 29#&Check_Thread_Use(); 30#my $NbOfDNSLookupAsked : shared = 0; 31#my %threadarray : shared = (); 32#my %MyDNSTable : shared = (); 33#my %TmpDNSLookup : shared = (); 34 35 36# ---------- Init variables -------- 37use vars qw/ $REVISION $VERSION /; 38$REVISION = '20140126'; 39$VERSION="1.2 (build $REVISION)"; 40 41use vars qw/ $NBOFLINESFORBENCHMARK /; 42$NBOFLINESFORBENCHMARK=8192; 43 44use vars qw/ 45$DIR $PROG $Extension 46$Debug $ShowSteps $AddFileNum $AddFileName $LastLogNum $PrintFields 47$MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone 48$NbOfLinesShowsteps $AFINET $QueueCursor $StopOnFirstEof $IgnoreMissing 49/; 50$DIR=''; 51$PROG=''; 52$Extension=''; 53$Debug=0; 54$ShowSteps=0; 55$AddFileNum=0; 56$AddFileName=0; 57$LastLogNum=0; 58$PrintFields=0; 59$MaxNbOfThread=0; 60$DNSLookup=0; 61$DNSCache=''; 62$DirCgi=''; 63$DirData=''; 64$DNSLookupAlreadyDone=0; 65$NbOfLinesShowsteps=0; 66$AFINET=''; 67$StopOnFirstEof=0; 68$IgnoreMissing=0; 69 70# ---------- Init arrays -------- 71use vars qw/ 72@SkipDNSLookupFor 73@ParamFile 74@Fields 75/; 76# ---------- Init hash arrays -------- 77use vars qw/ 78%LogFileToDo %linerecord %timerecord %corrupted 79%QueueHostsToResolve %QueueRecords 80/; 81%LogFileToDo = %linerecord = %timerecord = %corrupted = (); 82%QueueHostsToResolve = %QueueRecords = (); 83 84# DRA2: the order of timerecords are kept here, each index in the array is the filerecordnumber, which 85# DRA2: is used as the key for the other hashes 86use vars qw/ 87@timerecordorder 88/; 89@timerecordorder = (); 90 91# ---------- External Program variables ---------- 92# For gzip compression 93my $zcat = 'gzip -cd'; 94my $zcat_file = '\.gz$'; 95# For bz2 compression 96my $bzcat = 'bzip2 -cd'; 97my $bzcat_file = '\.bz2$'; 98# For xz compression 99my $xzcat = 'xz -cd'; 100my $xzcat_file = '\.xz$'; 101 102 103#----------------------------------------------------------------------------- 104# Functions 105#----------------------------------------------------------------------------- 106 107#------------------------------------------------------------------------------ 108# Function: Add all files of a specific directory 109# Parameters: $message 110# Input: Directory path 111# Output: None 112# Return: Array with list of files 113#------------------------------------------------------------------------------ 114sub addDirectory { 115 my ($dir,@list) = @_; 116 my $dirH; 117 opendir($dirH, $dir) || die ("Can't open '$dir'"); 118 while ($_ = readdir($dirH) ) { 119 if (-f "$dir/$_") { 120 push @list, "$dir/$_"; 121 } 122 } 123 closedir($dirH); 124 return @list; 125} 126 127#------------------------------------------------------------------------------ 128# Function: Write an error message and exit 129# Parameters: $message 130# Input: None 131# Output: None 132# Return: None 133#------------------------------------------------------------------------------ 134sub error { 135 print STDERR "Error: $_[0].\n"; 136 exit 1; 137} 138 139#------------------------------------------------------------------------------ 140# Function: Write a debug message 141# Parameters: $message 142# Input: $Debug 143# Output: None 144# Return: None 145#------------------------------------------------------------------------------ 146sub debug { 147 my $level = $_[1] || 1; 148 if ($Debug >= $level) { 149 my $debugstring = $_[0]; 150 print "DEBUG $level - ".localtime(time())." : $debugstring\n"; 151 } 152} 153 154#------------------------------------------------------------------------------ 155# Function: Write a warning message 156# Parameters: $message 157# Input: $Debug 158# Output: None 159# Return: None 160#------------------------------------------------------------------------------ 161sub warning { 162 my $messagestring=shift; 163 if ($Debug) { debug("$messagestring",1); } 164 print STDERR "$messagestring\n"; 165} 166 167#----------------------------------------------------------------------------- 168# Function: Return 1 if string contains only ascii chars 169# Input: String 170# Return: 0 or 1 171#----------------------------------------------------------------------------- 172sub IsAscii { 173 my $string=shift; 174 if ($Debug) { debug("IsAscii($string)",5); } 175 if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) { 176 if ($Debug) { debug(" Yes",5); } 177 return 1; # Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t 178 } 179 if ($Debug) { debug(" No",5); } 180 return 0; 181} 182 183#----------------------------------------------------------------------------- 184# DRA Function: Return 1 if DNS lookup should be skipped 185# Input: String 186# Return: 0 or 1 187#----------------------------------------------------------------------------- 188sub SkipDNSLookup { 189 foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } } 190 0; # Not in @SkipDNSLookupFor 191} 192 193#----------------------------------------------------------------------------- 194# Function: Function that wait for DNS lookup (can be threaded) 195# Input: String 196# Return: 0 or 1 197#----------------------------------------------------------------------------- 198sub MakeDNSLookup { 199 my $ipaddress=shift; 200 $NbOfDNSLookupAsked++; 201 use Socket; $AFINET=AF_INET; 202 my $tid=0; 203 $tid=$MaxNbOfThread?eval("threads->self->tid()"):0; 204 if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); } 205 my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET); # This is very slow, may took 20 seconds 206 if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) { 207 $TmpDNSLookup{$ipaddress}='*'; 208 } 209 else { 210 $TmpDNSLookup{$ipaddress}=$lookupresult; 211 } 212 if ($Debug) { debug(" ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); } 213 delete $threadarray{$ipaddress}; 214 return; 215} 216 217#----------------------------------------------------------------------------- 218# Function: WriteRecordsReadyInQueue 219# Input: - 220# Return: 0 221#----------------------------------------------------------------------------- 222sub WriteRecordsReadyInQueue { 223 my $logfilechosen=shift; 224 if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); } 225 while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) { 226 # $QueueCursor point to a ready record 227 if ($QueueHostsToResolve{$QueueCursor} eq '*') { 228 if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); } 229 } 230 else { 231 if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) { 232 if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') { 233 $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/; 234 if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); } 235 } 236 } 237 elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) { 238 if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') { 239 $QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/; 240 if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); } 241 } 242 } 243 } 244 # Record is ready, we output it. 245 if ($AddFileNum) { print "$logfilechosen "; } 246 if ($AddFileName) { print "$LogFileToDo{$logfilechosen} "; } 247 # see if we need to dump fields 248 if ($PrintFields && $LastLogNum != $logfilechosen){ 249 print($Fields[$logfilechosen]."\n"); 250 $LastLogNum = $logfilechosen; 251 } 252 print "$QueueRecords{$QueueCursor}\n"; 253 delete $QueueRecords{$QueueCursor}; 254 delete $QueueHostsToResolve{$QueueCursor}; 255 $QueueCursor++; 256 } 257 return 0; 258} 259 260#----------------------------------------------------------------------------- 261# Function: Check if thread are enabled or not 262# Input: - 263# Return: - 264#----------------------------------------------------------------------------- 265sub Check_Thread_Use { 266 if ($] >= 5.008) { for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) { 267 if ($UseThread) { 268 if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); } 269 if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); } 270 } 271 else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); } 272 } } 273 } 274} 275 276 277#----------------------------------------------------------------------------- 278# MAIN 279#----------------------------------------------------------------------------- 280($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1; 281 282# Get parameters (Note: $MaxNbOfThread is already known 283my $cpt=1; 284for (0..@ARGV-1) { 285 if ($ARGV[$_] =~ /^-/) { 286 if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; } 287 elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; } 288 elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; } 289 elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; } 290 elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; } 291 elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; } 292 elsif ($ARGV[$_] =~ /addfilename/i) { $AddFileName=1; } 293 elsif ($ARGV[$_] =~ /stoponfirsteof/i) { $StopOnFirstEof=1; } 294 elsif ($ARGV[$_] =~ /printfields/i) { $PrintFields=1; } 295 elsif ($ARGV[$_] =~ /ignoremissing/i) { $IgnoreMissing=1; } 296 else { print "Unknown argument $ARGV[$_] ignored\n"; } 297 } 298 elsif ($ARGV[$_] =~ /addfolder=(.*)$/i) { 299 @ParamFile = addDirectory($1, @ParamFile); 300 } 301 else { 302 push @ParamFile, $ARGV[$_]; 303 $cpt++; 304 } 305} 306if ($Debug) { $|=1; } 307 308if ($Debug) { 309 debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1); 310 debug("DNSLookup=$DNSLookup"); 311 debug("DNSCache=$DNSCache"); 312 debug("MaxNbOfThread=$MaxNbOfThread"); 313} 314 315# Disallow MaxNbOfThread and Perl < 5.8 316if ($] < 5.008 && $MaxNbOfThread) { 317 error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead"); 318} 319 320# Warning, there is a memory hole in ActiveState perl version (in delete functions) 321if ($^X =~ /activestate/i || $^X =~ /activeperl/i) { 322 # TODO Add a warning 323 324} 325 326if (scalar @ParamFile == 0) { 327 print "----- $PROG $VERSION (c) Laurent Destailleur -----\n"; 328 print "$PROG allows you to get one unique output log file, sorted on date,\n"; 329 print "built from particular sources:\n"; 330 print " - It can read several input log files,\n"; 331 print " - It can read .gz/.bz2/.xz log files,\n"; 332 print " - It can also makes a fast reverse DNS lookup to replace\n"; 333 print " all IP addresses into host names in resulting log file.\n"; 334 print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n"; 335 print "distributed with a GNU General Public License (See COPYING.txt file).\n"; 336 print "$PROG is part of AWStats but can be used alone as a log merger\n"; 337 print "or resolver before using any other log analyzer.\n"; 338 print "\n"; 339 print "Usage:\n"; 340 print " $PROG.$Extension [options] file\n"; 341 print " $PROG.$Extension [options] file1 ... filen\n"; 342 print " $PROG.$Extension [options] *.*\n"; 343 print " $PROG.$Extension [options] addfolder=dirname\n"; 344 print " perl $PROG.$Extension [options] *.* > newfile\n"; 345 print "Options:\n"; 346 print " -dnslookup make a reverse DNS lookup on IP adresses\n"; 347 print " -dnslookup=n same with a n parallel threads instead of serial requests\n"; 348 print " -dnscache=file make DNS lookup from cache file first before network lookup\n"; 349 print " -showsteps print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n"; 350 print " -addfilenum if used with several files, file number can be added in first\n"; 351 print " -addfilename if used with several files, file name can be added in first\n"; 352 print " field of output file. This can be used to add a cluster id\n"; 353 print " when log files come from several load balanced computers.\n"; 354 print " -stoponfirsteof Stop processing when any logfile reaches end-of-file.\n"; 355 print " -printfields For IIS or W3C logs, prints the latest field header for\n"; 356 print " the currentlog file when switching between log file entries\n"; 357 print " so that the parsercan automatically determine which fields\n"; 358 print " are avaiable.\n"; 359 print " -ignoremissing will not fail if a log file is missing\n"; 360 print "\n"; 361 362 print "This runs $PROG in command line to open one or several\n"; 363 print "server log files to merge them (sorted on date) and/or to make a reverse\n"; 364 print "DNS lookup (if asked). The result log file is sent on standard output.\n"; 365 print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n"; 366 print "software able to output sorted log records (with a reverse DNS lookup\n"; 367 print "included or not) even if log records are dispatched in several files.\n"; 368 print "Each of thoose files must be already independently sorted itself\n"; 369 print "(but that is the case in all web server log files). So you can use it\n"; 370 print "for load balanced log files or to group several old log files.\n"; 371 print "\n"; 372 print "Don't forget that the main goal of logresolvemerge is to send log records to\n"; 373 print "a log analyzer in a sorted order without merging files on disk (NO NEED\n"; 374 print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n"; 375 print "OF MORE MEMORY). Choose of output records is done on the fly.\n"; 376 print "\n"; 377 print "So logresolvemerge is particularly useful when you want to output several\n"; 378 print "and/or large log files in a fast process, with no use of disk or\n"; 379 print "more memory, and in a chronological order through a pipe (to be used by a log\n"; 380 print "analyzer).\n"; 381 print "\n"; 382 print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n"; 383 print "occurs with heavy servers), this is not a problem, the output will also\n"; 384 print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n"; 385 print "with such logs.\n"; 386 print "\n"; 387 print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n"; 388 print "can't run this tool on Win or Unix platforms.\n"; 389 print "\n"; 390 print "WARNING: Because of memory holes in ActiveState Perl version, use another\n"; 391 print "Perl interpreter if you need to process large log files.\n"; 392 print "\n"; 393 print "Now supports/detects:\n"; 394 print " Automatic detection of log format\n"; 395 print " Files can be .gz/.bz2/.xz files if gzip/bzip2/xz tools are available in PATH.\n"; 396 print " Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n"; 397 print "New versions and FAQ at http://www.awstats.org\n"; 398 exit 0; 399} 400 401# Get current time 402my $nowtime=time; 403my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime); 404if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; } 405my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//; 406if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; } 407if ($nowday < 10) { $nowday = "0$nowday"; } 408if ($nowhour < 10) { $nowhour = "0$nowhour"; } 409if ($nowmin < 10) { $nowmin = "0$nowmin"; } 410if ($nowsec < 10) { $nowsec = "0$nowsec"; } 411# Get tomorrow time (will be used to discard some record with corrupted date (future date)) 412my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400); 413if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; } 414my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//; 415if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; } 416if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; } 417if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; } 418if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; } 419if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; } 420my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec; 421 422# Init other parameters 423$NBOFLINESFORBENCHMARK--; 424if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; } 425if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; } 426if (! $DirData || $DirData eq '.') { $DirData=$DIR; } # If not defined or chosen as "." value then DirData is current dir 427if (! $DirData) { $DirData='.'; } # If current dir not defined then we put it to "." 428$DirData =~ s/\/$//; 429 430#my %monthlib = ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" ); 431# monthnum must be in english because it's used to translate log date in apache log files which are always in english 432my %monthnum = ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" ); 433 434if ($DNSCache) { 435 if ($Debug) { debug("Load DNS Cache file $DNSCache",2); } 436 open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache"); 437 while (<CACHE>) { 438 my ($time, $ip, $name) = split; 439 if ($ip && $name) { 440 $name="$ip" if $name eq '*'; 441 $MyDNSTable{$ip}=$name; 442 } 443 } 444 close CACHE; 445} 446 447#----------------------------------------------------------------------------- 448# PROCESSING CURRENT LOG(s) 449#----------------------------------------------------------------------------- 450my $NbOfLinesRead=0; 451my $NbOfLinesParsed=0; 452my $logfilechosen=0; 453my $starttime=time(); 454 455# Define the LogFileToDo list 456$cpt=1; 457foreach my $key (0..(@ParamFile-1)) { 458 if (($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) || $ParamFile[$key] =~ /\|$/) { 459 460 if ($Debug) { debug("DBG1 Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); } 461 # Check for supported compression 462 if ($ParamFile[$key] =~ /$zcat_file/) { 463 if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); } 464 # Modify the name to include the zcat command 465 $ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |'; 466 } 467 elsif ($ParamFile[$key] =~ /$bzcat_file/) { 468 if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); } 469 # Modify the name to include the bzcat command 470 $ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |'; 471 } 472 elsif ($ParamFile[$key] =~ /$xzcat_file/) { 473 if ($Debug) { debug("XZ compression detected for Log file $ParamFile[$key]."); } 474 # Modify the name to include the xzcat command 475 $ParamFile[$key] = $xzcat . ' ' . $ParamFile[$key] . ' |'; 476 } 477 478 $LogFileToDo{$cpt}=@ParamFile[$key]; 479 $cpt++; 480 481 } 482 else { 483 my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//; 484 $ParamFile[$key] = $1; 485 if ($DirFile eq '') { $DirFile = '.'; } 486 $ParamFile[$key] =~ s/\./\\\./g; 487 $ParamFile[$key] =~ s/\*/\.\*/g; 488 $ParamFile[$key] =~ s/\?/\./g; 489 if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); } 490 opendir(DIR,"$DirFile"); 491 my @filearray = sort readdir DIR; 492 close DIR; 493 foreach my $i (0..$#filearray) { 494 if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") { 495 496 if ($Debug) { debug("DBG2 Log file $filearray[$i] is added to LogFileToDo with number $cpt."); } 497 # Check for supported compression 498 if ($filearray[$i] =~ /$zcat_file/) { 499 if ($Debug) { debug("GZIP compression detected for Log file $filearray[$i]."); } 500 # Modify the name to include the zcat command 501 $LogFileToDo{$cpt}=$zcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; 502 } 503 elsif ($filearray[$i] =~ /$bzcat_file/) { 504 if ($Debug) { debug("BZ2 compression detected for Log file $filearray[$i]."); } 505 # Modify the name to include the bzcat command 506 $LogFileToDo{$cpt}=$bzcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; 507 } 508 elsif ($filearray[$i] =~ /$xzcat_file/) { 509 if ($Debug) { debug("XZ compression detected for Log file $filearray[$i]."); } 510 # Modify the name to include the xzcat command 511 $LogFileToDo{$cpt}=$xzcat . ' ' . "$DirFile/$filearray[$i]" . ' |'; 512 } 513 else { 514 $LogFileToDo{$cpt}="$DirFile/$filearray[$i]"; 515 } 516 $cpt++; 517 518 } 519 } 520 } 521} 522 523# If no files to process 524if (scalar keys %LogFileToDo == 0) { 525 error("No input log file found"); 526} 527 528# Open all log files 529if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); } 530foreach my $logfilenb (keys %LogFileToDo) { 531 if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); } 532 if ($IgnoreMissing){ 533 if (!open("LOG$logfilenb","$LogFileToDo{$logfilenb}")){ 534 debug("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!"); 535 delete $LogFileToDo{$logfilenb}; 536 } 537 }else{ 538 open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!"); 539 } 540 binmode "LOG$logfilenb"; # To avoid pb of corrupted text log files with binary chars. 541} 542 543$QueueCursor=1; 544STOPONFIRSTEOF: while (1 == 1) 545{ 546 # BEGIN Read new record 547 # For each log file if logfilechosen is 0 548 # If not, we go directly to log file instead of iterating over all keys for a match 549 #---------------------------------------------------------------------------------- 550 my @readlist; 551 if($logfilechosen == 0) { 552 @readlist = keys %LogFileToDo; 553 } else { 554 @readlist = ($logfilechosen); 555 } 556 foreach my $logfilenb (@readlist) 557 { 558 if ($Debug) { debug("Search next record in file number $logfilenb",3); } 559 # Read chosen log file until we found a record with good date or reaching end of file 560 while (1 == 1) { 561 my $LOG="LOG$logfilenb"; 562 $_=<$LOG>; # Read new line 563 if (! $_) 564 { # No more records in log file number $logfilenb 565 if ($Debug) { debug(" No more records in file number $logfilenb",2); } 566 delete $LogFileToDo{$logfilenb}; 567 if ($StopOnFirstEof) 568 { 569 if ($Debug) { debug("Exiting loop due to EOF of logfile $logfilenb",1); } 570 last STOPONFIRSTEOF; 571 } 572 last; 573 } 574 575 # Get the latest Fields header for printing IIS and W3C logs 576 if ($PrintFields && $_ =~ m/#Fields:/){ 577 my $field = $_; 578 # strip whitespace 579 $field =~ s/^\s+|\s+$//g; 580 if (!$Fields[$logfilenb] || $field != $Fields[$logfilenb]){ 581 $Fields[$logfilenb] = $field; 582 debug("Found new fields in $logfilenb: $Fields[$logfilenb]"); 583 } 584 } 585 586 $NbOfLinesRead++; 587 chomp $_; s/\r$//; 588 589 if (/^#/) { next; } # Ignore comment lines (ISS writes such comments) 590 if (/^!!/) { next; } # Ignore comment lines (Webstar writes such comments) 591 if (/^$/) { next; } # Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file) 592 593 $linerecord{$logfilenb}=$_; 594 595 # Check filters 596 #---------------------------------------------------------------------- 597 598 # Split YYYY-MM-DD HH:MM:SS 599 # or DD/Month/YYYY:HH:MM:SS 600 # or MM/DD/YY\tHH:MM:SS 601 # or 9999.999 602 # or Month DD HH:MM:SS 603 my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0; 604 if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; } 605 elsif ($_ =~ /\[(\d?\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; } 606 elsif ($_ =~ /\w+ (\w+) {1,2}(\d?\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; } 607 elsif ($_ =~ /^(\d\d\d\d+\.\d\d\d) /) 608 { 609 my $timetime = strftime('%Y-%m-%d-%T', gmtime($1)); 610 $timetime =~ /(\d\d\d\d)-(\d\d)-(\d\d)-(\d\d):(\d\d):(\d\d)/; 611 $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; 612 } 613 elsif ($_ =~ /(\w+)\s\s?(\d?\d) (\d\d):(\d\d):(\d\d) /) { # Month DD HH:MM:SS 614 $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; 615 if (($monthnum{$month}>$monthnum{$nowmonth}) || ($monthnum{$month}==$monthnum{$nowmonth} && $day>$nowday)) { 616 $year=$nowyear-1; 617 } 618 else { $year=$nowyear; } 619 } 620 if (length $day == 1) { $day = "0".$day; } 621 622 if ($monthnum{$month}) { $month=$monthnum{$month}; } # Change lib month in num month if necessary 623 624 # Create $timerecord like YYYYMMDDHHMMSS 625 $timerecord{$logfilenb}=int("$year$month$day$hour$minute$second"); 626 if ($timerecord{$logfilenb}<10000000000000) { 627 if ($Debug) { debug(" This record is corrupted (no date found)",3); } 628 $corrupted{$logfilenb}++; 629 next; 630 } 631 if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); } 632 633 # Sort and insert into timerecordorder, oldest at end/back of array 634 # At the beginning, timerecordorder is empty. Then beceause the first pass is 635 # a loop on each file to read each first line, the timerecordorder size is 636 # number of input files. 637 # After, each new loop, read only one new line, so timerecordorder size increase 638 # by one but decrease just after by the pop command later. 639 my $inserted=0; 640 for(my $c=$#timerecordorder; $c>=0 ; $c--) { 641 if($timerecord{$logfilenb} <= $timerecord{$timerecordorder[$c]}) 642 { 643 # Is older or equal than index at $c, add after 644 $timerecordorder[$c + 1]=$logfilenb; 645 $inserted = 1; 646 last; 647 } else { 648 $timerecordorder[$c + 1]=$timerecordorder[$c]; 649 } 650 } 651 if(! $inserted) { 652 $timerecordorder[0] = $logfilenb; 653 } 654 655 last; 656 } 657 } 658 # END Read new lines for each log file. After this, following var are filled 659 # $timerecord{$logfilenb} 660 # @timerecordorder array 661 662 # We choose which record of which log file to process 663 if ($Debug) { debug("Choose which record of which log file to process",3); } 664 $logfilechosen=pop(@timerecordorder); 665 if(!defined($logfilechosen)) { last; } # No more record to process 666 667 # Record is chosen 668 if ($Debug) { debug(" We chose to qualify record of file number $logfilechosen",3); } 669 if ($Debug) { debug(" Record is $linerecord{$logfilechosen}",3); } 670 671 # Record is approved. We found a new line to parse in file number $logfilechosen 672 #------------------------------------------------------------------------------- 673 $NbOfLinesParsed++; 674 if ($ShowSteps) { 675 if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) { 676 my $delay=(time()-$starttime)||1; 677 print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n"; 678 } 679 } 680 681 # Do DNS lookup 682 #-------------------- 683 my $Host=''; 684 my $ip=0; 685 if ($DNSLookup) { # DNS lookup is 1 or 2 686 if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4 687 elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6 688 if ($ip) { 689 # Check in static DNS cache file 690 if ($MyDNSTable{$Host}) { 691 if ($Debug) { debug(" DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); } 692 } 693 elsif ($DNSLookup==1) { 694 # Check in session cache (dynamic DNS cache file + session DNS cache) 695 if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) { 696 if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) { 697 $TmpDNSLookup{$Host}='*'; 698 if ($Debug) { debug(" No need of reverse DNS lookup for $Host, skipped at user request.",4); } 699 } 700 else { 701 if ($ip == 4) { 702 # Create or not a new thread 703 if ($MaxNbOfThread) { 704 if (! $threadarray{$Host}) { # No thread already launched for $Host 705 while ((scalar keys %threadarray) >= $MaxNbOfThread) { 706 if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); } 707 sleep 1; 708 } 709 $threadarray{$Host}=1; # Semaphore to tell thread for $Host is active 710# my $t = new Thread \&MakeDNSLookup, $Host; 711 my $t = threads->create(sub { MakeDNSLookup($Host) }); 712 if (! $t) { error("Failed to create new thread"); } 713 if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); } 714 $t->detach(); # We don't need to keep return code 715 } 716 else { 717 if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); } 718 } 719 # Here, this is the only way, $TmpDNSLookup{$Host} can be not defined 720 } else { 721 &MakeDNSLookup($Host); 722 if ($Debug) { debug(" Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); } 723 } 724 } 725 elsif ($ip == 6) { 726 $TmpDNSLookup{$Host}='*'; 727 if ($Debug) { debug(" Reverse DNS lookup for $Host not available for IPv6",4); } 728 } 729 } 730 } else { 731 if ($Debug) { debug(" Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); } 732 } 733 } 734 else { 735 if ($Debug) { debug(" DNS lookup by static DNS cache file asked for $Host but not found.",4); } 736 } 737 } 738 else { 739 if ($Debug) { debug(" DNS lookup asked for $Host but this is not an IP address.",4); } 740 $DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen}; 741 } 742 } 743 else { 744 if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; } # IPv4 745 elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; } # IPv6 746 if ($Debug) { debug(" No DNS lookup asked.",4); } 747 } 748 749 # Put record in record queue 750 if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); } 751 $QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen}; 752 753 # Put record in host queue 754 # If there is a host to resolve, we add line to queue with value of host to resolve 755 # $Host is '' (no ip found) or is ip 756 if ($DNSLookup==0) { 757 $QueueHostsToResolve{$NbOfLinesParsed}='*'; 758 } 759 if ($DNSLookup==1) { 760 $QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*'; 761 } 762 if ($DNSLookup==2) { 763 $QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*'; 764 } 765 766 # Print all records in head of queue that are ready 767 &WriteRecordsReadyInQueue($logfilechosen); 768 769} # End of processing new record. Loop on next one. 770 771if ($Debug) { debug("End of processing log file(s)"); } 772 773# Close all log files 774foreach my $logfilenb (keys %LogFileToDo) { 775 if ($Debug) { debug("Close log file number $logfilenb"); } 776 close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed"); 777} 778 779while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) { 780 sleep 1; 781 # Print all records in head of queue that are ready 782 &WriteRecordsReadyInQueue($logfilechosen); 783} 784 785# Waiting queue is empty 786if ($MaxNbOfThread) { 787 foreach my $t (threads->list()) { 788 if ($Debug) { debug("Join thread $t"); } 789 $t->join(); 790 } 791} 792 793# DNSLookup warning 794if ($DNSLookup==1 && $DNSLookupAlreadyDone) { 795 warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed."); 796} 797 798if ($Debug) { 799 debug("Total nb of read lines: $NbOfLinesRead"); 800 debug("Total nb of parsed lines: $NbOfLinesParsed"); 801 debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked"); 802} 803 804#if ($DNSCache) { 805# open(CACHE, ">$DNSCache") or die; 806# foreach (keys %TmpDNSLookup) { 807# $TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip"; 808# print CACHE "0\t$_\t$TmpDNSLookup{$_}\n"; 809# } 810# close CACHE; 811#} 812 8130; # Do not remove this line 814