1#!/usr/local/bin/perl
2#-----------------------------------------------------------------------------
3# Allows you to get one unique output log file, sorted on date,
4# built from particular sources.
5# This tool is part of AWStats log analyzer but can be use
6# alone for any other log analyzer.
7# See COPYING.TXT file about AWStats GNU General Public License.
8#-----------------------------------------------------------------------------
9
10use strict; no strict "refs";
11#use diagnostics;
12use POSIX qw( strftime );
13
14
15#-----------------------------------------------------------------------------
16# Defines
17#-----------------------------------------------------------------------------
18
19# ENABLETHREAD --> COMMENT THIS BLOCK TO USE A THREADED VERSION
20my $UseThread=0;
21&Check_Thread_Use();
22my $NbOfDNSLookupAsked = 0;
23my %threadarray = ();
24my %MyDNSTable = ();
25my %TmpDNSLookup = ();
26
27# ENABLETHREAD --> UNCOMMENT THIS BLOCK TO USE A THREADED VERSION
28#my $UseThread=1;
29#&Check_Thread_Use();
30#my $NbOfDNSLookupAsked : shared = 0;
31#my %threadarray : shared = ();
32#my %MyDNSTable : shared = ();
33#my %TmpDNSLookup : shared = ();
34
35
36# ---------- Init variables --------
37use vars qw/ $REVISION $VERSION /;
38$REVISION = '20140126';
39$VERSION="1.2 (build $REVISION)";
40
41use vars qw/ $NBOFLINESFORBENCHMARK /;
42$NBOFLINESFORBENCHMARK=8192;
43
44use vars qw/
45$DIR $PROG $Extension
46$Debug $ShowSteps $AddFileNum $AddFileName $LastLogNum $PrintFields
47$MaxNbOfThread $DNSLookup $DNSCache $DirCgi $DirData $DNSLookupAlreadyDone
48$NbOfLinesShowsteps $AFINET $QueueCursor $StopOnFirstEof $IgnoreMissing
49/;
50$DIR='';
51$PROG='';
52$Extension='';
53$Debug=0;
54$ShowSteps=0;
55$AddFileNum=0;
56$AddFileName=0;
57$LastLogNum=0;
58$PrintFields=0;
59$MaxNbOfThread=0;
60$DNSLookup=0;
61$DNSCache='';
62$DirCgi='';
63$DirData='';
64$DNSLookupAlreadyDone=0;
65$NbOfLinesShowsteps=0;
66$AFINET='';
67$StopOnFirstEof=0;
68$IgnoreMissing=0;
69
70# ---------- Init arrays --------
71use vars qw/
72@SkipDNSLookupFor
73@ParamFile
74@Fields
75/;
76# ---------- Init hash arrays --------
77use vars qw/
78%LogFileToDo %linerecord %timerecord %corrupted
79%QueueHostsToResolve %QueueRecords
80/;
81%LogFileToDo = %linerecord = %timerecord = %corrupted = ();
82%QueueHostsToResolve = %QueueRecords = ();
83
84# DRA2: the order of timerecords are kept here, each index in the array is the filerecordnumber, which
85# DRA2: is used as the key for the other hashes
86use vars qw/
87@timerecordorder
88/;
89@timerecordorder = ();
90
91# ---------- External Program variables ----------
92# For gzip compression
93my $zcat = 'gzip -cd';
94my $zcat_file = '\.gz$';
95# For bz2 compression
96my $bzcat = 'bzip2 -cd';
97my $bzcat_file = '\.bz2$';
98# For xz compression
99my $xzcat = 'xz -cd';
100my $xzcat_file = '\.xz$';
101
102
103#-----------------------------------------------------------------------------
104# Functions
105#-----------------------------------------------------------------------------
106
107#------------------------------------------------------------------------------
108# Function:		Add all files of a specific directory
109# Parameters:	$message
110# Input:		Directory path
111# Output:		None
112# Return:		Array with list of files
113#------------------------------------------------------------------------------
114sub addDirectory {
115    my ($dir,@list) = @_;
116    my $dirH;
117    opendir($dirH, $dir) || die ("Can't open '$dir'");
118    while ($_ = readdir($dirH) ) {
119		if (-f "$dir/$_") {
120		    push @list, "$dir/$_";
121		}
122    }
123    closedir($dirH);
124    return @list;
125}
126
127#------------------------------------------------------------------------------
128# Function:		Write an error message and exit
129# Parameters:	$message
130# Input:		None
131# Output:		None
132# Return:		None
133#------------------------------------------------------------------------------
134sub error {
135	print STDERR "Error: $_[0].\n";
136    exit 1;
137}
138
139#------------------------------------------------------------------------------
140# Function:		Write a debug message
141# Parameters:	$message
142# Input:		$Debug
143# Output:		None
144# Return:		None
145#------------------------------------------------------------------------------
146sub debug {
147	my $level = $_[1] || 1;
148	if ($Debug >= $level) {
149		my $debugstring = $_[0];
150		print "DEBUG $level - ".localtime(time())." : $debugstring\n";
151	}
152}
153
154#------------------------------------------------------------------------------
155# Function:		Write a warning message
156# Parameters:	$message
157# Input:		$Debug
158# Output:		None
159# Return:		None
160#------------------------------------------------------------------------------
161sub warning {
162	my $messagestring=shift;
163	if ($Debug) { debug("$messagestring",1); }
164   	print STDERR "$messagestring\n";
165}
166
167#-----------------------------------------------------------------------------
168# Function:     Return 1 if string contains only ascii chars
169# Input:        String
170# Return:       0 or 1
171#-----------------------------------------------------------------------------
172sub IsAscii {
173	my $string=shift;
174	if ($Debug) { debug("IsAscii($string)",5); }
175	if ($string =~ /^[\w\+\-\/\\\.%,;:=\"\'&?!\s]+$/) {
176		if ($Debug) { debug(" Yes",5); }
177		return 1;		# Only alphanum chars (and _) or + - / \ . % , ; : = " ' & ? space \t
178	}
179	if ($Debug) { debug(" No",5); }
180	return 0;
181}
182
183#-----------------------------------------------------------------------------
184# DRA Function:     Return 1 if DNS lookup should be skipped
185# Input:        String
186# Return:       0 or 1
187#-----------------------------------------------------------------------------
188sub SkipDNSLookup {
189	foreach my $match (@SkipDNSLookupFor) { if ($_[0] =~ /$match/i) { return 1; } }
190	0; # Not in @SkipDNSLookupFor
191}
192
193#-----------------------------------------------------------------------------
194# Function:     Function that wait for DNS lookup (can be threaded)
195# Input:        String
196# Return:       0 or 1
197#-----------------------------------------------------------------------------
198sub MakeDNSLookup {
199	my $ipaddress=shift;
200 	$NbOfDNSLookupAsked++;
201	use Socket; $AFINET=AF_INET;
202	my $tid=0;
203	$tid=$MaxNbOfThread?eval("threads->self->tid()"):0;
204	if ($Debug) { debug("  ***** Thread id $tid: MakeDNSlookup started (for $ipaddress)",4); }
205	my $lookupresult=gethostbyaddr(pack("C4",split(/\./,$ipaddress)),$AFINET);	# This is very slow, may took 20 seconds
206	if (! $lookupresult || $lookupresult =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ || ! IsAscii($lookupresult)) {
207		$TmpDNSLookup{$ipaddress}='*';
208	}
209	else {
210		$TmpDNSLookup{$ipaddress}=$lookupresult;
211	}
212	if ($Debug) { debug("  ***** Thread id $tid: MakeDNSlookup done ($ipaddress resolved into $TmpDNSLookup{$ipaddress})",4); }
213	delete $threadarray{$ipaddress};
214	return;
215}
216
217#-----------------------------------------------------------------------------
218# Function:     WriteRecordsReadyInQueue
219# Input:        -
220# Return:       0
221#-----------------------------------------------------------------------------
222sub WriteRecordsReadyInQueue {
223	my $logfilechosen=shift;
224	if ($Debug) { debug("Check head of queue to write records ready to flush (QueueCursor=$QueueCursor, QueueSize=".(scalar keys %QueueRecords).")",4); }
225	while ( $QueueHostsToResolve{$QueueCursor} && ( ($QueueHostsToResolve{$QueueCursor} eq '*') || ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) || ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) ) ) {
226		# $QueueCursor point to a ready record
227		if ($QueueHostsToResolve{$QueueCursor} eq '*') {
228			if ($Debug) { debug(" First elem in queue is ready. No change on it. We pull it.",4); }
229		}
230		else {
231			if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}}) {
232				if ($MyDNSTable{$QueueHostsToResolve{$QueueCursor}} ne '*') {
233					$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$MyDNSTable{$QueueHostsToResolve{$QueueCursor}}/;
234					if ($Debug) { debug(" First elem in queue has been resolved (found in MyDNSTable $MyDNSTable{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
235				}
236			}
237			elsif ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}) {
238				if ($TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ne '*') {
239					$QueueRecords{$QueueCursor}=~s/$QueueHostsToResolve{$QueueCursor}/$TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}/;
240					if ($Debug) { debug(" First elem in queue has been resolved (found in TmpDNSLookup $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}}). We pull it.",4); }
241				}
242			}
243		}
244		# Record is ready, we output it.
245		if ($AddFileNum)  { print "$logfilechosen "; }
246		if ($AddFileName) { print "$LogFileToDo{$logfilechosen} "; }
247		# see if we need to dump fields
248		if ($PrintFields && $LastLogNum != $logfilechosen){
249			print($Fields[$logfilechosen]."\n");
250			$LastLogNum = $logfilechosen;
251		}
252		print "$QueueRecords{$QueueCursor}\n";
253		delete $QueueRecords{$QueueCursor};
254		delete $QueueHostsToResolve{$QueueCursor};
255		$QueueCursor++;
256	}
257	return 0;
258}
259
260#-----------------------------------------------------------------------------
261# Function:     Check if thread are enabled or not
262# Input:        -
263# Return:       -
264#-----------------------------------------------------------------------------
265sub Check_Thread_Use {
266	if ($] >= 5.008) {	for (0..@ARGV-1) { if ($ARGV[$_] =~ /^-dnslookup[:=](\d{1,2})/i) {
267		if ($UseThread) {
268			if (!eval ('require "threads.pm";')) { &error("Failed to load perl module 'threads' required for multi-threaded DNS lookup".($@?": $@":"")); }
269			if (!eval ('require "threads/shared.pm";')) { &error("Failed to load perl module 'threads::shared' required for multi-threaded DNS lookup".($@?": $@":"")); }
270		}
271		else { &error("Multi-thread is disabled in default version of this script.\nYou must manually edit the file '$0' to comment/uncomment all\nlines marked with 'ENABLETHREAD' string to enable multi-threading"); }
272		} }
273	}
274}
275
276
277#-----------------------------------------------------------------------------
278# MAIN
279#-----------------------------------------------------------------------------
280($DIR=$0) =~ s/([^\/\\]*)$//; ($PROG=$1) =~ s/\.([^\.]*)$//; $Extension=$1;
281
282# Get parameters (Note: $MaxNbOfThread is already known
283my $cpt=1;
284for (0..@ARGV-1) {
285	if ($ARGV[$_] =~ /^-/) {
286		if ($ARGV[$_] =~ /debug=(\d)/i) { $Debug=$1; }
287		elsif ($ARGV[$_] =~ /dnscache=/i) { $DNSLookup||=2; $DNSCache=$ARGV[$_]; $DNSCache =~ s/-dnscache=//; }
288		elsif ($ARGV[$_] =~ /dnslookup[:=](\d{1,2})/i) { $DNSLookup||=1; $MaxNbOfThread=$1; }
289		elsif ($ARGV[$_] =~ /dnslookup/i) { $DNSLookup||=1; }
290		elsif ($ARGV[$_] =~ /showsteps/i) { $ShowSteps=1; }
291		elsif ($ARGV[$_] =~ /addfilenum/i) { $AddFileNum=1; }
292		elsif ($ARGV[$_] =~ /addfilename/i) { $AddFileName=1; }
293		elsif ($ARGV[$_] =~ /stoponfirsteof/i) { $StopOnFirstEof=1; }
294		elsif ($ARGV[$_] =~ /printfields/i) { $PrintFields=1; }
295		elsif ($ARGV[$_] =~ /ignoremissing/i) { $IgnoreMissing=1; }
296		else { print "Unknown argument $ARGV[$_] ignored\n"; }
297	}
298	elsif ($ARGV[$_] =~ /addfolder=(.*)$/i) {
299   		@ParamFile = addDirectory($1, @ParamFile);
300	}
301	else {
302		push @ParamFile, $ARGV[$_];
303		$cpt++;
304	}
305}
306if ($Debug) { $|=1; }
307
308if ($Debug) {
309	debug(ucfirst($PROG)." - $VERSION - Perl $^X $]",1);
310	debug("DNSLookup=$DNSLookup");
311	debug("DNSCache=$DNSCache");
312	debug("MaxNbOfThread=$MaxNbOfThread");
313}
314
315# Disallow MaxNbOfThread and Perl < 5.8
316if ($] < 5.008 && $MaxNbOfThread) {
317	error("Multi-threaded DNS lookup is only supported with Perl 5.8 or higher (not $]). Use -dnslookup option instead");
318}
319
320# Warning, there is a memory hole in ActiveState perl version (in delete functions)
321if ($^X =~ /activestate/i || $^X =~ /activeperl/i) {
322	# TODO Add a warning
323
324}
325
326if (scalar @ParamFile == 0) {
327	print "----- $PROG $VERSION (c) Laurent Destailleur -----\n";
328	print "$PROG allows you to get one unique output log file, sorted on date,\n";
329	print "built from particular sources:\n";
330	print " - It can read several input log files,\n";
331	print " - It can read .gz/.bz2/.xz log files,\n";
332	print " - It can also makes a fast reverse DNS lookup to replace\n";
333	print "   all IP addresses into host names in resulting log file.\n";
334	print "$PROG comes with ABSOLUTELY NO WARRANTY. It's a free software\n";
335	print "distributed with a GNU General Public License (See COPYING.txt file).\n";
336	print "$PROG is part of AWStats but can be used alone as a log merger\n";
337	print "or resolver before using any other log analyzer.\n";
338	print "\n";
339	print "Usage:\n";
340	print "  $PROG.$Extension [options] file\n";
341	print "  $PROG.$Extension [options] file1 ... filen\n";
342	print "  $PROG.$Extension [options] *.*\n";
343	print "  $PROG.$Extension [options] addfolder=dirname\n";
344	print "  perl $PROG.$Extension [options] *.* > newfile\n";
345	print "Options:\n";
346	print "  -dnslookup      make a reverse DNS lookup on IP adresses\n";
347	print "  -dnslookup=n    same with a n parallel threads instead of serial requests\n";
348	print "  -dnscache=file  make DNS lookup from cache file first before network lookup\n";
349	print "  -showsteps      print on stderr benchmark information every $NBOFLINESFORBENCHMARK lines\n";
350	print "  -addfilenum     if used with several files, file number can be added in first\n";
351	print "  -addfilename    if used with several files, file name can be added in first\n";
352	print "                  field of output file. This can be used to add a cluster id\n";
353	print "                  when log files come from several load balanced computers.\n";
354	print "  -stoponfirsteof Stop processing when any logfile reaches end-of-file.\n";
355	print "  -printfields    For IIS or W3C logs, prints the latest field header for\n";
356	print "                  the currentlog file when switching between log file entries\n";
357	print "                  so that the parsercan automatically determine which fields\n";
358	print "                  are avaiable.\n";
359	print "  -ignoremissing  will not fail if a log file is missing\n";
360	print "\n";
361
362	print "This runs $PROG in command line to open one or several\n";
363	print "server log files to merge them (sorted on date) and/or to make a reverse\n";
364	print "DNS lookup (if asked). The result log file is sent on standard output.\n";
365	print "Note: $PROG is not a 'sort' tool to sort one file. It's a\n";
366	print "software able to output sorted log records (with a reverse DNS lookup\n";
367	print "included or not) even if log records are dispatched in several files.\n";
368	print "Each of thoose files must be already independently sorted itself\n";
369	print "(but that is the case in all web server log files). So you can use it\n";
370	print "for load balanced log files or to group several old log files.\n";
371	print "\n";
372	print "Don't forget that the main goal of logresolvemerge is to send log records to\n";
373	print "a log analyzer in a sorted order without merging files on disk (NO NEED\n";
374	print "OF DISK SPACE AT ALL) and without loading files into memory (NO NEED\n";
375	print "OF MORE MEMORY). Choose of output records is done on the fly.\n";
376	print "\n";
377	print "So logresolvemerge is particularly useful when you want to output several\n";
378	print "and/or large log files in a fast process, with no use of disk or\n";
379	print "more memory, and in a chronological order through a pipe (to be used by a log\n";
380	print "analyzer).\n";
381	print "\n";
382	print "Note: If input records are not 'exactly' sorted but 'nearly' sorted (this\n";
383	print "occurs with heavy servers), this is not a problem, the output will also\n";
384	print "be 'nearly' sorted but a few log analyzers (like AWStats) knowns how to deal\n";
385	print "with such logs.\n";
386	print "\n";
387	print "WARNING: If log files are old MAC text files (lines ended with CR char), you\n";
388	print "can't run this tool on Win or Unix platforms.\n";
389	print "\n";
390	print "WARNING: Because of memory holes in ActiveState Perl version, use another\n";
391	print "Perl interpreter if you need to process large log files.\n";
392	print "\n";
393	print "Now supports/detects:\n";
394	print "  Automatic detection of log format\n";
395	print "  Files can be .gz/.bz2/.xz files if gzip/bzip2/xz tools are available in PATH.\n";
396	print "  Multithreaded reverse DNS lookup (several parallel requests) with Perl 5.8+.\n";
397	print "New versions and FAQ at http://www.awstats.org\n";
398	exit 0;
399}
400
401# Get current time
402my $nowtime=time;
403my ($nowsec,$nowmin,$nowhour,$nowday,$nowmonth,$nowyear) = localtime($nowtime);
404if ($nowyear < 100) { $nowyear+=2000; } else { $nowyear+=1900; }
405my $nowsmallyear=$nowyear;$nowsmallyear =~ s/^..//;
406if (++$nowmonth < 10) { $nowmonth = "0$nowmonth"; }
407if ($nowday < 10) { $nowday = "0$nowday"; }
408if ($nowhour < 10) { $nowhour = "0$nowhour"; }
409if ($nowmin < 10) { $nowmin = "0$nowmin"; }
410if ($nowsec < 10) { $nowsec = "0$nowsec"; }
411# Get tomorrow time (will be used to discard some record with corrupted date (future date))
412my ($tomorrowsec,$tomorrowmin,$tomorrowhour,$tomorrowday,$tomorrowmonth,$tomorrowyear) = localtime($nowtime+86400);
413if ($tomorrowyear < 100) { $tomorrowyear+=2000; } else { $tomorrowyear+=1900; }
414my $tomorrowsmallyear=$tomorrowyear;$tomorrowsmallyear =~ s/^..//;
415if (++$tomorrowmonth < 10) { $tomorrowmonth = "0$tomorrowmonth"; }
416if ($tomorrowday < 10) { $tomorrowday = "0$tomorrowday"; }
417if ($tomorrowhour < 10) { $tomorrowhour = "0$tomorrowhour"; }
418if ($tomorrowmin < 10) { $tomorrowmin = "0$tomorrowmin"; }
419if ($tomorrowsec < 10) { $tomorrowsec = "0$tomorrowsec"; }
420my $timetomorrow=$tomorrowyear.$tomorrowmonth.$tomorrowday.$tomorrowhour.$tomorrowmin.$tomorrowsec;
421
422# Init other parameters
423$NBOFLINESFORBENCHMARK--;
424if ($ENV{"GATEWAY_INTERFACE"}) { $DirCgi=''; }
425if ($DirCgi && !($DirCgi =~ /\/$/) && !($DirCgi =~ /\\$/)) { $DirCgi .= '/'; }
426if (! $DirData || $DirData eq '.') { $DirData=$DIR; }	# If not defined or chosen as "." value then DirData is current dir
427if (! $DirData)  { $DirData='.'; }						# If current dir not defined then we put it to "."
428$DirData =~ s/\/$//;
429
430#my %monthlib =  ( "01","$Message[60]","02","$Message[61]","03","$Message[62]","04","$Message[63]","05","$Message[64]","06","$Message[65]","07","$Message[66]","08","$Message[67]","09","$Message[68]","10","$Message[69]","11","$Message[70]","12","$Message[71]" );
431# monthnum must be in english because it's used to translate log date in apache log files which are always in english
432my %monthnum =  ( "Jan","01","jan","01","Feb","02","feb","02","Mar","03","mar","03","Apr","04","apr","04","May","05","may","05","Jun","06","jun","06","Jul","07","jul","07","Aug","08","aug","08","Sep","09","sep","09","Oct","10","oct","10","Nov","11","nov","11","Dec","12","dec","12" );
433
434if ($DNSCache) {
435	if ($Debug) { debug("Load DNS Cache file $DNSCache",2); }
436	open(CACHE, "<$DNSCache") or error("Can't open cache file $DNSCache");
437	while (<CACHE>) {
438		my ($time, $ip, $name) = split;
439        if ($ip && $name) {
440            $name="$ip" if $name eq '*';
441    		$MyDNSTable{$ip}=$name;
442        }
443	}
444	close CACHE;
445}
446
447#-----------------------------------------------------------------------------
448# PROCESSING CURRENT LOG(s)
449#-----------------------------------------------------------------------------
450my $NbOfLinesRead=0;
451my $NbOfLinesParsed=0;
452my $logfilechosen=0;
453my $starttime=time();
454
455# Define the LogFileToDo list
456$cpt=1;
457foreach my $key (0..(@ParamFile-1)) {
458	if (($ParamFile[$key] !~ /\*/ && $ParamFile[$key] !~ /\?/) || $ParamFile[$key] =~ /\|$/) {
459
460		if ($Debug) { debug("DBG1 Log file $ParamFile[$key] is added to LogFileToDo with number $cpt."); }
461		# Check for supported compression
462		if ($ParamFile[$key] =~ /$zcat_file/) {
463			if ($Debug) { debug("GZIP compression detected for Log file $ParamFile[$key]."); }
464			# Modify the name to include the zcat command
465			$ParamFile[$key] = $zcat . ' ' . $ParamFile[$key] . ' |';
466		}
467		elsif ($ParamFile[$key] =~ /$bzcat_file/) {
468			if ($Debug) { debug("BZ2 compression detected for Log file $ParamFile[$key]."); }
469			# Modify the name to include the bzcat command
470			$ParamFile[$key] = $bzcat . ' ' . $ParamFile[$key] . ' |';
471		}
472		elsif ($ParamFile[$key] =~ /$xzcat_file/) {
473			if ($Debug) { debug("XZ compression detected for Log file $ParamFile[$key]."); }
474			# Modify the name to include the xzcat command
475			$ParamFile[$key] = $xzcat . ' ' . $ParamFile[$key] . ' |';
476		}
477
478		$LogFileToDo{$cpt}=@ParamFile[$key];
479		$cpt++;
480
481	}
482    else {
483        my $DirFile=$ParamFile[$key]; $DirFile =~ s/([^\/\\]*)$//;
484        $ParamFile[$key] = $1;
485        if ($DirFile eq '') { $DirFile = '.'; }
486        $ParamFile[$key] =~ s/\./\\\./g;
487        $ParamFile[$key] =~ s/\*/\.\*/g;
488        $ParamFile[$key] =~ s/\?/\./g;
489        if ($Debug) { debug("Search for file \"$ParamFile[$key]\" into \"$DirFile\""); }
490        opendir(DIR,"$DirFile");
491        my @filearray = sort readdir DIR;
492        close DIR;
493        foreach my $i (0..$#filearray) {
494            if ("$filearray[$i]" =~ /^$ParamFile[$key]$/ && "$filearray[$i]" ne "." && "$filearray[$i]" ne "..") {
495
496                if ($Debug) { debug("DBG2 Log file $filearray[$i] is added to LogFileToDo with number $cpt."); }
497                # Check for supported compression
498                if ($filearray[$i] =~ /$zcat_file/) {
499                    if ($Debug) { debug("GZIP compression detected for Log file $filearray[$i]."); }
500                    # Modify the name to include the zcat command
501                    $LogFileToDo{$cpt}=$zcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
502                }
503                elsif ($filearray[$i] =~ /$bzcat_file/) {
504                    if ($Debug) { debug("BZ2 compression detected for Log file $filearray[$i]."); }
505                    # Modify the name to include the bzcat command
506                    $LogFileToDo{$cpt}=$bzcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
507                }
508                elsif ($filearray[$i] =~ /$xzcat_file/) {
509                    if ($Debug) { debug("XZ compression detected for Log file $filearray[$i]."); }
510                    # Modify the name to include the xzcat command
511                    $LogFileToDo{$cpt}=$xzcat . ' ' . "$DirFile/$filearray[$i]" . ' |';
512                }
513                else {
514                    $LogFileToDo{$cpt}="$DirFile/$filearray[$i]";
515                }
516                $cpt++;
517
518            }
519        }
520    }
521}
522
523# If no files to process
524if (scalar keys %LogFileToDo == 0) {
525	error("No input log file found");
526}
527
528# Open all log files
529if ($Debug) { debug("Start of processing ".(scalar keys %LogFileToDo)." log file(s), $MaxNbOfThread threads max"); }
530foreach my $logfilenb (keys %LogFileToDo) {
531	if ($Debug) { debug("Open log file number $logfilenb: \"$LogFileToDo{$logfilenb}\""); }
532	if ($IgnoreMissing){
533		if (!open("LOG$logfilenb","$LogFileToDo{$logfilenb}")){
534			debug("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
535			delete $LogFileToDo{$logfilenb};
536		}
537	}else{
538		open("LOG$logfilenb","$LogFileToDo{$logfilenb}") || error("Couldn't open log file \"$LogFileToDo{$logfilenb}\" : $!");
539	}
540	binmode "LOG$logfilenb";	# To avoid pb of corrupted text log files with binary chars.
541}
542
543$QueueCursor=1;
544STOPONFIRSTEOF: while (1 == 1)
545{
546	# BEGIN Read new record
547	# For each log file if logfilechosen is 0
548	# If not, we go directly to log file instead of iterating over all keys for a match
549	#----------------------------------------------------------------------------------
550    my @readlist;
551	if($logfilechosen == 0) {
552	    @readlist = keys %LogFileToDo;
553	} else {
554	    @readlist = ($logfilechosen);
555	}
556	foreach my $logfilenb (@readlist)
557	{
558		if ($Debug) { debug("Search next record in file number $logfilenb",3); }
559		# Read chosen log file until we found a record with good date or reaching end of file
560		while (1 == 1) {
561			my $LOG="LOG$logfilenb";
562			$_=<$LOG>;	# Read new line
563			if (! $_)
564			{							# No more records in log file number $logfilenb
565				if ($Debug) { debug(" No more records in file number $logfilenb",2); }
566				delete $LogFileToDo{$logfilenb};
567				if ($StopOnFirstEof)
568				{
569					if ($Debug) { debug("Exiting loop due to EOF of logfile $logfilenb",1); }
570					last STOPONFIRSTEOF;
571				}
572				last;
573			}
574
575			# Get the latest Fields header for printing IIS and W3C logs
576			if ($PrintFields && $_ =~ m/#Fields:/){
577				my $field = $_;
578				# strip whitespace
579				$field =~ s/^\s+|\s+$//g;
580				if (!$Fields[$logfilenb] || $field != $Fields[$logfilenb]){
581					$Fields[$logfilenb] = $field;
582					debug("Found new fields in $logfilenb: $Fields[$logfilenb]");
583				}
584			}
585
586			$NbOfLinesRead++;
587			chomp $_; s/\r$//;
588
589			if (/^#/) { next; }									# Ignore comment lines (ISS writes such comments)
590			if (/^!!/) { next; }								# Ignore comment lines (Webstar writes such comments)
591			if (/^$/) { next; }									# Ignore blank lines (With ISS: happens sometimes, with Apache: possible when editing log file)
592
593			$linerecord{$logfilenb}=$_;
594
595			# Check filters
596			#----------------------------------------------------------------------
597
598			# Split YYYY-MM-DD HH:MM:SS
599			#    or DD/Month/YYYY:HH:MM:SS
600			#    or MM/DD/YY\tHH:MM:SS
601			#    or 9999.999
602 			#    or Month DD HH:MM:SS
603			my $year=0; my $month=0; my $day=0; my $hour=0; my $minute=0; my $second=0;
604			if ($_ =~ /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/) { $year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6; }
605			elsif ($_ =~ /\[(\d?\d)[\/:\s](\w+)[\/:\s](\d\d\d\d)[\/:\s](\d\d)[\/:\s](\d\d)[\/:\s](\d\d) /) { $year=$3; $month=$2; $day=$1; $hour=$4; $minute=$5; $second=$6; }
606			elsif ($_ =~ /\w+ (\w+) {1,2}(\d?\d) (\d\d)[\/:\s](\d\d)[\/:\s](\d\d) (\d\d\d\d)/) { $year=$6; $month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5; }
607			elsif ($_ =~ /^(\d\d\d\d+\.\d\d\d) /)
608			{
609				my $timetime = strftime('%Y-%m-%d-%T', gmtime($1));
610				$timetime =~ /(\d\d\d\d)-(\d\d)-(\d\d)-(\d\d):(\d\d):(\d\d)/;
611				$year=$1; $month=$2; $day=$3; $hour=$4; $minute=$5; $second=$6;
612			}
613 			elsif ($_ =~ /(\w+)\s\s?(\d?\d) (\d\d):(\d\d):(\d\d) /) {	# Month DD HH:MM:SS
614 				$month=$1; $day=$2; $hour=$3; $minute=$4; $second=$5;
615 				if (($monthnum{$month}>$monthnum{$nowmonth}) || ($monthnum{$month}==$monthnum{$nowmonth} &&  $day>$nowday)) {
616 					$year=$nowyear-1;
617 				}
618                else { $year=$nowyear; }
619 			}
620			if (length $day == 1) { $day = "0".$day; }
621
622			if ($monthnum{$month}) { $month=$monthnum{$month}; }	# Change lib month in num month if necessary
623
624			# Create $timerecord like YYYYMMDDHHMMSS
625	 		$timerecord{$logfilenb}=int("$year$month$day$hour$minute$second");
626			if ($timerecord{$logfilenb}<10000000000000) {
627				if ($Debug) { debug(" This record is corrupted (no date found)",3); }
628				$corrupted{$logfilenb}++;
629				next;
630			}
631			if ($Debug) { debug(" This is next record for file $logfilenb : timerecord=$timerecord{$logfilenb}",3); }
632
633			# Sort and insert into timerecordorder, oldest at end/back of array
634			# At the beginning, timerecordorder is empty. Then beceause the first pass is
635			# a loop on each file to read each first line, the timerecordorder size is
636			# number of input files.
637			# After, each new loop, read only one new line, so timerecordorder size increase
638			# by one but decrease just after by the pop command later.
639			my $inserted=0;
640			for(my $c=$#timerecordorder; $c>=0 ; $c--) {
641			    if($timerecord{$logfilenb} <= $timerecord{$timerecordorder[$c]})
642			    {
643    				# Is older or equal than index at $c, add after
644				    $timerecordorder[$c + 1]=$logfilenb;
645				    $inserted = 1;
646				    last;
647			    } else {
648				    $timerecordorder[$c + 1]=$timerecordorder[$c];
649			    }
650			}
651			if(! $inserted) {
652			    $timerecordorder[0] = $logfilenb;
653			}
654
655			last;
656		}
657	}
658	# END Read new lines for each log file. After this, following var are filled
659	# $timerecord{$logfilenb}
660	# @timerecordorder array
661
662	# We choose which record of which log file to process
663	if ($Debug) { debug("Choose which record of which log file to process",3); }
664	$logfilechosen=pop(@timerecordorder);
665	if(!defined($logfilechosen)) { last; }              # No more record to process
666
667	# Record is chosen
668	if ($Debug) { debug(" We chose to qualify record of file number $logfilechosen",3); }
669	if ($Debug) { debug("  Record is $linerecord{$logfilechosen}",3); }
670
671	# Record is approved. We found a new line to parse in file number $logfilechosen
672	#-------------------------------------------------------------------------------
673	$NbOfLinesParsed++;
674	if ($ShowSteps) {
675		if ((++$NbOfLinesShowsteps & $NBOFLINESFORBENCHMARK) == 0) {
676			my $delay=(time()-$starttime)||1;
677			print STDERR "$NbOfLinesParsed lines processed (".(1000*$delay)." ms, ".int($NbOfLinesShowsteps/$delay)." lines/seconds)\n";
678		}
679	}
680
681	# Do DNS lookup
682	#--------------------
683	my $Host='';
684	my $ip=0;
685	if ($DNSLookup) {			# DNS lookup is 1 or 2
686		if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; }	# IPv4
687		elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; }						# IPv6
688		if ($ip) {
689			# Check in static DNS cache file
690			if ($MyDNSTable{$Host}) {
691				if ($Debug) { debug("  DNS lookup asked for $Host and found in static DNS cache file: $MyDNSTable{$Host}",4); }
692			}
693			elsif ($DNSLookup==1) {
694				# Check in session cache (dynamic DNS cache file + session DNS cache)
695				if (! $threadarray{$Host} && ! $TmpDNSLookup{$Host}) {
696					if (@SkipDNSLookupFor && &SkipDNSLookup($Host)) {
697						$TmpDNSLookup{$Host}='*';
698						if ($Debug) { debug("  No need of reverse DNS lookup for $Host, skipped at user request.",4); }
699					}
700					else {
701						if ($ip == 4) {
702							# Create or not a new thread
703							if ($MaxNbOfThread) {
704								if (! $threadarray{$Host}) {	# No thread already launched for $Host
705									while ((scalar keys %threadarray) >= $MaxNbOfThread) {
706										if ($Debug) { debug(" $MaxNbOfThread thread running reached, so we wait",4); }
707										sleep 1;
708									}
709									$threadarray{$Host}=1;		# Semaphore to tell thread for $Host is active
710#									my $t = new Thread \&MakeDNSLookup, $Host;
711									my $t = threads->create(sub { MakeDNSLookup($Host) });
712									if (! $t) { error("Failed to create new thread"); }
713									if ($Debug) { debug(" Reverse DNS lookup for $Host queued in thread ".$t->tid,4); }
714									$t->detach();	# We don't need to keep return code
715								}
716								else {
717									if ($Debug) { debug(" Reverse DNS lookup for $Host already queued in a thread"); }
718								}
719								# Here, this is the only way, $TmpDNSLookup{$Host} can be not defined
720							} else {
721								&MakeDNSLookup($Host);
722								if ($Debug) { debug("  Reverse DNS lookup for $Host done: $TmpDNSLookup{$Host}",4); }
723							}
724						}
725						elsif ($ip == 6) {
726							$TmpDNSLookup{$Host}='*';
727							if ($Debug) { debug("  Reverse DNS lookup for $Host not available for IPv6",4); }
728						}
729					}
730				} else {
731					if ($Debug) { debug("  Reverse DNS lookup already queued or done for $Host: $TmpDNSLookup{$Host}",4); }
732				}
733			}
734			else {
735				if ($Debug) { debug("  DNS lookup by static DNS cache file asked for $Host but not found.",4); }
736			}
737		}
738		else {
739			if ($Debug) { debug("  DNS lookup asked for $Host but this is not an IP address.",4); }
740			$DNSLookupAlreadyDone=$LogFileToDo{$logfilechosen};
741		}
742	}
743	else {
744		if ($linerecord{$logfilechosen} =~ /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/) { $ip=4; $Host=$1; }	# IPv4
745		elsif ($linerecord{$logfilechosen} =~ /([0-9A-F]*:)/i) { $ip=6; $Host=$1; }						# IPv6
746		if ($Debug) { debug("  No DNS lookup asked.",4); }
747	}
748
749	# Put record in record queue
750	if ($Debug) { debug("Add record $NbOfLinesParsed in record queue (with host to resolve = ".($Host?$Host:'*').")",4); }
751	$QueueRecords{$NbOfLinesParsed}=$linerecord{$logfilechosen};
752
753	# Put record in host queue
754	# If there is a host to resolve, we add line to queue with value of host to resolve
755	# $Host is '' (no ip found) or is ip
756	if ($DNSLookup==0) {
757		$QueueHostsToResolve{$NbOfLinesParsed}='*';
758	}
759	if ($DNSLookup==1) {
760		$QueueHostsToResolve{$NbOfLinesParsed}=$Host?$Host:'*';
761	}
762	if ($DNSLookup==2) {
763		$QueueHostsToResolve{$NbOfLinesParsed}=$MyDNSTable{$Host}?$Host:'*';
764	}
765
766	# Print all records in head of queue that are ready
767	&WriteRecordsReadyInQueue($logfilechosen);
768
769}	# End of processing new record. Loop on next one.
770
771if ($Debug) { debug("End of processing log file(s)"); }
772
773# Close all log files
774foreach my $logfilenb (keys %LogFileToDo) {
775	if ($Debug) { debug("Close log file number $logfilenb"); }
776	close("LOG$logfilenb") || error("Command for pipe '$LogFileToDo{$logfilenb}' failed");
777}
778
779while ( $QueueHostsToResolve{$QueueCursor} && $QueueHostsToResolve{$QueueCursor} ne '*' && ! $MyDNSTable{$QueueHostsToResolve{$QueueCursor}} && ! $TmpDNSLookup{$QueueHostsToResolve{$QueueCursor}} ) {
780	sleep 1;
781	# Print all records in head of queue that are ready
782	&WriteRecordsReadyInQueue($logfilechosen);
783}
784
785# Waiting queue is empty
786if ($MaxNbOfThread) {
787	foreach my $t (threads->list()) {
788		if ($Debug) { debug("Join thread $t"); }
789		$t->join();
790	}
791}
792
793# DNSLookup warning
794if ($DNSLookup==1 && $DNSLookupAlreadyDone) {
795	warning("Warning: $PROG has detected that some host names were already resolved in your logfile $DNSLookupAlreadyDone.\nIf DNS lookup was already made by the logger (web server) in ALL your log files, you should not use -dnslookup option to increase $PROG speed.");
796}
797
798if ($Debug) {
799	debug("Total nb of read lines: $NbOfLinesRead");
800	debug("Total nb of parsed lines: $NbOfLinesParsed");
801	debug("Total nb of DNS lookup asked: $NbOfDNSLookupAsked");
802}
803
804#if ($DNSCache) {
805#	open(CACHE, ">$DNSCache") or die;
806#	foreach (keys %TmpDNSLookup) {
807#		$TmpDNSLookup{$_}="*" if $TmpDNSLookup{$_} eq "ip";
808#		print CACHE "0\t$_\t$TmpDNSLookup{$_}\n";
809#	}
810#	close CACHE;
811#}
812
8130;	# Do not remove this line
814