1#!/usr/local/bin/perl
2#
3# status.pl v1.0  960413 Iain Lea (iain@sbs.de)
4#
5# ChangeLog
6# 960413 IL
7#
8# Produces a HTML 'Search Engine Status' page with last 5 runs
9# and 'Top 10' servers by #URLS indexed.
10#
11# Usage: status.pl [options]
12#        -h       help
13#        -F file  HTML footer
14#        -H file  HTML header
15#        -o file  HTML generated file
16#        -v       verbose
17#
18# TODO
19
20require 'timelocal.pl';
21require 'getopts.pl';
22require '/www/search.sbs.de/bin/sbs.pl';
23
24$DataDir = '/www/search.sbs.de/data/robot';
25$RunTimeFile = "$DataDir/current-runtime";
26$RobotFile = "$DataDir/current-robot";
27$IndexFile = '/www/search.sbs.de/test/db/db.wordlist';
28
29$DefOutputFile = '/www/search.sbs.de/test/pub/status.html';
30$TmpFile = "/tmp/status.$$";
31$DefFooter = '';
32$DefHeader = '';
33$Verbose = 0;
34$Top10Servers = 10;
35
36&ParseCmdLine;
37
38print "Generating status.html...\n" if $Verbose;
39
40&ReadDataFiles ($RunTimeFile, $RobotFile, $IndexFile);
41&WriteStatus ($DataDir, $DefOutputFile, $DefHeader, $DefFooter);
42
43exit 1;
44
45#############################################################################
46# Subroutines
47#
48
49sub ParseCmdLine
50{
51	&Getopts ('F:hH:o:v');
52
53	if ($opt_h ne "") {
54		print <<EndOfHelp
55Produce an HTML 'Status' page of last 5 runs and Top 10 servers by #URLS.
56
57Usage: $0 [options]
58  -h       help
59  -F file  HTML footer
60  -H file  HTML header
61  -o file  HTML generated file
62  -v       verbose
63
64EndOfHelp
65;
66		exit 0;
67	}
68	$DefFooter = $opt_F if ($opt_H ne "");
69	$DefHeader = $opt_H if ($opt_H ne "");
70	$DefOutputFile = $opt_o if ($opt_o ne "");
71	$Verbose = 1 if ($opt_v ne "");
72}
73
74sub ReadDataFiles
75{
76	my ($RunTimeFile, $RobotFile, $IndexFile) = @_;
77	my ($IndexSize, $NumWords, $NumURLS, $NumServers);
78	my ($BegTime, $EndTime, $RunDate, $RunTime, $Key);
79	my (%Months) = (
80		'Jan', '0', 'Feb', '1', 'Mar', '2', 'Apr', '3', 'May',  '4', 'Jun',  '5',
81		'Jul', '6', 'Aug', '7', 'Sep', '8', 'Oct', '9', 'Nov', '10', 'Dec', '11' );
82
83	# RunDate : RunTime
84
85	open (TIME, "$RunTimeFile") || die "Error: $RunTimeFile - $!\n";
86	while (<TIME>) {
87		chop;
88		if (! $EndTime && $BegTime) {
89			# Sat Apr 13 12:57:52 MET DST 1996
90			/^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
91			$EndTime = timelocal ($5, $4, $3, $2, $Months{$1}, $6 - 1900);
92			$RunTime = $EndTime - $BegTime;
93			$RunTime = sprintf ("%02d%02d", $RunTime/3600, ($RunTime%3600)/60);
94			print "END=[$_] [$EndTime] [$RunTime]\n" if $Verbose;
95		}
96		if (! $BegTime) {
97			# Sat Apr 13 12:57:52 MET DST 1996
98			/^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/;
99			$Mon = $Months{$1};
100			$Year = $6 - 1900;
101			$BegTime = timelocal ($5, $4, $3, $2, $Mon, $Year);
102			$RunDate = sprintf ("%02d%02d%02d", $Year, $Mon+1, $2);
103			print "BEG=[$_] [$BegTime] [$RunDate]\n" if $Verbose;
104		}
105	}
106	close (TIME);
107
108	# IndexSize : NumWords : NumURLS : NumServers
109
110	@StatData = stat ($IndexFile);
111	$IndexSize = $StatData[7];
112	print "SIZE=[$IndexSize]\n" if $Verbose;
113
114	# NumWords : NumURLS : NumServers
115
116	$NumWords = $NumURLS = $NumServers = 0;
117
118	open (ROBOT, "$RobotFile") || die "Error: $RobotFile - $!\n";
119	while (<ROBOT>) {
120		if (/^htdig:\s+(.*)\s+([0-9]*)\s+documents$/) {
121			$NumURLS += $2;
122			$NumServers++;
123			if ($2 > 0) {
124				$Key = sprintf ("%07d|%s", $2, $1);
125				$Top10ByName{$Key} = $2;
126			}
127			print "SERVER=[$1] DOCS=[$2]\n" if $Verbose;
128		} elsif (/^Read\s+([0-9]*)\s+words$/) {
129			$NumWords = $1;
130			print "WORDS=[$NumWords]\n" if $Verbose;
131		}
132	}
133	close (ROBOT);
134
135	# Write data to YYMMDD-info file
136
137	$InfoFile = "$DataDir/$RunDate-info";
138	$CurrFile = "$DataDir/current-info";
139
140	open (INFO, ">$InfoFile") || die "Error: $InfoFile - $!\n";
141	print "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n" if $Verbose;
142	print INFO "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n";
143	close (INFO);
144	unlink ($CurrFile);
145	symlink ($InfoFile, $CurrFile);
146}
147
148sub WriteStatus
149{
150	my ($DataDir, $OutFile, $Header, $Footer) = @_;
151
152	$RobotInfo = &ReadRobotInfo ("$DataDir/current-info");
153
154	open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n";
155
156	&PrintBoilerPlate ($Header, 1);
157
158	print HTML <<EOT
159<p>
160<strong>$RobotInfo</strong>
161<p>
162<table border=2 width=400>
163<caption>Table of last 5 robot runs.</caption>
164<th>Run Date<th>Run Time<th># Servers<th># URL's<th># Words<th>Index (MB)
165<tr>
166EOT
167;
168	# read YYMMDD-info files
169	opendir (DIR, $DataDir) || die "Error: $DataDir - $!\n";
170	@InfoFiles = grep (/^[0-9]{6}-info$/, readdir (DIR));
171	closedir (DIR);
172	@InfoFiles = reverse (sort (@InfoFiles));
173
174	@InfoFiles = @InfoFiles[0,1,2,3,4];
175	foreach $File (@InfoFiles) {
176		$File = "$DataDir/$File";
177		open (INFO, "$File") || die "Error: $File - $!\n";
178		chop (($_ = <INFO>));
179		($RunDate, $RunTime, $IndexSize, $NumWords, $NumURLS, $NumServers) = split (':');
180		$IndexSize = sprintf ("%.1f", $IndexSize / (1024*1024));
181		$RunTime =~ /(..)(..)/;
182		$RunTime = "$1:$2";
183		print HTML <<EOT
184<td align="center">$RunDate</td>
185<td align="center">$RunTime</td>
186<td align="right">$NumServers</td>
187<td align="right">$NumURLS</td>
188<td align="right">$NumWords</td>
189<td align="right">$IndexSize</td>
190<tr>
191EOT
192;
193		close (INFO);
194	}
195
196	print HTML <<EOT
197</table>
198<p>
199<p>
200<table border=2 width=400>
201<caption>Table of Top 10 servers listed by number of indexed documents.</caption>
202<th>Top 10 Servers<th># URL's
203<tr>
204EOT
205;
206	$NumServers = 0;
207	foreach $Key (reverse (sort (keys (%Top10ByName)))) {
208		if ($NumServers < $Top10Servers) {
209			$NumServers++;
210			$NumURLS = $Top10ByName{$Key};
211			$Key =~ /^[0-9]*\|(.*)$/;
212			$Server = $1;
213			$Server =~ s/:80$//;
214			print HTML <<EOT
215<td width="80%" align="left"><a href="http://$Server/">$Server</a></td>
216<td width="20%" align="right">$NumURLS</td>
217<tr>
218EOT
219;
220		}
221	}
222
223	print HTML "</table>\n";
224
225	&PrintBoilerPlate ($Footer, 0);
226
227	close (HTML);
228}
229
230sub PrintBoilerPlate
231{
232	my ($File, $IsHeader) = @_;
233
234	if ($File ne "" && -e $File) {
235		open (FILE, $File) || die "Error: $File - $!\n";
236		while (<FILE>) {
237			print HTML;
238		}
239		close (FILE);
240	} else {
241		if ($IsHeader) {
242			print HTML <<EOT
243<html>
244<head>
245<title>Search Engine Status</title>
246</head>
247<body>
248<h2>Search Engine Status</h2>
249<hr>
250<p>
251EOT
252;
253		} else {
254			&PrintFooterHTML;
255		}
256	}
257}
258
259