1#!/usr/local/bin/perl 2# 3# status.pl v1.0 960413 Iain Lea (iain@sbs.de) 4# 5# ChangeLog 6# 960413 IL 7# 8# Produces a HTML 'Search Engine Status' page with last 5 runs 9# and 'Top 10' servers by #URLS indexed. 10# 11# Usage: status.pl [options] 12# -h help 13# -F file HTML footer 14# -H file HTML header 15# -o file HTML generated file 16# -v verbose 17# 18# TODO 19 20require 'timelocal.pl'; 21require 'getopts.pl'; 22require '/www/search.sbs.de/bin/sbs.pl'; 23 24$DataDir = '/www/search.sbs.de/data/robot'; 25$RunTimeFile = "$DataDir/current-runtime"; 26$RobotFile = "$DataDir/current-robot"; 27$IndexFile = '/www/search.sbs.de/test/db/db.wordlist'; 28 29$DefOutputFile = '/www/search.sbs.de/test/pub/status.html'; 30$TmpFile = "/tmp/status.$$"; 31$DefFooter = ''; 32$DefHeader = ''; 33$Verbose = 0; 34$Top10Servers = 10; 35 36&ParseCmdLine; 37 38print "Generating status.html...\n" if $Verbose; 39 40&ReadDataFiles ($RunTimeFile, $RobotFile, $IndexFile); 41&WriteStatus ($DataDir, $DefOutputFile, $DefHeader, $DefFooter); 42 43exit 1; 44 45############################################################################# 46# Subroutines 47# 48 49sub ParseCmdLine 50{ 51 &Getopts ('F:hH:o:v'); 52 53 if ($opt_h ne "") { 54 print <<EndOfHelp 55Produce an HTML 'Status' page of last 5 runs and Top 10 servers by #URLS. 56 57Usage: $0 [options] 58 -h help 59 -F file HTML footer 60 -H file HTML header 61 -o file HTML generated file 62 -v verbose 63 64EndOfHelp 65; 66 exit 0; 67 } 68 $DefFooter = $opt_F if ($opt_H ne ""); 69 $DefHeader = $opt_H if ($opt_H ne ""); 70 $DefOutputFile = $opt_o if ($opt_o ne ""); 71 $Verbose = 1 if ($opt_v ne ""); 72} 73 74sub ReadDataFiles 75{ 76 my ($RunTimeFile, $RobotFile, $IndexFile) = @_; 77 my ($IndexSize, $NumWords, $NumURLS, $NumServers); 78 my ($BegTime, $EndTime, $RunDate, $RunTime, $Key); 79 my (%Months) = ( 80 'Jan', '0', 'Feb', '1', 'Mar', '2', 'Apr', '3', 'May', '4', 'Jun', '5', 81 'Jul', '6', 'Aug', '7', 'Sep', '8', 'Oct', '9', 'Nov', '10', 'Dec', '11' ); 82 83 # RunDate : RunTime 84 85 open (TIME, "$RunTimeFile") || die "Error: $RunTimeFile - $!\n"; 86 while (<TIME>) { 87 chop; 88 if (! $EndTime && $BegTime) { 89 # Sat Apr 13 12:57:52 MET DST 1996 90 /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/; 91 $EndTime = timelocal ($5, $4, $3, $2, $Months{$1}, $6 - 1900); 92 $RunTime = $EndTime - $BegTime; 93 $RunTime = sprintf ("%02d%02d", $RunTime/3600, ($RunTime%3600)/60); 94 print "END=[$_] [$EndTime] [$RunTime]\n" if $Verbose; 95 } 96 if (! $BegTime) { 97 # Sat Apr 13 12:57:52 MET DST 1996 98 /^...\ (...)\ ([0-9][0-9])\ (..):(..):(..)\ ... ... ([0-9]{4}$)/; 99 $Mon = $Months{$1}; 100 $Year = $6 - 1900; 101 $BegTime = timelocal ($5, $4, $3, $2, $Mon, $Year); 102 $RunDate = sprintf ("%02d%02d%02d", $Year, $Mon+1, $2); 103 print "BEG=[$_] [$BegTime] [$RunDate]\n" if $Verbose; 104 } 105 } 106 close (TIME); 107 108 # IndexSize : NumWords : NumURLS : NumServers 109 110 @StatData = stat ($IndexFile); 111 $IndexSize = $StatData[7]; 112 print "SIZE=[$IndexSize]\n" if $Verbose; 113 114 # NumWords : NumURLS : NumServers 115 116 $NumWords = $NumURLS = $NumServers = 0; 117 118 open (ROBOT, "$RobotFile") || die "Error: $RobotFile - $!\n"; 119 while (<ROBOT>) { 120 if (/^htdig:\s+(.*)\s+([0-9]*)\s+documents$/) { 121 $NumURLS += $2; 122 $NumServers++; 123 if ($2 > 0) { 124 $Key = sprintf ("%07d|%s", $2, $1); 125 $Top10ByName{$Key} = $2; 126 } 127 print "SERVER=[$1] DOCS=[$2]\n" if $Verbose; 128 } elsif (/^Read\s+([0-9]*)\s+words$/) { 129 $NumWords = $1; 130 print "WORDS=[$NumWords]\n" if $Verbose; 131 } 132 } 133 close (ROBOT); 134 135 # Write data to YYMMDD-info file 136 137 $InfoFile = "$DataDir/$RunDate-info"; 138 $CurrFile = "$DataDir/current-info"; 139 140 open (INFO, ">$InfoFile") || die "Error: $InfoFile - $!\n"; 141 print "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n" if $Verbose; 142 print INFO "$RunDate:$RunTime:$IndexSize:$NumWords:$NumURLS:$NumServers\n"; 143 close (INFO); 144 unlink ($CurrFile); 145 symlink ($InfoFile, $CurrFile); 146} 147 148sub WriteStatus 149{ 150 my ($DataDir, $OutFile, $Header, $Footer) = @_; 151 152 $RobotInfo = &ReadRobotInfo ("$DataDir/current-info"); 153 154 open (HTML, ">$OutFile") || die "Error: $OutFile - $!\n"; 155 156 &PrintBoilerPlate ($Header, 1); 157 158 print HTML <<EOT 159<p> 160<strong>$RobotInfo</strong> 161<p> 162<table border=2 width=400> 163<caption>Table of last 5 robot runs.</caption> 164<th>Run Date<th>Run Time<th># Servers<th># URL's<th># Words<th>Index (MB) 165<tr> 166EOT 167; 168 # read YYMMDD-info files 169 opendir (DIR, $DataDir) || die "Error: $DataDir - $!\n"; 170 @InfoFiles = grep (/^[0-9]{6}-info$/, readdir (DIR)); 171 closedir (DIR); 172 @InfoFiles = reverse (sort (@InfoFiles)); 173 174 @InfoFiles = @InfoFiles[0,1,2,3,4]; 175 foreach $File (@InfoFiles) { 176 $File = "$DataDir/$File"; 177 open (INFO, "$File") || die "Error: $File - $!\n"; 178 chop (($_ = <INFO>)); 179 ($RunDate, $RunTime, $IndexSize, $NumWords, $NumURLS, $NumServers) = split (':'); 180 $IndexSize = sprintf ("%.1f", $IndexSize / (1024*1024)); 181 $RunTime =~ /(..)(..)/; 182 $RunTime = "$1:$2"; 183 print HTML <<EOT 184<td align="center">$RunDate</td> 185<td align="center">$RunTime</td> 186<td align="right">$NumServers</td> 187<td align="right">$NumURLS</td> 188<td align="right">$NumWords</td> 189<td align="right">$IndexSize</td> 190<tr> 191EOT 192; 193 close (INFO); 194 } 195 196 print HTML <<EOT 197</table> 198<p> 199<p> 200<table border=2 width=400> 201<caption>Table of Top 10 servers listed by number of indexed documents.</caption> 202<th>Top 10 Servers<th># URL's 203<tr> 204EOT 205; 206 $NumServers = 0; 207 foreach $Key (reverse (sort (keys (%Top10ByName)))) { 208 if ($NumServers < $Top10Servers) { 209 $NumServers++; 210 $NumURLS = $Top10ByName{$Key}; 211 $Key =~ /^[0-9]*\|(.*)$/; 212 $Server = $1; 213 $Server =~ s/:80$//; 214 print HTML <<EOT 215<td width="80%" align="left"><a href="http://$Server/">$Server</a></td> 216<td width="20%" align="right">$NumURLS</td> 217<tr> 218EOT 219; 220 } 221 } 222 223 print HTML "</table>\n"; 224 225 &PrintBoilerPlate ($Footer, 0); 226 227 close (HTML); 228} 229 230sub PrintBoilerPlate 231{ 232 my ($File, $IsHeader) = @_; 233 234 if ($File ne "" && -e $File) { 235 open (FILE, $File) || die "Error: $File - $!\n"; 236 while (<FILE>) { 237 print HTML; 238 } 239 close (FILE); 240 } else { 241 if ($IsHeader) { 242 print HTML <<EOT 243<html> 244<head> 245<title>Search Engine Status</title> 246</head> 247<body> 248<h2>Search Engine Status</h2> 249<hr> 250<p> 251EOT 252; 253 } else { 254 &PrintFooterHTML; 255 } 256 } 257} 258 259