1#!/usr/bin/perl 2my $automcdir = "/usr/local/spamassassin/automc/svn/masses/rule-qa/automc"; 3 4###!/usr/bin/perl 5##my $automcdir = "/home/jm/ftp/spamassassin/masses/rule-qa/automc"; 6 7use strict; 8use warnings; 9 10my $PERL_INTERP = $^X; 11 12our %FREQS_FILENAMES = ( 13 'DETAILS.age' => 'set 0, broken down by message age in weeks', 14 'DETAILS.all' => 'set 0, broken down by contributor', 15 'DETAILS.new' => 'set 0, in aggregate', 16 'NET.age' => 'set 1 (network), by message age in weeks', 17 'NET.all' => 'set 1 (network), by contributor', 18 'NET.new' => 'set 1 (network), in aggregate', 19 'SCOREMAP.new' => 'set 0, score-map', 20 'OVERLAP.new' => 'set 0, overlaps between rules', 21); 22 23my $refresh_cache = ($ARGV[0] and $ARGV[0] eq '-refresh'); 24 25my $self = Mail::SpamAssassin::CGI::RuleQaApp->new(); 26$self->ui_parse_url_base(); 27$self->ui_get_url_switches(); 28$self->ui_get_daterev(); 29$self->ui_get_rules(); 30$self->show_view(); 31exit; 32 33# --------------------------------------------------------------------------- 34 35package Mail::SpamAssassin::CGI::RuleQaApp; 36use CGI; 37use CGI::Carp 'fatalsToBrowser'; 38use Date::Manip; 39use URI::Escape; 40use Time::Local; 41use POSIX qw(); 42use Storable qw(nfreeze thaw); 43use Compress::LZ4 qw(compress decompress); 44 45# daterevs -- e.g. "20060429/r239832-r" -- are aligned to just before 46# the time of day when the mass-check tagging occurs; that's 0850 GMT, 47# so align the daterev to 0800 GMT. 48# 49use constant DATEREV_ADJ => - (8 * 60 * 60); 50 51my $FREQS_LINE_TEMPLATE; 52my $FREQS_LINE_TEXT_TEMPLATE; 53my $FREQS_EXTRA_TEMPLATE; 54our %AUTOMC_CONF; 55 56our @ISA = qw(); 57 58sub new { 59 my $class = shift; 60 $class = ref($class) || $class; 61 my $self = { }; 62 63 $self->{q} = CGI->new(); 64 65 $self->{id_counter} = 0; 66 $self->{include_embedded_freqs_xml} = 1; 67 $self->{cgi_param_order} = [ ]; 68 $self->{cgi_params} = { }; 69 $self->{now} = time(); 70 71 bless ($self, $class); 72 73 # some global configuration 74 $self->set_freqs_templates(); 75 $self->read_automc_global_conf(); 76 77 die "no directory set in automc config for 'html'" unless $AUTOMC_CONF{html}; 78 $self->{cachefile} = "$AUTOMC_CONF{html}/ruleqa.scache"; 79 80 $self->{scache_keep_time} = defined $AUTOMC_CONF{scache_keep_time} ? 81 $AUTOMC_CONF{scache_keep_time} : 60*60*24*14; # default 2 weeks 82 83 if ($refresh_cache) { 84 $self->refresh_cache(); 85 exit; 86 } 87 88 $self->read_cache(); 89 $self->precache_params(); 90 return $self; 91} 92 93# --------------------------------------------------------------------------- 94 95sub read_automc_global_conf { 96 my ($self) = @_; 97 98 open (CF, "<$automcdir/config") or return; 99 while(<CF>) { /^(?!#)(\S+)=(\S+)/ and $AUTOMC_CONF{$1} = $2; } 100 close CF; 101} 102 103# --------------------------------------------------------------------------- 104 105sub ui_parse_url_base { 106 my ($self) = @_; 107 108# Allow path info to become CGI-ish parameters. 109# the two parts of path info double as (a) daterev, (b) rulename, 110# (c) "s_detail=1". 111# CGI parameters "daterev", "rule", "s_detail" override them though 112# 113 $self->{url_abs} = $self->{q}->url(-absolute=>1); 114 $self->{url_with_path} = $self->{q}->url(-absolute=>1, -path_info=>1); 115 116# if we have a valid, full URL (non-cgi use), use that instead of 117# the "path_info" one, since CGI.pm will unhelpfully remove duplicate 118# slashes. this screws up "/FOO" rule grep searches. Also, 119# fix $self->{url_abs} to be correct for the "entire website is web app" case, 120# as CGI.pm gets that wrong, too! 121 122 if ($self->{url_abs} =~ m,^/(?:20\d|last-net|last-preflight|last-night|\d+-days-ago|today),) { 123 $self->{url_with_path} = $self->{url_abs}; 124 $self->{url_abs} = "/"; 125 } else { 126 $self->{url_with_path} =~ s,^\Q$self->{url_abs}\E,,; 127 } 128 129 if ($self->{url_with_path} =~ s,^/*([^/]+),,) { $self->add_cgi_path_param("daterev", $1); } 130 if ($self->{url_with_path} =~ s,^/(/?[^/]+),,) { $self->add_cgi_path_param("rule", $1); } 131 if ($self->{url_with_path} =~ s,^/detail,,) { $self->add_cgi_path_param("s_detail", "1"); } 132 133# cgi_url: used in hrefs from the generated document 134 $self->{cgi_url} = $self->{url_abs}; 135 $self->{cgi_url} =~ s,/ruleqa/ruleqa$,/ruleqa,s; 136 $self->{cgi_url} ||= '/'; 137} 138 139# --------------------------------------------------------------------------- 140 141sub ui_get_url_switches { 142 my ($self) = @_; 143 144 $self->{s} = { }; 145 146# selection of what will be displayed. 147 $self->{s}{detail} = $self->get_url_switch('s_detail', 0); 148 $self->{s}{g_over_time} = $self->get_url_switch('s_g_over_time', 0); 149 $self->{s}{corpus} = $self->get_url_switch('s_corpus', 0); 150 151 # "?q=FOO" is a shortcut for "?rule=FOO&s_detail=1"; good for shortcuts 152 my $q = $self->{q}->param("q"); 153 if ($q) { 154 $self->add_cgi_param("rule", $q); 155 $self->add_cgi_param("s_detail", 1); 156 $self->{s}{detail} = 1; 157 } 158 159 $self->{s}{xml} = $self->get_url_switch('xml', 0); 160 $self->{include_embedded_freqs_xml} = $self->{s}{xml}; 161 162# note: age, new, overlap are all now synonyms for detail ;) 163 if ($self->{s}{age} || $self->{s}{overlap} || $self->{s}{detail}) { 164 $self->{s}{age} = 1; 165 $self->{s}{all} = 1; 166 $self->{s}{new} = 1; 167 $self->{s}{overlap} = 1; 168 $self->{s}{scoremap} = 1; 169 } 170 171 # always show "new" set, though 172 $self->{s}{new} = 1; 173} 174 175sub get_url_switch { 176 my ($self, $name, $defval) = @_; 177 my $val = $self->{q}->param($name); 178 if (!defined $val) { return $defval; } 179 return ($val) ? 1 : 0; 180} 181 182# --------------------------------------------------------------------------- 183 184sub ui_get_daterev { 185 my ($self) = @_; 186 187 # when and what 188 $self->{daterev} = $self->{q}->param('daterev') || ''; 189 190 $self->{daterevs} = $self->{cached}->{daterevs}; 191 192 # sanitise daterev string 193 if (defined $self->{daterev}) { 194 195 # all of these ignore "b" preflight mass-checks, btw 196 if ($self->{daterev} eq 'last-night') { 197 $self->{daterev} = $self->get_daterev_for_days_ago(1); 198 $self->{q}->param('daterev', $self->{daterev}); # make it absolute 199 } 200 elsif ($self->{daterev} =~ /^(\d+)-days-ago$/) { 201 $self->{daterev} = $self->get_daterev_for_days_ago($1); 202 $self->{q}->param('daterev', $self->{daterev}); 203 } 204 elsif ($self->{daterev} eq 'last-preflight') { 205 $self->{daterev} = undef; 206 } 207 elsif ($self->{daterev} eq 'today') { 208 $self->{daterev} = $self->get_daterev_by_date( 209 POSIX::strftime "%Y%m%d", gmtime (($self->{now} + DATEREV_ADJ))); 210 $self->{q}->param('daterev', $self->{daterev}); 211 } 212 elsif ($self->{daterev} eq 'last-net') { 213 $self->{daterev} = $self->get_last_net_daterev(); 214 $self->{q}->param('daterev', $self->{daterev}); 215 } 216 elsif ($self->{daterev} =~ /^(20\d\d[01]\d\d\d)$/) { 217 # a date 218 $self->{daterev} = $self->get_daterev_by_date($1); 219 $self->{q}->param('daterev', $self->{daterev}); 220 } 221 elsif ($self->{daterev} =~ /(\d+)[\/-](r\d+)-(\S+)/ && $2) { 222 $self->{daterev} = "$1-$2-$3"; 223 } else { 224 # default: last-night's 225 $self->{daterev} = $self->get_daterev_for_days_ago(1); 226 } 227 } 228 229 # turn possibly-empty $self->{daterev} into a real date/rev combo (that exists) 230 $self->{daterev} = $self->date_in_direction($self->{daterev}, 0); 231 232 $self->{daterev_md} = $self->get_daterev_metadata($self->{daterev}); 233} 234 235# --------------------------------------------------------------------------- 236 237sub ui_get_rules { 238 my ($self) = @_; 239 240 # which rules? 241 $self->{rule} = $self->{q}->param('rule') || ''; 242 $self->{rule} =~ s/[^_0-9a-zA-Z\/]//gs; # Sanitize 243 $self->{rules_all} = 0; 244 $self->{rules_grep} = 0; 245 $self->{nicerule} = $self->{rule}; 246 if (!$self->{nicerule}) { 247 $self->{rules_all}++; $self->{nicerule} = 'all rules'; 248 } 249 if ($self->{rule} =~ /^\//) { 250 $self->{rules_grep}++; $self->{nicerule} = 'regexp '.$self->{rule}; 251 } 252 253 $self->{srcpath} = $self->{q}->param('srcpath') || ''; 254 $self->{srcpath} =~ s/[^.,_0-9a-zA-Z\/-]//gs; # Sanitize 255 $self->{mtime} = $self->{q}->param('mtime') || ''; 256 $self->{mtime} =~ s/[^0-9]//gs; # Sanitize 257 258 $self->{freqs}{head} = { }; 259 $self->{freqs}{data} = { }; 260 $self->{freqs}{ordr} = { }; 261 $self->{line_counter} = 0; 262} 263 264# --------------------------------------------------------------------------- 265# supported views 266 267sub show_view { 268 my ($self) = @_; 269 270 if ($self->{q}->param('mclog')) { 271 $self->show_mclog($self->{q}->param('mclog')); 272 } 273 274 my $graph = $self->{q}->param('graph'); 275 if ($graph) { 276 if ($graph eq 'over_time') { $self->graph_over_time(); } 277 else { die "graph '$graph' unknown"; } 278 } 279 elsif ($self->{q}->param('longdatelist')) { 280 print $self->{q}->header(); 281 $self->show_daterev_selector_page(); 282 } 283 elsif ($self->{q}->param('shortdatelist')) { 284 $self->{s_shortdatelist} = 1; 285 print $self->{q}->header(); 286 $self->show_default_view(); 287 } 288 else { 289 print $self->{q}->header(); 290 $self->show_default_view(); 291 } 292} 293 294# --------------------------------------------------------------------------- 295 296sub show_default_header { 297 my ($self, $title) = @_; 298 299 # replaced with use of main, off-zone host: 300 # <!-- <link href="/ruleqa.css" rel="stylesheet" type="text/css"> <script src="https://ruleqa.spamassassin.org/sorttable.js"></script> --> 301 302 my $hdr = q{<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 303 "https://www.w3.org/TR/html4/strict.dtd"> 304 <html xmlns="https://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 305 <head><meta http-equiv="Content-type" content="text/html; charset=utf-8"> 306 <link rel="icon" href="https://spamassassin.apache.org/images/favicon.ico"> 307 <title>}.$title.q{: SpamAssassin Rule QA</title> 308 309 <link href="https://ruleqa.spamassassin.org/ruleqa.css" rel="stylesheet" type="text/css"> 310 <script src="https://ruleqa.spamassassin.org/sorttable.js"></script> 311 312 <script type="text/javascript"><!-- 313 314 function hide_header(id) {document.getElementById(id).style.display="none";} 315 function show_header(id) {document.getElementById(id).style.display="block";} 316 317 //--> 318 </script> 319 320 </head><body> 321 322 <table width="100%"> <tr> <td valign=top> 323 <h1>SpamAssassin Rule QA</h1> 324 </td> <td valign=top> 325 <p align="right"> 326 <a href="https://wiki.apache.org/spamassassin/RuleQaApp">help</a> 327 </p> 328 </td> </tr> </table> 329 330 }; 331 #<br> <a href="https://bbmass.spamassassin.org:8011/">preflight mass-check progress</a> 332 return $hdr; 333} 334 335sub show_default_view { 336 my ($self) = @_; 337 338 my $title; 339 if ($self->{s}{detail}) { 340 $title = "Details for $self->{nicerule} in mass-check $self->{daterev}"; 341 } else { 342 $title = "Overview of all rules in mass-check $self->{daterev}"; 343 } 344 print $self->show_default_header($title); 345 346 my $tmpl = q{ 347 348 <div class='updateform'> 349 350 <form action="!THISURL!" method="GET"> 351 <table style="padding-left: 0px" class='datetable'> 352 353 <tr> 354 <th> Commit </th> 355 <th> Preflight Mass-Checks </th> 356 <th> Nightly Mass-Checks </th> 357 <th> Network Mass-Checks </th> 358 </tr> 359 360 <tr> 361 <td colspan="4"> 362 <div class='ui_label'> 363 List <a href="/">just current daterev</a> / 364 <a href="!shortdatelist!">all daterevs within 2 days</a> / 365 <a href="!longdatelist!">most recent 1000</a> / 366 <a href="!fulldatelist!">full list</a> 367 </div> 368 </td> 369 </tr> 370 371 !daylinkstable! 372 373 </table> 374 375 <table width='100%'> 376 <tr> 377 <td width='100%'> 378 <div class='ui_label'> 379 Or, <a href="https://wiki.apache.org/spamassassin/DateRev">DateRev</a> 380 to display: <input type='textfield' name='daterev' value="!daterev!"> 381 </div> 382 <div class='ui_label'> 383 Or, select a recent nightly mass-check by date by entering 384 'YYYYMMDD' in the DateRev text field for a specific date, 385 or <a href='!daterev=last-night!'>last night's nightly run</a>, 386 <a href='!daterev=today!'>today's nightly run</a>, 387 <a href='!daterev=last-net!'>the most recent --net run</a>, or 388 <a href='!daterev=last-preflight!'>the most recent 'preflight' mass-check</a>. 389 </div> 390 </td> 391 </tr> 392 </table> 393 394 <br/> 395 396 <h4> Which Rules?</h4> 397 <div class='ui_label'> 398 Show only these rules (space separated, or regexp with '/' prefix):<br/> 399 </div> 400 <input type='textfield' size='60' name='rule' value="!rule!"><br/> 401 <br/> 402 <div class='ui_label'> 403 Show only rules from source files whose paths contain this string:<br/> 404 </div> 405 <input type='textfield' size='60' name='srcpath' value="!srcpath!"><br/> 406 <br/> 407 408 <!-- <input type='checkbox' name='s_detail' id='s_detail' !s_detail!><label 409 for='s_detail' class='ui_label'>Display full details: message age in weeks, by contributor, as score-map, overlaps with other rules, freshness graphs 410 </label><br/> 411 <br/> --> 412 413<p> 414 <div class='ui_label'> 415 Show only rules from files modified in the 416 <a href='!mtime=1!'>last day</a>, 417 <a href='!mtime=2!'>2</a>, 418 <a href='!mtime=3!'>3</a>, 419 <a href='!mtime=7!'>last week</a> 420 </div> 421</p> 422 423 <div align='right'><input type='submit' name='g' value="Change"></div> 424 </form> 425 </div> 426 427 }; 428 429 my @drs = (); 430 { 431 my $origdr = $self->{daterev} || $self->{daterevs}->[-1]; 432 $origdr =~ /^(\d+)[\/-](\S+)[\/-]/; 433 my $date = $1; 434 my $rev = $2; 435 436 my $dr_after = date_offset($date, -2); 437 my $dr_before = date_offset($date, 2); 438 439 my $origidx; 440 foreach my $dr (@{$self->{daterevs}}) { 441 next unless ($dr =~ /^(\d+)[\/-]/); 442 my $date = $1; 443 444 next unless ($date >= $dr_after); 445 next unless ($date <= $dr_before); 446 push @drs, $dr; 447 448 if ($dr eq $origdr) { 449 $origidx = scalar @drs - 1; 450 } 451 } 452 453 # if we're doing the default UI -- ie. looking at a mass-check -- 454 # cut it down to just a couple around it, for brevity 455 if (!$self->{s_shortdatelist} && defined($origidx)) { 456 my $i = $origidx; 457 while ($i < @drs-1 && $drs[$i] =~ /^${date}-${rev}-/) { $i++; } 458 my $nextrev = $drs[$i]; $nextrev =~ s/-[a-z]$//; 459 while ($i < @drs-1 && $drs[$i] =~ /^${nextrev}-/) { $i++; } 460 if ($i < @drs-1) { splice @drs, $i; } 461 462 $i = $origidx; 463 while ($i > 0 && $drs[$i] =~ /^${date}-${rev}-/) { $i--; } 464 my $prevrev = $drs[$i]; $prevrev =~ s/-[a-z]$//; 465 while ($i > 0 && $drs[$i] =~ /^${prevrev}-/) { $i--; } 466 if ($i > 0) { splice @drs, 0, $i+1; } 467 } 468 } 469 470 $tmpl =~ s{!daylinkstable!}{ 471 $self->get_daterev_html_table(\@drs, 0, 0); 472 }ges; 473 474 my $dranchor = "r".$self->{daterev}; $dranchor =~ s/[^A-Za-z0-9]/_/gs; 475 my $sdlurl = $self->gen_toplevel_url("shortdatelist", 1)."#".$dranchor; 476 my $ldlurl = $self->gen_toplevel_url("longdatelist", 1)."#".$dranchor; 477 my $fdlurl = $self->gen_toplevel_url("longdatelist", 1).'&perpage=999999#'.$dranchor; 478 479 $tmpl =~ s/!longdatelist!/$ldlurl/gs; 480 $tmpl =~ s/!fulldatelist!/$fdlurl/gs; 481 $tmpl =~ s/!shortdatelist!/$sdlurl/gs; 482 $tmpl =~ s/!THISURL!/$self->{cgi_url}/gs; 483 $tmpl =~ s/!daterev!/$self->{daterev}/gs; 484 $tmpl =~ s/!mtime=(.*?)!/ 485 $self->gen_switch_url("mtime", $1); 486 /eg; 487 $tmpl =~ s/!daterev=(.*?)!/ 488 $self->gen_switch_url("daterev", $1); 489 /eg; 490 $tmpl =~ s/!rule!/$self->{rule}/gs; 491 $tmpl =~ s/!srcpath!/$self->{srcpath}/gs; 492 foreach my $opt (keys %{$self->{s}}) { 493 if ($self->{s}{$opt}) { 494 $tmpl =~ s/!s_$opt!/checked /gs; 495 } else { 496 $tmpl =~ s/!s_$opt!/ /gs; 497 } 498 } 499 500 print $tmpl; 501 502 if (!$self->{s}{detail}) { 503 504 print qq{ 505 506 <p class='intro'> <strong>Instructions</strong>: click 507 the rule name to view details of a particular rule. </p> 508 509 }; 510 } 511 512 # debug: log the chosen sets parameters etc. 513 if (0) { 514 print "<!-- ", 515 "{s}{new} = $self->{s}{new}\n", 516 "{s}{age} = $self->{s}{age}\n", 517 "{s}{all} = $self->{s}{all}\n", 518 "{s}{overlap} = $self->{s}{overlap}\n", 519 "{s}{scoremap} = $self->{s}{scoremap}\n", 520 "{s}{xml} = $self->{s}{xml}\n", 521 "-->\n"; 522 } 523 524 $|=1; # turn off buffering from now on 525 526 my $single_rule_displayed = ($self->{s}{detail} && !($self->{rules_all} || $self->{rules_grep})); 527 528 # only display code if it's a single rule page 529 if ($single_rule_displayed) { 530 my $rev = $self->get_rev_for_daterev($self->{daterev}); 531 my $md = $self->get_rule_metadata($rev); 532 my $src = eval { $md->{rulemds}->{$self->{rule}}->{src} } || '(not found)'; 533 my $srchref = "https://svn.apache.org/viewvc/spamassassin/trunk/$src?revision=$rev\&view=markup"; 534 535 my $lastmod = '(unknown)'; 536 if (defined $md->{rulemds}->{$self->{rule}}->{srcmtime}) { 537 $lastmod = eval { 538 POSIX::strftime "%Y-%m-%d %H:%M:%S UTC", gmtime $md->{rulemds}->{$self->{rule}}->{srcmtime} 539 } || '(unknown)'; 540 } 541 542 my $tflags = eval { 543 $md->{rulemds}->{$self->{rule}}->{tf} 544 } || ''; 545 546 # a missing string is now represented as {}, annoyingly 547 if (ref $tflags =~ /HASH/ || $tflags =~ /^HASH/) { $tflags = ''; } 548 549 $tflags = ($tflags =~ /\S/) ? ", tflags $tflags" : ""; 550 551 my $plinkhref = $self->gen_this_url()."#rulemetadata"; 552 553 print qq{ 554 <p class="srcinfo"> 555 Detailed results for rule 556 <a id="rulemetadata"></a><a href="$plinkhref"><b>$self->{rule}</b></a>, 557 from source file <a href="$srchref">$src</a>$tflags. 558 Source file was last modified on $lastmod. 559 </p> 560 }; 561 } 562 563 $self->show_all_sets_for_daterev($self->{daterev}, $self->{daterev}); 564 565 # don't show "graph" link unless only a single rule is being displayed 566 if ($single_rule_displayed) { 567 my $graph_on = qq{ 568 569 <p><a id="over_time_anchor"></a><a id="overtime" 570 href="}.$self->gen_switch_url("s_g_over_time", "0").qq{#overtime" 571 >Hide graph</a></p> 572 <img src="}.$self->gen_switch_url("graph", "over_time").qq{" 573 width='800' height='815' /> 574 575 }; 576 577 my $graph_off = qq{ 578 579 <p><a id="over_time_anchor"></a><a id="overtime" 580 href="}.$self->gen_switch_url("s_g_over_time", "1").qq{#overtime" 581 >Show graph</a></p> 582 583 }; 584 585 print qq{ 586 587 <h3 class='graph_title'>Graph, hit-rate over time</h3> 588 }.($self->{s}{g_over_time} ? $graph_on : $graph_off).qq{ 589 590 </ul> 591 592 }; 593 my $corpus_on = qq{ 594 595 <p><a id="corpus_anchor"></a><a id="corpus" 596 href="}.$self->gen_switch_url("s_corpus", "0").qq{#corpus" 597 >Hide report</a></p> 598 <table> 599 <tr class='freqsextra'> 600 <td><pre class='perruleextra'>}.read_corpus_file().qq{</pre></td> 601 </tr> 602 <table> 603 604 }; 605 606 my $corpus_off = qq{ 607 608 <p><a id="corpus_anchor"></a><a id="corpus" 609 href="}.$self->gen_switch_url("s_corpus", "1").qq{#corpus" 610 >Show report</a></p> 611 612 }; 613 614 print qq{ 615 616 <h3 class='corpus_title'>Corpus quality</h3> 617 }.($self->{s}{corpus} ? $corpus_on : $corpus_off).qq{ 618 619 </ul> 620 621 }; 622 623 my @parms = $self->get_params_except(qw( 624 rule s_age s_overlap s_all s_detail 625 )); 626 my $url_back = $self->assemble_url(@parms); 627 628 print qq{ 629 630 <div class='ui_label'> 631 <p><a href="$url_back">< Back</a> to overview.</p> 632 </div> 633 634 }; 635 } 636 637 print qq{ 638 639 <div class='ui_label'> 640 <p>Note: the freqs tables are sortable. Click on the headers to resort them 641 by that column. <a 642 href="https://www.kryogenix.org/code/browser/sorttable/">(thanks!)</a></p> 643 </div> 644 645 </body></html> 646 647 }; 648 649} 650 651sub date_offset { 652 my ($yyyymmdd, $offset_days) = @_; 653 $yyyymmdd =~ /^(....)(..)(..)$/; 654 my $time = timegm(0,0,0,$3,$2-1,$1); 655 $time += (24 * 60 * 60) * $offset_days; 656 return POSIX::strftime "%Y%m%d", gmtime $time; 657} 658 659sub get_all_daterevs { 660 my ($self) = @_; 661 662 die "no directory set in automc config for 'html'" unless $AUTOMC_CONF{html}; 663 664 return sort map { 665 s/^.*\/(\d+)\/(r\d+-\S+)$/$1-$2/; $_; 666 } grep { /\/\d+\/r\d+-\S+$/ && -d $_ } (<$AUTOMC_CONF{html}/2*/r*>); 667} 668 669sub date_in_direction { 670 my ($self, $origdaterev, $dir) = @_; 671 672 my $orig; 673 if ($origdaterev && $origdaterev =~ /^(\d+)[\/-](r\d+-\S+)$/) { 674 $orig = "$1-$2"; 675 } else { 676 $orig = $self->{daterevs}->[-1]; # the most recent 677 } 678 679 if (!$orig) { 680 die "no daterev found for $origdaterev, with these options: ". 681 join(' ', @{$self->{daterevs}}); 682 } 683 684 my $cur; 685 for my $i (0 .. scalar(@{$self->{daterevs}})) { 686 if (defined $self->{daterevs}->[$i] && $self->{daterevs}->[$i] eq $orig) { 687 $cur = $i; last; 688 } 689 } 690 691 # if it's not in the list, $cur should be the last entry 692 if (!defined $cur) { $cur = scalar(@{$self->{daterevs}})-1; } 693 694 my $new; 695 if ($dir < 0) { 696 if ($cur+$dir >= 0) { 697 $new = $self->{daterevs}->[$cur+$dir]; 698 } 699 } 700 elsif ($dir == 0) { 701 $new = $self->{daterevs}->[$cur]; 702 } 703 else { 704 if ($cur+$dir <= scalar(@{$self->{daterevs}})-1) { 705 $new = $self->{daterevs}->[$cur+$dir]; 706 } 707 } 708 709 if ($new && -d $self->get_datadir_for_daterev($new)) { 710 return $new; 711 } 712 713 return undef; # couldn't find one 714} 715 716sub get_daterev_for_days_ago { 717 my ($self, $days) = @_; 718 719 # don't use a daterev after (now - 12 hours); that's too recent 720 # to be "last night", for purposes of rule-update generation. 721 722 my $notafter = POSIX::strftime "%Y%m%d", 723 gmtime ((($self->{now} + DATEREV_ADJ) + (12*60*60)) - (24*60*60*$days)); 724 return $self->get_daterev_by_date($notafter); 725} 726 727sub get_daterev_by_date { 728 my ($self, $notafter) = @_; 729 730 foreach my $dr (reverse @{$self->{daterevs}}) { 731 my $t = $self->get_daterev_metadata($dr); 732 next unless $t; 733 734 next if ($t->{date} + 0 > $notafter); 735 return $dr if ($t->{tag} eq 'n'); 736 } 737 return undef; 738} 739 740sub get_last_net_daterev { 741 my ($self) = @_; 742 743 foreach my $dr (reverse @{$self->{daterevs}}) { 744 my $t = $self->get_daterev_metadata($dr); 745 next unless $t; 746 return $dr if ($t->{includes_net}); 747 } 748 return undef; 749} 750 751sub show_all_sets_for_daterev { 752 my ($self, $path, $strdate) = @_; 753 754 $strdate = "mass-check date/rev: $path"; 755 756 $self->{datadir} = $self->get_datadir_for_daterev($path); 757 758 $self->showfreqset('DETAILS', $strdate); 759 760 # special case: we only build this for one set, as it's quite slow 761 # to generate 762 $self->{s}{scoremap} and $self->showfreqsubset("SCOREMAP.new", $strdate); 763 $self->{s}{overlap} and $self->showfreqsubset("OVERLAP.new", $strdate); 764} 765 766########################################################################### 767 768sub graph_over_time { 769 my ($self) = @_; 770 771 $self->{datadir} = $self->get_datadir_for_daterev($self->{daterev}); 772 773 # logs are named e.g. 774 # /home/automc/corpus/html/20051028/r328993/LOGS.all-ham-mc-fast.log.gz 775 776 # untaint 777 $self->{rule} =~ /([_0-9a-zA-Z]+)/; my $saferule = $1; 778 $self->{datadir} =~ /([-\.\,_0-9a-zA-Z\/]+)/; my $safedatadir = $1; 779 780 # outright block possibly-hostile stuff here: 781 # no "../" path traversal 782 die "forbidden: $safedatadir .." if ($safedatadir =~ /\.\./); 783 784 exec ("$PERL_INTERP $automcdir/../rule-hits-over-time ". 785 "--cgi --scale_period=200 --rule='$saferule' ". 786 "--ignore_older=180 ". 787 "$safedatadir/LOGS.*.log.gz") 788 or die "exec failed"; 789} 790 791########################################################################### 792 793sub show_mclog { 794 my ($self, $name) = @_; 795 796 print "Content-Type: text/plain\r\n\r\n"; 797 798 $self->{datadir} = $self->get_datadir_for_daterev($self->{daterev}); 799 800 # logs are named e.g. 801 # .../20051028/r328993-n/LOGS.all-ham-mc-fast-20051028-r328993-n.log.gz 802 803 # untaint 804 $name =~ /^([-\.a-zA-Z0-9]+)/; my $safename = $1; 805 $self->{rule} =~ /([_0-9a-zA-Z]+)/; my $saferule = $1; 806 $self->{datadir} =~ /([-\.\,_0-9a-zA-Z\/]+)/; my $safedatadir = $1; 807 808 # logs now include the daterev, too 809 $self->{daterev} =~ /([-\.\,_0-9a-zA-Z\/]+)/; my $safedaterev = $1; 810 $safedaterev =~ s/\//-/gs; 811 $safedaterev =~ s/^\d+-//; # no date in logfile 812 $safedaterev =~ s/-n$//; 813 814 # outright block possibly-hostile stuff here: 815 # no "../" path traversal 816 die "forbidden: $safedatadir .." if ($safedatadir =~ /\.\./); 817 die "forbidden: $safedaterev .." if ($safedaterev =~ /\.\./); 818 die "forbidden: $safename .." if ($safename =~ /\.\./); 819 820 my $gzfile = "$safedatadir/LOGS.all-$safename.$safedaterev.log.gz"; 821 if (!-f $gzfile) { 822 print "cannot open $gzfile\n"; 823 die "cannot open $gzfile"; 824 } 825 826 my $lines = 0; 827 open (GZ, "pigz -cd < $gzfile | grep -F '$saferule' |") or die "cannot gunzip '$gzfile'"; 828 while (<GZ>) { 829 /^[\.Y]\s+\S+\s+\S+\s+(?:\S*,|)\Q$saferule\E[, ]/ or next; 830 831 # sanitise privacy-relevant stuff 832 s/,mid=<.*>,/,mid=<REMOVED_BY_RULEQA>,/gs; 833 834 print; 835 last if ++$lines >= 100; 836 } 837 838 close GZ; 839 exit; 840} 841 842########################################################################### 843 844sub read_corpus_file { 845 return ''; # THERE IS NO CORPUS.all FILE GENERATED ATM 846 847 $self->{datadir} = $self->get_datadir_for_daterev($self->{daterev}); 848 $self->{datadir} =~ /([-\.\,_0-9a-zA-Z\/]+)/; my $safedatadir = $1; 849 850 # outright block possibly-hostile stuff here: 851 # no "../" path traversal 852 die "forbidden: $safedatadir .." if ($safedatadir =~ /\.\./); 853 854 open IN, "<$safedatadir/CORPUS.all" or warn "cannot read $safedatadir/CORPUS.all"; 855 my $text = join('', <IN>); 856 close IN; 857 return $text; 858} 859 860########################################################################### 861 862sub showfreqset { 863 my ($self, $type, $strdate) = @_; 864 $self->{s}{new} and $self->showfreqsubset("$type.new", $strdate); 865 $self->{s}{all} and $self->showfreqsubset("$type.all", $strdate); 866 $self->{s}{age} and $self->showfreqsubset("$type.age", $strdate); 867} 868 869sub showfreqsubset { 870 my ($self, $filename, $strdate) = @_; 871 $self->read_freqs_file($filename); 872 873 if ($filename eq 'DETAILS.new') { 874 # report which sets we used 875 $self->summarise_head($self->{freqs}{head}{$filename}, 876 $filename, $strdate, $self->{rule}); 877 } 878 879 $self->get_freqs_for_rule($filename, $strdate, $self->{rule}); 880} 881 882sub summarise_head { 883 my ($self, $head, $filename, $strdate, $rule) = @_; 884 885 my @mcfiles = (); 886 if ($head =~ /^# ham results used for \S+ \S+ \S+: (.*)$/m) { 887 @mcfiles = split(' ', $1); 888 } 889 890 map { 891 s/^ham-//; s/\.r[0-9]+\.log$//; 892 } @mcfiles; 893 894 my $who = join(', ', @mcfiles); 895 896 print qq{ 897 898 <!-- <em>(Using mass-check data from: $who)</em> --> 899 900 }; 901} 902 903sub read_freqs_file { 904 my ($self, $key, $refresh) = @_; 905 906 $refresh ||= 0; 907 my $file = $self->{datadir}.$key; 908 909 # storable cache file 910 my $scache = "$file.scache"; 911 912 if (!-f $file) { 913 # try gz if not found 914 if (-f "$file.gz") { 915 $file = "$file.gz"; 916 } else { 917 warn "missing file $file"; 918 } 919 } 920 921 if (-f $scache) { 922 # is fresh? 923 if (mtime($scache) >= mtime($file)) { 924 return if $refresh; # just -refresh 925 eval { 926 $self->{freqs} = thaw(decompress(readfile($scache))); 927 }; 928 if ($@ || !defined $self->{freqs}) { 929 warn "cache retrieve failed $scache: $@ $!"; 930 # remove bad file 931 unlink($scache); 932 } 933 else { 934 return; 935 } 936 } 937 else { 938 # remove stale cache 939 unlink($scache); 940 } 941 } 942 943 if ($file =~ /\.gz$/) { 944 $file =~ s/'//gs; 945 if (!open (IN, "pigz -cd < '$file' |")) { 946 warn "cannot read $file"; 947 return; 948 } 949 } 950 elsif (!open (IN, "<$file")) { 951 warn "cannot read $file"; 952 } 953 954 $self->{freqs}{head}{$key}=<IN>; 955 $self->{freqs}{data}{$key} = { }; 956 $self->{freqs}{ordr}{$key} = [ ]; 957 my $lastrule; 958 959 my $subset_is_user = 0; 960 my $subset_is_age = 0; 961 if ($file =~ /\.age/) { $subset_is_age = 1; } 962 if ($file =~ /\.all/) { $subset_is_user = 1; } 963 964 while (<IN>) { 965 if (/^#/ || / \(all messages/ || /OVERALL%/) { 966 $self->{freqs}{head}{$key} .= $_; 967 } 968 elsif (/^\s*MSEC/) { 969 next; # just ignored for now 970 } 971 elsif (/^\s*scoremap (.*)$/) { 972 $self->{freqs}{data}{$key}{$lastrule}{scoremap} .= $_; 973 } 974 elsif (/^\s*overlap (.*)$/) { 975 $self->{freqs}{data}{$key}{$lastrule}{overlap} .= $_; 976 } 977 elsif (/ (?:([\+\-])\s+)?(\S+?)(\:\S+)?\s*$/) { 978 my $promochar = $1; 979 $lastrule = $2; 980 my $subset = $3; 981 if ($subset) { $subset =~ s/^://; } 982 983 my $is_testing = ($lastrule =~ /^T_/); 984 my $is_subrule = ($lastrule =~ /^__/); 985 986 # assume a default based on rule name; turn off explicitly 987 # the rules that are not hitting qual thresholds. list 988 # both testing and core rules. 989 my $promo = (!$is_subrule); 990 if ($promochar eq '-') { 991 $promo = 0; 992 } 993 994 my @vals = split; 995 if (!exists $self->{freqs}{data}{$key}{$lastrule}) { 996 push (@{$self->{freqs}{ordr}{$key}}, $lastrule); 997 $self->{freqs}{data}{$key}{$lastrule} = { 998 lines => [ ] 999 }; 1000 } 1001 1002 my $line = { 1003 name => $lastrule, 1004 msecs => $vals[0], 1005 spampc => $vals[1], 1006 hampc => $vals[2], 1007 so => $vals[3], 1008 rank => $vals[4], 1009 score => $vals[5], 1010 username => ($subset_is_user ? $subset : undef), 1011 age => ($subset_is_age ? $subset : undef), 1012 promotable => $promo ? '1' : '0', 1013 }; 1014 push @{$self->{freqs}{data}{$key}{$lastrule}{lines}}, $line; 1015 } 1016 elsif (!/\S/) { 1017 # silently ignore empty lines 1018 } 1019 else { 1020 warn "warning: unknown freqs line in $file: '$_'"; 1021 } 1022 } 1023 close IN; 1024 1025 if ($refresh && !-f $scache) { 1026 eval { 1027 open (OUT, ">$scache.$$") or die "open failed: $@"; 1028 print OUT compress(nfreeze(\%{$self->{freqs}})); 1029 close OUT; 1030 }; 1031 if ($@ || !rename("$scache.$$", $scache)) { 1032 warn "cache store failed $scache: $@"; 1033 unlink("$scache.$$"); 1034 } 1035 } 1036} 1037 1038sub get_freqs_for_rule { 1039 my ($self, $key, $strdate, $ruleslist) = @_; 1040 1041 my $desc = $FREQS_FILENAMES{$key}; 1042 my $file = $self->{datadir}.$key; 1043 1044 my $titleplinkold = "$key.$strdate"; 1045 $titleplinkold =~ s/[^A-Za-z0-9]+/_/gs; 1046 1047 my $titleplinknew = "t".$key; 1048 $titleplinknew =~ s/[^A-Za-z0-9]+/_/gs; 1049 $titleplinknew =~ s/^tDETAILS_//; 1050 1051 my $titleplinkhref = $self->{q}->url(-base=>1).$self->gen_this_url()."#".$titleplinknew; 1052 1053 my $comment = qq{ 1054 1055 <!-- freqs start $key --> 1056 <h3 class='freqs_title'>$desc</h3> 1057 <!-- <h4>$strdate</h4> --> 1058 1059 }; 1060 1061 my $heads = $self->sub_freqs_head_line($self->{freqs}{head}{$key}); 1062 my $header_context = $self->extract_freqs_head_info($self->{freqs}{head}{$key}); 1063 1064 my $headers_id = $key; $headers_id =~ s/[^A-Za-z0-9]/_/gs; 1065 1066 $comment .= qq{ 1067 1068 <div id="$headers_id" class='headdiv' style='display: none'> 1069 <p class='headclosep' align='right'><a 1070 href="javascript:hide_header('$headers_id')">[close]</a></p> 1071 <pre class='head'>$heads</pre> 1072 </div> 1073 1074 <div id="txt_$headers_id" class='headdiv' style='display: none'> 1075 <p class='headclosep' align='right'><a 1076 href="javascript:hide_header('txt_$headers_id')">[close]</a></p> 1077 <pre class='head'><<<TEXTS>>></pre> 1078 </div> 1079 1080 <br clear="all"/> 1081 <p class='showfreqslink'><a 1082 href="javascript:show_header('txt_$headers_id')">(pasteable)</a> <a 1083 href="javascript:show_header('$headers_id')">(source details)</a> 1084 <a name='$titleplinknew' href='$titleplinkhref' class='title_permalink'>(#)</a> 1085 <a name='$titleplinkold'><!-- backwards compat --></a> 1086 </p> 1087 1088 <table class='sortable' id='freqs_${headers_id}' class='freqs'> 1089 <tr class='freqshead'> 1090 <th>MSECS</th> 1091 <th>SPAM%</th> 1092 <th>HAM%</th> 1093 <th>S/O</th> 1094 <th>RANK</th> 1095 <th>SCORE</th> 1096 <th>NAME</th> 1097 <th>WHO/AGE</th> 1098 </tr> 1099 1100 }; 1101 1102 $ruleslist ||= ''; 1103 my @rules = split (' ', $ruleslist); 1104 1105 if (ref $self->{freqs}{ordr}{$key} ne 'ARRAY') { 1106 print qq( 1107 <h3 class='freqs_title'>$desc</h3> 1108 <table><p><i>('$key' not yet available)</i></p></table> 1109 ); 1110 return; 1111 } 1112 1113 if ($self->{rules_all}) { 1114 push @rules, @{$self->{freqs}{ordr}{$key}}; 1115 } 1116 elsif ($self->{rules_grep} && $ruleslist =~ /^\/(.*)$/) { 1117 my $regexp = $1; 1118 foreach my $r (@{$self->{freqs}{ordr}{$key}}) { 1119 next unless ($r =~/${regexp}/i); 1120 push @rules, $r; 1121 } 1122 } 1123 1124 my $srcpath = $self->{srcpath}; 1125 my $mtime = $self->{mtime}; 1126 my $no_net_rules = (!$self->{daterev_md}->{includes_net}); 1127 1128 if ($srcpath || $mtime) { 1129 my $rev = $self->get_rev_for_daterev($self->{daterev}); 1130 my $md = $self->get_rule_metadata($rev); 1131 $md = $md->{rulemds}; 1132 1133 # use Data::Dumper; print Dumper $md; 1134 1135 if ($srcpath) { # bug 4984 1136 @rules = grep { 1137 $md->{$_}->{src} and 1138 ($md->{$_}->{src} =~ /\Q$srcpath\E/); 1139 } @rules; 1140 } 1141 1142 if ($mtime) { # bug 4985 1143 my $target = $self->{now} - ($mtime * 24 * 60 * 60); 1144 @rules = grep { 1145 $md->{$_}->{srcmtime} and 1146 ($md->{$_}->{srcmtime} >= $target); 1147 } @rules; 1148 } 1149 1150 if ($no_net_rules) { # bug 5047 1151 @rules = grep { 1152 !$md->{$_}->{tf} or 1153 ($md->{$_}->{tf} !~ /\bnet\b/); 1154 } @rules; 1155 } 1156 } 1157 1158 if ($self->{include_embedded_freqs_xml} == 0) { 1159 $FREQS_LINE_TEMPLATE =~ s/<!--\s+<rule>.*?-->//gs; 1160 } 1161 1162 my $texts = $titleplinkhref." :\n\n". 1163 " MSECS SPAM% HAM% S/O RANK SCORE NAME WHO/AGE\n"; 1164 # 0 0.0216 0.0763 0.221 0.52 2.84 X_IP 1165 1166 foreach my $rule (@rules) { 1167 if ($rule && defined $self->{freqs}{data}{$key}{$rule}) { 1168 $comment .= $self->rule_anchor($key,$rule); 1169 $comment .= $self->output_freqs_data_line($self->{freqs}{data}{$key}{$rule}, 1170 \$FREQS_LINE_TEMPLATE, 1171 $header_context); 1172 $texts .= $self->output_freqs_data_line($self->{freqs}{data}{$key}{$rule}, 1173 \$FREQS_LINE_TEXT_TEMPLATE, 1174 $header_context); 1175 } 1176 else { 1177 $comment .= $self->rule_anchor($key,$rule); 1178 $comment .= " 1179 <tr><td colspan=8> 1180 (no data found) 1181 </td></tr> 1182 "; 1183 $texts .= "(no data found)\n"; 1184 } 1185 } 1186 1187 # insert the text into that template 1188 $comment =~ s/<<<TEXTS>>>/$texts/gs; 1189 1190 print $comment; 1191 print "</table>"; 1192} 1193 1194sub rule_anchor { 1195 my ($self, $key, $rule) = @_; 1196 return "<a name='".uri_escape($key."_".$rule)."'></a>". 1197 "<a name='$rule'></a>"; 1198} 1199 1200sub sub_freqs_head_line { 1201 my ($self, $str) = @_; 1202 $str = "<em><tt>".($str || '')."</tt></em><br/>"; 1203 return $str; 1204} 1205 1206sub set_freqs_templates { 1207 my ($self) = @_; 1208 1209 $FREQS_LINE_TEMPLATE = qq{ 1210 1211 <tr class='freqsline_promo[% PROMO %]'> 1212 <td>[% MSECS %]</td> 1213 <td><a class='ftd' [% SPAMLOGHREF %]>[% SPAMPC %]<span>[% SPAMPCDETAIL %]</span></a> 1214 <td><a class='ftd' [% HAMLOGHREF %]>[% HAMPC %]<span>[% HAMPCDETAIL %]</span></a> 1215 <td>[% SO %]</td> 1216 <td>[% RANK %]</td> 1217 <td>[% SCORE %]</td> 1218 <td style='text-align: left'><a href="[% NAMEREF %]">[% NAME %]</a></td> 1219 <td>[% USERNAME %][% AGE %][% CORPUSAHREF %]</td> 1220 <!-- 1221 <rule><test>[% NAME %]</test><promo>[% PROMO %]</promo> <spc>[% SPAMPC %]</spc><hpc>[% HAMPC %]</hpc><so>[% SO %]</so> <detailhref esc='1'>[% NAMEREFENCD %]</detailhref></rule> 1222 --> 1223 </tr> 1224 1225 }; 1226 1227 $FREQS_LINE_TEXT_TEMPLATE = 1228 qq{[% MSECS %] [% SPAMPC %] [% HAMPC %] }. 1229 qq{[% SO %] [% RANK %] [% SCORE %] }. 1230 qq{[% NAME %] [% USERNAME %][% AGE %] }. 1231 "\n"; 1232 1233 $FREQS_EXTRA_TEMPLATE = qq{ 1234 1235 <tr class='freqsextra'> 1236 <td colspan=7><pre class='perruleextra'>[% EXTRA %]</pre></td> 1237 </tr> 1238 1239 }; 1240 1241 $FREQS_LINE_TEMPLATE =~ s/^\s+//gm; 1242 $FREQS_EXTRA_TEMPLATE =~ s/^\s+//gm; 1243 1244 $FREQS_LINE_TEMPLATE =~ s/\s+/ /gs; # no <pre> stuff in this, shrink it 1245} 1246 1247sub extract_freqs_head_info { 1248 my ($self, $headstr) = @_; 1249 my $ctx = { }; 1250 1251 # extract the "real" numbers of mails for particular classes, for 1252 # some of the report types: 1253 # 0 1000 1000 0.500 0.00 0.00 (all messages):mc-fast 1254 # 0 4983 4995 0.499 0.00 0.00 (all messages):mc-med 1255 # 0 9974 9995 0.499 0.00 0.00 (all messages):mc-slow 1256 # 0 19972 19994 0.500 0.00 0.00 (all messages):mc-slower 1257 # or just: 1258 # 0 35929 35984 0.500 0.00 0.00 (all messages) 1259 while ($headstr =~ m/^ 1260 \s+\d+\s+(\d+)\s+(\d+)\s+\S+\s+\S+\s+\S+\s+\(all\smessages\)(|:\S+)\s* 1261 $/gmx) 1262 { 1263 $ctx->{'message_count'.$3} = { 1264 nspam => $1, 1265 nham => $2 1266 }; 1267 } 1268 1269 return $ctx; 1270} 1271 1272sub create_spampc_detail { 1273 my ($self, $percent, $isspam, $ctx, $line) = @_; 1274 1275 # optimization: no need to look anything up if it's 0.0000% 1276 # disabled; this info may be pretty useful after all 1277 ## if ($percent == 0.0) { return qq{ 0\ messages }; } 1278 1279 my $who = $line->{username} || $line->{age}; 1280 my $obj; 1281 if ($who) { 1282 $obj = $ctx->{'message_count:'.$who}; 1283 } else { 1284 $obj = $ctx->{'message_count'}; 1285 } 1286 1287 if (!$obj) { 1288 return "???"; # no data found for that submitter, stop here! 1289 } 1290 1291 my $outof = ($isspam ? $obj->{nspam} : $obj->{nham}); 1292 my $count = int ((($percent/100.0) * $outof) + 0.5); # round to nearest int 1293 return qq{ 1294 $count\ of\ $outof\ messages 1295 }; 1296} 1297 1298sub create_mclog_href { 1299 my ($self, $percent, $isspam, $ctx, $line) = @_; 1300 1301 # optimization: no need to look anything up if it's 0.0000% 1302 return '' if ($percent == 0.0); 1303 1304 # also, does nothing unless there's a username 1305 my $who = $line->{username}; 1306 return '' unless $who; 1307 1308 #my $net = ($self->{daterev_md}->{includes_net}) ? '-net' : ''; 1309 1310 my $href = $self->assemble_url( 1311 "mclog=".(($isspam ? "spam" : "ham")."-$who"), 1312 "rule=".$line->{name}, 1313 "daterev=".$self->{daterev}, 1314 $self->get_params_except(qw( mclog rule s_detail ))); 1315 1316 return qq{ 1317 href='$href' 1318 }; 1319} 1320 1321sub output_freqs_data_line { 1322 my ($self, $obj, $template, $header_context) = @_; 1323 1324 # normal freqs lines, with optional subselector after rule name 1325 my $out = ''; 1326 foreach my $line (@{$obj->{lines}}) { 1327 1328 my $detailurl = ''; 1329 if (!$self->{s}{detail}) { # not already in "detail" mode 1330 $detailurl = $self->create_detail_url($line->{name}); 1331 } 1332 1333 my $score = $line->{score}; 1334 if ($line->{name} =~ /^__/) { 1335 $score = '(n/a)'; 1336 } 1337 1338 my $SPAMPCDETAIL = $self->create_spampc_detail( 1339 $line->{spampc}, 1, $header_context, $line); 1340 my $HAMPCDETAIL = $self->create_spampc_detail( 1341 $line->{hampc}, 0, $header_context, $line); 1342 my $SPAMLOGHREF = $self->create_mclog_href( 1343 $line->{spampc}, 1, $header_context, $line); 1344 my $HAMLOGHREF = $self->create_mclog_href( 1345 $line->{hampc}, 0, $header_context, $line); 1346 1347 $self->process_template($template, { 1348 RULEDETAIL => $detailurl, 1349 MSECS => $line->{msecs}+0 ? sprintf("%7s", $line->{msecs}) : " 0", 1350 SPAMPC => $line->{spampc}+0 ? sprintf("%7s", $line->{spampc}) : " 0", 1351 HAMPC => $line->{hampc}+0 ? sprintf("%7s", $line->{hampc}) : " 0", 1352 SPAMPCDETAIL => $SPAMPCDETAIL, 1353 HAMPCDETAIL => $HAMPCDETAIL, 1354 SPAMLOGHREF => $SPAMLOGHREF, 1355 HAMLOGHREF => $HAMLOGHREF, 1356 SO => sprintf("%6s", $line->{so}), 1357 RANK => sprintf("%6s", $line->{rank}), 1358 SCORE => sprintf("%6s", $score), 1359 NAME => $line->{name}, 1360 NAMEREF => $self->create_detail_url($line->{name}), 1361 NAMEREFENCD => uri_escape($self->create_detail_url($line->{name})), 1362 USERNAME => $line->{username} || '', 1363 CORPUSAHREF => $self->create_corpus_href($line->{name}, $line->{username}), 1364 AGE => $line->{age} || '', 1365 PROMO => $line->{promotable}, 1366 }, \$out); 1367 1368 $self->{line_counter}++; 1369 } 1370 1371 # add scoremap using the FREQS_EXTRA_TEMPLATE if it's present 1372 if ($obj->{scoremap}) { 1373 my $smap = $obj->{scoremap} || ''; 1374 # scoremap spam: 16 12.11% 777 **** 1375 1376 $self->process_template(\$FREQS_EXTRA_TEMPLATE, { 1377 EXTRA => $smap, 1378 }, \$out); 1379 1380 $self->generate_scoremap_chart($smap, \$out); 1381 } 1382 1383 # add overlap using the FREQS_EXTRA_TEMPLATE if it's present 1384 if ($obj->{overlap}) { 1385 $self->process_template(\$FREQS_EXTRA_TEMPLATE, { 1386 EXTRA => $self->format_overlap($obj->{overlap} || '') 1387 }, \$out); 1388 } 1389 1390 return $out; 1391} 1392 1393sub generate_scoremap_chart { 1394 my ($self, $smap, $outref) = @_; 1395 1396 my %chart; 1397 foreach my $l (split (/^/m, $smap)) { 1398 # scoremap spam: 16 12.11% 777 **** 1399 $l =~ /^\s*scoremap\s+(\S+):\s+(\S+)\s+(\S+)\%\s+\d+/ 1400 or $$outref .= "chart: failed to parse scoremap line: $l<br>"; 1401 1402 my ($type, $idx, $pc) = ($1,$2,$3); 1403 next unless $type; 1404 1405 $chart{$type}{$idx} = $pc; 1406 } 1407 1408 my %uniq=(); 1409 my $max_x = 0; 1410 my $max_y = 0; 1411 for my $i (keys %{$chart{'spam'}}, keys %{$chart{'ham'}}) { 1412 next if exists $uniq{$i}; undef $uniq{$i}; 1413 if (($chart{'spam'}{$i}||0) > $max_y) { $max_y = $chart{'spam'}{$i}; } 1414 if (($chart{'ham'}{$i}||0) > $max_y) { $max_y = $chart{'ham'}{$i}; } 1415 if ($i > $max_x) { $max_x = $i; } 1416 } 1417 $max_y ||= 0.001; 1418 1419 # ensure 0 .. $max_x are always set 1420 foreach my $i (0 .. $max_x) { $uniq{$i} = undef; } 1421 1422 my @idxes = sort { $a <=> $b } keys %uniq; 1423 if (!scalar @idxes) { 1424 $max_x = 1; @idxes = ( 0 ); 1425 } 1426 my $min_x = $idxes[0]; 1427 1428 # normalize to [0,100] and set default to 0 1429 my @ycoords_s = map { sprintf "%.2f", (100/$max_y) * ($chart{'spam'}{$_}||0) } @idxes; 1430 my @ycoords_h = map { sprintf "%.2f", (100/$max_y) * ($chart{'ham'}{$_}||0) } @idxes; 1431 my @xcoords = map { sprintf "%.2f", (100/($max_x||0.0001)) * $_ } @idxes; 1432 1433 my $xgrid = (100/($max_x||0.0001)) * 5; 1434 my $ygrid = (100/($max_y||0.0001)) * 10; 1435 1436 # https://code.google.com/apis/chart/ , woo 1437 my $chartsetup = 1438 "cht=lxy" # line chart with x- and y-axis coords 1439 ."\&chs=400x200" 1440 ."\&chd=t:".join(",", @xcoords)."|".join(",", @ycoords_h) 1441 ."|".join(",", @xcoords)."|".join(",", @ycoords_s) 1442 ."\&chts=ff0000,18" 1443 ."\&chdl=Ham|Spam" 1444 ."\&chco=ff0000,0000ff,00ff00" 1445 ."\&chg=$xgrid,$ygrid" 1446 ."\&chxl=0:|$min_x+points|$max_x+points|1:|0\%|$max_y\%" 1447 ."\&chxt=x,y"; 1448 1449 $$outref .= "<div class='scoremap_chart'> 1450 <img src='https://chart.apis.google.com/chart?$chartsetup' 1451 class='scoremap_chart' width='400' height='200' align='right' 1452 /></div>\n"; 1453} 1454 1455sub format_overlap { 1456 my ($self, $ovl) = @_; 1457 1458 # list the subrules last; they're noisy and typically nonuseful 1459 my $out_fullrules = ''; 1460 my $out_subrules = ''; 1461 1462 foreach my $line (split(/^/m, $ovl)) { 1463 my $issubrule = ($line =~ /\d+\%\s+of __/ 1464 || $line =~ /\(meta rule and subrule\)/); 1465 1466 $line =~ s{^(\s+overlap\s+(?:ham|spam):\s+\d+% )(\S.+?)$}{ 1467 my $str = "$1"; 1468 foreach my $rule (split(' ', $2)) { 1469 if ($rule =~ /^(?:[(]?[a-z]{1,6}[)]?|\d+\%[)]?)$/) { # "of", "hits" etc. 1470 $str .= $rule." "; 1471 } else { 1472 my $post = ''; 1473 $rule =~ s/(\;\s*)$// and $post = $1; 1474 $str .= $self->gen_rule_link($rule,$rule).$post." "; 1475 } 1476 } 1477 $str; 1478 }gem; 1479 1480 if ($issubrule) { 1481 $out_subrules .= $line; 1482 } else { 1483 $out_fullrules .= $line; 1484 } 1485 } 1486 1487 return "OVERLAP WITH FULL RULES:\n".$out_fullrules."\n". 1488 "OVERLAP WITH SUBRULES:\n".$out_subrules; 1489} 1490 1491# get rid of slow, overengineered Template::Toolkit. This replacement 1492# is extremely simple-minded, but doesn't call time() on every invocation, 1493# which makes things just a little bit faster 1494sub process_template { 1495 my ($self, $tmplref, $keys, $outref) = @_; 1496 my $buf = $$tmplref; 1497 foreach my $k (keys %{$keys}) { 1498 $buf =~ s/\[\% \Q$k\E \%\]/$keys->{$k}/gs; 1499 } 1500 $$outref .= $buf; 1501} 1502 1503sub create_detail_url { 1504 my ($self, $rulename) = @_; 1505 1506 if (!$self->{create_detail_url_template}) { 1507 my @parms = ( 1508 $self->get_params_except(qw( 1509 rule s_age s_overlap s_all s_detail daterev 1510 )), 1511 "daterev=".$self->{daterev}, 1512 "s_detail=1", 1513 "rule=__create_detail_url_template__", 1514 ); 1515 $self->{create_detail_url_template} = $self->assemble_url(@parms); 1516 } 1517 1518 my $ret = $self->{create_detail_url_template}; 1519 $rulename = uri_escape($rulename); 1520 $ret =~ s/__create_detail_url_template__/${rulename}/gs; 1521 return $ret; 1522} 1523 1524sub create_corpus_href { 1525 my ($self, $rulename, $username) = @_; 1526 1527 if (!$self->{s}{detail} || !$username) { # not already in "detail" mode 1528 return ''; 1529 } 1530 my $url = $self->assemble_url( 1531 "s_corpus=1", 1532 "s_detail=1", 1533 "rule=".$rulename, 1534 "daterev=".$self->{daterev}, 1535 $self->get_params_except(qw( mclog rule s_detail s_corpus daterev ))) 1536 ."#corpus"; 1537 return " <a href='$url' class='mcloghref'>[corpus]</a>"; 1538} 1539 1540sub gen_rule_link { 1541 my ($self, $rule, $linktext) = @_; 1542 return "<a href='".$self->create_detail_url($rule)."'>$linktext</a>"; 1543} 1544 1545sub gen_switch_url { 1546 my ($self, $switch, $newval) = @_; 1547 1548 my @parms = $self->get_params_except($switch); 1549 $newval ||= ''; 1550 if (!defined $switch) { warn "switch '$switch'='$newval' undef value"; } 1551 push (@parms, 1552 $switch."=".$newval, 1553 "daterev=".$self->{daterev} 1554 ); 1555 return $self->assemble_url(@parms); 1556} 1557 1558sub gen_this_url { 1559 my ($self) = @_; 1560 my @parms = $self->get_params_except("__nonexistent__"); 1561 return $self->assemble_url(@parms); 1562} 1563 1564sub gen_toplevel_url { 1565 my ($self, $switch, $newval) = @_; 1566 1567 my @parms = $self->get_params_except($switch, qw( 1568 rule s_age s_overlap s_all s_detail daterev 1569 )); 1570 $newval ||= ''; 1571 if (!defined $switch) { warn "switch '$switch'='$newval' undef value"; } 1572 push (@parms, $switch."=".$newval); 1573 return $self->assemble_url(@parms); 1574} 1575 1576sub get_rev_for_daterev { 1577 my ($self, $daterev) = @_; 1578 # '20060120-r370897-b' 1579 $daterev =~ /-r(\d+)-/ or return undef; 1580 return $1; 1581} 1582 1583sub assemble_url { 1584 my ($self, @orig) = @_; 1585 1586 # e.g. https://buildbot.spamassassin.org/ruleqa? 1587 # daterev=20060120-r370897-b&rule=T_PH_SEC&s_detail=1 1588 1589 # we support special treatment for 'daterev' and 'rule' 1590 my %path = (); 1591 my @parms = (); 1592 $path{daterev} = ''; 1593 $path{rule} = ''; 1594 foreach my $p (@orig) { 1595 # some ignored parameter noise, from the form 1596 if (!$p) { next; } 1597 elsif ($p =~ /^keywords=$/) { next; } 1598 elsif ($p =~ /^g=Change$/) { next; } 1599 # default values that can be omitted 1600 elsif ($p =~ /^srcpath=$/) { next; } 1601 elsif ($p =~ /^mtime=$/) { next; } 1602 # the ones we can put in the path 1603 elsif ($p =~ /^rule=(.*)$/) { $path{rule} = $1; } 1604 elsif ($p =~ /^daterev=(.*)$/) { $path{daterev} = $1; } 1605 elsif ($p =~ /^s_detail=(?:1|on)$/) { $path{s_detail} = 1; } 1606 # and all the rest 1607 else { push (@parms, $p); } 1608 } 1609 1610 # ensure "/FOO" rule greps are encoded as "%2FFOO" 1611 $path{rule} =~ s,^/,\%2F,; 1612 1613 my $url = $self->{cgi_url}. 1614 ($path{daterev} ? '/'.$path{daterev} : ''). 1615 ($path{rule} ? '/'.$path{rule} : ''). 1616 ($path{s_detail} ? '/detail' : ''). 1617 '?'.join('&', sort @parms); 1618 1619 # no need for a trailing ? if there were no parms 1620 $url =~ s/\?$//; 1621 1622 # ensure local URL (not starting with "//", which confuses Firefox) 1623 $url =~ s,^/+,/,; 1624 1625 # now, a much more readable 1626 # https://ruleqa.spamassassin.org/ 1627 # 20060120-r370897-b/T_PH_SEC/detail 1628 1629 return $url; 1630} 1631 1632sub precache_params { 1633 my ($self) = @_; 1634 1635 @{$self->{cgi_param_order}} = $self->{q}->param(); 1636 foreach my $k (@{$self->{cgi_param_order}}) { 1637 next unless defined ($k); 1638 next if ($k eq 'q'); # a shortcut, ignore for future refs 1639 my $v = $self->{q}->param($k); 1640 if (!defined $v) { $v = ''; } 1641 $k =~ s/[<>]//gs; 1642 $v =~ s/[<>]//gs; 1643 $self->{cgi_params}{$k} = uri_escape($k)."=".uri_escape($v); 1644 } 1645} 1646 1647sub add_cgi_path_param { # assumes already escaped unless $not_escaped 1648 my ($self, $k, $v, $not_escaped) = @_; 1649 $k =~ s/[<>]//gs; 1650 $v =~ s/[<>]//gs; 1651 if (!defined $self->{cgi_params}{$k}) { 1652 push (@{$self->{cgi_param_order}}, $k); 1653 } 1654 if ($not_escaped) { 1655 $self->{cgi_params}{$k} = uri_escape($k)."=".uri_escape($v); 1656 $self->{q}->param(-name=>$k, -value=>$v); 1657 } else { 1658 $self->{cgi_params}{$k} = $k."=".$v; 1659 $self->{q}->param(-name=>$k, -value=>uri_unescape($v)); 1660 } 1661} 1662 1663sub add_cgi_param { # a variant for unescaped data 1664 my ($self, $k, $v) = @_; 1665 return $self->add_cgi_path_param($k, $v, 1); 1666} 1667 1668sub get_params_except { 1669 my ($self, @excepts) = @_; 1670 1671 my @str = (); 1672 foreach my $p (@{$self->{cgi_param_order}}) { 1673 foreach my $skip (@excepts) { 1674 next unless defined $skip && defined $self->{cgi_params}{$p}; 1675 goto nextnext if 1676 ($skip eq $p || $self->{cgi_params}{$p} =~ /^\Q$skip\E=/); 1677 } 1678 push (@str, $self->{cgi_params}{$p}); 1679nextnext: ; 1680 } 1681 @str; 1682} 1683 1684sub get_datadir_for_daterev { 1685 my ($self, $npath) = @_; 1686 $npath =~ s/-/\//; 1687 return $AUTOMC_CONF{html}."/".$npath."/"; 1688} 1689 1690sub get_daterev_metadata { 1691 my ($self, $dr) = @_; 1692 return $self->{cached}->{daterev_metadata}->{$dr} || { }; 1693} 1694 1695sub get_mds_as_text { 1696 my ($self, $mclogmds) = @_; 1697 1698 # 'mclogmd' => [ 1699 # { 1700 # 'daterev' => '20060430/r398298-n', 1701 # 'mcstartdate' => '20060430T122405Z', 1702 # 'mtime' => '1146404744', 1703 # 'rev' => '398298', 1704 # 'file' => 'ham-cthielen.log', 1705 # 'fsize' => '3036336' 1706 # }, [...] 1707 1708 # $mds_as_text = XMLout($mclogmds); # debug, as XML 1709 1710 # use Data::Dumper; $mds_as_text = Dumper($mclogmds); # debug, as perl data 1711 1712 my $all = ''; 1713 if (ref $mclogmds && $mclogmds->{mclogmd}) { 1714 foreach my $f (@{$mclogmds->{mclogmd}}) { 1715 my $started = $f->{mcstartdate}; 1716 my $subtime = POSIX::strftime "%Y%m%dT%H%M%SZ", gmtime $f->{mtime}; 1717 1718 $all .= qq{ 1719 1720 <p> <b>$f->{file}</b>:<br /> 1721 started: $started;<br /> 1722 submitted: $subtime;<br /> 1723 size: $f->{fsize} bytes 1724 </p> 1725 1726 }; 1727 } 1728 } 1729 1730 my $id = "mclogmds_".($self->{id_counter}++); 1731 1732 return qq{ 1733 1734 <a href="javascript:show_header('$id')">[+]</a> 1735 <div id='$id' class='mclogmds' style='display: none'> 1736 <p class='headclosep' align='right'><a 1737 href="javascript:hide_header('$id')">[-]</a></p> 1738 1739 $all 1740 </div> 1741 1742 }; 1743} 1744 1745sub get_daterev_code_description { 1746 my ($self, $dr) = @_; 1747 my $meta = $self->get_daterev_metadata($dr); 1748 1749 return qq{ 1750 1751 <td class="daterevcommittd" width='30%'> 1752 <span class="daterev_code_description"> 1753 <p> 1754 <a title="$meta->{author}: $meta->{drtitle} ($meta->{cdate})" 1755 href="!drhref!"><strong>$meta->{rev}</strong>: $meta->{cdate}</a> 1756 </p> 1757 <p><div class='commitmsgdiv'> 1758 $meta->{author}: $meta->{drtitle} 1759 </div></p> 1760 </span> 1761 </td> 1762 1763 }; 1764} 1765 1766sub get_daterev_masscheck_description { 1767 my ($self, $dr) = @_; 1768 my $meta = $self->get_daterev_metadata($dr); 1769 my $net = $meta->{includes_net} ? "[net]" : ""; 1770 1771 my $isvishtml = ''; 1772 my $isvisclass = ''; 1773 if ($self->{daterev} eq $dr) { 1774 $isvishtml = '<b>(Viewing)</b>'; 1775 $isvisclass = 'mcviewing'; 1776 } 1777 1778 my $mds_as_text = ''; 1779 if ($meta->{mclogmds}) { 1780 $mds_as_text = $self->get_mds_as_text($meta->{mclogmds}) || ''; 1781 } 1782 1783 my $submitters = $meta->{submitters}; 1784 # remove daterevs, they're superfluous in this table 1785 $submitters =~ s/\.\d+-r\d+-[a-z]\b//gs; 1786 1787 return qq{ 1788 1789 <td class="daterevtd $isvisclass" width='20%'> 1790 <span class="daterev_masscheck_description $isvisclass"> 1791 <p> 1792 <a name="$meta->{dranchor}" 1793 href="!drhref!"><strong> 1794 <span class="dr">$dr</span> 1795 </strong></a> $isvishtml 1796 </p><p> 1797 <em><span class="mcsubmitters">$submitters</span></em> 1798 $mds_as_text</x> 1799 </p> 1800 <!-- <span class="mctype">$meta->{type}</span> --> 1801 <!-- <span class="mcwasnet">$net</span> --> 1802 <!-- <span class="mcauthor">$meta->{author}</span> --> 1803 <!-- <span class="date">$meta->{date}</span> --> 1804 <!-- tag=$meta->{tag} --> 1805 </span> 1806 </td> 1807 1808 }; 1809} 1810 1811sub get_daterev_html_table { 1812 my ($self, $daterev_list, $reverse) = @_; 1813 1814 my $rows = { }; 1815 foreach my $dr (@{$daterev_list}) { 1816 next unless $dr; 1817 my $meta = $self->get_daterev_metadata($dr); 1818 1819 my $colidx; 1820 my $type = $meta->{type}; 1821 if ($type eq 'preflight') { 1822 $colidx = 0; 1823 } elsif ($type eq 'net') { 1824 $colidx = 2; 1825 } else { 1826 $colidx = 1; 1827 } 1828 1829 # use the daterev number as the row key 1830 $rows->{$meta->{daterev}} ||= [ ]; 1831 $rows->{$meta->{daterev}}->[$colidx] = $meta; 1832 } 1833 1834 my @rowkeys = sort keys %{$rows}; 1835 if ($reverse) { @rowkeys = reverse @rowkeys; } 1836 1837 my @html = (); 1838 foreach my $rowdate (@rowkeys) { 1839 my $row = $rows->{$rowdate}; 1840 1841 my $meta; 1842 foreach my $col (0 .. 2) { 1843 if ($row->[$col]) { 1844 $meta = $row->[$col]; 1845 last; 1846 } 1847 } 1848 1849 next unless $meta; # no entries in the row 1850 1851 push @html, qq{ 1852 1853 <tr class='daterevtr'> 1854 1855 }, $self->gen_daterev_html_commit_td($meta); 1856 1857 foreach my $col (0 .. 2) { 1858 $meta = $row->[$col]; 1859 if ($meta) { 1860 push @html, $self->gen_daterev_html_table_td($meta); 1861 } 1862 else { 1863 push @html, qq{ 1864 1865 <td class='daterevtdempty' width='20%'></td> 1866 1867 }; 1868 } 1869 } 1870 push @html, qq{ 1871 1872 </tr> 1873 1874 }; 1875 } 1876 1877 return join '', @html; 1878} 1879 1880sub gen_daterev_html_commit_td { 1881 my ($self, $meta) = @_; 1882 1883 my $dr = $meta->{daterev}; 1884 my @parms = $self->get_params_except(qw( 1885 daterev longdatelist shortdatelist 1886 )); 1887 my $drhref = $self->assemble_url("daterev=".$dr, @parms); 1888 1889 my $text = $self->get_daterev_code_description($dr) || ''; 1890 $text =~ s/!drhref!/$drhref/gs; 1891 1892 return $text; 1893} 1894 1895sub gen_daterev_html_table_td { 1896 my ($self, $meta) = @_; 1897 1898 my $dr = $meta->{daterev}; 1899 my @parms = $self->get_params_except(qw( 1900 daterev longdatelist shortdatelist 1901 )); 1902 my $drhref = $self->assemble_url("daterev=".$dr, @parms); 1903 1904 my $text = $self->get_daterev_masscheck_description($dr) || ''; 1905 $text =~ s/!drhref!/$drhref/gs; 1906 return $text; 1907} 1908 1909sub show_daterev_selector_page { 1910 my ($self) = @_; 1911 1912 my $title = "Rule QA: all recent mass-check results"; 1913 print $self->show_default_header($title); 1914 1915 my $max_listings = $self->{q}->param('perpage') || 1000; # def. 1000 1916 my @drs = @{$self->{daterevs}}; 1917 if ($max_listings > 0 && scalar @drs > $max_listings) { 1918 splice @drs, 0, -$max_listings; 1919 } 1920 1921 print qq{ 1922 1923 <h3> All Mass-Checks </h3> 1924 <br/> <a href='#net' name='net'>#</a> 1925 1926 <div class='updateform'> 1927 <table style="padding-left: 0px" class='datetable'> 1928 <tr> 1929 <th> Commit </th> 1930 <th> Preflight Mass-Checks </th> 1931 <th> Nightly Mass-Checks </th> 1932 <th> Network Mass-Checks </th> 1933 </tr> 1934 1935 }. $self->get_daterev_html_table(\@drs, 1, 1); 1936} 1937 1938 1939sub get_rule_metadata { 1940 my ($self, $rev) = @_; 1941 1942 if ($self->{rule_metadata}->{$rev}) { 1943 return $self->{rule_metadata}->{$rev}; 1944 } 1945 1946 my $meta = $self->{rule_metadata}->{$rev} = { }; 1947 $meta->{rev} = $rev; 1948 1949 my $fname = $AUTOMC_CONF{html}."/rulemetadata/$rev/rulemetadata.xml"; 1950 if (-f $fname) { 1951 eval { 1952 $meta->{rulemds} = parse_rulemetadataxml($fname); 1953 #use Data::Dumper; print STDERR Dumper $meta->{rulemds}; 1954 1955 # '__CTYPE_HTML' => { 1956 # 'srcmtime' => '1154348696', 1957 # 'src' => 'rulesrc/core/20_ratware.cf' 1958 # }, 1959 1960 }; 1961 1962 if ($@ || !defined $meta->{rulemds}) { 1963 warn "rev rulemetadata.xml read failed: $@"; 1964 } else { 1965 return $meta; 1966 } 1967 } 1968 1969 # if that failed, just return empty 1970 if (1) { 1971 print "<!-- WARN: Failed to read rule metadata file: $fname -->\n"; 1972 } 1973 1974 $meta->{rulemds} = {}; 1975 return $meta; 1976} 1977 1978# --------------------------------------------------------------------------- 1979 1980sub read_cache { 1981 my ($self) = @_; 1982 if (!-f $self->{cachefile}) { 1983 warn "missing $self->{cachefile}, run -refresh"; 1984 return; 1985 } 1986 eval { 1987 $self->{cached} = thaw(decompress(readfile($self->{cachefile}))); 1988 }; 1989 if ($@ || !defined $self->{cached}) { 1990 warn "cannot read $self->{cachefile}: $@ $!"; 1991 } 1992} 1993 1994# --------------------------------------------------------------------------- 1995 1996sub refresh_cache { 1997 my ($self) = @_; 1998 1999 $self->{cached} = { }; 2000 2001 # all known date/revision combos. 2002 @{$self->{cached}->{daterevs}} = $self->get_all_daterevs(); 2003 2004 foreach my $dr (@{$self->{cached}->{daterevs}}) { 2005 $self->refresh_daterev_metadata($dr); 2006 } 2007 2008 eval { 2009 open (OUT, ">".$self->{cachefile}.".$$") or die "open failed: $@"; 2010 print OUT compress(nfreeze(\%{$self->{cached}})); 2011 close OUT; 2012 }; 2013 if ($@ || !rename($self->{cachefile}.".$$", $self->{cachefile})) { 2014 unlink($self->{cachefile}.".$$"); 2015 die "cannot write $self->{cachefile}: $@"; 2016 } 2017} 2018 2019sub refresh_daterev_metadata { 2020 my ($self, $dr) = @_; 2021 2022 my $meta = $self->{cached}->{daterev_metadata}->{$dr} = { }; 2023 $meta->{daterev} = $dr; 2024 2025 my $dranchor = "r".$dr; $dranchor =~ s/[^A-Za-z0-9]/_/gs; 2026 $meta->{dranchor} = $dranchor; 2027 2028 $dr =~ /^(\d+)-r(\d+)-(\S+)$/; 2029 my $date = $1; 2030 my $rev = $2; 2031 my $tag = $3; 2032 2033 my $datadir = $self->get_datadir_for_daterev($dr); 2034 $self->{datadir} = $datadir; 2035 2036 # update scache for all freqfiles 2037 foreach my $f (keys %FREQS_FILENAMES) { 2038 my $file = -f $datadir.$f ? $datadir.$f : 2039 -f $datadir."$f.gz" ? $datadir."$f.gz" : undef; 2040 if (defined $file) { 2041 if (time - mtime($file) <= $self->{scache_keep_time}) { 2042 $self->read_freqs_file($f, 1); 2043 } 2044 else { 2045 # remove too old cachefiles 2046 $file =~ s/\.gz$//; 2047 unlink("$file.scache"); 2048 } 2049 } 2050 } 2051 2052 my $fname = "$datadir/info.xml"; 2053 my $fastfname = "$datadir/fastinfo.xml"; 2054 2055 if (-f $fname && -f $fastfname) { 2056 eval { 2057 my $fastinfo = parse_infoxml($fastfname); 2058 $meta->{rev} = $rev; 2059 $meta->{tag} = $tag; 2060 $meta->{mclogmds} = $fastinfo->{mclogmds}; 2061 $meta->{includes_net} = $fastinfo->{includes_net}; 2062 $meta->{date} = $fastinfo->{date}; 2063 $meta->{submitters} = $fastinfo->{submitters}; 2064 2065 if ($rev ne $fastinfo->{rev}) { 2066 warn "dr and fastinfo disagree: ($rev ne $fastinfo->{rev})"; 2067 } 2068 2069 my $type; 2070 if ($meta->{tag} && $meta->{tag} eq 'b') { 2071 $type = 'preflight'; 2072 } elsif ($meta->{includes_net}) { 2073 $type = 'net'; 2074 } else { 2075 $type = 'nightly'; 2076 } 2077 $meta->{type} = $type; 2078 2079 2080 my $info = parse_infoxml($fname); 2081 # use Data::Dumper; print Dumper $info; 2082 my $cdate = $info->{checkin_date}; 2083 $cdate =~ s/T(\S+)\.\d+Z$/ $1/; 2084 2085 my $drtitle = ($info->{msg} ? $info->{msg} : ''); 2086 $drtitle =~ s/[\"\'\&\>\<]/ /gs; 2087 $drtitle =~ s/\s+/ /gs; 2088 $drtitle =~ s/^(.{0,160}).*$/$1/gs; 2089 2090 $meta->{cdate} = $cdate; 2091 $meta->{drtitle} = $drtitle; 2092 $meta->{author} = $info->{author}; 2093 }; 2094 2095 if ($@) { 2096 warn "daterev info.xml: $@"; 2097 } 2098 2099 return $meta; 2100 } 2101 2102 # if that failed, just use the info that can be gleaned from the 2103 # daterev itself. 2104 my $drtitle = "(no info)"; 2105 2106 { 2107 $meta->{rev} = $rev; 2108 $meta->{cdate} = $date; 2109 $meta->{drtitle} = '(no info available yet)'; 2110 $meta->{includes_net} = 0; 2111 $meta->{date} = $date; 2112 $meta->{submitters} = ""; 2113 $meta->{author} = "nobody"; 2114 $meta->{tag} = $tag; 2115 $meta->{type} = 'preflight'; # default 2116 } 2117} 2118 2119# return file modification time 2120sub mtime { 2121 return (stat $_[0])[9]; 2122} 2123 2124# slurp'a'file 2125sub readfile { 2126 my $file = shift; 2127 my $str; 2128 eval { 2129 open(IN, $file) or die $@; 2130 { local($/); $str = <IN> } 2131 close(IN); 2132 }; 2133 if ($@) { 2134 warn "read failed $file: $@"; 2135 return undef; 2136 } 2137 return $str; 2138} 2139 2140# fast simple xml parser, since we know what to expect 2141sub parse_rulemetadataxml { 2142 my $file = shift; 2143 my $xmlstr = readfile($file); 2144 my $md = {}; 2145 while ($xmlstr =~ m!<rulemetadata>(.*?)</rulemetadata>!gs) { 2146 my $rmd = $1; 2147 my %attrs; 2148 while ($rmd =~ m!<([A-Za-z0-9_]{1,50})>(.*?)</\1>!gs) { 2149 $attrs{$1} = $2; 2150 } 2151 if (defined $attrs{name}) { 2152 foreach (keys %attrs) { 2153 next if $_ eq 'name'; 2154 $md->{$attrs{name}}->{$_} = $attrs{$_}; 2155 } 2156 } 2157 } 2158 if (!%$md) { 2159 warn "xml parse failed $file"; 2160 } 2161 return $md; 2162} 2163 2164sub parse_infoxml { 2165 my $file = shift; 2166 my $xmlstr = readfile($file); 2167 my $opt = {}; 2168 if ($xmlstr =~ m!<opt ([^>]*?)>!s) { 2169 my $optstr = $1; 2170 my %attrs; 2171 while ($optstr =~ m!\b([A-Za-z0-9_]{1,50})="([^"]*)"!gs) { 2172 $opt->{$1} = $2; 2173 } 2174 } 2175 if (!%$opt) { 2176 warn "xml parse failed $file"; 2177 } 2178 return $opt; 2179} 2180 2181=cut 2182 2183to install, add this line to httpd.conf: 2184 2185 ScriptAlias /ruleqa "/path/to/spamassassin/automc/ruleqa.cgi" 2186 2187 2188