1#!/usr/bin/perl -w
2
3# settings are located in $HOME/.corpus
4
5use strict;
6use Getopt::Long;
7use File::Path;
8use File::Copy;
9use Time::ParseDate;
10use POSIX qw(nice strftime);
11use Cwd;
12
13our ( $opt_dir, $opt_override, $opt_tag );
14GetOptions(
15    "tag=s" => \$opt_tag,
16    "dir=s" => \$opt_dir,
17    "override=s" => \$opt_override,
18);
19
20$opt_override ||= '';
21$opt_tag ||= 'n';       # nightly is the default
22
23nice(15);
24
25# daterevs -- e.g. "20060429/r239832-r" -- are aligned to 0800 UTC, just before
26# the time of day when the mass-check tagging occurs; see
27# http://wiki.apache.org/spamassassin/DateRev for more details.
28use constant DATEREV_ADJ => - (8 * 60 * 60);
29
30# ---------------------------------------------------------------------------
31
32my $configuration = "$ENV{HOME}/.corpus";
33my %cf;
34
35configure();
36init();
37
38if (!$opt_dir) {
39  $opt_dir = $cf{corpus};
40  update_rsync();
41}
42
43chdir $opt_dir;
44print "reading logs from '$opt_dir'\n";
45
46my $linkdir = "$cf{html}/logs";
47(-d $linkdir) or mkdir $linkdir;
48
49locate_and_link();
50exit;
51
52sub configure {
53  # does rough equivalent of source
54  open(C, $configuration) || die "open failed: $configuration: $!\n";
55  my $pwd = Cwd::getcwd;
56
57  # add 'override' options
58  my @lines = (<C>, split(/\|/, $opt_override));
59
60  foreach $_ (@lines) {
61    chomp;
62    s/#.*//;
63    if (/^\s*(.*?)\s*=\s*(.*?)\s*$/) {
64      my ($key, $val) = ($1, $2);
65      $val =~ s/\$PWD/$pwd/gs;
66      $cf{$key} = $val;
67    }
68  }
69  close(C);
70}
71
72sub init {
73  $ENV{RSYNC_PASSWORD} = $cf{password};
74  $ENV{TIME} = '%e,%U,%S';
75  $ENV{TZ} = 'UTC';
76}
77
78sub update_rsync {
79  chdir $opt_dir;
80  if (!$cf{rsync_command}) { die "no 'rsync_command' set"; }
81  system $cf{rsync_command};
82  system "chmod +r *.log > /dev/null 2>&1";
83}
84
85sub locate_and_link {
86  opendir(CORPUS, $opt_dir);
87  my @files = sort readdir(CORPUS);
88  closedir(CORPUS);
89
90  print "Found ", $#files + 1, " files in $opt_dir\n";
91
92  @files = grep {
93    /^(?:spam|ham)-(?:net-)?[-\w.]+\.log$/ && -f "$opt_dir/$_" && -M _ < 30
94  } @files;
95
96  print "Kept ", $#files + 1, " files\n";
97
98  foreach my $file (@files) {
99    my $ftime;
100    my $frevision;
101
102    print "Processing $opt_dir/$file\n";
103    open(FILE, "$opt_dir/$file") or warn "cannot read $opt_dir/$file";
104    while (my $line = <FILE>) {
105      last if $line !~ /^#/;
106      if ($line =~ /, on (... ... .. )(..)(:..:.. ... ....)/) {
107        my ($datepre, $hh, $datepost) = ($1,$2,$3);
108
109        $ftime = Time::ParseDate::parsedate($datepre.$hh.$datepost,
110                    GMT => 1, PREFER_PAST => 1);
111      }
112      elsif ($line =~ m/^# Date:\s*(\S+)/) {
113        # a better way to do the above.  TODO: parse it instead
114      }
115      elsif ($line =~ m/^# SVN revision:\s*(\S+)/) {
116        $frevision = $1;
117      }
118    }
119    close(FILE);
120
121    if (!defined $ftime) {
122      warn "$opt_dir/$file: no time found, ignored\n"; next;
123    }
124    if (!defined $frevision) {
125      warn "$opt_dir/$file: no revision found, ignored\n"; next;
126    }
127    if ($frevision eq 'unknown') {
128      warn "$opt_dir/$file: not tagged with a revision, ignored\n"; next;
129    }
130
131    my $daterev = mk_daterev($ftime, $frevision, $opt_tag);
132    link_file($file, $daterev);
133  }
134}
135
136sub mk_daterev {
137  my ($timet, $rev, $tag) = @_;
138  return strftime("%Y%m%d", gmtime($timet + DATEREV_ADJ)) . "/r$rev-$tag";
139}
140
141sub link_file {
142  my ($file, $daterev) = @_;
143
144  my $f = "$opt_dir/$file";
145
146  # /^(?:spam|ham)-(?:net-)?[-\w]+\.log$/
147  my $linkfile = $file;
148  my $dr = $daterev; $dr =~ s/\//-/gs; $linkfile =~ s/\.log$/.$dr.log/i;
149  my $t = "$linkdir/$linkfile";
150
151  print "ln $f $t\n";
152  (-f $t) and unlink $t;
153  # cannot hardlink unless we have ownership or RW perms on the file
154  symlink $f, $t or die "cannot ln";
155  system "/usr/bin/touch -h -r '$f' '$t'";       # preserve modtimes
156}
157
158