1#! --PERL-- 2# -*- indent-tabs-mode: nil; -*- 3# vim:ft=perl:et:sw=4 4# $Id$ 5 6# Sympa - SYsteme de Multi-Postage Automatique 7# 8# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel 9# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 10# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites 11# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER 12# 13# This program is free software; you can redistribute it and/or modify 14# it under the terms of the GNU General Public License as published by 15# the Free Software Foundation; either version 2 of the License, or 16# (at your option) any later version. 17# 18# This program is distributed in the hope that it will be useful, 19# but WITHOUT ANY WARRANTY; without even the implied warranty of 20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21# GNU General Public License for more details. 22# 23# You should have received a copy of the GNU General Public License 24# along with this program. If not, see <http://www.gnu.org/licenses/>. 25 26use lib '--modulesdir--'; 27use strict; 28use warnings; 29use English qw(-no_match_vars); 30use Getopt::Long; 31 32use Conf; 33use Sympa::Constants; 34use Sympa::DatabaseManager; 35use Sympa::List; 36use Sympa::Message; 37use Sympa::Spool::Archive; 38 39my %month_idx = qw(jan 1 40 fev 2 41 feb 2 42 fv 2 43 mar 3 44 avr 4 45 apr 4 46 mai 5 47 may 5 48 jun 6 49 jul 7 50 aug 8 51 aou 8 52 sep 9 53 oct 10 54 nov 11 55 dec 12 56 dc 12); 57 58my $msg_count = 0; 59 60# load options ? 61#$main::options{'debug'} = 1; 62#$main::options{'debug2'} = 1 if ($main::options{'debug'}); 63 64$OUTPUT_AUTOFLUSH = 1; 65 66my %opt; 67unless (GetOptions(\%opt, 'input-directory=s')) { 68 die("Unknown options."); 69} 70 71die 72 "Usage : $ARGV[-1] [-input-directory=<directory containing individual messages>] <listname> [robot]" 73 unless ($#ARGV >= 0); 74my $listname = $ARGV[0]; 75my $robot = $ARGV[1]; 76 77## Check UID 78unless ([getpwuid $UID]->[0] eq Sympa::Constants::USER) { 79 printf 80 "You should run this script as user \"%s\", ignore ? (y/CR)", 81 Sympa::Constants::USER; 82 my $s = <STDIN>; 83 die unless ($s =~ /^y$/i); 84} 85 86## Load sympa config 87unless (Conf::load()) { 88 die 'config_error'; 89} 90 91Sympa::DatabaseManager::probe_db(); 92 93chdir $Conf::Conf{'home'}; 94 95my $list = Sympa::List->new($listname, $robot) 96 or die 'Cannot create List object'; 97 98my $home_sympa; 99if ($robot) { 100 $home_sympa = $Conf::Conf{'home'} . '/' . $robot; 101} else { 102 $home_sympa = $Conf::Conf{'home'}; 103} 104my $dest_dir = Conf::get_robot_conf($robot, 'arc_path') . '/' . $list->get_id; 105 106unless (-d "$home_sympa/$listname") { 107 die "No directory for list $listname"; 108} 109 110unless (-d "$home_sympa/$listname/archives") { 111 die "No archives for list $listname"; 112} 113 114if (-d $dest_dir) { 115 print "Web archives already exist for list $listname\nGo on (<CR>|n) ?"; 116 my $s = <STDIN>; 117 die if ($s eq 'n'); 118} else { 119 mkdir $dest_dir, 0755 or die; 120} 121 122if ($opt{'input-directory'}) { 123 unless (-d $opt{'input-directory'}) { 124 die "Parameter input-directory (%s) is not a directory", 125 $opt{'input-directory'}; 126 } 127 128 opendir DIR, $opt{'input-directory'} || die; 129 foreach my $file (sort grep (!/^\.\.?$/, readdir(DIR))) { 130 my @msgs; #FIXME: Not used anymore. 131 132 open ARCFILE, $opt{'input-directory'} . '/' . $file; 133 my @msg = <ARCFILE>; 134 push @msgs, \@msg; 135 $msg_count++; 136 close ARCFILE; 137 } 138 closedir DIR; 139 140} else { 141 142 print STDERR "Bursting archives\n"; 143 foreach my $arc_file (<$home_sympa/$listname/archives/log*>) { 144 my ($first, $new); 145 my $msg = []; 146 my @msgs; 147 148 ## Split the archives file 149 print '.'; 150 open ARCFILE, $arc_file; 151 while (<ARCFILE>) { 152 if (/^------- THIS IS A RFC934 (COMPILANT|COMPLIANT) DIGEST/) { 153 $first = 1; 154 $new = 1; 155 next; 156 } elsif (!$first) { 157 next; 158 } elsif (/^$/ && $new) { 159 next; 160 } elsif (/^------- CUT --- CUT/) { 161 push @msgs, $msg; 162 $msg_count++; 163 $msg = []; 164 $new = 1; 165 } else { 166 push @{$msg}, $_; 167 undef $new; 168 } 169 } 170 close ARCFILE; 171 172 ##Dump 173 #foreach my $i (0..$#msgs) { 174 # printf "******** Message %d *******\n", $i; 175 # print @{$msgs[$i]}; 176 #} 177 178 ## Store messages in web arc 179 store_messages(\@msgs, $dest_dir); 180 181 } 182} 183 184print STDERR "\nFound $msg_count messages\n"; 185 186# Rebuild web archives 187print STDERR "Rebuilding HTML\n"; 188my $arc_message = Sympa::Message->new( 189 sprintf("\nrebuildarc %s *\n\n", $list->{'name'}), 190 context => $list->{'domain'}, 191 sender => sprintf('listmaster@%s', $list->{'domain'}), 192 date => time 193); 194Sympa::Spool::Archive->new->store($arc_message); 195 196print STDERR "\nHave a look in $dest_dir/-/ directory for messages dateless 197Now, you should add a web_archive parameter in the config file to make it accessible from the web\n"; 198 199## Analyze message header fields and store them in web archives 200sub store_messages { 201 my ($list_of_msg, $dest_dir) = @_; 202 my @msgs = @{$list_of_msg}; 203 204 my %nummsg; 205 206 ## Analyzing Date header fields 207 #print STDERR "Analysing Date: header fields\n"; 208 foreach my $msg (@msgs) { 209 my $incorrect = 0; 210 my ($date, $year, $month); 211 212 print '.'; 213 foreach (@{$msg}) { 214 if (/^Date:\s+(.*)$/) { 215 #print STDERR "#$_#\n"; 216 $date = $1; 217 218 # Date type : Mon, 8 Dec 97 13:33:47 +0100 219 if ($date =~ 220 /^\w{2,3},\s+\d{1,2}\s+([\w\x80-\xFF]{2,3})\s+(\d{2,4})/) 221 { 222 $month = $1; 223 $year = $2; 224 #print STDERR "$month/$year\n"; 225 226 # Date type : 8 Dec 97 13:33:47+0100 227 } elsif ($date =~ /^\d{1,2}\s+(\w{3}) (\d{2,4})/) { 228 $month = $1; 229 $year = $2; 230 231 # Date type : 8-DEC-1997 13:33:47 +0100 232 } elsif ($date =~ /^\d{1,2}-(\w{3})-(\d{4})/) { 233 $month = $1; 234 $year = $2; 235 236 # Date type : Mon Dec 8 13:33:47 1997 237 } elsif ($date =~ 238 /^\w+\s+(\w+)\s+\d{1,2} \d+:\d+:\d+ (GMT )?(\d{4})/) { 239 $month = $1; 240 $year = $3; 241 242 # unknown date format 243 } else { 244 $incorrect = 1; 245 last; 246 } 247 248 # Month format 249 if ($month !~ /^\d+$/) { 250 $month =~ y/\xe9\xfb/eu/; #FIXME 251 $month =~ y/A-Z/a-z/; 252 if (!$month_idx{$month}) { 253 $incorrect = 1; 254 } else { 255 $month = $month_idx{$month}; 256 } 257 } elsif (($month < 1) or ($month > 12)) { 258 $incorrect = 1; 259 } 260 $month = "0" . $month if $month =~ /^\d$/; 261 262 # Checking Year format 263 if ($year =~ /^[89]\d$/) { 264 $year = "19" . $year; 265 } elsif ($year !~ /^19[89]\d|20[0-9][0-9]$/) { 266 $incorrect = 1; 267 } 268 269 last; 270 } 271 272 # empty line => end of header 273 if (/^\s*$/) { 274 last; 275 } 276 } 277 278 # Unknown date format/No date 279 if ($incorrect || !$month || !$year) { 280 $year = 'UN'; 281 $month = 'KNOWN'; 282 } 283 284 # New month 285 if (!-d "$dest_dir/$year-$month") { 286 print "\nNew directory $year-$month\n"; 287 `mkdir $dest_dir/$year-$month`; 288 } 289 290 if (!-d "$dest_dir/$year-$month/arctxt") { 291 `mkdir $dest_dir/$year-$month/arctxt`; 292 } 293 294 $nummsg{$year}{$month}++ 295 while (-e "$dest_dir/$year-$month/arctxt/$nummsg{$year}{$month}"); 296 297 # Save message 298 open DESTFILE, 299 ">$dest_dir/$year-$month/arctxt/$nummsg{$year}{$month}"; 300 print DESTFILE @{$msg}; 301 close DESTFILE; 302 # `mv $m $dest_dir/$year-$month/arctxt/$nummsg{$year}{$month}`; 303 $nummsg{$year}{$month}++; 304 } 305 306 return 1; 307} 308