1#! --PERL--
2# -*- indent-tabs-mode: nil; -*-
3# vim:ft=perl:et:sw=4
4# $Id$
5
6# Sympa - SYsteme de Multi-Postage Automatique
7#
8# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
9# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
10# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
11# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program.  If not, see <http://www.gnu.org/licenses/>.
25
26use lib '--modulesdir--';
27use strict;
28use warnings;
29use English qw(-no_match_vars);
30use Getopt::Long;
31
32use Conf;
33use Sympa::Constants;
34use Sympa::DatabaseManager;
35use Sympa::List;
36use Sympa::Message;
37use Sympa::Spool::Archive;
38
39my %month_idx = qw(jan 1
40    fev 2
41    feb 2
42    fv  2
43    mar 3
44    avr 4
45    apr 4
46    mai 5
47    may 5
48    jun 6
49    jul 7
50    aug 8
51    aou 8
52    sep 9
53    oct 10
54    nov 11
55    dec 12
56    dc  12);
57
58my $msg_count = 0;
59
60# load options ?
61#$main::options{'debug'} = 1;
62#$main::options{'debug2'} = 1 if ($main::options{'debug'});
63
64$OUTPUT_AUTOFLUSH = 1;
65
66my %opt;
67unless (GetOptions(\%opt, 'input-directory=s')) {
68    die("Unknown options.");
69}
70
71die
72    "Usage : $ARGV[-1] [-input-directory=<directory containing individual messages>] <listname> [robot]"
73    unless ($#ARGV >= 0);
74my $listname = $ARGV[0];
75my $robot    = $ARGV[1];
76
77## Check UID
78unless ([getpwuid $UID]->[0] eq Sympa::Constants::USER) {
79    printf
80        "You should run this script as user \"%s\", ignore ? (y/CR)",
81        Sympa::Constants::USER;
82    my $s = <STDIN>;
83    die unless ($s =~ /^y$/i);
84}
85
86## Load sympa config
87unless (Conf::load()) {
88    die 'config_error';
89}
90
91Sympa::DatabaseManager::probe_db();
92
93chdir $Conf::Conf{'home'};
94
95my $list = Sympa::List->new($listname, $robot)
96    or die 'Cannot create List object';
97
98my $home_sympa;
99if ($robot) {
100    $home_sympa = $Conf::Conf{'home'} . '/' . $robot;
101} else {
102    $home_sympa = $Conf::Conf{'home'};
103}
104my $dest_dir = Conf::get_robot_conf($robot, 'arc_path') . '/' . $list->get_id;
105
106unless (-d "$home_sympa/$listname") {
107    die "No directory for list $listname";
108}
109
110unless (-d "$home_sympa/$listname/archives") {
111    die "No archives for list $listname";
112}
113
114if (-d $dest_dir) {
115    print "Web archives already exist for list $listname\nGo on (<CR>|n) ?";
116    my $s = <STDIN>;
117    die if ($s eq 'n');
118} else {
119    mkdir $dest_dir, 0755 or die;
120}
121
122if ($opt{'input-directory'}) {
123    unless (-d $opt{'input-directory'}) {
124        die "Parameter input-directory (%s) is not a directory",
125            $opt{'input-directory'};
126    }
127
128    opendir DIR, $opt{'input-directory'} || die;
129    foreach my $file (sort grep (!/^\.\.?$/, readdir(DIR))) {
130        my @msgs;    #FIXME: Not used anymore.
131
132        open ARCFILE, $opt{'input-directory'} . '/' . $file;
133        my @msg = <ARCFILE>;
134        push @msgs, \@msg;
135        $msg_count++;
136        close ARCFILE;
137    }
138    closedir DIR;
139
140} else {
141
142    print STDERR "Bursting archives\n";
143    foreach my $arc_file (<$home_sympa/$listname/archives/log*>) {
144        my ($first, $new);
145        my $msg = [];
146        my @msgs;
147
148        ## Split the archives file
149        print '.';
150        open ARCFILE, $arc_file;
151        while (<ARCFILE>) {
152            if (/^------- THIS IS A RFC934 (COMPILANT|COMPLIANT) DIGEST/) {
153                $first = 1;
154                $new   = 1;
155                next;
156            } elsif (!$first) {
157                next;
158            } elsif (/^$/ && $new) {
159                next;
160            } elsif (/^------- CUT --- CUT/) {
161                push @msgs, $msg;
162                $msg_count++;
163                $msg = [];
164                $new = 1;
165            } else {
166                push @{$msg}, $_;
167                undef $new;
168            }
169        }
170        close ARCFILE;
171
172        ##Dump
173        #foreach my $i (0..$#msgs) {
174        #    printf "******** Message %d *******\n", $i;
175        #    print @{$msgs[$i]};
176        #}
177
178        ## Store messages in web arc
179        store_messages(\@msgs, $dest_dir);
180
181    }
182}
183
184print STDERR "\nFound $msg_count messages\n";
185
186# Rebuild web archives
187print STDERR "Rebuilding HTML\n";
188my $arc_message = Sympa::Message->new(
189    sprintf("\nrebuildarc %s *\n\n", $list->{'name'}),
190    context => $list->{'domain'},
191    sender  => sprintf('listmaster@%s', $list->{'domain'}),
192    date    => time
193);
194Sympa::Spool::Archive->new->store($arc_message);
195
196print STDERR "\nHave a look in $dest_dir/-/ directory for messages dateless
197Now, you should add a web_archive parameter in the config file to make it accessible from the web\n";
198
199## Analyze message header fields and store them in web archives
200sub store_messages {
201    my ($list_of_msg, $dest_dir) = @_;
202    my @msgs = @{$list_of_msg};
203
204    my %nummsg;
205
206    ## Analyzing Date header fields
207    #print STDERR "Analysing Date: header fields\n";
208    foreach my $msg (@msgs) {
209        my $incorrect = 0;
210        my ($date, $year, $month);
211
212        print '.';
213        foreach (@{$msg}) {
214            if (/^Date:\s+(.*)$/) {
215                #print STDERR "#$_#\n";
216                $date = $1;
217
218                # Date type : Mon, 8 Dec 97 13:33:47 +0100
219                if ($date =~
220                    /^\w{2,3},\s+\d{1,2}\s+([\w\x80-\xFF]{2,3})\s+(\d{2,4})/)
221                {
222                    $month = $1;
223                    $year  = $2;
224                    #print STDERR "$month/$year\n";
225
226                    # Date type : 8 Dec 97 13:33:47+0100
227                } elsif ($date =~ /^\d{1,2}\s+(\w{3}) (\d{2,4})/) {
228                    $month = $1;
229                    $year  = $2;
230
231                    # Date type : 8-DEC-1997 13:33:47 +0100
232                } elsif ($date =~ /^\d{1,2}-(\w{3})-(\d{4})/) {
233                    $month = $1;
234                    $year  = $2;
235
236                    # Date type : Mon Dec 8 13:33:47 1997
237                } elsif ($date =~
238                    /^\w+\s+(\w+)\s+\d{1,2} \d+:\d+:\d+ (GMT )?(\d{4})/) {
239                    $month = $1;
240                    $year  = $3;
241
242                    # unknown date format
243                } else {
244                    $incorrect = 1;
245                    last;
246                }
247
248                # Month format
249                if ($month !~ /^\d+$/) {
250                    $month =~ y/\xe9\xfb/eu/;    #FIXME
251                    $month =~ y/A-Z/a-z/;
252                    if (!$month_idx{$month}) {
253                        $incorrect = 1;
254                    } else {
255                        $month = $month_idx{$month};
256                    }
257                } elsif (($month < 1) or ($month > 12)) {
258                    $incorrect = 1;
259                }
260                $month = "0" . $month if $month =~ /^\d$/;
261
262                # Checking Year format
263                if ($year =~ /^[89]\d$/) {
264                    $year = "19" . $year;
265                } elsif ($year !~ /^19[89]\d|20[0-9][0-9]$/) {
266                    $incorrect = 1;
267                }
268
269                last;
270            }
271
272            # empty line => end of header
273            if (/^\s*$/) {
274                last;
275            }
276        }
277
278        # Unknown date format/No date
279        if ($incorrect || !$month || !$year) {
280            $year  = 'UN';
281            $month = 'KNOWN';
282        }
283
284        # New month
285        if (!-d "$dest_dir/$year-$month") {
286            print "\nNew directory $year-$month\n";
287            `mkdir $dest_dir/$year-$month`;
288        }
289
290        if (!-d "$dest_dir/$year-$month/arctxt") {
291            `mkdir $dest_dir/$year-$month/arctxt`;
292        }
293
294        $nummsg{$year}{$month}++
295            while (-e "$dest_dir/$year-$month/arctxt/$nummsg{$year}{$month}");
296
297        # Save message
298        open DESTFILE,
299            ">$dest_dir/$year-$month/arctxt/$nummsg{$year}{$month}";
300        print DESTFILE @{$msg};
301        close DESTFILE;
302        #    `mv $m $dest_dir/$year-$month/arctxt/$nummsg{$year}{$month}`;
303        $nummsg{$year}{$month}++;
304    }
305
306    return 1;
307}
308