1#!/net/nf/bin/perl
2#$Id: prsfrom.pl 1.2 1998/01/21 12:09:26 aburgers Exp aburgers $
3# parse command-line arguments
4
5require('getopt.pl');
6&Getopt('o');
7
8# print a help message
9
10if ($opt_h) {
11    print <<HELP; exit;
12usage:
13	$0 -h
14	$0 [-o output_mailbox] [input_mailbox]
15
16mhonarc extracts the date from a message from Date: or Received:
17fields from the message-header. The sender is extracted from a From:
18field. There are several cases when these fields are missing ( e.g.
19out-boxes of Eudora, DEC-mailx). In all these cases it is possible to
20extract the sender and the date from the message separator line.
21
22$0 checks messages in mailbox input_mailbox (or standard input if
23input_mailbox is not specified) for the presence of Date:, Received:
24and From: fields.  If information is missing $0 attempts to construct
25these fields from the message separator.  $0 assumes the message are
26separated by a line of the following form.
27
28>From sender date
29
30The new Date: and From: fields are written
31directly after the message separator. A new mailbox is written to
32standard output or the file specified with the -o option. If the -o
33option is used some statistics are reported to standard output.
34HELP
35}
36
37# open output-file
38
39if ($opt_o) {
40    open(OUT, ">$opt_o") || die "Error opening file $opt_o\n";
41    select OUT;
42}
43
44$msg            = 0;
45$inheader       = 0;
46$date_found     = 0;
47$received_found = 0;
48$from_found     = 0;
49
50# method
51#
52# The message header is assumed to start at a line starting with /^From /
53# and end at the next blank line.
54# The sender and the date are extracted from the /^From / line. The
55# lines of the header are stored in array @headerlines and checked
56# for the presence of Date:, Received: and From: fields.
57#
58# $inheader=1 means we are processing a header
59# $inheader=0 means we are outside the a header
60
61while (<>) {
62    if ($inheader) {    # process message-header
63        push(@headerlines, $_);
64        study;
65        if (/^date:/i) {    # check for date field
66            $date_found = 1;
67        } elsif (/^received:/i) {    # check for received field
68            $received_found = 1;
69        } elsif (/^from:/i) {        # check for from field
70            $from_found = 1;
71        } elsif (/^\s*$/) {          # blank line ending header
72            unless ($date_found || $received_found) {
73                if ($date) {
74                    print "Date: $date\n";
75                    $print_date++;
76                } else {
77                    warn "No date in From field\n";
78                }
79            }
80            unless ($from_found) {
81                if ($adress) {
82                    print "From: $adress\n";
83                    $print_from++;
84                } else {
85                    warn "No adress in From field\n";
86                }
87            }
88
89            # Copy header to new mailbox
90
91            for $line (@headerlines) {
92                print $line;
93            }
94
95            # Reset counters
96
97            $inheader = 0;
98            undef @headerlines;
99            $date_found     = 0;
100            $received_found = 0;
101            $from_found     = 0;
102        }
103    } else {    # process message-body and message separator
104        if (/^From /) {    #test for message-header
105            ($dum, $adress, $date) = split(' ', $_, 3);
106            $date =~ s/\s*$//;
107            $adress =~ s/\s*$//;
108            $inheader = 1;
109            $msg++;
110        }
111        print;
112    }
113}
114
115# print statistics
116
117if ($opt_o) {
118    select STDOUT;
119    print "Total number of messages found: $msg\n";
120    print "Added a Date field to $print_date messages\n" if ($print_date);
121    print "Added a From field to $print_from messages\n" if ($print_from);
122}
123
124__END__
125
126=head1 NAME
127
128B<prsfrom> - supply missing Date: and From: fields to mailboxes
129
130=head1 SYNOPSIS
131
132B<prsfrom> [B<-o> F<output_mailbox>] [F<input_mailbox>]
133
134B<prsfrom> [B<-h>]
135
136=head1 DESCRIPTION
137
138B<prsfrom> is a tool meant to be used in conjunction with B<mhonarc>.
139B<mhonarc> extracts the date from a message from Date: or Received:
140fields from the message-header. The sender is extracted from a From:
141field. There are several cases when these fields are missing ( e.g.
142out-boxes of Eudora, DEC-mailx). In all these cases it is possible to
143extract the sender and the date from the message separator line.
144
145B<prsfrom> checks messages in mailbox F<input_mailbox> (or standard input
146if F<input_mailbox> is not specified) for the presence of Date:,
147Received:  and From: fields. If information is missing B<prsfrom>
148attempts to construct these fields from the message separator.
149B<prsfrom> assumes the message are separated by a line of the following
150form.
151
152>From sender date
153
154The new Date: and From: fields are written directly after the message
155separator. A new mailbox is written to standard output or the file
156specified with the -o option. If the -o option is used some statistics
157are reported to standard output.
158
159If the -h option is specified a usage summary is written to standard
160output.
161
162=head2 Options
163
164=over
165
166=item B<-h>
167
168A usage summary is written to standard output. No further processing is
169done
170
171=item B<-o> F<output_mailbox>
172
173Default the new mailbox is written to standard output. With the
174-o option a file to receive the new mailbox can be specified.
175If the -o option is specified, some statistics are written
176to standard output.
177
178=back
179
180=head1 RESTRICTIONS
181
182B<prsfrom> also changes the headers of message in
183mailboxes included as attachments in other message.
184
185=head1 RETURN VALUE
186
187The return value of B<prsfrom> is always 0
188
189=head1 SEE ALSO
190
191=for html
192See the <a href="http://www.oac.uci.edu/indiv/ehood/mhonarc.html">mhonarc home-page</a>.
193
194=head1 AUTHOR
195
196=begin latex
197
198A.R. Burgers\\
199Netherlands Energy Research Foundation ECN\\
200P.O. Box 1, 1755 ZG Petten, The Netherlands\\
201e-mail: burgers@ecn.nl
202
203=end latex
204
205=for text
206 A.R. Burgers
207 Netherlands Energy Research Foundation ECN
208 P.O. Box 1, 1755 ZG Petten, The Netherlands
209 e-mail: burgers@ecn.nl
210
211=for html
212A.R. Burgers <br>
213Netherlands Energy Research Foundation ECN <br>
214P.O. Box 1, 1755 ZG Petten, The Netherlands <br>
215e-mail: <a href="mailto:burgers@ecn.nl">burgers@ecn.nl </a>
216