1#!/net/nf/bin/perl 2#$Id: prsfrom.pl 1.2 1998/01/21 12:09:26 aburgers Exp aburgers $ 3# parse command-line arguments 4 5require('getopt.pl'); 6&Getopt('o'); 7 8# print a help message 9 10if ($opt_h) { 11 print <<HELP; exit; 12usage: 13 $0 -h 14 $0 [-o output_mailbox] [input_mailbox] 15 16mhonarc extracts the date from a message from Date: or Received: 17fields from the message-header. The sender is extracted from a From: 18field. There are several cases when these fields are missing ( e.g. 19out-boxes of Eudora, DEC-mailx). In all these cases it is possible to 20extract the sender and the date from the message separator line. 21 22$0 checks messages in mailbox input_mailbox (or standard input if 23input_mailbox is not specified) for the presence of Date:, Received: 24and From: fields. If information is missing $0 attempts to construct 25these fields from the message separator. $0 assumes the message are 26separated by a line of the following form. 27 28>From sender date 29 30The new Date: and From: fields are written 31directly after the message separator. A new mailbox is written to 32standard output or the file specified with the -o option. If the -o 33option is used some statistics are reported to standard output. 34HELP 35} 36 37# open output-file 38 39if ($opt_o) { 40 open(OUT, ">$opt_o") || die "Error opening file $opt_o\n"; 41 select OUT; 42} 43 44$msg = 0; 45$inheader = 0; 46$date_found = 0; 47$received_found = 0; 48$from_found = 0; 49 50# method 51# 52# The message header is assumed to start at a line starting with /^From / 53# and end at the next blank line. 54# The sender and the date are extracted from the /^From / line. The 55# lines of the header are stored in array @headerlines and checked 56# for the presence of Date:, Received: and From: fields. 57# 58# $inheader=1 means we are processing a header 59# $inheader=0 means we are outside the a header 60 61while (<>) { 62 if ($inheader) { # process message-header 63 push(@headerlines, $_); 64 study; 65 if (/^date:/i) { # check for date field 66 $date_found = 1; 67 } elsif (/^received:/i) { # check for received field 68 $received_found = 1; 69 } elsif (/^from:/i) { # check for from field 70 $from_found = 1; 71 } elsif (/^\s*$/) { # blank line ending header 72 unless ($date_found || $received_found) { 73 if ($date) { 74 print "Date: $date\n"; 75 $print_date++; 76 } else { 77 warn "No date in From field\n"; 78 } 79 } 80 unless ($from_found) { 81 if ($adress) { 82 print "From: $adress\n"; 83 $print_from++; 84 } else { 85 warn "No adress in From field\n"; 86 } 87 } 88 89 # Copy header to new mailbox 90 91 for $line (@headerlines) { 92 print $line; 93 } 94 95 # Reset counters 96 97 $inheader = 0; 98 undef @headerlines; 99 $date_found = 0; 100 $received_found = 0; 101 $from_found = 0; 102 } 103 } else { # process message-body and message separator 104 if (/^From /) { #test for message-header 105 ($dum, $adress, $date) = split(' ', $_, 3); 106 $date =~ s/\s*$//; 107 $adress =~ s/\s*$//; 108 $inheader = 1; 109 $msg++; 110 } 111 print; 112 } 113} 114 115# print statistics 116 117if ($opt_o) { 118 select STDOUT; 119 print "Total number of messages found: $msg\n"; 120 print "Added a Date field to $print_date messages\n" if ($print_date); 121 print "Added a From field to $print_from messages\n" if ($print_from); 122} 123 124__END__ 125 126=head1 NAME 127 128B<prsfrom> - supply missing Date: and From: fields to mailboxes 129 130=head1 SYNOPSIS 131 132B<prsfrom> [B<-o> F<output_mailbox>] [F<input_mailbox>] 133 134B<prsfrom> [B<-h>] 135 136=head1 DESCRIPTION 137 138B<prsfrom> is a tool meant to be used in conjunction with B<mhonarc>. 139B<mhonarc> extracts the date from a message from Date: or Received: 140fields from the message-header. The sender is extracted from a From: 141field. There are several cases when these fields are missing ( e.g. 142out-boxes of Eudora, DEC-mailx). In all these cases it is possible to 143extract the sender and the date from the message separator line. 144 145B<prsfrom> checks messages in mailbox F<input_mailbox> (or standard input 146if F<input_mailbox> is not specified) for the presence of Date:, 147Received: and From: fields. If information is missing B<prsfrom> 148attempts to construct these fields from the message separator. 149B<prsfrom> assumes the message are separated by a line of the following 150form. 151 152>From sender date 153 154The new Date: and From: fields are written directly after the message 155separator. A new mailbox is written to standard output or the file 156specified with the -o option. If the -o option is used some statistics 157are reported to standard output. 158 159If the -h option is specified a usage summary is written to standard 160output. 161 162=head2 Options 163 164=over 165 166=item B<-h> 167 168A usage summary is written to standard output. No further processing is 169done 170 171=item B<-o> F<output_mailbox> 172 173Default the new mailbox is written to standard output. With the 174-o option a file to receive the new mailbox can be specified. 175If the -o option is specified, some statistics are written 176to standard output. 177 178=back 179 180=head1 RESTRICTIONS 181 182B<prsfrom> also changes the headers of message in 183mailboxes included as attachments in other message. 184 185=head1 RETURN VALUE 186 187The return value of B<prsfrom> is always 0 188 189=head1 SEE ALSO 190 191=for html 192See the <a href="http://www.oac.uci.edu/indiv/ehood/mhonarc.html">mhonarc home-page</a>. 193 194=head1 AUTHOR 195 196=begin latex 197 198A.R. Burgers\\ 199Netherlands Energy Research Foundation ECN\\ 200P.O. Box 1, 1755 ZG Petten, The Netherlands\\ 201e-mail: burgers@ecn.nl 202 203=end latex 204 205=for text 206 A.R. Burgers 207 Netherlands Energy Research Foundation ECN 208 P.O. Box 1, 1755 ZG Petten, The Netherlands 209 e-mail: burgers@ecn.nl 210 211=for html 212A.R. Burgers <br> 213Netherlands Energy Research Foundation ECN <br> 214P.O. Box 1, 1755 ZG Petten, The Netherlands <br> 215e-mail: <a href="mailto:burgers@ecn.nl">burgers@ecn.nl </a> 216