1# -*-perl-*-
2#
3# Copyright (c) 1996-1998 Kevin Johnson <kjj@pobox.com>.
4#
5# All rights reserved. This program is free software; you can
6# redistribute it and/or modify it under the same terms as Perl
7# itself.
8#
9# $Id: Mbox.pm,v 1.6 1998/04/05 17:21:53 kjj Exp $
10
11require 5.00397;
12
13package Mail::Folder::Mbox;
14use strict;
15use vars qw($VERSION @ISA $folder_id);
16
17@ISA = qw(Mail::Folder);
18$VERSION = "0.07";
19
20Mail::Folder->register_type('mbox');
21
22=head1 NAME
23
24Mail::Folder::Mbox - A Unix mbox interface for Mail::Folder.
25
26B<WARNING: This code is in alpha release. Expect the interface to
27change.>
28
29=head1 SYNOPSIS
30
31C<use Mail::Folder::Mbox;>
32
33=head1 DESCRIPTION
34
35This module provides an interface to unix B<mbox> folders.
36
37The B<mbox> folder format is the standard monolithic folder structure
38prevalent on Unix.  A single folder is contained within a single file.
39Each message starts with a line matching C</^From /> and ends with a
40blank line.
41
42The folder architecture does not provide any persistantly stored
43current message variable, so the current message in this folder
44interface defaults to C<1> and is not retained between C<open>s of a
45folder.
46
47If the C<Timeout> option is specified when the object is created, that
48value will be used to determine the timeout for attempting to aquire a
49folder lock.  The default is 10 seconds.
50
51If the C<DotLock> option is specified when the object is created, that
52value will be used to determine whether or not to use 'C<.lock>' style
53folder locking.  The default value is C<1>.
54
55If the C<Flock> option is specified when the object is created, that
56value will be used to determined whether or not to use C<flock> style
57folder locking.  By default, the option is not set.
58
59If the C<NFSLock> option is specified when the object is created, that
60value will be used to determine whether or not special measures are
61taken when doing C<DotLock>ing.  These special measures consist of
62constructing the lock file in a special manner that is more immune to
63atomicity problems with NFS when creating a folder lock file.  By
64default, the option is not set.  This option necessitates the ability
65to use long filenames.
66
67It is currently a fatal error to have both C<DotLock> and C<Flock>
68disabled.
69
70**NOTE** flock locking is currently disabled until I can sift out the
71'right way'. **NOTE**
72
73=cut
74
75use Mail::Folder;
76use Mail::Internet;
77use Mail::Header;
78use Mail::Address;
79use Date::Format;
80use Date::Parse;
81# use File::BasicFlock;
82use IO::File;
83use DirHandle;
84use Sys::Hostname;		# for NFSLock option
85use Carp;
86
87$folder_id = 0;			# used to generate a unique id per open folder
88
89=head1 METHODS
90
91=head2 open($folder_name)
92
93=over 2
94
95=item * Call the superclass C<open> method.
96
97=item * Check to see if it is a valid mbox folder.
98
99=item * Mark it as readonly if the folder is not writable.
100
101=item * Lock the folder.
102
103=item * Split the folder into individual messages in a temporary
104working directory.
105
106=item * Unlock the folder.
107
108=item * Cache all the headers.
109
110=item * Update the appropriate labels with information in the
111C<Status> fields.
112
113=item * Set C<current_message> to C<1>.
114
115=back
116
117=cut
118
119sub open {
120  my $self = shift;
121  my $foldername = shift;
122
123  return 0 unless $self->SUPER::open($foldername);
124
125  is_valid_folder_format($foldername) || (-z $foldername)
126    or croak "$foldername isn't an mbox folder";
127
128  if (($< == 0) || ($> == 0)) {	# if we're root we have to check it by hand
129    $self->set_readonly unless ((stat($foldername))[2] & 0200);
130  } else {
131    $self->set_readonly unless (-w $foldername);
132  }
133  # $self->set_readonly unless (-w $foldername);
134
135  $self->_lock_folder or return 0;
136
137  my $fh = new IO::File $foldername or croak "can't open $foldername: $!";
138  $fh->seek(0, 2);
139  $self->{MBOX_OldSeekPos} = $fh->tell;
140  $fh->close;
141
142  my $qty_new_msgs = $self->_absorb_mbox($foldername, 0);
143  unless (defined($qty_new_msgs) && $self->_unlock_folder) {
144    $self->_clean_working_dir;
145    return 0;
146  }
147  $self->current_message(1);
148
149  return $qty_new_msgs;
150}
151
152=head2 close
153
154Deletes the internal working copy of the folder and calls the
155superclass C<close> method.
156
157=cut
158
159sub close {
160  my $self = shift;
161
162  $self->_clean_working_dir;
163  return $self->SUPER::close;
164}
165
166=head2 sync
167
168=over 2
169
170=item * Call the superclass C<sync> method.
171
172=item * Lock the folder.
173
174=item * Extract into the temporary working directory any new messages
175that have been appended to the folder since the last time the folder
176was either C<open>ed or C<sync>ed.
177
178=item * Create a new copy of the folder and populate it with the
179messages in the working copy that are not flagged for deletion and
180update the C<Status> fields appropriately.
181
182=item * Move the original folder to a temp location
183
184=item * Move the new folder into place
185
186=item * Delete the old original folder
187
188=item * Unlock the folder
189
190=back
191
192=cut
193
194sub sync {
195  my $self = shift;
196
197  my @statary;
198  my $folder = $self->foldername;
199  my $tmpfolder = "$folder.$$";
200  my $infh;
201  my $outfh;
202
203  return -1 if ($self->SUPER::sync == -1);
204
205  my $last_msgnum = $self->last_message;
206
207  return -1 unless ($self->_lock_folder);
208
209  unless ($infh = new IO::File($folder)) {
210    $self->_unlock_folder;
211    croak "can't open $folder: $!";
212  }
213  $infh->close;
214
215  my $qty_new_msgs = $self->_absorb_mbox($folder, $self->{MBOX_OldSeekPos});
216  unless (defined($qty_new_msgs)) {
217    $self->_unlock_folder;
218  }
219
220  unless ($self->is_readonly) {
221    # we need to diddle current_message if it's pointing to a deleted msg
222    my $msg = $self->current_message;
223    while ($msg >= $self->first_message) {
224      last if (!$self->label_exists($msg, 'deleted'));
225      $msg = $self->prev_message($msg);
226    }
227    $self->current_message($msg);
228
229    for my $msg ($self->select_label('deleted')) {
230      unlink("$self->{MBOX_WorkingDir}/$msg");
231      $self->forget_message($msg);
232    }
233    $self->clear_label('deleted');
234
235    unless (@statary = stat($folder)) {
236      $self->_unlock_folder;
237      croak "can't stat $folder: $!";
238    }
239
240    unless ($outfh = new IO::File $tmpfolder, O_CREAT|O_WRONLY, 0600) {
241      $self->_unlock_folder;
242      croak "can't create $tmpfolder: $!";
243    }
244
245    # match the permissions of the original folder
246    unless (chmod(($statary[2] & 0777), $tmpfolder)) {
247      unlink($tmpfolder);
248      $self->_unlock_folder;
249      croak "can't chmod $tmpfolder: $!";
250    }
251
252    for my $msg (sort { $a <=> $b } $self->message_list) {
253      my $mref = $self->get_message($msg);
254      my $href = $self->get_header($msg);
255
256      unless ($self->get_option('NotMUA')) {
257	my $status = 'O';
258	$status = 'RO' if $self->label_exists($msg, 'seen');
259	$href->replace('Status', $status, -1);
260      }
261
262      my $from = $href->get('Mail-From') || $href->get('From ');
263
264      # we dup them cuz we're going to modify them
265      my $dup_href = $href->dup;
266      my $dup_mref = $mref->dup;
267      $dup_href->delete('Mail-From') if ($dup_href->count('Mail-From'));
268
269      $outfh->print("From $from");
270      $dup_href->print($outfh);
271      $outfh->print("\n");
272      $dup_mref->escape_from unless $self->get_option('Content-Length');
273      $dup_mref->print_body($outfh);
274      $outfh->print("\n");
275    }
276    $outfh->close;
277
278    # Move the original folder to a temp location
279
280    unless (rename($folder, "$folder.tmp")) {
281      $self->_unlock_folder;
282      croak "can't move $folder out of the way: $!";
283    }
284
285    # Move the new folder into place
286
287    unless (rename($tmpfolder, $folder)) {
288      $self->_unlock_folder;
289      croak "gack! can't rename $folder.tmp to $folder: $!"
290	unless (rename("$folder.tmp", $folder));
291      croak "can't move $folder to $folder.tmp: $!";
292    }
293
294    # Delete the old original folder
295
296    unless (unlink("$folder.tmp")) {
297      $self->_unlock_folder;
298      croak "can't unlink $folder.tmp: $!";
299    }
300  }
301
302  $self->_unlock_folder;
303
304  return $qty_new_msgs;
305}
306
307=head2 pack
308
309Calls the superclass C<pack> method.
310
311Renames the message list to that there are no gaps in the numbering
312sequence.
313
314It also tweaks the current_message accordingly.
315
316=cut
317
318sub pack {
319  my $self = shift;
320
321  my $newmsg = 0;
322  my $curmsg = $self->current_message;
323
324  return 0 if (!$self->SUPER::pack);
325
326  for my $msg (sort { $a <=> $b } $self->message_list) {
327    $newmsg++;
328    if ($msg > $newmsg) {
329      $self->current_message($newmsg) if ($msg == $curmsg);
330      $self->remember_message($newmsg);
331      $self->cache_header($newmsg, $self->{Messages}{$msg}{Header});
332      $self->forget_message($msg);
333    }
334  }
335
336  return 1;
337}
338
339=item get_message ($msg_number)
340
341Calls the superclass C<get_message> method.
342
343Retrieves the given mail message file into a B<Mail::Internet> object
344reference, sets the 'C<seen>' label, and returns the reference.
345
346If the 'Content-Length' option is not set, then C<get_message> will
347unescape 'From ' lines in the body of the message.
348
349=cut
350
351sub get_message {
352  my $self = shift;
353  my $key = shift;
354
355  return undef unless $self->SUPER::get_message($key);
356
357  my $file = "$self->{MBOX_WorkingDir}/$key";
358
359  my $fh = new IO::File $file or croak "whoa! can't open $file: $!";
360  my $mref = new Mail::Internet($fh,
361				Modify => 0,
362				MailFrom => 'COERCE');
363  $mref->unescape_from unless $self->get_option('Content-Length');
364  $fh->close;
365
366  my $href = $mref->head;
367  $self->cache_header($key, $href);
368
369  $self->add_label($key, 'seen');
370
371  return $mref;
372}
373
374=item get_message_file ($msg_number)
375
376Calls the superclass C<get_message_file> method.
377
378Retrieves the given mail message file and returns the name of the file.
379
380Returns C<undef> on failure.
381
382This method does NOT currently do any 'From ' unescaping.
383
384=cut
385
386sub get_message_file {
387  my $self = shift;
388  my $key = shift;
389
390  return undef unless $self->SUPER::get_message($key);
391
392  return "$self->{MBOX_WorkingDir}/$key";
393}
394
395=head2 get_header($msg_number)
396
397If the particular header has never been retrieved then C<get_header>
398loads (in a manner similar to C<get_message>) the header of the given
399mail message into C<$self-E<gt>{Messages}{$msg_number}{Header}> and
400returns the object reference.
401
402If the header for the given mail message has already been retrieved in
403a prior call to C<get_header>, then the cached entry is returned.
404
405It also calls the superclass C<get_header> method.
406
407=cut
408
409sub get_header {
410  my $self = shift;
411  my $key = shift;
412
413  my $hdr = $self->SUPER::get_header($key);
414  return $hdr if defined($hdr);
415
416  # return undef unless ($self->SUPER::get_header($key));
417
418  # return $self->{Messages}{$key}{Header} if ($self->{Messages}{$key}{Header});
419
420  my $file = "$self->{MBOX_WorkingDir}/$key";
421
422  my $fh = new IO::File $file or croak "can't open $file: $!";
423  my $href = new Mail::Header($fh,
424			      Modify => 0,
425			      MailFrom => 'COERCE');
426  $fh->close;
427
428  $self->cache_header($key, $href);
429
430  return $href;
431}
432
433=head2 append_message($mref)
434
435=over 2
436
437Calls the superclass C<append_message> method.
438
439Creates a new mail message file, in the temporary working directory,
440with the contents of the mail message contained in C<$mref>.
441It will synthesize a 'From ' line if one is not present in
442C<$mref>.
443
444If the 'Content-Length' option is not set, then C<get_message> will
445escape 'From ' lines in the body of the message.
446
447=cut
448
449sub append_message {
450  my $self = shift;
451  my $mref = shift;
452
453  my $msgnum = $self->last_message;
454
455  my $dup_mref = $mref->dup;
456
457  return 0 unless $self->SUPER::append_message($dup_mref);
458
459  my $dup_href = $mref->head->dup;
460  $dup_mref->escape_from unless ($self->get_option('Content-Length'));
461
462  $msgnum++;
463  my $fh = new IO::File("$self->{MBOX_WorkingDir}/$msgnum",
464			O_CREAT|O_WRONLY, 0600)
465    or croak "can't create $self->{MBOX_WorkingDir}/$msgnum: $!";
466  _coerce_header($dup_href);
467  $dup_href->print($fh);
468  $fh->print("\n");
469  $dup_mref->print_body($fh);
470  $fh->close;
471
472  $self->remember_message($msgnum);
473
474  return 1;
475}
476
477=head2 update_message($msg_number, $mref)
478
479Calls the superclass C<update_message> method.
480
481Replaces the message pointed to by C<$msg_number> with the contents of
482the C<Mail::Internet> object reference C<$mref>.
483
484It will synthesize a 'From ' line if one is not present in
485$mref.
486
487If the 'Content-Length' option is not set, then C<get_message> will
488escape 'From ' lines in the body of the message.
489
490=cut
491
492sub update_message {
493  my $self = shift;
494  my $key = shift;
495  my $mref = shift;
496
497  my $file_pos = 0;
498  my $filename = "$self->{MBOX_WorkingDir}/$key";
499
500  my $dup_mref = $mref->dup;
501  my $dup_href = $dup_mref->head->dup;
502
503  return 0 unless $self->SUPER::update_message($key, $dup_mref);
504
505  $dup_mref->escape_from unless $self->get_option('Content-Length');
506
507  my $fh = new IO::File "$filename.new", O_CREAT|O_WRONLY, 0600
508    or croak "can't create $filename.new: $!";
509  _coerce_header($dup_href);
510  $dup_href->print($fh);
511  $fh->print("\n");
512  $dup_mref->print_body($fh);
513  $fh->close;
514
515  rename("$filename.new", $filename) or
516    croak "can't rename $filename.new to $filename: $!";
517
518  return 1;
519}
520
521=head2 init
522
523Initializes various items specific to B<Mbox>.
524
525=over 2
526
527=item * Determines an appropriate temporary directory.  If the
528C<TMPDIR> environment variable is set, it uses that, otherwise it uses
529C</tmp>.  The working directory will be a subdirectory in that
530directory.
531
532=item * Bumps a sequence number used for unique temporary filenames.
533
534=item * Initializes C<$self-E<gt>{WorkingDir}> to the name of a
535directory that will be used to hold the working copies of the messages
536in the folder.
537
538=back
539
540=cut
541
542sub init {
543  my $self = shift;
544
545  my $tmpdir = $ENV{TMPDIR} ? $ENV{TMPDIR} : "/tmp";
546
547  $self->{MBOX_WorkingDir} = undef;
548  $folder_id++;
549  for my $i ($folder_id .. ($folder_id + 10)) {
550    if (! -e "$tmpdir/mbox$folder_id.$$") {
551      $self->{MBOX_WorkingDir} = "$tmpdir/mbox.$folder_id.$$";
552      last;
553    }
554    $folder_id++;
555  }
556  croak "can't seem to be able to create a working directory\n"
557    unless (defined($self->{MBOX_WorkingDir}));
558  $self->set_option('DotLock', 1)
559    unless defined($self->get_option('DotLock'));
560
561  croak "flock locking currently not implemented - sorry..."
562    if ($self->get_option('Flock'));
563
564  return 1;
565}
566
567=head2 is_valid_folder_format($foldername)
568
569Returns C<1> if the folder is a plain file and starts with the string
570'C<From >', otherwise it returns C<0>.
571
572Returns C<1> if the folder is a zero-length file and the
573C<$Mail::Format::DefaultEmptyFileFormat> class variable is set to
574'C<mbox>'.
575
576Otherwise it returns C<0>.
577
578=cut
579
580sub is_valid_folder_format {
581  my $foldername = shift;
582
583  return 0 if (! -f $foldername);
584  if (-z $foldername) {
585    return 1 if ($Mail::Folder::DefaultEmptyFileFormat eq 'mbox');
586    return 0;
587  }
588
589  my $fh = new IO::File $foldername or return 0;
590  my $line = <$fh>;
591  $fh->close;
592  return($line =~ /^From /);
593}
594
595=head2 create($foldername)
596
597Creates a new folder named C<$foldername>.  Returns C<0> if the folder
598already exists, otherwise returns C<1>.
599
600=cut
601
602sub create {
603  my $self = shift;
604  my $foldername = shift;
605
606  return 0 if (-e $foldername);
607  my $fh = new IO::File $foldername, O_CREAT|O_WRONLY, 0600
608    or croak "can't create $foldername: $!";
609  $fh->close;
610  return 1;
611}
612###############################################################################
613sub DESTROY {
614  my $self = shift;
615
616  # all of these are just in case...
617  # the appropriate methods should have removed them already...
618  if ($self->{Creator} == $$) {
619    $self->_unlock_folder;
620    $self->_clean_working_dir;
621  }
622}
623###############################################################################
624sub _absorb_mbox {
625  my $self = shift;
626  my $folder = shift;
627  my $seek_pos = shift;
628
629  my $qty_new_msgs = 0;
630  my $last_was_blank = 0;
631  my $is_blank = 0;
632  my $last_msgnum = $self->last_message;
633  my $new_msgnum = $last_msgnum;
634  my $outfile_is_open = 0;
635  my $outfh;
636
637  if (! -e $self->{MBOX_WorkingDir}) {
638    mkdir($self->{MBOX_WorkingDir}, 0700)
639      or (carp "can't create $self->{MBOX_WorkingDir}: $!" and return undef);
640  } elsif (! -d $self->{MBOX_WorkingDir}) {
641    carp "$self->{MBOX_WorkingDir} isn't a directory!";
642    return undef;
643  }
644
645  my $infh = new IO::File $folder or croak "can't open $folder: $!";
646  $infh->seek($seek_pos, 0)
647    or (carp "can't seek to $seek_pos in $folder: $!" and return undef);
648  while (<$infh>) {
649    $is_blank = /^$/ ? 1 : 0;
650    if (/^From /) {
651      $outfh->close if ($outfile_is_open);
652      $outfile_is_open = 0;
653      $new_msgnum++;
654      $qty_new_msgs++;
655      $self->remember_message($new_msgnum);
656      $outfh = new IO::File("$self->{MBOX_WorkingDir}/$new_msgnum",
657			    O_CREAT|O_WRONLY, 0600)
658	or (carp "can't create $self->{MBOX_WorkingDir}/$new_msgnum: $!"
659	    and return undef);
660      $outfile_is_open++;
661    } else {
662      $outfh->print("\n") if ($last_was_blank);
663    }
664    $last_was_blank = $is_blank ? 1 : 0;
665    $outfh->print($_) if !$is_blank;
666  }
667  $outfh->close if ($outfile_is_open);
668  $self->{MBOX_OldSeekPos} = $infh->tell;
669  $infh->close;
670
671  for my $msg (($last_msgnum + 1) .. $self->last_message) {
672    my $href = $self->get_header($msg);
673    my $status = $href->get('Status') or next;
674    $self->add_label($msg, 'seen') if ($status =~ /R/);
675  }
676
677  return $qty_new_msgs;
678}
679
680# Mbox files must have a 'From ' line at the beginning of each
681# message.  This routine will synthesize one from the 'From:' and
682# 'Date:' fields.  Original solution and code of the following
683# subroutine provided by Andreas Koenig
684
685# Since Mail::Header could have been told to coerce the 'From ' into a
686# Mail-From field, we look for both, and neither is found then
687# synthesize one.  In either case, a 'From ' string is returned.
688
689sub _coerce_header {
690  my $href = shift;
691  my $from = '';
692  my $date = '';
693
694  my $mailfrom = $href->get('From ') || $href->get('Mail-From');
695
696  unless ($mailfrom) {
697    if ($from =
698	$href->get('Reply-To') ||
699	$href->get('From') ||
700	$href->get('Sender') ||
701	$href->get('Return-Path')) { # this is dubious
702      my @addrs = Mail::Address->parse($from);
703      $from = $addrs[0]->address();
704    } else {
705      $from = 'NOFROM';
706    }
707
708    if ($date = $href->get('Date')) {
709      chomp($date);
710      $date = gmtime(str2time($date));
711    } else {
712      # There was no date field. Let's just stuff today's date in there
713      # for lack of a better value. I think it should be gmtime - someone
714      # correct me if this is wrong.
715      $date = gmtime;
716    }
717    chomp($date);
718    $mailfrom = "$from $date\n";
719  }
720
721  $href->delete('From ');
722  $href->delete('Mail-From');
723
724  $href->mail_from('KEEP');
725  $href->add('From ', $mailfrom, 0);
726  $href->mail_from('COERCE');
727
728  return $href;
729}
730
731sub _clean_working_dir {
732  my $self = shift;
733  # unlink(glob("$self->{MBOX_WorkingDir}/*"));
734  # maybe this should filter out directories, just to be safe...
735  my $dir = DirHandle->new($self->{MBOX_WorkingDir})
736    or croak "yeep! can't read $self->{MBOX_WorkingDir} disappeared: $!\n";
737  for my $file ($dir->read) {
738    next if (($file eq '.') || ($file eq '..'));
739    next if (-d "$self->{MBOX_WorkingDir}/$file");
740    unlink "$self->{MBOX_WorkingDir}/$file";
741  }
742  $dir->close;
743  rmdir($self->{MBOX_WorkingDir});
744}
745
746sub _lock_folder {
747  my $self = shift;
748  my $folder = $self->foldername;
749
750  croak "DotLock and Flock are both disabled\n"
751    unless ($self->get_option('DotLock') || $self->get_option('Flock'));
752
753  my $timeout = $self->get_option('Timeout');
754  $timeout ||= 10;
755  my $sleep = 1.0;		# maybe this should be configurable
756
757  if ($self->get_option('DotLock')) {
758    my $nfshack = 0;
759    my $lockfile = "$folder.lock";
760    if ($self->get_option('NFSLock')) {
761      my $host = hostname;
762      $nfshack++;
763      my $time = time;
764      $lockfile .= ".$time.$$.$host";
765    }
766    for my $num (1 .. int($timeout / $sleep)) {
767      my $fh;
768      if ($fh = new IO::File $lockfile, O_CREAT|O_EXCL|O_WRONLY, 0600) {
769	$fh->close;
770	if ($nfshack) {
771	  # Whhheeeee!!!!!
772	  # In NFS, the O_CREAT|O_EXCL isn't guaranteed to be atomic.
773	  # So we create a temp file that is probably unique in space
774	  # and time ($folder.lock.$time.$pid.$host).
775	  # Then we use link to create the real lock file. Since link
776	  # is atomic across nfs, this works.
777	  # It loses if it's on a filesystem that doesn't do long filenames.
778	  link $lockfile, "$folder.lock"
779	    or carp "link return: $!\n";
780	  my @statary = stat($lockfile);
781	  unlink $lockfile;
782	  if (!defined(@statary) || $statary[3] != 2) { # failed to link?
783	    goto RETRY;
784	  }
785	}
786	return 1;
787      }
788    RETRY:
789      last if ($! =~ /denied/);	# failure due to permissions
790      select(undef, undef, undef, $sleep);
791    }
792    return 0;
793  }
794
795  # return lock($folder) if ($self->get_option('Flock'));
796  return 0;
797}
798
799sub _unlock_folder {
800  my $self = shift;
801  my $folder = $self->foldername;
802
803  if ($self->get_option('DotLock')) {
804    return unlink("$folder.lock") if (-e "$folder.lock");
805    return 1;
806  }
807
808  # return unlock($folder) if ($self->get_option('Flock'));
809  return 0;
810}
811
812=head1 AUTHOR
813
814Kevin Johnson E<lt>F<kjj@pobox.com>E<gt>
815
816=head1 COPYRIGHT
817
818Copyright (c) 1996-1998 Kevin Johnson <kjj@pobox.com>.
819
820All rights reserved. This program is free software; you can
821redistribute it and/or modify it under the same terms as Perl itself.
822
823=cut
824
8251;
826