1# -*-perl-*- 2# 3# Copyright (c) 1996-1998 Kevin Johnson <kjj@pobox.com>. 4# 5# All rights reserved. This program is free software; you can 6# redistribute it and/or modify it under the same terms as Perl 7# itself. 8# 9# $Id: Mbox.pm,v 1.6 1998/04/05 17:21:53 kjj Exp $ 10 11require 5.00397; 12 13package Mail::Folder::Mbox; 14use strict; 15use vars qw($VERSION @ISA $folder_id); 16 17@ISA = qw(Mail::Folder); 18$VERSION = "0.07"; 19 20Mail::Folder->register_type('mbox'); 21 22=head1 NAME 23 24Mail::Folder::Mbox - A Unix mbox interface for Mail::Folder. 25 26B<WARNING: This code is in alpha release. Expect the interface to 27change.> 28 29=head1 SYNOPSIS 30 31C<use Mail::Folder::Mbox;> 32 33=head1 DESCRIPTION 34 35This module provides an interface to unix B<mbox> folders. 36 37The B<mbox> folder format is the standard monolithic folder structure 38prevalent on Unix. A single folder is contained within a single file. 39Each message starts with a line matching C</^From /> and ends with a 40blank line. 41 42The folder architecture does not provide any persistantly stored 43current message variable, so the current message in this folder 44interface defaults to C<1> and is not retained between C<open>s of a 45folder. 46 47If the C<Timeout> option is specified when the object is created, that 48value will be used to determine the timeout for attempting to aquire a 49folder lock. The default is 10 seconds. 50 51If the C<DotLock> option is specified when the object is created, that 52value will be used to determine whether or not to use 'C<.lock>' style 53folder locking. The default value is C<1>. 54 55If the C<Flock> option is specified when the object is created, that 56value will be used to determined whether or not to use C<flock> style 57folder locking. By default, the option is not set. 58 59If the C<NFSLock> option is specified when the object is created, that 60value will be used to determine whether or not special measures are 61taken when doing C<DotLock>ing. These special measures consist of 62constructing the lock file in a special manner that is more immune to 63atomicity problems with NFS when creating a folder lock file. By 64default, the option is not set. This option necessitates the ability 65to use long filenames. 66 67It is currently a fatal error to have both C<DotLock> and C<Flock> 68disabled. 69 70**NOTE** flock locking is currently disabled until I can sift out the 71'right way'. **NOTE** 72 73=cut 74 75use Mail::Folder; 76use Mail::Internet; 77use Mail::Header; 78use Mail::Address; 79use Date::Format; 80use Date::Parse; 81# use File::BasicFlock; 82use IO::File; 83use DirHandle; 84use Sys::Hostname; # for NFSLock option 85use Carp; 86 87$folder_id = 0; # used to generate a unique id per open folder 88 89=head1 METHODS 90 91=head2 open($folder_name) 92 93=over 2 94 95=item * Call the superclass C<open> method. 96 97=item * Check to see if it is a valid mbox folder. 98 99=item * Mark it as readonly if the folder is not writable. 100 101=item * Lock the folder. 102 103=item * Split the folder into individual messages in a temporary 104working directory. 105 106=item * Unlock the folder. 107 108=item * Cache all the headers. 109 110=item * Update the appropriate labels with information in the 111C<Status> fields. 112 113=item * Set C<current_message> to C<1>. 114 115=back 116 117=cut 118 119sub open { 120 my $self = shift; 121 my $foldername = shift; 122 123 return 0 unless $self->SUPER::open($foldername); 124 125 is_valid_folder_format($foldername) || (-z $foldername) 126 or croak "$foldername isn't an mbox folder"; 127 128 if (($< == 0) || ($> == 0)) { # if we're root we have to check it by hand 129 $self->set_readonly unless ((stat($foldername))[2] & 0200); 130 } else { 131 $self->set_readonly unless (-w $foldername); 132 } 133 # $self->set_readonly unless (-w $foldername); 134 135 $self->_lock_folder or return 0; 136 137 my $fh = new IO::File $foldername or croak "can't open $foldername: $!"; 138 $fh->seek(0, 2); 139 $self->{MBOX_OldSeekPos} = $fh->tell; 140 $fh->close; 141 142 my $qty_new_msgs = $self->_absorb_mbox($foldername, 0); 143 unless (defined($qty_new_msgs) && $self->_unlock_folder) { 144 $self->_clean_working_dir; 145 return 0; 146 } 147 $self->current_message(1); 148 149 return $qty_new_msgs; 150} 151 152=head2 close 153 154Deletes the internal working copy of the folder and calls the 155superclass C<close> method. 156 157=cut 158 159sub close { 160 my $self = shift; 161 162 $self->_clean_working_dir; 163 return $self->SUPER::close; 164} 165 166=head2 sync 167 168=over 2 169 170=item * Call the superclass C<sync> method. 171 172=item * Lock the folder. 173 174=item * Extract into the temporary working directory any new messages 175that have been appended to the folder since the last time the folder 176was either C<open>ed or C<sync>ed. 177 178=item * Create a new copy of the folder and populate it with the 179messages in the working copy that are not flagged for deletion and 180update the C<Status> fields appropriately. 181 182=item * Move the original folder to a temp location 183 184=item * Move the new folder into place 185 186=item * Delete the old original folder 187 188=item * Unlock the folder 189 190=back 191 192=cut 193 194sub sync { 195 my $self = shift; 196 197 my @statary; 198 my $folder = $self->foldername; 199 my $tmpfolder = "$folder.$$"; 200 my $infh; 201 my $outfh; 202 203 return -1 if ($self->SUPER::sync == -1); 204 205 my $last_msgnum = $self->last_message; 206 207 return -1 unless ($self->_lock_folder); 208 209 unless ($infh = new IO::File($folder)) { 210 $self->_unlock_folder; 211 croak "can't open $folder: $!"; 212 } 213 $infh->close; 214 215 my $qty_new_msgs = $self->_absorb_mbox($folder, $self->{MBOX_OldSeekPos}); 216 unless (defined($qty_new_msgs)) { 217 $self->_unlock_folder; 218 } 219 220 unless ($self->is_readonly) { 221 # we need to diddle current_message if it's pointing to a deleted msg 222 my $msg = $self->current_message; 223 while ($msg >= $self->first_message) { 224 last if (!$self->label_exists($msg, 'deleted')); 225 $msg = $self->prev_message($msg); 226 } 227 $self->current_message($msg); 228 229 for my $msg ($self->select_label('deleted')) { 230 unlink("$self->{MBOX_WorkingDir}/$msg"); 231 $self->forget_message($msg); 232 } 233 $self->clear_label('deleted'); 234 235 unless (@statary = stat($folder)) { 236 $self->_unlock_folder; 237 croak "can't stat $folder: $!"; 238 } 239 240 unless ($outfh = new IO::File $tmpfolder, O_CREAT|O_WRONLY, 0600) { 241 $self->_unlock_folder; 242 croak "can't create $tmpfolder: $!"; 243 } 244 245 # match the permissions of the original folder 246 unless (chmod(($statary[2] & 0777), $tmpfolder)) { 247 unlink($tmpfolder); 248 $self->_unlock_folder; 249 croak "can't chmod $tmpfolder: $!"; 250 } 251 252 for my $msg (sort { $a <=> $b } $self->message_list) { 253 my $mref = $self->get_message($msg); 254 my $href = $self->get_header($msg); 255 256 unless ($self->get_option('NotMUA')) { 257 my $status = 'O'; 258 $status = 'RO' if $self->label_exists($msg, 'seen'); 259 $href->replace('Status', $status, -1); 260 } 261 262 my $from = $href->get('Mail-From') || $href->get('From '); 263 264 # we dup them cuz we're going to modify them 265 my $dup_href = $href->dup; 266 my $dup_mref = $mref->dup; 267 $dup_href->delete('Mail-From') if ($dup_href->count('Mail-From')); 268 269 $outfh->print("From $from"); 270 $dup_href->print($outfh); 271 $outfh->print("\n"); 272 $dup_mref->escape_from unless $self->get_option('Content-Length'); 273 $dup_mref->print_body($outfh); 274 $outfh->print("\n"); 275 } 276 $outfh->close; 277 278 # Move the original folder to a temp location 279 280 unless (rename($folder, "$folder.tmp")) { 281 $self->_unlock_folder; 282 croak "can't move $folder out of the way: $!"; 283 } 284 285 # Move the new folder into place 286 287 unless (rename($tmpfolder, $folder)) { 288 $self->_unlock_folder; 289 croak "gack! can't rename $folder.tmp to $folder: $!" 290 unless (rename("$folder.tmp", $folder)); 291 croak "can't move $folder to $folder.tmp: $!"; 292 } 293 294 # Delete the old original folder 295 296 unless (unlink("$folder.tmp")) { 297 $self->_unlock_folder; 298 croak "can't unlink $folder.tmp: $!"; 299 } 300 } 301 302 $self->_unlock_folder; 303 304 return $qty_new_msgs; 305} 306 307=head2 pack 308 309Calls the superclass C<pack> method. 310 311Renames the message list to that there are no gaps in the numbering 312sequence. 313 314It also tweaks the current_message accordingly. 315 316=cut 317 318sub pack { 319 my $self = shift; 320 321 my $newmsg = 0; 322 my $curmsg = $self->current_message; 323 324 return 0 if (!$self->SUPER::pack); 325 326 for my $msg (sort { $a <=> $b } $self->message_list) { 327 $newmsg++; 328 if ($msg > $newmsg) { 329 $self->current_message($newmsg) if ($msg == $curmsg); 330 $self->remember_message($newmsg); 331 $self->cache_header($newmsg, $self->{Messages}{$msg}{Header}); 332 $self->forget_message($msg); 333 } 334 } 335 336 return 1; 337} 338 339=item get_message ($msg_number) 340 341Calls the superclass C<get_message> method. 342 343Retrieves the given mail message file into a B<Mail::Internet> object 344reference, sets the 'C<seen>' label, and returns the reference. 345 346If the 'Content-Length' option is not set, then C<get_message> will 347unescape 'From ' lines in the body of the message. 348 349=cut 350 351sub get_message { 352 my $self = shift; 353 my $key = shift; 354 355 return undef unless $self->SUPER::get_message($key); 356 357 my $file = "$self->{MBOX_WorkingDir}/$key"; 358 359 my $fh = new IO::File $file or croak "whoa! can't open $file: $!"; 360 my $mref = new Mail::Internet($fh, 361 Modify => 0, 362 MailFrom => 'COERCE'); 363 $mref->unescape_from unless $self->get_option('Content-Length'); 364 $fh->close; 365 366 my $href = $mref->head; 367 $self->cache_header($key, $href); 368 369 $self->add_label($key, 'seen'); 370 371 return $mref; 372} 373 374=item get_message_file ($msg_number) 375 376Calls the superclass C<get_message_file> method. 377 378Retrieves the given mail message file and returns the name of the file. 379 380Returns C<undef> on failure. 381 382This method does NOT currently do any 'From ' unescaping. 383 384=cut 385 386sub get_message_file { 387 my $self = shift; 388 my $key = shift; 389 390 return undef unless $self->SUPER::get_message($key); 391 392 return "$self->{MBOX_WorkingDir}/$key"; 393} 394 395=head2 get_header($msg_number) 396 397If the particular header has never been retrieved then C<get_header> 398loads (in a manner similar to C<get_message>) the header of the given 399mail message into C<$self-E<gt>{Messages}{$msg_number}{Header}> and 400returns the object reference. 401 402If the header for the given mail message has already been retrieved in 403a prior call to C<get_header>, then the cached entry is returned. 404 405It also calls the superclass C<get_header> method. 406 407=cut 408 409sub get_header { 410 my $self = shift; 411 my $key = shift; 412 413 my $hdr = $self->SUPER::get_header($key); 414 return $hdr if defined($hdr); 415 416 # return undef unless ($self->SUPER::get_header($key)); 417 418 # return $self->{Messages}{$key}{Header} if ($self->{Messages}{$key}{Header}); 419 420 my $file = "$self->{MBOX_WorkingDir}/$key"; 421 422 my $fh = new IO::File $file or croak "can't open $file: $!"; 423 my $href = new Mail::Header($fh, 424 Modify => 0, 425 MailFrom => 'COERCE'); 426 $fh->close; 427 428 $self->cache_header($key, $href); 429 430 return $href; 431} 432 433=head2 append_message($mref) 434 435=over 2 436 437Calls the superclass C<append_message> method. 438 439Creates a new mail message file, in the temporary working directory, 440with the contents of the mail message contained in C<$mref>. 441It will synthesize a 'From ' line if one is not present in 442C<$mref>. 443 444If the 'Content-Length' option is not set, then C<get_message> will 445escape 'From ' lines in the body of the message. 446 447=cut 448 449sub append_message { 450 my $self = shift; 451 my $mref = shift; 452 453 my $msgnum = $self->last_message; 454 455 my $dup_mref = $mref->dup; 456 457 return 0 unless $self->SUPER::append_message($dup_mref); 458 459 my $dup_href = $mref->head->dup; 460 $dup_mref->escape_from unless ($self->get_option('Content-Length')); 461 462 $msgnum++; 463 my $fh = new IO::File("$self->{MBOX_WorkingDir}/$msgnum", 464 O_CREAT|O_WRONLY, 0600) 465 or croak "can't create $self->{MBOX_WorkingDir}/$msgnum: $!"; 466 _coerce_header($dup_href); 467 $dup_href->print($fh); 468 $fh->print("\n"); 469 $dup_mref->print_body($fh); 470 $fh->close; 471 472 $self->remember_message($msgnum); 473 474 return 1; 475} 476 477=head2 update_message($msg_number, $mref) 478 479Calls the superclass C<update_message> method. 480 481Replaces the message pointed to by C<$msg_number> with the contents of 482the C<Mail::Internet> object reference C<$mref>. 483 484It will synthesize a 'From ' line if one is not present in 485$mref. 486 487If the 'Content-Length' option is not set, then C<get_message> will 488escape 'From ' lines in the body of the message. 489 490=cut 491 492sub update_message { 493 my $self = shift; 494 my $key = shift; 495 my $mref = shift; 496 497 my $file_pos = 0; 498 my $filename = "$self->{MBOX_WorkingDir}/$key"; 499 500 my $dup_mref = $mref->dup; 501 my $dup_href = $dup_mref->head->dup; 502 503 return 0 unless $self->SUPER::update_message($key, $dup_mref); 504 505 $dup_mref->escape_from unless $self->get_option('Content-Length'); 506 507 my $fh = new IO::File "$filename.new", O_CREAT|O_WRONLY, 0600 508 or croak "can't create $filename.new: $!"; 509 _coerce_header($dup_href); 510 $dup_href->print($fh); 511 $fh->print("\n"); 512 $dup_mref->print_body($fh); 513 $fh->close; 514 515 rename("$filename.new", $filename) or 516 croak "can't rename $filename.new to $filename: $!"; 517 518 return 1; 519} 520 521=head2 init 522 523Initializes various items specific to B<Mbox>. 524 525=over 2 526 527=item * Determines an appropriate temporary directory. If the 528C<TMPDIR> environment variable is set, it uses that, otherwise it uses 529C</tmp>. The working directory will be a subdirectory in that 530directory. 531 532=item * Bumps a sequence number used for unique temporary filenames. 533 534=item * Initializes C<$self-E<gt>{WorkingDir}> to the name of a 535directory that will be used to hold the working copies of the messages 536in the folder. 537 538=back 539 540=cut 541 542sub init { 543 my $self = shift; 544 545 my $tmpdir = $ENV{TMPDIR} ? $ENV{TMPDIR} : "/tmp"; 546 547 $self->{MBOX_WorkingDir} = undef; 548 $folder_id++; 549 for my $i ($folder_id .. ($folder_id + 10)) { 550 if (! -e "$tmpdir/mbox$folder_id.$$") { 551 $self->{MBOX_WorkingDir} = "$tmpdir/mbox.$folder_id.$$"; 552 last; 553 } 554 $folder_id++; 555 } 556 croak "can't seem to be able to create a working directory\n" 557 unless (defined($self->{MBOX_WorkingDir})); 558 $self->set_option('DotLock', 1) 559 unless defined($self->get_option('DotLock')); 560 561 croak "flock locking currently not implemented - sorry..." 562 if ($self->get_option('Flock')); 563 564 return 1; 565} 566 567=head2 is_valid_folder_format($foldername) 568 569Returns C<1> if the folder is a plain file and starts with the string 570'C<From >', otherwise it returns C<0>. 571 572Returns C<1> if the folder is a zero-length file and the 573C<$Mail::Format::DefaultEmptyFileFormat> class variable is set to 574'C<mbox>'. 575 576Otherwise it returns C<0>. 577 578=cut 579 580sub is_valid_folder_format { 581 my $foldername = shift; 582 583 return 0 if (! -f $foldername); 584 if (-z $foldername) { 585 return 1 if ($Mail::Folder::DefaultEmptyFileFormat eq 'mbox'); 586 return 0; 587 } 588 589 my $fh = new IO::File $foldername or return 0; 590 my $line = <$fh>; 591 $fh->close; 592 return($line =~ /^From /); 593} 594 595=head2 create($foldername) 596 597Creates a new folder named C<$foldername>. Returns C<0> if the folder 598already exists, otherwise returns C<1>. 599 600=cut 601 602sub create { 603 my $self = shift; 604 my $foldername = shift; 605 606 return 0 if (-e $foldername); 607 my $fh = new IO::File $foldername, O_CREAT|O_WRONLY, 0600 608 or croak "can't create $foldername: $!"; 609 $fh->close; 610 return 1; 611} 612############################################################################### 613sub DESTROY { 614 my $self = shift; 615 616 # all of these are just in case... 617 # the appropriate methods should have removed them already... 618 if ($self->{Creator} == $$) { 619 $self->_unlock_folder; 620 $self->_clean_working_dir; 621 } 622} 623############################################################################### 624sub _absorb_mbox { 625 my $self = shift; 626 my $folder = shift; 627 my $seek_pos = shift; 628 629 my $qty_new_msgs = 0; 630 my $last_was_blank = 0; 631 my $is_blank = 0; 632 my $last_msgnum = $self->last_message; 633 my $new_msgnum = $last_msgnum; 634 my $outfile_is_open = 0; 635 my $outfh; 636 637 if (! -e $self->{MBOX_WorkingDir}) { 638 mkdir($self->{MBOX_WorkingDir}, 0700) 639 or (carp "can't create $self->{MBOX_WorkingDir}: $!" and return undef); 640 } elsif (! -d $self->{MBOX_WorkingDir}) { 641 carp "$self->{MBOX_WorkingDir} isn't a directory!"; 642 return undef; 643 } 644 645 my $infh = new IO::File $folder or croak "can't open $folder: $!"; 646 $infh->seek($seek_pos, 0) 647 or (carp "can't seek to $seek_pos in $folder: $!" and return undef); 648 while (<$infh>) { 649 $is_blank = /^$/ ? 1 : 0; 650 if (/^From /) { 651 $outfh->close if ($outfile_is_open); 652 $outfile_is_open = 0; 653 $new_msgnum++; 654 $qty_new_msgs++; 655 $self->remember_message($new_msgnum); 656 $outfh = new IO::File("$self->{MBOX_WorkingDir}/$new_msgnum", 657 O_CREAT|O_WRONLY, 0600) 658 or (carp "can't create $self->{MBOX_WorkingDir}/$new_msgnum: $!" 659 and return undef); 660 $outfile_is_open++; 661 } else { 662 $outfh->print("\n") if ($last_was_blank); 663 } 664 $last_was_blank = $is_blank ? 1 : 0; 665 $outfh->print($_) if !$is_blank; 666 } 667 $outfh->close if ($outfile_is_open); 668 $self->{MBOX_OldSeekPos} = $infh->tell; 669 $infh->close; 670 671 for my $msg (($last_msgnum + 1) .. $self->last_message) { 672 my $href = $self->get_header($msg); 673 my $status = $href->get('Status') or next; 674 $self->add_label($msg, 'seen') if ($status =~ /R/); 675 } 676 677 return $qty_new_msgs; 678} 679 680# Mbox files must have a 'From ' line at the beginning of each 681# message. This routine will synthesize one from the 'From:' and 682# 'Date:' fields. Original solution and code of the following 683# subroutine provided by Andreas Koenig 684 685# Since Mail::Header could have been told to coerce the 'From ' into a 686# Mail-From field, we look for both, and neither is found then 687# synthesize one. In either case, a 'From ' string is returned. 688 689sub _coerce_header { 690 my $href = shift; 691 my $from = ''; 692 my $date = ''; 693 694 my $mailfrom = $href->get('From ') || $href->get('Mail-From'); 695 696 unless ($mailfrom) { 697 if ($from = 698 $href->get('Reply-To') || 699 $href->get('From') || 700 $href->get('Sender') || 701 $href->get('Return-Path')) { # this is dubious 702 my @addrs = Mail::Address->parse($from); 703 $from = $addrs[0]->address(); 704 } else { 705 $from = 'NOFROM'; 706 } 707 708 if ($date = $href->get('Date')) { 709 chomp($date); 710 $date = gmtime(str2time($date)); 711 } else { 712 # There was no date field. Let's just stuff today's date in there 713 # for lack of a better value. I think it should be gmtime - someone 714 # correct me if this is wrong. 715 $date = gmtime; 716 } 717 chomp($date); 718 $mailfrom = "$from $date\n"; 719 } 720 721 $href->delete('From '); 722 $href->delete('Mail-From'); 723 724 $href->mail_from('KEEP'); 725 $href->add('From ', $mailfrom, 0); 726 $href->mail_from('COERCE'); 727 728 return $href; 729} 730 731sub _clean_working_dir { 732 my $self = shift; 733 # unlink(glob("$self->{MBOX_WorkingDir}/*")); 734 # maybe this should filter out directories, just to be safe... 735 my $dir = DirHandle->new($self->{MBOX_WorkingDir}) 736 or croak "yeep! can't read $self->{MBOX_WorkingDir} disappeared: $!\n"; 737 for my $file ($dir->read) { 738 next if (($file eq '.') || ($file eq '..')); 739 next if (-d "$self->{MBOX_WorkingDir}/$file"); 740 unlink "$self->{MBOX_WorkingDir}/$file"; 741 } 742 $dir->close; 743 rmdir($self->{MBOX_WorkingDir}); 744} 745 746sub _lock_folder { 747 my $self = shift; 748 my $folder = $self->foldername; 749 750 croak "DotLock and Flock are both disabled\n" 751 unless ($self->get_option('DotLock') || $self->get_option('Flock')); 752 753 my $timeout = $self->get_option('Timeout'); 754 $timeout ||= 10; 755 my $sleep = 1.0; # maybe this should be configurable 756 757 if ($self->get_option('DotLock')) { 758 my $nfshack = 0; 759 my $lockfile = "$folder.lock"; 760 if ($self->get_option('NFSLock')) { 761 my $host = hostname; 762 $nfshack++; 763 my $time = time; 764 $lockfile .= ".$time.$$.$host"; 765 } 766 for my $num (1 .. int($timeout / $sleep)) { 767 my $fh; 768 if ($fh = new IO::File $lockfile, O_CREAT|O_EXCL|O_WRONLY, 0600) { 769 $fh->close; 770 if ($nfshack) { 771 # Whhheeeee!!!!! 772 # In NFS, the O_CREAT|O_EXCL isn't guaranteed to be atomic. 773 # So we create a temp file that is probably unique in space 774 # and time ($folder.lock.$time.$pid.$host). 775 # Then we use link to create the real lock file. Since link 776 # is atomic across nfs, this works. 777 # It loses if it's on a filesystem that doesn't do long filenames. 778 link $lockfile, "$folder.lock" 779 or carp "link return: $!\n"; 780 my @statary = stat($lockfile); 781 unlink $lockfile; 782 if (!defined(@statary) || $statary[3] != 2) { # failed to link? 783 goto RETRY; 784 } 785 } 786 return 1; 787 } 788 RETRY: 789 last if ($! =~ /denied/); # failure due to permissions 790 select(undef, undef, undef, $sleep); 791 } 792 return 0; 793 } 794 795 # return lock($folder) if ($self->get_option('Flock')); 796 return 0; 797} 798 799sub _unlock_folder { 800 my $self = shift; 801 my $folder = $self->foldername; 802 803 if ($self->get_option('DotLock')) { 804 return unlink("$folder.lock") if (-e "$folder.lock"); 805 return 1; 806 } 807 808 # return unlock($folder) if ($self->get_option('Flock')); 809 return 0; 810} 811 812=head1 AUTHOR 813 814Kevin Johnson E<lt>F<kjj@pobox.com>E<gt> 815 816=head1 COPYRIGHT 817 818Copyright (c) 1996-1998 Kevin Johnson <kjj@pobox.com>. 819 820All rights reserved. This program is free software; you can 821redistribute it and/or modify it under the same terms as Perl itself. 822 823=cut 824 8251; 826