1#!/usr/local/bin/perl -Tw
2require 5.006_001;
3use strict;
4use sigtrap;
5
6=head1 NAME
7
8bfproxy - performs bogofilter functions via email
9
10=cut
11
12my $version = "0.3.5";
13
14################################################
15############### Copyleft Notice ################
16################################################
17
18# Copyright � 2003 Order amid Chaos, Inc.
19# Author: Tom Anderson
20# neo+bfproxy@orderamidchaos.com
21#
22# This program is open-source software; you can redistribute it
23# and/or modify it under the terms of the GNU General Public
24# License, v2, as published by the Free Software Foundation.
25#
26# This program is distributed in the hope that it will be useful,
27# but WITHOUT ANY WARRANTY; without even the implied warranty of
28# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
29# GNU General Public License for more details.
30#
31# You should have received a copy of the GNU General Public
32# License along with this program; if not, write to:
33#
34# Free Software Foundation
35# 59 Temple Place, Suite 330
36# Boston, MA 02111-1307  USA
37#
38# http://www.gnu.org/
39
40#################################################
41################# Documentation #################
42#################################################
43
44# use "perldoc bfproxy" or "bfproxy -h" to read this
45
46=head1 SYNOPSIS
47
48=head2 Command line usage:
49
50B<bfproxy> [I<options>] < [I<rfc822_email>]
51
52=head2 Procmail usage (recommended):
53
54Add to ~/.procmailrc the following recipe.  This recipe needs to
55be BEFORE the filtering recipe to prevent reclassifying all of
56the forwarded messages again.
57
58  :0fw
59  * ^To: $USER\+bfproxyID
60  | $HOME/.bogofilter/bfproxy
61
62Where I<$HOME> is your home directory, if not set in the environment,
63I<$USER> is your login name, if not set, and "I<bfproxyID>" is any
64string by which you choose to identify mail that should be
65processed by bfproxy.  Essentially, you are matching your regular
66email address with "I<+bfproxyID>" after it. Eg:
67
68  joe.shmoe+mybfproxystring@somedomain.com
69
70Joe's procmail recipe would then be:
71
72  :0fw
73  * ^To: joe.shmoe\+mybfproxystring
74  | $HOME/.bogofilter/bfproxy
75
76Don't use the default "I<bfproxyID>" though, and keep yours
77confidential so that people can't send bogofilter commands on your
78behalf.  The from address would have to be spoofed, and you'd get a
79report anyway, but why tempt fate?  Using your own unique
80"I<bfproxyID>" helps prevent automated attacks by spammers on
81bogofilter users.
82
83When entering the bogofilter recipe after the bfproxy recipe, you
84should add the C<E> flag so that bogofilter does not filter the
85results of running bfproxy.  Eg:
86
87  :0Efw
88  | bogofilter -uep
89
90It's not necessary, but suggested.  If you use the C<-u> option with
91bogofilter, these bfproxy-trained emails would be trained again
92otherwise, which is generally not what you want.  You would also
93risk having the results thrown in with your spam, which would also
94be undesireable.  Therefore, it's best to use the C<E> flag in the
95bogofilter recipe.
96
97Just remember to make sure that the bogofilter recipe is AFTER the
98bfproxy recipe to prevent screwing up your database by double-
99classifying those attached emails.
100
101=head2 Email usage:
102
103When using an MDA such as Procmail to handle running bfproxy, as
104intended, you'll need to send yourself emails with the special
105"I<bfproxyID>" included, followed by any options. Eg:
106
107[I<USER>]+[I<bfproxyID>]-[I<options>]@[I<DOMAIN>]
108
109The email you send to this address should contain any emails you
110want to correct as attachments.  Using your email client's forward-
111as-attachment functionality should suffice.  All such attached
112emails will be processed according to address-line options.
113
114=head1 OPTIONS:
115
116=head2 Address-line options:
117
118=over 4
119
120=item B<C>
121
122all emails with an original bogosity of I<spam> or I<ham> will
123be unregistered and then re-registered as the opposite bogosity
124(C<-Sn>, C<-Ns>).  This is useful if the emails were originally
125auto registered with the C<-u> flag or if a user error occurred.
126
127=item B<R>
128
129all emails with an original bogosity of I<spam> or I<ham> will
130be registered as the opposite bogosity (C<-n>, C<-s>).
131
132=item B<N>
133
134unregister all attached emails from the ham database
135
136=item B<S>
137
138unregister all attached emails from the spam database
139
140=item B<n>
141
142register all attached emails as ham
143
144=item B<s>
145
146register all attached emails as spam
147
148=item B<x>
149
150Repeatedly register emails that are within the "unsure" zone
151of spamicity values (between ham_cutoff and spam_cutoff) until
152they are correctly classified or until I<rmax> has been reached.
153
154=item B<v>
155
156Verbose output.  Show per-email registration information in
157addition to the subject lines and summary.
158
159=item B<b>
160
161Show benchmarking information in the output.
162
163=item B<c>
164
165Display bogofilter configuration info in the output.
166
167=back
168
169
170If no address-line command is provided, it defaults to C<C> for
171"correction" mode.  If you choose to use C<C> or C<R> and wish to
172register unsures at the same time, you can add a secondary command
173after the C<C> or C<R> to register unsures as either spam (C<s>)
174or ham (C<n>).  Eg:
175
176    joe.shmoe+mybfproxystring-Rs@somedomain.com
177
178The result of the above command string will be that all emails
179previously classified as spams will be registered as hams, all
180emails previously classified as hams will be registered as spams,
181and all emails previously classified as unsures will be registered
182as spams.  If a secondary command is not used with C<C> or C<R>,
183then unsures are not registered.
184
185=head2 Command-line options:
186
187=over 4
188
189=item B<h>
190
191display this help file
192
193=back
194
195=head1 REQUIRES
196
197=over 4
198
199=item *
200
201Perl 5.6.1
202
203=item *
204
205bogofilter
206
207=back
208
209=head1 DESCRIPTION
210
211Bfproxy accepts an email on stdin (generally from procmail or some
212other MDA) containing one or more forwarded-as-attachment emails.
213It will extract the attached emails and remove [SPAM] indicators
214from the subject line.  Then, it'll determine the original
215classification of each email from its X-Bogosity field, and pass
216the original emails to bogofilter with the appropriate flags as
217determined by address-line options.
218
219The attachments containing the emails should be message/rfc822
220format, which is how most email clients will do
221forwarding-by-attachment by default.  If they are not in this
222format, then it is quite likely that important header information
223is being omitted, and bfproxy will ignore such attachments.
224Moreover, bfproxy will only recognize attachments in encapsulating
225MIME types multipart/mixed or multipart/digest.  Quoted-text
226attachments will not work, nor will inline attachments which do not
227conform to the previous requirements (rfc822 and mixed or digest).
228Just about any MUA should be able to meet these requirements for
229forwarding emails.  If there is one that uses a slightly different,
230but possibly acceptable format, please send a bug report to
231neo+bfproxy@orderamidchaos.com.
232
233Also, bfproxy will not parse attached emails multiple MIME levels.
234If an attachment has an attachment, then the whole thing will get
235parsed as a single email rather than accounting for the headers in
236the attached email.  Do not nest corrections within each other.
237
238This scheme uses "subaddressing" to direct the MDA to run the
239email through bfproxy without requiring a new user or alias on the
240system.  Moreover, the output of the operation will arrive in the
241user's mailbox without having to resend a second email.  This also
242allows bfproxy to run with the permissions of the user whose
243database is to be altered by the command, and access his/her
244environment, including the appropriate config files and db files.
245See RFC3598 (http://www.ietf.org/) for details on "subaddressing".
246It is supported by common MTA's such as Sendmail.
247
248=head1 FAQ
249
250=head2 Why isn't it working?
251
252If you get an error the first time you try it, such as bfproxy
253saying it received zero emails for correction, then view the source
254of the email you sent and make sure the attachments use
255message/rfc822 format, and that the attachments are contained in a
256MIME part of multipart/mixed or multipart/digest.  If not, check your
257email client's options to see if you can get it into that format.  If
258you get any other errors/problems, please submit a bug report.
259
260=head2 Why call it 'bfproxy' instead of 'bogoproxy', etc.?
261
262Because "bogo" refers to bogons which are the elementary particle
263of bogosity which bogofilter filters.  We're not proxying bogons
264but commands to bogofilter.  Therefore, despite the lyrical
265synergy that "bogoproxy" would have, it simply wouldn't make any
266logical sense, whereas the less elegant bfproxy is more
267descriptive of its actual function... proxying "bf" commands.
268
269=head2 What happens to X-headers from my client/proxy/etc?
270
271X-headers are left as-is now, unlike initial bfproxy behavior.  The
272reason is that bogofilter's Bayesian algorithm will essentially make
273those X-headers irrelevant to classifications since an equal number
274will come from spams as from hams.  Therefore emails will end up
275being classified based on other tokens instead.  And this way,
276X-headers from senders' clients/proxies/servers/etc may be useful in
277classifying.  Furthermore, this reduces the number of ad hoc rules
278that bfproxy applies and would need to be maintained.  Bogofilter
279already strips the X-Bogosity header, so it is unnecessary to do so
280in bfproxy.
281
282=head1 BUGS
283
284=over 4
285
286=item Please report any.
287
288=back
289
290=head1 TODO
291
292=over 4
293
294=item Add more verbosity options
295
296=item Use bogofilter -Q to obtain non-standard tags, formats, etc.
297
298=item Suggestions welcome.
299
300=back
301
302=head1 SEE ALSO
303
304L<bogofilter>
305
306=head1 AUTHOR
307
308Tom Anderson <neo+bfproxy@orderamidchaos.com>
309
310=cut
311
312#################################################
313############### User Variables  #################
314#################################################
315
316# please edit according to your setup
317
318# default path
319our $path = "/bin:/usr/bin:/usr/local/bin";
320
321# default shell
322our $shell = "/bin/sh";
323
324# address-line command string (after the "+", before the "@")
325our $command = "bfproxyID";
326
327# maximum number of recursions for train-to-exhaustion
328our $rmax = 10;
329
330# delivery username; leave blank unless your MTA is unable
331# to deliver to individual virtual host users but instead
332# delivers to a single shared virtual host user
333our $mailbox = "";  # most setups will not need this
334
335# of course, modify the first line of this file,
336# the shebang, to point to your perl interpreter
337
338# do not edit below this line unless you really
339# know what you're doing
340
341#################################################
342############## Include Libraries ################
343#################################################
344
345use IO::Select;
346use IPC::Open2;
347use Fcntl qw(:flock);
348use Benchmark;
349
350#################################################
351############## Default Globals ##################
352#################################################
353
354$> = $<; # set effective user ID to real UID
355$) = $(; # set effective group ID to real GID
356
357# Make %ENV safer
358delete @ENV{qw(IFS CDPATH ENV BASH_ENV PATH SHELL)};
359
360# Set the environment explicitely
361$ENV{PATH} = $path;
362$ENV{SHELL} = $shell;
363
364################################################
365##################### Main #####################
366################################################
367
368# print help info
369if (defined @ARGV && $ARGV[0] =~ /^-+h.*/)
370{
371	my $bfproxy = $1 if $0 =~ /^([\w\/.\-~]*)$/;
372	system("perldoc $bfproxy"); exit(0);
373}
374
375# determine bogofilter settings
376our ($robx, $robs, $min_dev, $ham_cutoff, $spam_cutoff) = get_config();
377
378# extract the header
379my ($header, $boundary, $from, $options) = extract_header($command, $version);
380
381# run bogofilter on the extracted emails and gather the results
382my $results = process_emails($boundary, $from, $options);
383
384# output the new email containing the results
385print "$header\n$results\n";
386
387################################################
388############## Get Configuration  ##############
389################################################
390
391sub get_config
392{
393	my ($robx, $robs, $min_dev, $ham_cutoff, $spam_cutoff) = (0,0,0,0,0);
394
395	open (CONF, "bogofilter -Q |") or error("warn", "Could not get bogofilter settings");
396
397	while (<CONF>)
398	{
399		my $line = $_;
400
401		$robx = ($1+0) if $line =~ /robx\s*?=\s*?([0-9\.]+?)\s/i;
402		$robs = ($1+0) if $line =~ /robs\s*?=\s*?([0-9\.]+?)\s/i;
403		$min_dev = ($1+0) if $line =~ /min_dev\s*?=\s*?([0-9\.]+?)\s/i;
404		$ham_cutoff = ($1+0) if $line =~ /ham_cutoff\s*?=\s*?([0-9\.]+?)\s/i;
405		$spam_cutoff = ($1+0) if $line =~ /spam_cutoff\s*?=\s*?([0-9\.]+?)\s/i;
406	}
407	close CONF;
408
409	return $robx, $robs, $min_dev, $ham_cutoff, $spam_cutoff;
410}
411
412################################################
413############### Extract Header  ################
414################################################
415
416sub extract_header
417{
418	my $command = shift;
419	my $version = shift;
420	my $header = "";
421	my $boundary = "";
422	my $multipart = 0;
423	my $from = "";
424	my $to = "";
425
426	while (<STDIN>)
427	{
428		my $line = $_;
429
430		# record the boundary of the multipart/mixed or digest MIME message
431		$multipart = 1 if $line =~ /Content-Type: multipart\/(?:mixed|digest);/i;
432		$boundary = $1 if $multipart && $line =~ /boundary="(.*?)"/i;
433
434		# replace the from, subject, and content-type lines in the headers
435		$from = $1 if $line =~ s/^From:\s(.*)$/From: Bfproxy v$version/i;
436		$line =~ s/^Subject:\s.*$/Subject: training results/i;
437		$line =~ s/^Content-type:\s.*$/Content-Type: text\/plain/i;
438		$to = $1 if $line =~ /^To:\s(.*)$/i;
439
440		$header .= $line; # if $line =~ /^\w*?:\s|^From\s/i; # unless $line =~ /^X-[^\s]*?:/i;
441
442		# we're done with the header when we've found a blank line
443		last if $line !~ /[^\s]/i;
444	}
445
446	# extract the "user" portion of the "from" address
447	my $user = $1 if $from =~ /.*?((?:\w|-|\.)+?)\@.*$/i;
448
449	# parse any address-line flags
450	my $options = ($to =~ /.*?$user\+$command\-(.*?)\@.*$/i)? $1 : "";
451
452	return $header, $boundary, $from, $options;
453}
454
455################################################
456############### Process Emails  ################
457################################################
458
459sub process_emails
460{
461	# 1) Read the email from STDIN
462	# 2) Discard the enveloping email and process only the "message/rfc822" MIME parts
463	#    representing the forwarded-as-attachment incorrectly classified emails
464	# 3) Send emails to bogofilter for registration
465	# 4) Output the results
466
467	my $boundary=shift||""; # multipart boundary
468	my $from = shift || "";	# address of sender
469	my $options=shift ||"";	# address-line flags
470
471	my @count = (0,0,0,0);	# count of emails (found, processed, lines, words)
472	my $locked = 0;		# lock the boundary at the shallowest level where rfc822 content found
473	my %email;		# hash to temporarily hold a single email
474	my $found = 0;		# indicator of when to record an email
475					# 0: no rfc822 messages found yet
476					# 1: we've got a message/rfc822 header
477					# 2: we're out of the header part
478
479	# start timing the process
480	my $start_time = new Benchmark if $options =~ /b/;
481
482	# begin generating output
483	my $results .= "Bfproxy has processed the following emails with option $options:\n\n";
484
485	while (<STDIN>)
486	{
487		my $line = $_;
488		$count[2]++;
489
490		if ($found < 2) # try to find any new attached emails
491		{
492			# record the boundary of the multipart/mixed or digest MIME message
493			unless ($locked) { $boundary = $1 if $line =~ /Content-Type: multipart\/(?:mixed|digest); boundary="(.*?)"/i; }
494
495			# we've found a new attached email if the content-type is message/rfc822 and we have a boundary
496			if ($line =~ /Content-Type: message\/rfc822/i && $boundary)
497			{
498				$found++;
499				$locked = 1; # lock the boundary on first attachment
500			}
501
502			# we're ready to start recording if we're out of the attachment header (found a blank line)
503			$found++ if ($line !~ /[^\s]/i && $found == 1);
504		}
505		else # if we're inside an attached email, record it
506		{
507			if ($line !~ /\Q$boundary\E/)
508			{
509				# strip [SPAM] token from subject
510				$line =~ s/^Subject: (.*?)\[(?:SPAM|UNSURE)\]/Subject: $1/i;
511
512				# escape From lines in body
513				$line =~ s/^(From) />$1 /i;
514
515				# record properties of this email
516				$email{'spam'} = 'U' if $line =~ /^X-Bogosity: (?:Unsure)/i;
517				$email{'spam'} = 'S' if $line =~ /^X-Bogosity: (?:Yes|Spam)/i;
518				$email{'spam'} = 'H' if $line =~ /^X-Bogosity: (?:No|Ham|Clean)/i;
519				$email{'spamicity'} = $1 if $line =~ /spamicity=((?:\d|\.)*?), /;
520				$email{'content'} .= "$line" unless $line =~ /^X-[^\s]*?:/i;
521				$email{'return-path'} = $1 if $line =~ /^Return-Path: <?(.*?)(?=>|\n)/i;
522				$email{'from'} = $1 if !defined $email{'from'} && $line =~ /^From: (?:.*?<)?(.*?)(?=>|\n)/i;
523				$email{'subject'} = $1 if $line =~ /^Subject: (.{0, 40})(.*?)/i;
524				$email{'subject'} .= "..." if $line =~ /^Subject: .{40}.+?/i;
525			}
526			else # the email is finished once we've come to a boundary -- send it to bogofilter
527			{
528				$count[0]++;
529				$email{'subject'} = "No Subject" unless $email{'subject'};
530				$email{'spam'} = 'U' unless defined $email{'spam'};
531				$email{'spamicity'} = "None" unless $email{'spamicity'};
532
533				my $training = train(\%email, $options, 0, \@count);
534
535				# per-email output
536				$results .= "subject: $email{'subject'}\n";
537				$results .= "original spamicity: $email{'spamicity'}\n" . $training if $options =~ /v/;
538
539				# close this email and advance to the next
540				undef %email;
541				$found = 0;
542			}
543		}
544	}
545
546	$results .= "\n";
547	$results .= "$count[0] emails found, containing $count[2] lines total.\n";
548	$results .= "$count[3] words from $count[1] emails were registered.\n\n";
549
550	if ($options =~ /b/)
551	{
552		# calculate total running time
553		my $end_time = new Benchmark;
554		my $td = timediff($end_time, $start_time);
555		my $cpu = $td->[1]+$td->[2]+$td->[3]+$td->[4];
556		my $wall = $td->[0];
557		my $per_line = $cpu / $count[2]; $per_line = int(($per_line*1000) + .5 * ($per_line <=> 0)) / 1000;
558		my $per_mail = $cpu / $count[0]; $per_mail = int(($per_mail*1000) + .5 * ($per_mail <=> 0)) / 1000;
559
560		$results .= "Total running time was $wall wallclock secs, $cpu CPU secs.\n";
561		$results .= "$per_line CPU secs/line, $per_mail CPU secs/email.\n";
562		$results .= "Bfproxy required ".$td->[1]." usr + ".$td->[2]." sys = ".($td->[1]+$td->[2])." CPU secs.\n";
563		$results .= "Bogofilter required ".$td->[3]." usr + ".$td->[4]." sys = ".($td->[3]+$td->[4])." CPU secs.\n\n";
564	}
565
566	if ($options =~ /c/)
567	{
568		$results .= "robx=$robx, robs=$robs, min_dev=$min_dev, spam_cutoff=$spam_cutoff, ham_cutoff=$ham_cutoff, rmax=$rmax\n";
569	}
570
571	return $results;
572}
573
574################################################
575############### Perform Training ###############
576################################################
577
578sub train
579{
580	my $email = shift or error("warn", "No email provided");
581	my $options = shift || "C";
582	my $r = shift || 0; # recursion counter for exhaustive training
583	my $count = shift || (0,0,0,0);
584	my $results = "";
585
586	if ($options =~ /[CR]/) # if action is corrective, check the bogosity to decide what to do
587	{
588		if ($email->{'spam'} == 'S') # email was classified as SPAM
589		{
590			my $flag = ($options =~ /C/)? "Sn" : "n";
591			$results .= "user classification: ham\ncommand: bogofilter -${\$flag}\n" unless $r;
592
593			my ($status, $words, $output) = bogofilter("-${\$flag}evD 2>&1", $email->{'content'});
594			unless ($status || $r) { $count->[1]++; $count->[3]+=$words; $results .= "words: $words\n"; }
595			elsif ($status) {$results .= "could not process this email: $status\n";}
596		}
597		elsif ($email->{'spam'} == 'H') # email was classified as HAM
598		{
599			my $flag = ($options =~ /C/)? "Ns" : "s";
600			$results .= "user classification: spam\ncommand: bogofilter -${\$flag}\n" unless $r;
601
602			my ($status, $words, $output) = bogofilter("-${\$flag}evD 2>&1", $email->{'content'});
603			unless ($status || $r) { $count->[1]++; $count->[3]+=$words; $results .= "words: $words\n"; }
604			elsif ($status) {$results .= "could not process this email: $status\n";}
605		}
606		elsif ($email->{'spam'} == 'U') # email was classified as UNSURE
607		{
608			my $new_options = $options; $new_options =~ s/[CR]//gs;
609			$results .= train($email, $new_options, $r, $count);
610			return $results;
611		}
612	}
613	elsif ($options =~ /[NSns]/) # if action is direct, just do it
614	{
615		my $flag = $options; $flag =~ s/[^NSns]//gs;
616
617		my $class = ($flag =~ /s/)? "spam" : ($flag =~ /n/)? "ham" : "none";
618		$results .= "user classification: $class\ncommand: bogofilter -${\$flag}\n" unless $r;
619
620		my ($status, $words, $output) = bogofilter("-${\$flag}evD 2>&1", $email->{'content'});
621		unless ($status || $r) { $count->[1]++; $count->[3]+=$words; $results .= "words: $words\n"; }
622		elsif ($status) { $results .= "could not process this email: $status\n"; }
623	}
624	else { $results .= "could not process this email: no option passed in for this condition\n"; }
625
626	# check new spamicity
627	my ($status, $words, $output) = bogofilter("-te 2>&1", $email->{'content'});
628	unless ($status) { $output =~ s/^.*?((?:\d|\.)*?)$/$1/;	$results .= "new spamicity: $output"; }
629	else {$results .= "could not classify this email: $status\n";}
630
631	# train-to-exhaustion
632	$options =~ s/[^nsx]//gis; # only do registration on subsequent recursions
633	$results .= train($email, $options, $r+1, $count) if $options =~ /x/ && $r < $rmax && (($options =~ /s/ && $output < $spam_cutoff) || ($options =~ /n/ && $output > $ham_cutoff));
634
635	$results .= "\n" unless $r;
636
637	return $results;
638}
639
640################################################
641################ Run Bogofilter ################
642################################################
643
644sub bogofilter
645{
646	my $options = shift || "";
647	my $email = shift || "";
648	my $status = 0;
649	my $output = "";
650	my $words = "";
651
652	# for environments that don't split virtual host users into system users
653	$options = "-d $ENV{HOME}/.bogofilter/$mailbox " . $options if $mailbox;
654
655	# trap broken pipes
656	$SIG{PIPE} = \&sig_trap;
657
658	# fork pipe to bogofilter
659	my $pid = open2(\*R, \*W, "bogofilter $options") or error("die", "Could not open pipe to bogofilter: $!");
660
661	# lock filehandle
662	assert_dominance (\*W, LOCK_EX);
663
664	# create filehandle references
665	my $R = *R{IO};	my $W = *W{IO};
666
667	# create select object and add handles
668	my $sel = IO::Select->new($R, $W) or error("die", "Cannot create IO::Select object: $!");
669
670	# set autoflush
671	select((select(W), $| = 1)[0]);
672
673	# select strings
674	my $ex = join(', ', $sel->has_exception(0));
675	my $cw = join(', ', $sel->can_write(0));
676	my $w  = "$W";
677
678	# send email to bogofilter
679	unless ($ex =~ /\Q$w\E/) 	# unless there was an exception on this filehandle
680	{
681		if ($cw =~ /\Q$w\E/) 	# if this filehandle is writeable
682		{
683			syswrite W, $email or error("die", "Cannot write to pipe: bogofilter $options: $email: $!");
684		}
685		else {error("warn", "Filehandle $w is not in ready list: $cw: $!");}
686	}
687	else {error("warn", "Filehandle $w had an exception: $!");}
688
689	# close write filehandle to flush the buffer and read from the process outputs
690	if (close W)
691	{
692		$status = $? >> 8;  # exit status
693		sysread R, $output, 32 or error("warn", "Cannot read from pipe: $!");
694		$words = $1 if $output =~ /^#\s*?(\d*?) words.*/;
695	}
696	else {error("warn", "Could not flush output to bogofilter: $!");}
697
698	# close read filehandle
699	unless (close R) {error("warn", "Could not close input from bogofilter: $!");}
700
701	# terminate child processes
702	waitpid $pid, 0;
703
704	return $status, $words, $output;
705}
706
707################################################
708################ Error Handling ################
709################################################
710
711sub error
712{
713	my ($action, $msg) = @_;
714
715	die $msg if $action eq "die";
716	warn $msg unless $action eq "die";
717	# add other actions if you like
718}
719
720sub sig_trap
721{
722	my $sig = shift;
723	my ($action, $more) = ("warn", "");
724
725	sig:
726	{
727		$action = "warn", last sig if $sig =~ /ALRM/;
728		$action = "warn", last sig if $sig =~ /PIPE/;
729		$action = "warn", last sig if $sig =~ /CHLD/;
730		$action = "die" , last sig if $sig =~ /INT/;
731		$action = "die" , last sig if $sig =~ /HUP/;
732		$action = "warn";
733	}
734
735	my $waitedpid = wait;
736	$more = "; Reaped pid $waitedpid, exited with status " . ($? >> 8) if $waitedpid;
737
738	$SIG{$sig} = \&sig_trap;
739
740	error ($action, "Trapped signal SIG$sig$more");
741}
742
743################################################
744################# File Locking #################
745################################################
746
747sub assert_dominance
748{
749	my ($handle, $type) = @_;
750
751	# assert yourself
752	unless (flock ($handle, $type))
753	{
754		# get impatient
755		sleep 3;
756
757		# reassert yourself or give up
758		unless (flock ($handle, $type)) { error ("die", "File lock error: $!"); }
759	}
760
761	seek $handle, 0, 2;
762}
763