1#
2#   MailScanner - SMTP Email Processor
3#   Copyright (C) 2002  Julian Field
4#
5#   $Id: Message.pm 5099 2011-06-27 10:40:34Z sysjkf $
6#
7#   This program is free software; you can redistribute it and/or modify
8#   it under the terms of the GNU General Public License as published by
9#   the Free Software Foundation; either version 2 of the License, or
10#   (at your option) any later version.
11#
12#   This program is distributed in the hope that it will be useful,
13#   but WITHOUT ANY WARRANTY; without even the implied warranty of
14#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15#   GNU General Public License for more details.
16#
17#   You should have received a copy of the GNU General Public License
18#   along with this program; if not, write to the Free Software
19#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20#
21#      https://www.mailscanner.info
22#
23
24package MailScanner::Message;
25
26use strict 'vars';
27use strict 'refs';
28no  strict 'subs'; # Allow bare words for parameter %'s
29
30use DirHandle;
31use Time::localtime qw/ctime/;
32use Time::HiRes qw/time/;
33use MIME::Parser;
34use MIME::Decoder::UU;
35use MIME::Decoder::BinHex;
36use MIME::WordDecoder;
37use POSIX qw(:signal_h setsid);
38use HTML::TokeParser;
39use HTML::Parser;
40use HTML::Entities qw(decode_entities);
41use Archive::Zip qw( :ERROR_CODES );
42use Filesys::Df;
43use Digest::MD5;
44use OLE::Storage_Lite;
45use Fcntl;
46use File::Path;
47use File::Temp;
48use MailScanner::FileInto;
49use IO::Pipe;
50use IO::File;
51#use Data::Dumper;
52
53# Install an extra MIME decoder for badly-header uue messages.
54install MIME::Decoder::UU 'uuencode';
55# Install an extra MIME decoder for binhex-encoded attachments.
56install MIME::Decoder::BinHex 'binhex','binhex40','mac-binhex40','mac-binhex';
57
58use vars qw($VERSION);
59
60### The package version, both in 1.23 style *and* usable by MakeMaker:
61$VERSION = substr q$Revision: 5099 $, 10;
62
63# Attributes are
64#
65# $id			set by new
66# $store		set by new (is a SMDiskStore for now)
67# #$hpath		set by new
68# #$dpath		set by new
69# $size			set by new (copy of $store->{size})
70# $maxmessagesize	set in SweepContent.pm, copied out of configuration
71# #$inhhandle		set by new
72# #$indhandle		set by new
73# $from			set by ReadQf
74# $fromdomain		set by new
75# $fromuser		set by new
76# @to			set by new
77# @todomain		set by new
78# @touser		set by new
79# $subject		set by ReadQf
80# @headers		set by ReadQf # just the headers, with /^H/ removed
81#                       Note @headers is read-only!
82# @metadata             set by ReadQf # the entire qf file excluding final "."
83# $returnpathflags	set by ReadQf # Only used for sendmail at the moment
84# $clientip		set by ReadQf
85# $scanme		set by NeedsScanning (from MsgBatch constructor)
86# $workarea		set by new
87# @archiveplaces	set by new (addresses and dirs)
88# @quarantineplaces	set by Quarantine.pm
89# $spamwhitelisted      set by IsSpam
90# $spamblacklisted      set by IsSpam
91# $isspam               set by IsSpam
92# $issaspam             set by IsSpam
93# $isrblspam            set by IsSpam
94# $rblspamreport      set by IsSpam
95# $ishigh               set by IsSpam
96# $sascore		set by IsSpam
97# $spamreport           set by IsSpam
98# $sarules		set by IsSpam (ref to hash of rulenames hit)
99# $saruleaction		set by IsSpam (hash of triggered actions by rulename)
100# $mcpwhitelisted       set by IsMCP
101# $ismcp                set by IsMCP
102# $issamcp              set by IsMCP
103# $ishighmcp            set by IsMCP
104# $mcpsascore		set by IsMCP
105# $mcpreport            set by IsMCP
106# $deleted		set by delivery functions
107# $headerspath          set by WriterHeaderFile # file is read-only
108# $cantparse		set by Explode
109# $toomanyattach	set by Explode
110# $cantdisinfect	set by ExplodeArchive
111# $entity		set by Explode
112# $tnefentity		set by Explode (only set if it's a TNEF message)
113# $tnefname		set by Explode (contains the type indicator)
114# $badtnef		set by Explode
115# $entity		set by Explode
116# %name2entity		set by Explode
117# %file2parent		set by Explode (parent will have type indicator)
118# $virusinfected	set by new and ScanBatch
119# $nameinfected		set by new and ScanBatch
120# JKF 19/12/2007 $passwordinfected	set by new and ScanBatch
121# $otherinfected	set by new and ScanBatch
122# $sizeinfected		set by new and ScanBatch
123# %virusreports         set by TryCommercial (key is filename)
124# %virustypes           set by TryCommercial (key is filename)
125# %namereports		set by filename trap checker
126# %nametypes		set by filename trap checker
127# %otherreports		set by TryOther (key is filename)
128# %othertypes		set by TryOther (key is filename)
129# %entityreports        set by TryOther (key is entity)
130# %oldviruses		set by DisinfectAndDeliver
131# $infected             set by CombineReports
132# %allreports		set by CombineReports
133# %alltypes		set by CombineReports
134# %entity2parent	set by CreateEntitiesHelpers
135# %entity2file		set by CreateEntitiesHelpers
136# %entity2safefile	set by CreateEntitiesHelpers
137# %file2entity		set by CreateEntitiesHelpers (maps original evil names)
138# %file2safefile	set by CreateEntitiesHelpers (evil==>safe)
139# %safefile2file	set by CreateEntitiesHelpers (safe==>evil)
140# $numberparts		set by CreateEntitiesHelpers
141# $signed               set by Clean
142# $externalsigned       set by DeliverModifiedBody
143# $bodymodified         set by Clean and SignUninfected
144# $silent		set by FindSilentAndNoisyInfections
145#				if infected with a silent virus
146# $noisy		set by FindSilentAndNoisyInfections
147#				if infected with a noisy virus
148# $needsstripping       set by HandleSpam and HandleMCP
149# $stillwarn		set by new # Still send warnings even if deleted
150# $needsencapsulating	set by HandleSpam and HAndleMCP
151# %postfixrecips	set by ReadQf in Postfix support only. Hash of all the
152#				'R' addresses in the message to aid rebuilding.
153# %originalrecips	set by ReadQf in Postfix support only. Hash of all the
154#				'O' addresses in the message to aid rebuilding.
155# %deleteattach		set by ScanBatch and CheckFiletypeRules. True if
156#                              attachment is to be deleted rather than stored.
157# $tagstoconvert	set by ??? is list of HTML tags to dis-arm
158# $gonefromdisk		set by calls to DeleteUnlock
159# $subjectwasunsafe	set by SweepContent.pm
160# $safesubject		set by SweepContent.pm
161# $mcpdelivering        set by HandleMCP
162# $salongreport		set by SA::Checks (longest version of SA report)
163# @spamarchive          set by HandleHamAndSpam, list of places we have
164#                              quarantined spam/mcp message. Used later to
165#                              delete infected spam from spam quarantine.
166# $dontdeliver          set by HandleHamAndSpam, true if the message was put
167#                              in the spam/mcp archive, but still needs to be
168#                              virus-scanned so we can remove it again if
169#                              necessary. But it doesn't need repairing, as we
170#                              won't be delivering it anyway.
171# $datenumber		set by new
172# $hournumber		set by new
173# $datestring		set by new
174# $messagedisarmed	set by DisarmHTMLTree
175# @disarmedtags                All the HTML tags (incl. phishing) that we found
176#                              and disarmed or highlighted.
177# $quarantinedinfectionsset by QuarantineInfections, has this message already
178#                              been quarantined, so doesn't need quarantining
179#                              in QuarantineModifiedBodies.
180# $actions		set by HandleHamAndSpam, saves action list.
181# $ret                  set by new, true if BarricadeMX RET hash is valid
182# $utf8subject		set by ReadQf, UTF8 rep'n of 'unsafe' subject, used by
183#			       MailWatch v2
184# $mtime                set by ReadQf, mtime from stat of the message qfile
185# $sigimagepresent  set by DisarmHTML, did we find a MailScanner signature image?
186# $isreply        set by SignCleanMessage, did we find signs this is a reply
187# $dkimfriendly		set by new if we are running DKIM-friendly.
188# $newheadersattop	set by new if 'New Headers At Top' = yes at all.
189# $archivesare		set by new, regexp showing what types are Archived.
190# $spamvirusreport	set by virus checking, is comma-separated list of
191#                 	spam-virus names.
192#
193
194# Constructor.
195# Takes id.
196# Takes options $fake which is just used for making an object for
197# the command-line testing.
198# This isn't specific to the MTA at all, so is all done here.
199sub new {
200  my $type = shift;
201  my($id, $queuedirname, $getipfromheader, $fake) = @_;
202  my $this = {};
203  my($mta, $addr, $user, $domain);
204  my($archiveplaces);
205  my $rejectflag = 0;
206  my $rejectmsg = '';
207  my $rejectheader = '';
208  my $orgname = '';
209
210  #print STDERR "Creating message $id\n";
211
212  $this->{id} = $id;
213  @{$this->{archiveplaces}}    = ();
214  @{$this->{spamarchive}}      = ();
215  @{$this->{quarantineplaces}} = ();
216
217  if ($fake) {
218    bless $this, $type;
219    $this->{store} = new MailScanner::SMDiskStore($id, $queuedirname);
220    return $this;
221  }
222
223  # Create somewhere to store the message
224  $this->{store} = new MailScanner::SMDiskStore($id, $queuedirname);
225
226  # Try to open and exclusive-lock this message. Return undef if failed.
227  #print STDERR "Trying to lock message " . $this->{id} . "\n";
228  $this->{store}->Lock() or return undef;
229  #print STDERR "Locked message\n";
230
231  # getipfromheader used to be a yes or no option
232  # It is now a number. yes = 1, no = 0.
233  $getipfromheader = 1 if $getipfromheader =~ /y/i;
234  $getipfromheader = 0 if $getipfromheader =~ /n/i || $getipfromheader eq "";
235  MailScanner::Log::WarnLog("Illegal value for Read IP Address From Received Header, should be a number") unless $getipfromheader =~ /^\d+$/;
236
237  # Now try to fill as much of the structure as possible
238  $this->{size} = $this->{store}->size();
239  if ($global::MS->{mta}->ReadQf($this, $getipfromheader) != 1) {
240    bless $this, $type;
241    $this->{INVALID} = 1;
242    return $this;
243  }
244  #  or return 'INVALID'; # Return empty if fails
245
246  # Are we in milter mode, and has the message been requeued
247  # from a local relay reject?
248  my $mta = MailScanner::Config::Value('mta');
249  if ($mta =~ /^msmail/i) {
250      my($header_line);
251      $orgname = MailScanner::Config::DoPercentVars('%org-name%');
252      $rejectheader = "X-$orgname-MailScanner-Relay-Reject:";
253      my $pos = 0;
254      foreach $header_line (@{$this->{headers}}) {
255          if($header_line =~ /^$rejectheader/) {
256              # Message was rejected at relay
257              # Rewrite header for quarantine so that it doesn't get
258              # flagged if released and allows for easy troubleshooting
259              $header_line =~ s/^.*: //;
260              $rejectmsg = $header_line;
261              splice @{$this->{headers}}, $pos, 1, 0, 'X-' . $orgname . "-MailScanner-Relay-Quarantine: " . $rejectmsg;
262              splice @{$this->{headerflags}}, $pos, 1, 0, "H" if $this->{headerflags};
263              # Flag and carry on
264              $rejectflag = 1;
265          }
266          $pos++;
267      }
268  }
269
270
271  # Work out the user @ domain components
272  ($user, $domain) = address2userdomain($this->{from});
273  $this->{fromuser} = $user;
274  $this->{fromdomain} = $domain;
275  foreach $addr (@{$this->{to}}) {
276    ($user, $domain) = address2userdomain($addr);
277    push @{$this->{touser}}, $user;
278    push @{$this->{todomain}}, $domain;
279  }
280
281  # BarricadeMX mods
282  # Automatically detect if BarricadeMX is in use (clientip=127.0.0.1) and
283  # the second Received header contains 'ret-id'.  If this is true then:
284  # 1)  Override $this->{clientip} with the IP from the 2nd Received header.
285  # 2)  If 'ret-id pass' is in the 2nd Received header, set $this->{ret}.
286  #
287  # Example:
288  #
289  # Received: from xxx.xxx.com (localhost.localdomain [127.0.0.1])
290  #   by mail.fsg.com (8.13.1/8.13.1) with SMTP id xxxxxxxxxxxxxx
291  #   for <xxx@xxx.com>; Sat, 13 Jan 2007 17:02:49 -0500
292  # Received: from xxxxxxxxxx.net (xxxxxxxx.xxxxxxxxxx.xxx [111.111.11.11])
293  #   by xxxx.xxx.com (xxxx.xxx.com [192.168.111.11])
294  #   id xxxxxxxxxxxxxxxxxx ret-id none; Sat, 13 Jan 2007 17:03:09 -0500
295  #
296  $this->{ret} = 0;
297  #  my($header_line, $last_rcvd, $last_rcvd_ip);
298  if($this->{clientip} eq '127.0.0.1') {
299    my($header_line, $last_rcvd, $last_rcvd_ip);
300    my($rcvd_count) = 0;
301    foreach $header_line (@{$this->{headers}}) {
302      # print STDERR "DEBUG: Header line: $header_line\n";
303      if($header_line =~ /ret-id/ && $rcvd_count == 2) {
304        $this->{clientip} = $last_rcvd_ip if ($last_rcvd_ip);
305        # print STDERR "DEBUG: Using received header $rcvd_count - IP: $last_rcvd_ip\n";
306        $this->{ret} = 1 if($header_line =~ /ret-id pass/i);
307        last;
308      }
309      if($header_line =~ /Received:/) {
310        $rcvd_count++;
311        $last_rcvd = $header_line;
312        #my($rcvd_ip) = $last_rcvd =~ /\(.*\[(.+)\]\)/;
313        my($rcvd_ip) = $last_rcvd =~ /\(.*\[(.+)\].*\)|\[(.+)\]/;
314        # print STDERR "DEBUG: $last_rcvd - IP: $rcvd_ip\n";
315        $rcvd_ip = $2 unless $rcvd_ip;
316        $last_rcvd_ip = $rcvd_ip;
317        last if $rcvd_count > 2;
318      }
319    }
320  }
321
322  $this->{mshmacnullvalid} = 1;
323  $this->{mshmacskipvalid} = 0;
324  my $usewatermark = (MailScanner::Config::Value('usewatermarking', $this) =~ /1/)?1:0;
325  my $mshmackey = MailScanner::Config::Value('mshmac', $this);
326  if (($usewatermark) && (length $mshmackey)) {
327    #print STDERR "You are using the Watermark blocking\n";
328    my ($subject, $date, $from, $to, $useragent, $hash, $msgid);
329    my ($chkmshmacnull, $addmshmac, $chkmshmacskip, $mshamcexp, $header_line, $skiphmac, @WholeBody);
330    $this->{addmshmac} = 0;
331    $this->{mshmac} = "";
332    $chkmshmacnull = MailScanner::Config::Value('checkmshmac', $this);
333    $chkmshmacskip = MailScanner::Config::Value('checkmshmacskip', $this);
334    $addmshmac = MailScanner::Config::Value('addmshmac', $this);
335    $mshamcexp = MailScanner::Config::Value('mshmacvalid', $this);
336    $chkmshmacnull = ($chkmshmacnull =~ /1/)?1:0;
337    $chkmshmacskip = ($chkmshmacskip =~ /1/)?1:0;
338    $addmshmac = ($addmshmac =~ /1/)?1:0;
339    my $mshmacheader = MailScanner::Config::Value('mshmacheader', $this);
340    $mshmacheader .= ':' unless $mshmacheader =~ /:$/;
341
342    # So do we need to look for a header in the message body?
343    # Don't check if there was no client IP address, as we must have made it.
344    if ($chkmshmacnull &&
345        $this->{fromuser} eq "" &&
346        $this->{clientip} ne '0.0.0.0') {
347	#print STDERR "\tI am checking for a valid Watermark\n";
348      $this->{store}->ReadBody(\@WholeBody, 6000);
349      foreach (@WholeBody) {
350        $date=Date::Parse::str2time($1) if /^Date: (.*)/i;
351        $msgid=$1 if /^Message-ID: (.*)/i;
352        $hash=$1 if /^$mshmacheader (.*)/i;
353        # If we have our headers then end
354        last if defined($date) && defined($hash) && defined($msgid);
355        # If we have some our headers and a blank line then end
356        last if $_ eq '' && (defined($date) || defined($hash) || defined($msgid));
357      }
358      undef(@WholeBody);
359      if (!defined($hash)) {
360        #print STDERR "\tNo hash found\n";
361        $this->{mshmacnullvalid} = 0;
362        $this->{mshmacnullpresent} = 1;
363      }
364      else {
365        $this->{mshmacnullpresent} = 1;
366        $this->{mshmacnullvalid} = checkHMAC($hash, $this->{touser}[0] . "\@" . $this->{todomain}[0], $date, $mshmackey, $msgid);
367      }
368    }
369    # Now check to see if we need to add a header
370    if ($chkmshmacskip) {
371      my @hashes;
372      foreach (@{$this->{headers}}) {
373        $date=Date::Parse::str2time($1) if /^Date: (.*)/i;
374        $msgid=$1 if /^Message-ID: (.*)/i;
375        push(@hashes, $1) if /^$mshmacheader (.*)/i;
376      }
377      #print STDERR "I got $hash\n";
378      if ($chkmshmacskip) {
379        foreach (@hashes) {
380          if (checkHMAC($_,$this->{fromuser} . "\@" . $this->{fromdomain}, $date, $mshmackey, $msgid)) {
381            $this->{mshmacskipvalid}=1;
382            last;
383          }
384        }
385      }
386    }
387    if ($addmshmac)  {
388      my $expiry=time()+$mshamcexp;
389      $hash= createHMAC($expiry, $this->{fromuser} . "\@" . $this->{fromdomain}, $date, $mshmackey, $msgid);
390      #$global::MS->{mta}->AppendHeader($this, $mshmacheader, "$expiry\@$hash");
391      $this->{addmshmac} = 1;
392      $this->{mshmac} = "$expiry\@$hash";
393    }
394  }
395
396
397  # Reset the infection counters to 0
398  $this->{virusinfected} = 0;
399  $this->{nameinfected}  = 0;
400  $this->{otherinfected} = 0;
401  $this->{sizeinfected}  = 0;
402  # JKF 19/12/2007 $this->{passwordinfected} = 0;
403  $this->{stillwarn}     = 0;
404
405  # Set the date string and number
406  $this->{datestring} = scalar localtime;
407  my($hour, $day, $month, $year, $date);
408  ($hour, $day, $month, $year) = (localtime)[2,3,4,5];
409  $date = sprintf("%04d%02d%02d", $year+1900, $month+1, $day);
410  $this->{datenumber} = $date;
411  $this->{hournumber} = sprintf("%02d", $hour);
412
413  # Work out where to archive/copy this message.
414  # Could do all the archiving in a different separate place.
415  $archiveplaces = MailScanner::Config::Value('archivemail', $this);
416  if ($archiveplaces =~ /_DATE_/) {
417    # Only do the work for the date substitution if we really have to
418    $archiveplaces =~ s/_DATE_/$date/g;
419    #print STDERR "Archive location is $archiveplaces\n";
420  }
421  $archiveplaces =~ s/_HOUR_/$hour/g;
422  @{$this->{archiveplaces}} = ((defined $archiveplaces)?split(" ", $archiveplaces):());
423
424  # Decide if we want to scan this message at all
425  $this->{scanmail} = MailScanner::Config::Value('scanmail', $this);
426  if ($this->{scanmail} =~ /[1]/) {
427    $this->{scanmail} = 1;
428    $this->{scanvirusonly} = 0;
429  } elsif ($this->{scanmail} =~ /[2]/) {
430  	$this->{scanmail} = 1;
431  	$this->{scanvirusonly} = 1;
432  } else {
433    # Make sure it is set to something, and not left as undef.
434    $this->{scanmail} = 0;
435    $this->{scanvirusonly} = 0;
436  }
437
438  # Are we running in DKIM-friendly mode?
439  # Require Multiple Headers = add
440  # and Add New Headers At Top = yes
441  my $multhead = MailScanner::Config::Value('multipleheaders', $this);
442  my $attop    = MailScanner::Config::Value('newheadersattop', $this);
443  $this->{dkimfriendly} = ($multhead =~ /add/ && $attop =~ /1/)?1:0;
444  $this->{newheadersattop} = 1 if $attop =~ /1/;
445
446  # Work out what types of file are archives
447  my $ArchivesAre = MailScanner::Config::Value('archivesare', $this);
448  my @ArchivesAre = split " ", $ArchivesAre;
449  # Reduce each word to the first letter
450  @ArchivesAre    = map { substr($_,0,1) } @ArchivesAre;
451  $ArchivesAre    = join '', @ArchivesAre;
452  # And turn the first letters into a regexp
453  $ArchivesAre    = '[' . $ArchivesAre . ']' if $ArchivesAre;
454  $this->{archivesare} = $ArchivesAre;
455
456  if ($rejectflag == 1) {
457      # Log Relay rejects as other
458      $this->{otherinfected} = 1;
459  }
460
461  bless $this, $type;
462  return $this;
463}
464
465sub checkHMAC {
466
467  my ($hash, $email, $date, $secret, $msgid)=@_;
468
469  my ($expiry, $newhash)=split(/\@/, $hash);
470  return 0 if ($expiry<time());
471
472  #print STDERR "I am checking $hash using input of: $email, $date, $secret, $msgid\n";
473
474  $hash = createHMAC($expiry, $email, $date, $secret, $msgid);
475  return 0 unless ($hash eq $newhash);
476
477  return 1;
478}
479
480sub createHMAC {
481  my ($expiry, $email, $date, $secret, $msgid)=@_;
482
483  #print STDERR "I am generating a hash using the input of: $expiry, $email, $date, $secret, $msgid\n";
484
485  # JKF Watermark fix June 2008 return Digest::MD5::md5_base64(join("\$\%", $expiry, $email, $date, $secret, $msgid));
486  return Digest::MD5::md5_base64(join("\$\%", $expiry, $date, $secret, $msgid));
487}
488
489# Delete a named attachment (filename supplied) from this message
490sub DeleteFile {
491  my $this = shift;
492  my $safefile = shift;
493
494  #print STDERR "Been asked to delete $safefile\n";
495  $global::MS->{work}->DeleteFile($this, $safefile);
496}
497
498
499# Take an email address. Return (user, domain).
500sub address2userdomain {
501  my($addr) = @_;
502
503  my($user, $domain);
504
505  $addr = lc($addr);
506  $addr =~ s/^<\s*//; # Delete leading and
507  $addr =~ s/\s*>$//; # trailing <>
508
509  $user   = $addr;
510  $domain = $addr;
511
512  if ($addr =~ /@/) {
513    $user   =~ s/@[^@]*$//;
514    $domain =~ s/^.*@//;
515  }
516
517  return ($user, $domain);
518}
519
520
521# Print a message
522sub print {
523  my $this = shift;
524
525  print STDERR "Message " . $this->{id} . "\n";
526  print STDERR "  Size = " . $this->{size} . "\n";
527  print STDERR "  From = " . $this->{from} . "\n";
528  print STDERR "  To   = " . join(',',@{$this->{to}}) . "\n";
529  print STDERR "  Subj = " . $this->{subject} . "\n";
530}
531
532
533# Get/Set "scanme" flag
534sub NeedsScanning {
535  my($this, $value) = @_;
536
537  $this->{scanme} = $value if @_ > 1;
538  return $this->{scanme};
539}
540
541
542# Write the file containing all the message headers.
543# Called by the MessageBatch constructor.
544# Notes: assumes the directories required already exist.
545sub WriteHeaderFile {
546  my $this = shift;
547
548  #my @headers;
549  my $header = new FileHandle;
550  my $filename = $global::MS->{work}->{dir} . '/' . $this->{id} . '.header';
551  $this->{headerspath} = $filename;
552
553  MailScanner::Lock::openlock($header, ">$filename", "w")
554    or MailScanner::Log::DieLog("Cannot create + lock headers file %s, %s",
555                                $filename, $!);
556
557  #@headers = $global::MS->{mta}->OriginalMsgHeaders($this);
558  #print STDERR "Headers are " . join(', ', @headers) . "\n";
559  #foreach (@headers) {
560  foreach ($global::MS->{mta}->OriginalMsgHeaders($this)) {
561    tr/\r/\n/; # Work around Outlook [Express] bug allowing viruses in headers
562    print $header "$_\n";
563  }
564  print $header "\n";
565  MailScanner::Lock::unlockclose($header);
566
567  # Set the owner of the header file
568  $filename =~ /^(.*)$/;
569  $filename = $1;
570  chown $global::MS->{work}->{uid}, $global::MS->{work}->{gid}, $filename # TAINT
571    if $global::MS->{work}->{changeowner};
572}
573
574
575# Is this message spam? Try to build the spam report and store it in
576# the message.
577sub IsSpam {
578  my $this = shift;
579  my($includesaheader, $iswhitelisted, $usegsscanner, $mshmacreport);
580
581  my $spamheader    = "";
582  my $rblspamheader = "";
583  my $gsreport      = "";
584  my $saspamheader  = "";
585  my $RBLsaysspam   = 0;
586  my $rblcounter    = 0;
587  my $LogSpam = MailScanner::Config::Value('logspam');
588  my $LogNonSpam = MailScanner::Config::Value('lognonspam');
589  my $LocalSpamText = MailScanner::Config::LanguageValue($this, 'spam');
590  my $LocalNotSpamText = MailScanner::Config::LanguageValue($this, 'notspam');
591
592  #print STDERR "MTime{" . $this->{id} . "} = " . $this->{mtime} . "\n";
593
594  # Construct a pretty list of all the unique domain names for logging
595  my(%todomain, $todomain);
596  foreach $todomain (@{$this->{todomain}}) {
597    $todomain{$todomain} = 1;
598  }
599  $todomain = join(',', keys %todomain);
600  my $recipientcount = @{$this->{to}};
601
602  # $spamwhitelisted      set by IsSpam
603  # $spamblacklisted      set by IsSpam
604  # $isspam               set by IsSpam
605  # $ishigh               set by IsSpam
606  # $spamreport           set by IsSpam
607
608  $this->{spamwhitelisted} = 0;
609  $this->{spamblacklisted} = 0;
610  $this->{isspam} = 0;
611  $this->{ishigh} = 0;
612  $this->{spamreport} = "";
613  $this->{sascore} = 0;
614
615  # Work out if they always want the SA header
616  $includesaheader = MailScanner::Config::Value('includespamheader', $this);
617  # If they want the GS scanner then we must carry on too
618  $usegsscanner = MailScanner::Config::Value('gsscanner', $this);
619
620  # Do the whitelist check before the blacklist check.
621  # If anyone whitelists it, then everyone gets the message.
622  # If no-one has whitelisted it, then consider the blacklist.
623  $iswhitelisted = 0;
624  my $maxrecips = MailScanner::Config::Value('whitelistmaxrecips');
625  $maxrecips = 999999 unless $maxrecips;
626
627  # BarricadeMX mods
628  # Skip SpamAssassin if a valid RET hash is found ($this->{ret} == true)
629  if ($this->{ret}) {
630   MailScanner::Log::InfoLog("Valid RET hash found in Message %s, skipping Spam Checks",$this->{id});
631   return 0;
632  }
633
634  # Skip Spam Checks if Watermark is valid
635  if ($this->{mshmacskipvalid}) {
636    MailScanner::Log::InfoLog("Valid Watermark HASH found in Message %s Header, skipping Spam Checks", $this->{id});
637    return 0;
638  }
639
640  # MailScanner NULL sender mods
641  if ($this->{mshmacnullpresent} && $this->{mshmacnullvalid}) {
642    MailScanner::Log::InfoLog("Message %s from %s has valid watermark",
643                              $this->{id}, $this->{clientip});
644  } elsif ($this->{mshmacnullpresent} && $this->{mshmacnullvalid}==0) {
645    # If the sender is empty then treat unmarked messages as spam perhaps?
646    my $mshmacnull = lc(MailScanner::Config::Value('mshmacnull', $this));
647    #print STDERR "mshmacnull = $mshmacnull\n";
648    # This can be "none", "spam" or "high-scoring spam"
649    #$mshmacnull =~ s/[^a-z]//g;
650    if ($mshmacnull =~ /delete/) {
651      $this->{deleted} = 1;
652      $this->{dontdeliver} = 1;
653      MailScanner::Log::InfoLog("Message %s from %s has no (or invalid) watermark or sender address, deleted", $this->{id}, $this->{clientip}) if $LogSpam;
654    }
655    elsif ($mshmacnull =~ /high/) {
656      my $highscore = MailScanner::Config::Value('highspamassassinscore', $this);
657      $this->{isspam} = 1;
658      $this->{ishigh} = 1;
659      $this->{sascore} = $highscore if $this->{sascore} < $highscore;
660      $this->{spamreport} = $LocalSpamText . "(no watermark or sender address)";
661      MailScanner::Log::InfoLog("Message %s from %s has no (or invalid) watermark or sender address, marked as high-scoring spam", $this->{id}, $this->{clientip}) if $LogSpam;
662      return 1;
663    }
664    elsif ($mshmacnull =~ /spam/) {
665      my $reqscore = MailScanner::Config::Value('reqspamassassinscore', $this);
666      $this->{isspam} = 1;
667      $this->{sascore} = $reqscore if $this->{sascore} < $reqscore;
668      $this->{spamreport} = $LocalSpamText . "(no watermark or sender address)";
669      MailScanner::Log::InfoLog("Message %s from %s has no (or invalid) watermark or sender address, marked as spam", $this->{id}, $this->{clientip}) if $LogSpam;
670      return 1;
671    }
672    # spam/high/normal can also be a number, which is added to the Spam Score
673    elsif (($mshmacnull+0.0) > 0.01) {
674      $this->{sascore} += $mshmacnull+0.0;
675      MailScanner::Log::InfoLog("Message %s had bad watermark, added %s to spam score", $this->{id}, $mshmacnull+0.0) if $LogSpam;
676
677      my($mshspam, $mshhigh) = MailScanner::SA::SATest_spam($this, 0.0, $this->{sascore}+0.0);
678      $this->{isspam} = 1 if $mshspam;
679      $this->{ishigh} = 1 if $mshhigh;
680      $this->{spamreport} = ($mshspam?$LocalSpamText:$LocalNotSpamText) . " (no watermark or sender address)";
681      $mshmacreport = " (no watermark or sender address)";
682    } elsif ($this->{mshmacnullpresent}) {
683      MailScanner::Log::InfoLog("Message %s from %s has no (or invalid) watermark or sender address", $this->{id}, $this->{clientip});
684    }
685  }
686
687  # Only allow whitelisting if there are few enough recipients.
688  if ($recipientcount<=$maxrecips) {
689    if (MailScanner::Config::Value('spamwhitelist', $this)) {
690      # Whitelisted, so get out unless they want SA header
691      #print STDERR "Message is whitelisted\n";
692      MailScanner::Log::InfoLog("Message %s from %s (%s) is whitelisted",
693                                $this->{id}, $this->{clientip}, $this->{from})
694        if $LogSpam || $LogNonSpam;
695      $iswhitelisted = 1;
696      $this->{spamwhitelisted} = 1;
697      # whitelisted and doesn't want SA header so get out
698      return 0 unless $includesaheader || $usegsscanner;
699    }
700  } else {
701    # Had too many recipients, ignoring the whitelist
702    MailScanner::Log::InfoLog("Message %s from %s (%s) ignored whitelist, " .
703                              "had %d recipients (>%d)", $this->{id},
704                              $this->{clientip}, $this->{from},
705                              $recipientcount, $maxrecips)
706      if $LogSpam || $LogNonSpam;
707  }
708
709  # If it's a blacklisted address, don't bother doing any checks at all
710  if (!$iswhitelisted && MailScanner::Config::Value('spamblacklist', $this)) {
711    $this->{spamblacklisted} = 1;
712    $this->{isspam} = 1;
713    $this->{ishigh} = 1
714      if MailScanner::Config::Value('blacklistedishigh', $this);
715    $this->{spamreport} = $LocalSpamText . ' (' .
716                   MailScanner::Config::LanguageValue($this, 'blacklisted') .
717                   ')';
718    MailScanner::Log::InfoLog("Message %s from %s (%s) to %s" .
719                              " is spam (blacklisted)",
720                              $this->{id}, $this->{clientip},
721                              $this->{from}, $todomain)
722      if $LogSpam;
723    return 1;
724  }
725
726  my $whitelistreport = '';
727  if ($iswhitelisted) {
728    $whitelistreport = ' (' .
729                  MailScanner::Config::LanguageValue($this, 'whitelisted') .
730                  ')';
731  }
732
733  #
734  # Check to see if message is too large to be likely to be spam.
735  #
736  my $maxtestsize = MailScanner::Config::Value('maxspamchecksize',$this);
737  if ($this->{size} > $maxtestsize) {
738    $this->{spamreport} = MailScanner::Config::LanguageValue($this, 'skippedastoobig');
739    $this->{spamreport} = $this->ReflowHeader(
740                  MailScanner::Config::Value('spamheader',$this),
741                  $this->{spamreport});
742    MailScanner::Log::InfoLog("Message %s from %s (%s) to %s is too big for spam checks (%d > %d bytes)",
743                              $this->{id}, $this->{clientip},
744                              $this->{from}, $todomain,
745                              $this->{size}, $maxtestsize);
746    return 0;
747  }
748
749  my $isauthenticated = 0;
750  if (MailScanner::Config::Value('mta') == "postfix" && MailScanner::Config::Value('spamlistskipifauthenticated')) {
751    # MailScanner::Log::InfoLog(Dumper($metadata));
752    # Test if sender is authenticated on mta
753    foreach my $metadata (@{$this->{metadata}}) {
754      #Postfix
755      if ($metadata =~ m/^Asasl_method=(PLAIN|LOGIN)$/) {
756        MailScanner::Log::InfoLog("Sender was authenticated - Not checking RBLs");
757        $isauthenticated = 1;
758      }
759    }
760  } elsif (MailScanner::Config::Value('mta') == "exim" && MailScanner::Config::Value('spamlistskipifauthenticated')) {
761    if (exists $this->{metadata}->{dv_auth_id}) {
762        MailScanner::Log::InfoLog("Sender was authenticated - Not checking RBLs");
763        $isauthenticated = 1;
764    }
765  }
766
767  if (!$iswhitelisted && !$isauthenticated) {
768    # Not whitelisted, so do the RBL checks
769    $0 = 'MailScanner: checking with Spam Lists';
770    ($rblcounter, $rblspamheader) = MailScanner::RBLs::Checks($this);
771    my $rblthreshold = MailScanner::Config::Value('normalrbls', $this);
772    my $highrblthreshold = MailScanner::Config::Value('highrbls', $this);
773    $rblthreshold = 1 if $rblthreshold <= 1;
774    $highrblthreshold = 1 if $highrblthreshold <= 1;
775    $RBLsaysspam       = 1 if $rblcounter >= $rblthreshold;
776    # Add leading "spam, " if RBL says it is spam. This will be at the
777    # front of the spam report.
778    $this->{isspam}    = 1 if $RBLsaysspam;
779    $this->{isrblspam} = 1 if $RBLsaysspam;
780    $this->{rblspamreport} = "";
781    $this->{rblspamreport} = $rblspamheader if $RBLsaysspam;
782    $this->{ishigh}    = 1 if $rblcounter >= $highrblthreshold;
783  }
784  # rblspamheader is useful start to spamreport if RBLsaysspam.
785
786  # Do the Custom Spam Checker
787  my($gsscore, $gsreport);
788  #print STDERR "In Message.pm about to look at gsscanner\n";
789  if ($usegsscanner) {
790    #print STDERR "In Message.pm about to run gsscanner\n";
791
792    ($gsscore, $gsreport) = MailScanner::GenericSpam::Checks($this);
793    #print STDERR "In Message.pm we got $gsscore, $gsreport\n";
794    $this->{gshits} = $gsscore;
795    $this->{gsreport} = $gsreport;
796    $this->{sascore} += $gsscore; # Add the score
797    MailScanner::Log::InfoLog("Custom Spam Scanner for message %s from %s " .
798                              "(%s) to %s report is %s %s",
799                              $this->{id}, $this->{clientip},
800                              $this->{from}, $todomain, $gsscore, $gsreport)
801      if $LogSpam && ($gsscore!=0 || $gsreport ne "");
802  }
803
804  # Don't do the SA checks if they have said no.
805  unless (MailScanner::Config::Value('usespamassassin', $this)) {
806    $this->{spamwhitelisted} = $iswhitelisted;
807    $this->{isspam} = 1
808      if $this->{sascore}+0.0 >=
809         MailScanner::Config::Value('reqspamassassinscore',$this)+0.0;
810    $this->{ishigh} = 1
811      if $this->{sascore}+0.0 >=
812         MailScanner::Config::Value('highspamassassinscore',$this)+0.0;
813    MailScanner::Log::InfoLog("Message %s from %s (%s) to %s is %s",
814                              $this->{id}, $this->{clientip},
815                              $this->{from}, $todomain, $rblspamheader)
816      if $RBLsaysspam && $LogSpam;
817    # Replace start of report if it wasn't spam from rbl but now is.
818    $this->{spamreport} = ($this->{isspam})?$LocalSpamText:$LocalNotSpamText;
819    $this->{spamreport} .= $mshmacreport;
820    $this->{spamreport} .= $whitelistreport;
821    $this->{spamreport} .= ', ' if $this->{spamreport};
822    $this->{spamreport} .= $rblspamheader if $rblspamheader;
823    $this->{spamreport} .= ', ' if $this->{spamreport} && $rblspamheader;
824    $this->{spamreport} .= $gsscore+0.0 if $gsscore!=0;
825    $this->{spamreport} .= ', ' if $this->{spamreport} && $gsscore!=0;
826    $this->{spamreport} .= $gsreport if $gsreport ne "";
827    $this->{spamreport} = $this->ReflowHeader(
828                  MailScanner::Config::Value('spamheader',$this),
829                  $this->{spamreport});
830    return $this->{isspam};
831  }
832
833  # If it's spam and they dont want to check SA as well
834  if ($this->{isspam} &&
835      !MailScanner::Config::Value('checksaifonspamlist', $this)) {
836    $this->{spamwhitelisted} = $iswhitelisted;
837    MailScanner::Log::InfoLog("Message %s from %s (%s) to %s is %s",
838                              $this->{id}, $this->{clientip},
839                              $this->{from}, $todomain, $rblspamheader)
840      if $RBLsaysspam && $LogSpam;
841    # Replace start of report if it wasn't spam from rbl but now is.
842    $this->{spamreport} = ($this->{isspam})?$LocalSpamText:$LocalNotSpamText;
843    $this->{spamreport} .= $mshmacreport;
844    $this->{spamreport} .= $whitelistreport;
845    $this->{spamreport} .= ', ' if $this->{spamreport};
846    $this->{spamreport} .= $rblspamheader if $rblspamheader;
847    $this->{spamreport} .= ', ' if $this->{spamreport} && $rblspamheader;
848    $this->{spamreport} .= $gsscore+0.0 if $gsscore!=0;
849    $this->{spamreport} .= ', ' if $this->{spamreport} && $gsscore!=0;
850    $this->{spamreport} .= $gsreport if $gsreport ne "";
851    $this->{spamreport} = $this->ReflowHeader(
852                  MailScanner::Config::Value('spamheader',$this),
853                  $this->{spamreport});
854    return $RBLsaysspam;
855  }
856
857  # They must want the SA checks doing.
858
859  my $SAsaysspam = 0;
860  my $SAHighScoring = 0;
861  my $saheader = "";
862  my $sascore  = 0;
863  my $salongreport = "";
864  $0 = 'MailScanner: checking with SpamAssassin';
865  ($SAsaysspam, $SAHighScoring, $saheader, $sascore, $salongreport)
866    = MailScanner::SA::Checks($this);
867  # Cannot trust the SAsaysspam and SAHighScoring from the previous test as
868  # they depend solely on what SpamAssassin finds, and not what the Watermark
869  # and GS scanner found previously, the scores for which are already in
870  # $this->{sascore}. So recalculate the SAsaysspam and SAHighScoring based
871  # on *all* the evidence we have so far.
872  ($SAsaysspam, $SAHighScoring) = MailScanner::SA::SATest_spam($this, $this->{sascore}+0.0, $sascore+0.0);
873  $this->{sascore} += $sascore; # Save the actual figure for use later...
874  # Trim all the leading rubbish off the long SA report and turn it back
875  # into a multi-line string, then store it in the message properties.
876  $salongreport =~ s/^.* pts rule name/ pts rule name/;
877  $salongreport =~ tr/\0/\n/;
878  $this->{salongreport} = $salongreport;
879  #print STDERR $salongreport . "\n";
880
881  # Fix the return values
882  $SAsaysspam = 0 unless $saheader;    # Solve bug with empty SAreports
883  $saheader =~ s/\s+$//g if $saheader; # Solve bug with trailing space
884
885  # Build the hash containing all the rules hit as keys, values are 1
886  # $saheader looks like this: score=11.12, required 6, DATE_IN_PAST_12_24 1.77, INVALID_DATE 1.65, INVALID_MSGID 2.60, RCVD_IN_NJABL_SPAM 3.10, SPF_HELO_NEUTRAL 2.00
887  my(@hitslist, %names);
888  @hitslist = split(/\s*,\s*/, $saheader);
889  shift @hitslist; # Remove total score
890  shift @hitslist; # Remove required score
891  foreach (@hitslist) {
892    $names{lc($1)} = 1 if /^\s*(\S+)\s+/;
893  }
894  $this->{sarules} = \%names;
895
896  #print STDERR "SA report is \"$saheader\"\n";
897  #print STDERR "SAsaysspam = $SAsaysspam\n";
898  $saheader = MailScanner::Config::LanguageValue($this, 'spamassassin') .
899              " ($saheader)" if $saheader;
900
901  # The message really is spam if SA says so (unless it's been whitelisted)
902  unless ($iswhitelisted) {
903    $this->{isspam} |= $SAsaysspam;
904    $this->{issaspam} = $SAsaysspam;
905  }
906
907  # If it's spam...
908  if ($this->{isspam}) {
909    #print STDERR "It is spam\nInclude SA = $includesaheader\n";
910    #print STDERR "SAHeader = $saheader\n";
911    # If it's SA spam as well, or they always want the SA header
912    #if ($SAsaysspam || $includesaheader) {
913      #print STDERR "Spam or Add SA Header\n";
914      $this->{ishigh} = 1 if $SAHighScoring;
915      $this->{spamreport} = ($this->{isspam})?$LocalSpamText:$LocalNotSpamText;
916      $this->{spamreport} .= $mshmacreport;
917      $this->{spamreport} .= $whitelistreport;
918      $this->{spamreport} .= ', ' if $this->{spamreport};
919      $this->{spamreport} .= $rblspamheader if $rblspamheader;
920      $this->{spamreport} .= ', ' if $this->{spamreport} && $rblspamheader;
921      $this->{spamreport} .= $gsscore+0.0 if $gsscore!=0;
922      $this->{spamreport} .= ', ' if $this->{spamreport} && $gsscore!=0;
923      $this->{spamreport} .= $gsreport if $gsreport ne "";
924      #$this->{spamreport} .= ', ' if $this->{spamreport} && $gsreport;
925      #$this->{spamreport} .= $saheader if $saheader ne "";
926    #}
927    if ($SAsaysspam || $includesaheader) {
928      $this->{spamreport} .= ', ' if $this->{spamreport} && $gsreport;
929      $this->{spamreport} .= $saheader if $saheader ne "";
930    }
931  } else {
932    # It's not spam...
933    #print STDERR "It's not spam\n";
934    #print STDERR "SAHeader = $saheader\n";
935    $this->{spamreport} = ($this->{isspam})?$LocalSpamText:$LocalNotSpamText;
936    $this->{spamreport} .= $mshmacreport;
937    $this->{spamreport} .= $whitelistreport;
938    $this->{spamreport} .= ', ' if $this->{spamreport};
939    $this->{spamreport} .= $rblspamheader if $rblspamheader;
940    $this->{spamreport} .= ', ' if $this->{spamreport} && $rblspamheader;
941    $this->{spamreport} .= $gsscore+0.0 if $gsscore!=0;
942    $this->{spamreport} .= ', ' if $this->{spamreport} && $gsscore!=0;
943    $this->{spamreport} .= $gsreport if $gsreport ne "";
944    $this->{spamreport} .= ', ' if $this->{spamreport} && $gsreport;
945    $this->{spamreport} .= $saheader if $saheader ne "";
946  }
947
948  # Do the spam logging here so we can log high-scoring spam too
949  if (($LogSpam && $this->{isspam}) || ($LogNonSpam && !$this->{isspam})) {
950    my $ReportText = $this->{spamreport};
951    $ReportText =~ s/\s+/ /sg;
952    MailScanner::Log::InfoLog("Message %s from %s (%s) to %s is %s",
953                              $this->{id}, $this->{clientip},
954                              $this->{from}, $todomain, $ReportText);
955  }
956
957  # Now just reflow and log the results
958  if ($this->{spamreport} ne "") {
959    $this->{spamreport} = $this->ReflowHeader(
960                  MailScanner::Config::Value('spamheader',$this),
961                  $this->{spamreport});
962  }
963
964  return $this->{isspam};
965}
966
967
968# Do whatever is necessary with this message to deal with spam.
969# We can assume the message passed is indeed spam (isspam==true).
970# Call it with either 'spam' or 'nonspam'. Don't use 'ham'!
971sub HandleHamAndSpam {
972  my($this, $HamSpam) = @_;
973
974  my($actions, $action, @actions, %actions);
975  my(@extraheaders, $actionscopy, $actionkey);
976
977  # Set default action for DMX/MailWatch reporting
978  $this->{actions} = 'deliver';
979
980  # Get a space-separated list of all the actions
981  if ($HamSpam eq 'nonspam') {
982	#print STDERR "Looking up hamactions\n";
983	$actions = MailScanner::Config::Value('hamactions', $this);
984	# Fast bail-out if it's just the simple "deliver" case that 99% of
985	# people will use
986	# Can't do this with SA rule actions: return if $actions eq 'deliver';
987  } else {
988	# It must be spam as it's not ham
989	if ($this->{ishigh}) {
990	  #print STDERR "Looking up highscorespamactions\n";
991	  $actions = MailScanner::Config::Value('highscorespamactions', $this);
992	} else {
993	  #print STDERR "Looking up spamactions\n";
994	  $actions = MailScanner::Config::Value('spamactions', $this);
995	}
996  }
997
998  # Find all the bits in quotes, with their spaces
999  $actionscopy = $actions;
1000  #print STDERR "Actions = \'$actions\'\n";
1001  while ($actions =~ s/\"([^\"]+)\"//) {
1002    $actionkey = $1;
1003    #print STDERR "ActionKey = $actionkey and $1\n";
1004    push @extraheaders, $actionkey;
1005    MailScanner::Log::WarnLog("Syntax error in \"header\" action in spam " .
1006                              "actions, missing \":\" in %s", $actionkey)
1007      unless $actionkey =~ /:/;
1008  }
1009  @{$this->{extraspamheaders}} = @extraheaders;
1010  #$actions = lc($actions);
1011  $actions =~ s/^\s*//;
1012  $actions =~ s/\s*$//;
1013  $actions =~ s/\s+/ /g;
1014  #print STDERR "Actions after = \'$actions\'\n";
1015  #print STDERR "Extra headers are \"" . join(',',@extraheaders) . "\"\n";
1016
1017  MailScanner::Log::WarnLog('Syntax error: missing " in spam actions %s',
1018                            $actionscopy) if $actions =~ /\"/;
1019
1020  $actions =~ tr/,//d; # Remove all commas in case they put any in
1021  @actions = split(" ", $actions);
1022  #print STDERR "Actions are $actions\n";
1023
1024  # The default action if they haven't specified anything is to
1025  # deliver spam like normal mail.
1026  # Can't do this with SA rule actions: return unless @actions;
1027
1028  # If they have just specified a filename, then something is wrong
1029  if ($#actions==0 && $actions[0] =~ /\// && $actions[0] !~ /^store-\//) {
1030    MailScanner::Log::WarnLog('Your spam actions "%s" looks like a filename.' .
1031        ' If this is a ruleset filename, it must end in .rule or .rules',
1032        $actions[0]);
1033    $actions[0] = 'deliver';
1034  }
1035
1036  #print STDERR "Message: HandleHamSpam has actions " . join(',',@actions) .
1037  #             "\n";
1038
1039  # Save actions for DMX/MailWatch reporting
1040  $this->{actions} = join(',', @actions);
1041
1042  my(%lintoptions, $custom);
1043  foreach $action (@actions) {
1044    # Allow for store-mcp, store-nonspam, etc.
1045    #$action =~ s/^store\W(\w+).*$/store-$1/;
1046    if ($action =~ /^custom\((.*)\)/) {
1047      MailScanner::Config::CallCustomAction($this, 'yes', $1);
1048      $action = 'custom';
1049    }
1050
1051    $lintoptions{$action} = 1 unless $action =~ /-\//;
1052
1053    # If the message is a MCP message then don't do the ham/spam "deliver"
1054    # as the MCP actions will have provided a "deliver" if they want one.
1055    next if $this->{ismcp} && $action eq 'deliver';
1056
1057    $actions{$action} = 1;
1058    #print STDERR "Message: HandleSpam action is $action\n";
1059    if ($action =~ /\@/) {
1060      #print STDERR "Message " . $this->{id} . " : HandleSpam() adding " .
1061      #             "$action to archiveplaces\n";
1062      push @{$this->{archiveplaces}}, $action;
1063      $actions{'forward'} = 1;
1064      delete $lintoptions{$action}; # Can't syntax-check email addresses
1065    }
1066    if ($action =~ /-\//) {
1067      delete $lintoptions{$action}; # Can't syntax-check dir paths
1068    }
1069  }
1070
1071  #############################################
1072  ### SpamAssassin Rule Actions starts here ###
1073  #############################################
1074  my $sarule = MailScanner::Config::Value('saactions', $this);
1075  my $logsaactions = MailScanner::Config::Value('logsaactions', $this);
1076  if ($sarule) {
1077    #print STDERR "SArule = $sarule\n";
1078    $logsaactions = 1 if $logsaactions =~ /1/;
1079    my @sarule = split /\s*,\s*/, $sarule;
1080    my %sarule = ();
1081    my @sascorerules; # List of extra rules of the spamscore>10 variety
1082    my $lastrule = ""; # Allows multiple actions per rule name
1083    my $thisaction; # Just for debug output
1084    # Loop through each x=>y in the saactions config setting
1085    foreach my $rule (@sarule) {
1086      if ($rule =~ /^(\S+)\s*=\>\s*(.*)$/) {
1087        # It's a new RULE=>action
1088        $sarule{lc($1)} .= "\0$2";
1089        $lastrule = $1;
1090        $thisaction = $2;
1091        #print STDERR "Added rule $1 ==> action $2\n";
1092      } else {
1093        # No '=>', it's just an action,
1094        # so make the RULE a copy of the previous one.
1095        $sarule{lc($lastrule)} .= "\0$rule";
1096        $thisaction = $rule;
1097        #print STDERR "(Added rule $lastrule ==> action $rule)\n";
1098      }
1099
1100      #print STDERR "Breaking up sarule into $lastrule => $thisaction\n";
1101
1102      # Look for SpamScore>n and other tests
1103      my $rulename = lc($1); # This will look like spamscore>10
1104      if ($rulename =~ /^spamscore\s*(\>|\>=|==|\<=|\<)\s*([0-9.]+)/) {
1105        my($test, $threshold) = ($1, $2);
1106        my $spamscore = $this->{sascore} + 0.0; # Be wary of Perl bug
1107        my $result = 0;
1108        #print STDERR 'Evaling $result=1 if ' . $spamscore . $test .
1109        #             $threshold . ';' . "\n";
1110        eval '$result=1 if ' . $spamscore . $test . $threshold . ';';
1111        #print STDERR "  Result was $result\n";
1112        push @sascorerules, $rulename if $result; # These rules are all hits
1113      }
1114    }
1115
1116    # Loop through each SA rule we hit with this message
1117    foreach my $looprule ((keys %{$this->{sarules}}), @sascorerules) {
1118      # Bail out if we're not interested in this rule
1119      #print STDERR "*Looking for sarule $looprule\n";
1120      foreach $action (split(/\0+/,$sarule{$looprule})) {
1121        #my $action = $sarule{$looprule};
1122        $action =~ s/^\s+//;
1123        $action =~ s/\s+$//;
1124        next unless $action;
1125        #print STDERR "*sarule $looprule gave action $action\n";
1126        if (defined($this->{saruleaction}{$looprule})) {
1127          $this->{saruleaction}{$looprule} .= ','.$action;
1128        } else {
1129          $this->{saruleaction}{$looprule} = $action;
1130        }
1131        MailScanner::Log::InfoLog("SpamAssassin Rule Actions: rule %s caused action %s in message %s", $looprule, $action, $this->{id}) if $logsaactions;
1132        if ($action !~ /^notify/ && $action =~ s/^no\w?\W*//) { # Anything started no, not not, etc.
1133          #
1134          # It's a NOT action so remove the action
1135          #
1136          #print STDERR "It's a NOT action $action\n";
1137          $action =~ s/forward\s*|header\s*//g;
1138          if ($action =~ /\@/) {
1139            # Remove the address from the list of @{$this->{archiveplaces}}
1140            my @places;
1141            foreach (@{$this->{archiveplaces}}) {
1142              push @places, $_ unless /^$action$/i;
1143            }
1144            $this->{archiveplaces} = \@places;
1145            #print STDERR "Removed $action from archiveplaces to give " . join(',',@places) . "\n";
1146          } elsif ($action =~ /\"([^\"]+)\"/) {
1147            # Remove the header from the list of @{$this->{extraspamheaders}}
1148            my @headers;
1149            foreach (@{$this->{extraspamheaders}}) {
1150              push @headers, $_ unless /^$action$/i;
1151            }
1152            $this->{extraspamheaders} = \@headers;
1153            #print STDERR "Removed $action from extraspamheaders to give " . join(',',@headers) . "\n";
1154          } elsif ($action =~ /^custom\((.*)\)/) {
1155            # Call the "no" custom action
1156            MailScanner::Config::CallCustomAction($this, 'no', $1);
1157          } else {
1158            #print STDERR "Removed $action from actions list\n";
1159            # Support store-mcp, store-nonspam etc.
1160            #$action =~ s/^store\W(\w+).*$/store-$1/;
1161            delete $actions{$action};
1162            $lintoptions{$action} = 1 unless $action =~ /-\//;
1163          }
1164        } else {
1165          #
1166          # It's a normal action so add the action
1167          #
1168          #print STDERR "SArule normal action $action\n";
1169          # Need to handle 'forward' and 'header' specially
1170          $action =~ s/forward\s*|header\s*//g;
1171          if ($action =~ /\@/) {
1172            # It's a forward
1173            #print STDERR "Adding $action to archiveplaces\n";
1174            push @{$this->{archiveplaces}}, $action;
1175            $actions{'forward'} = 1;
1176            #delete $lintoptions{$action};
1177          } elsif ($action =~ /\"([^\"]+)\"/) {
1178            # It's a header
1179            $actionkey = $1;
1180            #print STDERR "Adding $actionkey to extraspamheaders\n";
1181            push @{$this->{extraspamheaders}}, $actionkey;
1182            MailScanner::Log::WarnLog("Syntax error in \"header\" action in " .
1183                                      "SpamAssassin rule actions, missing " .
1184                                      "\":\" in %s", $actionkey)
1185              unless $action =~ /:/;
1186            #delete $lintoptions{$action};
1187          } elsif ($action =~ /^custom\((.*)\)/) {
1188            # Call the "no" custom action
1189            MailScanner::Config::CallCustomAction($this, 'yes', $1);
1190          } else {
1191            # It's some other action
1192            #print STDERR "Adding action $action\n";
1193            # Support store-mcp, store-nonspam etc.
1194            #$action =~ s/^store\W(\w+).*$/store-$1/;
1195            #print STDERR "Adding action $action after cleaning up stores\n";
1196            $actions{$action} = 1;
1197            $lintoptions{$action} = 1 unless $action =~ /-\//;
1198          }
1199        }
1200      }
1201      # "delete" ==> "no-deliver"
1202      delete $actions{'deliver'} if $actions{'delete'};
1203    }
1204  }
1205  ###########################################
1206  ### SpamAssassin Rule Actions ends here ###
1207  ###########################################
1208
1209  delete $actions{''}; # Delete any null records that crept in
1210  #print STDERR "Actions are: " . join(',',keys %actions) . "\n";
1211
1212  # Do the syntax check
1213  delete $lintoptions{'deliver'};
1214  delete $lintoptions{'delete'};
1215  delete $lintoptions{'store'};
1216  delete $lintoptions{'store-nonmcp'};
1217  delete $lintoptions{'store-mcp'};
1218  delete $lintoptions{'store-nonspam'};
1219  delete $lintoptions{'store-spam'};
1220  delete $lintoptions{'bounce'};
1221  delete $lintoptions{'forward'};
1222  delete $lintoptions{'striphtml'};
1223  delete $lintoptions{'attachment'};
1224  delete $lintoptions{'notify'};
1225  delete $lintoptions{'header'};
1226  delete $lintoptions{'custom'};
1227  my $lintstring = join(' ', keys %lintoptions);
1228  if ($lintstring ne '') {
1229    my $lints = ($lintstring =~ / /)?'s':'';
1230    my $linttype;
1231    if ($HamSpam eq 'nonspam') {
1232      $linttype = 'Non-Spam';
1233    } else {
1234      if ($this->{ishigh}) {
1235        $linttype = 'High-Scoring Spam';
1236      } else {
1237        $linttype = 'Spam';
1238      }
1239    }
1240    MailScanner::Log::WarnLog("Message %s produced illegal %s Action%s " .
1241                              "\"%s\", so message is being delivered",
1242                              $this->{id}, $linttype, $lints, $lintstring);
1243
1244    #print STDERR sprintf("Message %s produced illegal %s Action%s " .
1245    #                     "\"%s\", so message is being delivered\n",
1246    #                     $this->{id}, $linttype, $lints, $lintstring);
1247
1248    # We found an error so fail-safe by delivering the message
1249    $actions{'deliver'} = 1;
1250  }
1251
1252
1253  # Now we are left with deliver, bounce, delete, store and striphtml.
1254  #print STDERR "Archive places are " . join(',', keys %actions) . "\n";
1255
1256  # Log every message not being delivered
1257  if (MailScanner::Config::Value('logdelivery')) {
1258    if (!$actions{'deliver'}) {
1259      MailScanner::Log::NoticeLog(
1260        "Non-delivery of \u%s: message %s from %s to %s with subject %s",
1261        $HamSpam,
1262        $this->{id},
1263        lc($this->{from}),
1264        lc(join(',',@{$this->{to}})),
1265        $this->{subject}
1266      );
1267    }
1268    # Log every message being delivered
1269    if ($actions{'deliver'}) { # || $this->{mcpdelivering}) {
1270      MailScanner::Log::NoticeLog(
1271        "Delivery of \u%s: message %s from %s to %s with subject %s",
1272        $HamSpam,
1273        $this->{id},
1274        lc($this->{from}),
1275        lc(join(',',@{$this->{to}})),
1276        $this->{subject}
1277      );
1278    }
1279  }
1280
1281  # Split this job into 2.
1282  # 1) The message is being delivered to at least 1 address,
1283  # 2) The message is not being delivered to anyone.
1284  # The extra addresses for forward it to have already been added.
1285  if ($actions{'deliver'} || $actions{'forward'} || $this->{mcpdelivering}) {
1286    #
1287    # Message is going to original recipient and/or extra recipients
1288    #
1289
1290    MailScanner::Log::NoticeLog("Spam Actions: message %s actions are %s",
1291                              $this->{id}, join(',', keys %actions))
1292      if $HamSpam eq 'spam' && MailScanner::Config::Value('logspam');
1293
1294    # Delete the original recipient if they are only forwarding it
1295    $global::MS->{mta}->DeleteRecipients($this) if !$actions{'deliver'};
1296
1297    # Delete action is over-ridden as we are sending it somewhere
1298    delete $actions{'delete'};
1299
1300    # Message still exists, so it will be delivered to its new recipients
1301  } else {
1302    #
1303    # Message is not going to be delivered anywhere
1304    #
1305
1306    MailScanner::Log::NoticeLog("Spam Actions: message %s actions are %s",
1307                              $this->{id}, join(',', keys %actions))
1308      if $HamSpam eq 'spam' && MailScanner::Config::Value('logspam');
1309
1310    # Mark the message so it won't get cleaned up or delivered, but just dropped
1311    #print STDERR "Setting DontDeliver for " . $this->{id} . "\n";
1312    $this->{dontdeliver} = 1;
1313    # Optimisation courtesy of Yavor.Trapkov@wipo.int
1314    $this->{deleted} = 1 if (keys %actions) == 1 && $actions{'delete'};
1315    ## Mark the message as deleted, so it won't get delivered
1316    #$this->{deleted} = 1;
1317  }
1318
1319  # All delivery will now happen correctly.
1320
1321  # Bounce a message back to the sender if they want that
1322  if ($actions{'bounce'}) {
1323    if ($HamSpam eq 'nonspam') {
1324      MailScanner::Log::WarnLog("Does not make sense to bounce non-spam");
1325    } else {
1326      #MailScanner::Log::WarnLog('The "bounce" Spam Action no longer exists');
1327      if ($this->{ishigh}) {
1328        MailScanner::Log::NoticeLog("Will not bounce high-scoring spam")
1329      } else {
1330        $this->HandleSpamBounce()
1331          if MailScanner::Config::Value('enablespambounce', $this);
1332      }
1333    }
1334  }
1335
1336  # Notify the recipient if they want that
1337  if ($actions{'notify'}) {
1338    if ($HamSpam eq 'nonspam') {
1339      MailScanner::Log::WarnLog("Does not make sense to notify recipient about non-spam");
1340    } else {
1341      $this->HandleSpamNotify();
1342    }
1343  }
1344
1345  # Store it if they want that
1346  my($store, @stores);
1347  push @stores, $HamSpam  if $actions{'store'};
1348  push @stores, 'nonmcp'  if $actions{'store-nonmcp'};
1349  push @stores, 'mcp'     if $actions{'store-mcp'};
1350  push @stores, 'nonspam' if $actions{'store-nonspam'};
1351  push @stores, 'spam'    if $actions{'store-spam'};
1352  $this->{ismcp} = 1      if $actions{'store-mcp'}; # For MailWatch
1353  # Find all the absolute dir path stores
1354  foreach $store (keys %actions) {
1355    next unless $store =~ s/^store-//;
1356    push @stores, $store if $store =~ /^\//;
1357  }
1358
1359  my %storealready;
1360  foreach $store (@stores) {
1361    my($dir, $dir2, $spamdir, $uid, $gid, $changeowner);
1362    $uid = $global::MS->{quar}->{uid};
1363    $gid = $global::MS->{quar}->{gid};
1364    $changeowner = $global::MS->{quar}->{changeowner};
1365    #print STDERR "Store is $store\n";
1366    if ($store =~ /^\//) {
1367      #print STDERR "Absolute store $store\n";
1368      # It's an absolute store, so just store it in there
1369      $store =~ s/_HOUR_/$this->{hournumber}/;
1370      $store =~ s/_DATE_/$this->{datenumber}/;
1371      $store =~ s/_FROMUSER_/$this->{fromuser}/;
1372      $store =~ s/_FROMDOMAIN_/$this->{fromdomain}/;
1373      if ($store =~ /_TOUSER_|_TODOMAIN_/) {
1374        # It contains a substitution so we need to loop through all the recips
1375        my $numrecips = scalar (@{$this->{to}});
1376        foreach my $recip (0..$numrecips-1) {
1377          my $storecopy = $store;
1378          my $u = $this->{touser}[$recip];
1379          my $d = $this->{todomain}[$recip];
1380          $storecopy =~ s/_TOUSER_/$u/g;
1381          $storecopy =~ s/_TODOMAIN_/$d/g;
1382          umask $global::MS->{quar}->{dirumask};
1383          mkpath $storecopy unless -d $storecopy;
1384          chown $uid, $gid, $storecopy if $changeowner;
1385          umask $global::MS->{quar}->{fileumask};
1386          push @{$this->{spamarchive}},
1387               $this->{store}->CopyEntireMessage($this, $storecopy, $this->{id},
1388                                                 $uid, $gid, $changeowner)
1389            unless $storealready{$storecopy};
1390          $storealready{$storecopy} = 1;
1391          chown $uid, $gid, "$storecopy/" . $this->{id};
1392        }
1393      } else {
1394        # It doesn't contian _TOUSER_ or _TODOMAIN_ so is a simple one
1395        umask $global::MS->{quar}->{dirumask};
1396        mkpath $store unless -d $store;
1397        chown $uid, $gid, $store if $changeowner;
1398        umask $global::MS->{quar}->{fileumask};
1399        push @{$this->{spamarchive}},
1400             $this->{store}->CopyEntireMessage($this, $store, $this->{id},
1401                                               $uid, $gid, $changeowner)
1402          unless $storealready{$store};
1403        $storealready{$store} = 1;
1404        chown $uid, $gid, "$store/" . $this->{id};
1405      }
1406    } else {
1407      $dir = MailScanner::Config::Value('quarantinedir', $this);
1408      #$dir2 = $dir . '/' .  MailScanner::Quarantine::TodayDir();
1409      $dir2 = $dir . '/' .  $this->{datenumber};
1410      $spamdir = $dir2 . '/' . $store;
1411      #print STDERR "dir = $dir\ndir2 = $dir2\nspamdir = $spamdir\n";
1412      umask $global::MS->{quar}->{dirumask};
1413      unless (-d $dir) {
1414        mkdir $dir, 0777;
1415        chown $uid, $gid, $dir if $changeowner;
1416      }
1417      unless (-d $dir2) {
1418        mkdir $dir2, 0777;
1419        chown $uid, $gid, $dir2 if $changeowner;
1420      }
1421      unless (-d $spamdir) {
1422      mkdir $spamdir, 0777;
1423        chown $uid, $gid, $spamdir if $changeowner;
1424      }
1425      #print STDERR "Storing spam to $spamdir/" . $this->{id} . "\n";
1426      #print STDERR "uid=$uid gid=$gid changeowner=$changeowner\n";
1427      umask $global::MS->{quar}->{fileumask};
1428      my @paths;
1429      @paths = $this->{store}->CopyEntireMessage($this, $spamdir, $this->{id},
1430                                                    $uid, $gid, $changeowner)
1431        unless $storealready{$spamdir};
1432      # Remember where we have stored the spam in an archive, so we never
1433      # archive infected messages
1434      #print STDERR "Added " . join(',', @paths) . " to spamarchive\n";
1435      push @{$this->{spamarchive}}, @paths unless $storealready{$spamdir};
1436      $spamdir =~ /^(.*)$/;
1437      $spamdir = $1;
1438      my $tempid = $this->{id};
1439      $tempid =~ /^(.*)$/;
1440      $tempid = $1;
1441      chown $uid, $gid, "$spamdir/" . $tempid; # Harmless if this fails # TAINT
1442    }
1443  }
1444  umask 0077; # Safety net
1445
1446  # If they want to strip the HTML tags out of it,
1447  # then just tag it as we can only do this later.
1448  $this->{needsstripping} = 1 if $actions{'striphtml'};
1449
1450  # If they want to encapsulate the message in an RFC822 part,
1451  # then tag it so we can do this later.
1452  $this->{needsencapsulating} = 1 if $actions{'attachment'};
1453}
1454
1455
1456# We want to send a message back to the sender saying that their junk
1457# email has been rejected by our site.
1458# Send a message back to the sender which has the local postmaster as
1459# the header sender, but <> as the envelope sender. This means it
1460# cannot bounce.
1461# Now have 3 different message file settings:
1462# 1. Is spam according to RBL's
1463# 2. Is spam according to SpamAssassin
1464# 3. Is spam according to both
1465sub HandleSpamBounce {
1466  my $this = shift;
1467
1468  my($from,$to,$subject,$date,$spamreport,$longspamreport,$hostname);
1469  my($emailmsg, $line, $messagefh, $filename, $localpostmaster, $id);
1470  my($postmastername);
1471
1472  $from = $this->{from};
1473
1474  # Don't ever send a message to "" or "<>"
1475  return if $from eq "" || $from eq "<>";
1476
1477  # Do we want to send the sender a warning at all?
1478  # If nosenderprecedence is set to non-blank and contains this
1479  # message precedence header, then just return.
1480  my(@preclist, $prec, $precedence, $header);
1481  @preclist = split(" ",
1482                  lc(MailScanner::Config::Value('nosenderprecedence', $this)));
1483  $precedence = "";
1484  foreach $header (@{$this->{headers}}) {
1485    $precedence = lc($1) if $header =~ /^precedence:\s+(\S+)/i;
1486  }
1487  if (@preclist && $precedence ne "") {
1488    foreach $prec (@preclist) {
1489      if ($precedence eq $prec) {
1490        MailScanner::Log::InfoLog("Skipping sender of precedence %s",
1491                                  $precedence);
1492        return;
1493      }
1494    }
1495  }
1496
1497  # Setup other variables they can use in the message template
1498  $id = $this->{id};
1499  #$to = join(', ', @{$this->{to}});
1500  $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
1501  $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
1502  $hostname = MailScanner::Config::Value('hostname', $this);
1503  $subject = $this->{subject};
1504  $date = $this->{datestring}; # scalar localtime;
1505  $spamreport = $this->{spamreport};
1506  $longspamreport = $this->{salongreport};
1507  #print STDERR "longspamreport = \"$longspamreport\"\n";
1508  my($to, %tolist);
1509  foreach $to (@{$this->{to}}) {
1510    $tolist{$to} = 1;
1511  }
1512  $to = join(', ', sort keys %tolist);
1513
1514  # Delete everything in brackets after the SA report, if it exists
1515  $spamreport =~ s/(spamassassin)[^(]*\([^)]*\)/$1/i;
1516
1517  # Work out which of the 3 spam reports to send them.
1518  $filename = "";
1519  if ($this->{isrblspam} && !$this->{issaspam}) {
1520    $filename = MailScanner::Config::Value('senderrblspamreport', $this);
1521    MailScanner::Log::NoticeLog("Spam Actions: (RBL) Bounce to %s", $from)
1522      if MailScanner::Config::Value('logspam');
1523  } elsif ($this->{issaspam} && !$this->{isrblspam}) {
1524    $filename = MailScanner::Config::Value('sendersaspamreport', $this);
1525    MailScanner::Log::NoticeLog("Spam Actions: (SpamAssassin) Bounce to %s",
1526                              $from)
1527      if MailScanner::Config::Value('logspam');
1528  }
1529  if ($filename eq "") {
1530    $filename = MailScanner::Config::Value('senderbothspamreport', $this);
1531    MailScanner::Log::NoticeLog("Spam Actions: (RBL,SpamAssassin) Bounce to %s",
1532                              $from)
1533      if MailScanner::Config::Value('logspam');
1534  }
1535
1536  $messagefh = new FileHandle;
1537  $messagefh->open($filename)
1538    or MailScanner::Log::WarnLog("Cannot open message file %s, %s",
1539                                 $filename, $!);
1540  $emailmsg = "X-MailScanner-Bounce: yes\n";
1541  while(<$messagefh>) {
1542    chomp;
1543    s#"#\\"#g;
1544    s#@#\\@#g;
1545    # Boring untainting again...
1546    /(.*)/;
1547    # Bug fix by Martin Hepworth
1548    $line = eval "\"$1\"";
1549    $emailmsg .= MailScanner::Config::DoPercentVars($line) . "\n";
1550  }
1551  $messagefh->close();
1552
1553  if (MailScanner::Config::Value('bouncespamasattachment', $this)) {
1554    $this->HandleSpamBounceAttachment($emailmsg);
1555  } else {
1556    # Send the message to the spam sender, but ensure the envelope
1557    # sender address is "<>" so that it can't be bounced.
1558    $global::MS->{mta}->SendMessageString($this, $emailmsg, '<>')
1559      or MailScanner::Log::WarnLog("Could not send sender spam bounce, %s", $!);
1560  }
1561}
1562
1563
1564# Like encapsulating and sending a message to the recipient, take the
1565# passed text as the text and headers of an email message and attach
1566# the original message as an rfc/822 attachment.
1567sub HandleSpamBounceAttachment {
1568  my($this, $plaintext) = @_;
1569
1570  my $parser = MIME::Parser->new;
1571  my $explodeinto = $global::MS->{work}->{dir} . '/' . $this->{id};
1572  #print STDERR "Extracting spam bounce message into $explodeinto\n";
1573  my $filer  = MIME::Parser::MailScanner->new($explodeinto);
1574  $parser->filer($filer);
1575
1576  my $bounce = eval { $parser->parse_data(\$plaintext) };
1577  if (!$bounce) {
1578    MailScanner::Log::WarnLog("Cannot parse spam bounce report, %s", $!);
1579    return;
1580  }
1581  #print STDERR "Successfully parsed bounce report\n";
1582
1583  # Now make it multipart and push the report into a child
1584  $bounce->make_multipart('report');
1585
1586  # Now turn the original message into a string and attach it
1587  my(@original);
1588  #my $original = $this->{entity}->stringify;
1589  @original = $global::MS->{mta}->OriginalMsgHeaders($this, "\n");
1590  push(@original, "\n");
1591  $this->{store}->ReadBody(\@original, MailScanner::Config::Value(
1592                                          'maxspamassassinsize'));
1593
1594  $bounce->add_part(MIME::Entity->build(Type        => 'message/rfc822',
1595                                        Disposition => 'attachment',
1596                                        Top         => 0,
1597                                        'X-Mailer'  => undef,
1598                                        Data        => \@original));
1599
1600  # Prune all the dead branches off the tree
1601  PruneEntityTree($bounce);
1602  # Stringify the message and send it -- this could be VERY large!
1603  my $bouncetext = $bounce->stringify;
1604  #print STDERR "Spam bounce message is this:\n$bouncetext";
1605  if ($bouncetext) {
1606    $global::MS->{mta}->SendMessageString($this, $bouncetext, '<>')
1607      or MailScanner::Log::WarnLog(
1608           "Could not send sender spam bounce attachment, %s", $!);
1609  } else {
1610    MailScanner::Log::WarnLog(
1611      "Failed to create sender spam bounce attachment, %s", $!);
1612  }
1613}
1614
1615
1616# We want to send a message to the recipient saying that their spam
1617# mail has not been delivered.
1618# Send a message to the recipients which has the local postmaster as
1619# the sender.
1620sub HandleSpamNotify {
1621  my $this = shift;
1622
1623  my($from,$to,$subject,$date,$spamreport,$hostname,$day,$month,$year);
1624  my($emailmsg, $line, $messagefh, $filename, $localpostmaster, $id);
1625  my($postmastername);
1626
1627  $from = $this->{from};
1628
1629  # Don't ever send a message to "" or "<>"
1630  return if $from eq "" || $from eq "<>";
1631
1632  # Do we want to send the sender a warning at all?
1633  # If nosenderprecedence is set to non-blank and contains this
1634  # message precedence header, then just return.
1635  my(@preclist, $prec, $precedence, $header);
1636  @preclist = split(" ",
1637                  lc(MailScanner::Config::Value('nosenderprecedence', $this)));
1638  $precedence = "";
1639  foreach $header (@{$this->{headers}}) {
1640    $precedence = lc($1) if $header =~ /^precedence:\s+(\S+)/i;
1641  }
1642  if (@preclist && $precedence ne "") {
1643    foreach $prec (@preclist) {
1644      if ($precedence eq $prec) {
1645        MailScanner::Log::InfoLog("Skipping sender of precedence %s",
1646                                  $precedence);
1647        return;
1648      }
1649    }
1650  }
1651
1652  # Setup other variables they can use in the message template
1653  $id = $this->{id};
1654  $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
1655  $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
1656  $hostname = MailScanner::Config::Value('hostname', $this);
1657  $subject = $this->{subject};
1658  $date = $this->{datestring}; # scalar localtime;
1659  $spamreport = $this->{spamreport};
1660  # And let them put the date number in there too
1661  #($day, $month, $year) = (localtime)[3,4,5];
1662  #$month++;
1663  #$year += 1900;
1664  #my $datenumber = sprintf("%04d%02d%02d", $year, $month, $day);
1665  my $datenumber = $this->{datenumber};
1666
1667
1668  my($to, %tolist);
1669  foreach $to (@{$this->{to}}) {
1670    $tolist{$to} = 1;
1671  }
1672  $to = join(', ', sort keys %tolist);
1673
1674  # Delete everything in brackets after the SA report, if it exists
1675  $spamreport =~ s/(spamassassin)[^(]*\([^)]*\)/$1/i;
1676
1677  # Work out which of the 3 spam reports to send them.
1678  $filename = MailScanner::Config::Value('recipientspamreport', $this);
1679  MailScanner::Log::NoticeLog("Spam Actions: Notify %s", $to)
1680    if MailScanner::Config::Value('logspam');
1681
1682  $messagefh = new FileHandle;
1683  $messagefh->open($filename)
1684    or MailScanner::Log::WarnLog("Cannot open message file %s, %s",
1685                                 $filename, $!);
1686  $emailmsg = "";
1687  while(<$messagefh>) {
1688    chomp;
1689    s#"#\\"#g;
1690    s#@#\\@#g;
1691    # Boring untainting again...
1692    /(.*)/;
1693    $line = eval "\"$1\"";
1694    $emailmsg .= MailScanner::Config::DoPercentVars($line) . "\n";
1695  }
1696  $messagefh->close();
1697
1698  # Send the message to the spam sender, but ensure the envelope
1699  # sender address is "<>" so that it can't be bounced.
1700  $global::MS->{mta}->SendMessageString($this, $emailmsg, $localpostmaster)
1701    or MailScanner::Log::WarnLog("Could not send sender spam notify, %s", $!);
1702}
1703
1704sub RejectMessage {
1705  my $this = shift;
1706
1707  my($from,$to,%tolist,$subject,$date,$hostname);
1708  my($emailmsg, $line, $messagefh, $filename, $localpostmaster, $id);
1709  my($postmastername);
1710
1711  $from = $this->{from};
1712
1713  # Don't ever send a message to "" or "<>"
1714  return if $from eq "" || $from eq "<>";
1715
1716  # Setup other variables they can use in the message template
1717  $id = $this->{id};
1718  $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
1719  $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
1720  $hostname = MailScanner::Config::Value('hostname', $this);
1721  $subject = $this->{subject};
1722  $date = $this->{datestring}; # scalar localtime;
1723  foreach $to (@{$this->{to}}) {
1724    $tolist{$to} = 1;
1725  }
1726  $to = join(', ', sort keys %tolist);
1727
1728  # Work out which of the 3 spam reports to send them.
1729  $filename = MailScanner::Config::Value('rejectionreport', $this);
1730  MailScanner::Log::NoticeLog("Reject message %s from %s with report %s",
1731                            $id, $from, $filename);
1732  return if $filename eq "";
1733
1734  #print STDERR "Rejecting message $id with $filename\n";
1735  $messagefh = new FileHandle;
1736  $messagefh->open($filename)
1737    or MailScanner::Log::WarnLog("Cannot open message file %s, %s",
1738                                 $filename, $!);
1739  $emailmsg = "X-MailScanner-Rejected: yes\n";
1740
1741  while(<$messagefh>) {
1742    chomp;
1743    s#"#\\"#g;
1744    s#@#\\@#g;
1745    # Boring untainting again...
1746    /(.*)/;
1747    $line = eval "\"$1\"";
1748    $emailmsg .= MailScanner::Config::DoPercentVars($line) . "\n";
1749  }
1750  $messagefh->close();
1751
1752  #print STDERR "Rejection is:\n-----SNIP-----\n$emailmsg-----SNIP-----\n";
1753  # Send the message to the spam sender, but ensure the envelope
1754  # sender address is "<>" so that it can't be bounced.
1755  $global::MS->{mta}->SendMessageString($this, $emailmsg, '<>')
1756    or MailScanner::Log::WarnLog("Could not send rejection report for %s, %s",
1757                                 $id, $!);
1758  $this->{deleted} = 1;
1759  $this->{dontdeliver} = 1;
1760
1761}
1762
1763
1764# Like encapsulating and sending a message to the recipient, take the
1765
1766# Deliver a message that doesn't want to be touched at all in any way.
1767# Take an out queue dir.
1768sub DeliverUntouched {
1769  my $this = shift;
1770  my($OutQ) = @_;
1771
1772  return if $this->{deleted};
1773
1774  #my $OutQ = MailScanner::Config::Value('outqueuedir', $this);
1775  my $store = $this->{store};
1776
1777  # Link the queue data file from in to out
1778  $store->LinkData($OutQ);
1779
1780  # Add the headers onto the metadata in the message store
1781  $global::MS->{mta}->AddHeadersToQf($this);
1782
1783  # Don't add the same extra recipient twice
1784  my %alreadydone = ();
1785
1786  # Add the secret archive recipients
1787  my($extra, @extras);
1788  foreach $extra (@{$this->{archiveplaces}}) {
1789    # Email archive recipients include a '@'
1790    next if $extra =~ /^\//;
1791    next unless $extra =~ /@/;
1792    $extra =~ s/_HOUR_/$this->{hournumber}/g;
1793    $extra =~ s/_DATE_/$this->{datenumber}/g;
1794    $extra =~ s/_FROMUSER_/$this->{fromuser}/g;
1795    $extra =~ s/_FROMDOMAIN_/$this->{fromdomain}/g;
1796    if ($extra !~ /_TOUSER_|_TODOMAIN_/) {
1797      # It's a simple email address
1798      push @extras, $extra unless $alreadydone{$extra};
1799      $alreadydone{$extra} = 1;
1800    } else {
1801      # It contains a substitution so we need to loop through all the recips
1802      my $numrecips = scalar (@{$this->{to}});
1803      foreach my $recip (0..$numrecips-1) {
1804        my $extracopy = $extra;
1805        my $u = $this->{touser}[$recip];
1806        my $d = $this->{todomain}[$recip];
1807        $extracopy =~ s/_TOUSER_/$u/g;
1808        $extracopy =~ s/_TODOMAIN_/$d/g;
1809        push @extras, $extracopy unless $alreadydone{$extracopy};
1810        $alreadydone{$extracopy} = 1; # Dont add the same address twice
1811      }
1812    }
1813  }
1814  $global::MS->{mta}->AddRecipients($this, @extras) if @extras;
1815
1816  # Write the new qf file, delete originals and unlock the message
1817  $store->WriteHeader($this, $OutQ);
1818  unless ($this->{gonefromdisk}) {
1819    $store->DeleteUnlock();
1820    $this->{gonefromdisk} = 1;
1821  }
1822
1823  # Note this does not kick the MTA into life here any more
1824}
1825
1826# Deliver a message that doesn't need scanning at all
1827# Takes an out queue dir.
1828sub DeliverUnscanned {
1829  my $this = shift;
1830  my($OutQ) = @_;
1831
1832  return if $this->{deleted};
1833
1834  #my $OutQ = MailScanner::Config::Value('outqueuedir', $this);
1835  my $store = $this->{store};
1836
1837  # Link the queue data file from in to out
1838  $store->LinkData($OutQ);
1839
1840  # Add the headers onto the metadata in the message store
1841  $global::MS->{mta}->AddHeadersToQf($this);
1842
1843  # Remove duplicate subject: lines
1844  $global::MS->{mta}->UniqHeader($this, 'Subject:');
1845
1846  # Add the information/help X- header
1847  my $infoheader = MailScanner::Config::Value('infoheader', $this);
1848  if ($infoheader) {
1849    my $infovalue = MailScanner::Config::Value('infovalue', $this);
1850    $global::MS->{mta}->ReplaceHeader($this, $infoheader, $infovalue);
1851  }
1852  my $idheader = MailScanner::Config::Value('idheader', $this);
1853  if ($idheader) {
1854    $global::MS->{mta}->ReplaceHeader($this, $idheader, $this->{id});
1855  }
1856
1857  # Add the Unscanned X- header
1858  if (MailScanner::Config::Value('signunscannedmessages', $this)) {
1859    $global::MS->{mta}->AddMultipleHeader($this, 'mailheader',
1860                 MailScanner::Config::Value('unscannedheader', $this), ', ');
1861  }
1862
1863  # Remove any headers we don't want in the message
1864  my(@removeme, $remove);
1865  @removeme = split(/[,\s]+/, MailScanner::Config::Value('removeheaders', $this));
1866  foreach $remove (@removeme) {
1867    # Add a : if there isn't one already, it's needed for DeleteHeader()
1868    # 20090312 Done in DeleteHeader: $remove .= ':' unless $remove =~ /:$/;
1869    $global::MS->{mta}->DeleteHeader($this, $remove);
1870  }
1871
1872  # Leave old content-length: headers as we aren't changing body.
1873
1874  # Add IPv6 or IPv4 protocol version header
1875  my $ipverheader = MailScanner::Config::Value('ipverheader', $this);
1876  $global::MS->{mta}->ReplaceHeader($this, $ipverheader,
1877                             ( ($this->{clientip} =~ /:/)?'IPv6':'IPv4' ))
1878    if $ipverheader;
1879
1880  # Add the MCP headers if necessary
1881  $global::MS->{mta}->AddMultipleHeader($this, 'mcpheader',
1882                                        $this->{mcpreport}, ', ')
1883    if $this->{ismcp} ||
1884       MailScanner::Config::Value('includemcpheader', $this);
1885  # Add spam header if it's spam or they asked for it
1886  #$global::MS->{mta}->AddHeader($this,
1887  #                              MailScanner::Config::Value('spamheader',$this),
1888  #                              $this->{spamreport})
1889  # JKF 3/10/2005
1890  $global::MS->{mta}->AddMultipleHeader($this, 'spamheader',
1891                                        $this->{spamreport}, ', ')
1892    if MailScanner::Config::Value('includespamheader', $this) ||
1893       ($this->{spamreport} && $this->{isspam});
1894
1895  # Add the spam stars if they want that. Limit it to 60 characters to avoid
1896  # a potential denial-of-service attack.
1897  my($stars,$starcount,$scoretext,$minstars,$scorefmt);
1898  $starcount = int($this->{sascore}) + 0;
1899  $starcount = 0 if $this->{spamwhitelisted}; # 0 stars if white-listed
1900  $scorefmt = MailScanner::Config::Value('scoreformat', $this);
1901  $scorefmt = '%d' if $scorefmt eq '';
1902  $scoretext = sprintf($scorefmt, $this->{sascore}+0);
1903  $minstars = MailScanner::Config::Value('minstars', $this);
1904  $starcount = $minstars if $this->{isrblspam} && $minstars &&
1905                            $starcount<$minstars;
1906  if (MailScanner::Config::Value('spamscorenotstars', $this)) {
1907    $stars = $scoretext; # int($starcount);
1908  } else {
1909    $starcount = 60 if $starcount>60;
1910    $stars = MailScanner::Config::Value('spamstarscharacter') x $starcount;
1911  }
1912  if (MailScanner::Config::Value('spamstars', $this) =~ /1/ && $starcount>0) {
1913    $global::MS->{mta}->AddMultipleHeader($this, 'spamstarsheader',
1914                                          $stars, ', ');
1915  }
1916
1917  # Add the Envelope to and from headers
1918  AddFromAndTo($this);
1919
1920  # Repair the subject line
1921  $global::MS->{mta}->ReplaceHeader($this, 'Subject:', $this->{safesubject})
1922    if $this->{subjectwasunsafe};
1923
1924  # Modify the subject line for Disarming
1925  my $disarmtag = MailScanner::Config::Value('disarmsubjecttext',$this);
1926  #if ($this->{messagedisarmed} &&
1927  #    MailScanner::Config::Value('disarmprependsubject',$this) =~ /1/ &&
1928  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $disarmtag)) {
1929  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $disarmtag, ' ');
1930  #}
1931  if ($this->{messagedisarmed}) {
1932    #print STDERR "Message has been disarmed at 1346.\n";
1933    my $where = MailScanner::Config::Value('disarmmodifysubject',$this);
1934    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $disarmtag)) {
1935      $global::MS->{mta}->AppendHeader($this, 'Subject:', $disarmtag, ' ');
1936    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $disarmtag)) {
1937      $global::MS->{mta}->PrependHeader($this, 'Subject:', $disarmtag, ' ');
1938    }
1939  }
1940
1941
1942  # Modify the subject line for spam
1943  # if it's spam AND they want to modify the subject line AND it's not
1944  # already been modified by another of your MailScanners.
1945  my $spamtag = MailScanner::Config::Value('spamsubjecttext', $this);
1946  $spamtag =~ s/_SCORE_/$scoretext/;
1947  $spamtag =~ s/_STARS_/$stars/i;
1948  #if ($this->{isspam} && !$this->{ishigh} &&
1949  #    MailScanner::Config::Value('spamprependsubject',$this) &&
1950  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
1951  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
1952  #}
1953  if ($this->{isspam} && !$this->{ishigh}) {
1954    my $where = MailScanner::Config::Value('spammodifysubject',$this);
1955    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $spamtag)) {
1956      $global::MS->{mta}->AppendHeader($this, 'Subject:', $spamtag, ' ');
1957    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
1958      $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
1959    }
1960  }
1961
1962  # If it is high-scoring spam, then add a different bit of text
1963  $spamtag = MailScanner::Config::Value('highspamsubjecttext', $this);
1964  $spamtag =~ s/_SCORE_/$scoretext/;
1965  $spamtag =~ s/_STARS_/$stars/i;
1966  #if ($this->{isspam} && $this->{ishigh} &&
1967  #    MailScanner::Config::Value('highspamprependsubject',$this) &&
1968  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
1969  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
1970  #}
1971  if ($this->{isspam} && $this->{ishigh}) {
1972    my $where = MailScanner::Config::Value('highspammodifysubject',$this);
1973    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $spamtag)) {
1974      $global::MS->{mta}->AppendHeader($this, 'Subject:', $spamtag, ' ');
1975    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
1976      $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
1977    }
1978  }
1979
1980
1981  # Modify the subject line for MCP
1982  # if it's MCP AND they want to modify the subject line AND it's not
1983  # already been modified by another of your MailScanners.
1984  $starcount = int($this->{mcpsascore}) + 0;
1985  $starcount = 0 if $this->{mcpwhitelisted}; # 0 stars if white-listed
1986  $scorefmt = MailScanner::Config::Value('scoreformat', $this);
1987  $scorefmt = '%d' if $scorefmt eq '';
1988  $scoretext = sprintf($scorefmt, $this->{mcpsascore}+0);
1989  my $mcptag = MailScanner::Config::Value('mcpsubjecttext', $this);
1990  $mcptag =~ s/_SCORE_/$scoretext/;
1991  $mcptag =~ s/_STARS_/$stars/i;
1992  #if ($this->{ismcp} && !$this->{ishighmcp} &&
1993  #    MailScanner::Config::Value('mcpprependsubject',$this) &&
1994  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
1995  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
1996  #}
1997  if ($this->{ismcp} && !$this->{ishighmcp}) {
1998    my $where = MailScanner::Config::Value('mcpmodifysubject',$this);
1999    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $mcptag)) {
2000      $global::MS->{mta}->AppendHeader($this, 'Subject:', $mcptag, ' ');
2001    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
2002      $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
2003    }
2004  }
2005
2006
2007  # If it is high-scoring MCP, then add a different bit of text
2008  $mcptag = MailScanner::Config::Value('highmcpsubjecttext', $this);
2009  $mcptag =~ s/_SCORE_/$scoretext/;
2010  $mcptag =~ s/_STARS_/$stars/i;
2011  #if ($this->{ismcp} && $this->{ishighmcp} &&
2012  #    MailScanner::Config::Value('highmcpprependsubject',$this) &&
2013  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
2014  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
2015  #}
2016  if ($this->{ismcp} && $this->{ishighmcp}) {
2017    my $where = MailScanner::Config::Value('highmcpmodifysubject',$this);
2018    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $mcptag)) {
2019      $global::MS->{mta}->AppendHeader($this, 'Subject:', $mcptag, ' ');
2020    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
2021      $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
2022    }
2023  }
2024
2025
2026  # Add the extra headers they want for MCP and spam messages
2027  my(@extraheaders, $extraheader);
2028  my($key, $value);
2029  @extraheaders = @{$this->{extramcpheaders}} if $this->{extramcpheaders};
2030  push @extraheaders, @{$this->{extraspamheaders}} if $this->{extraspamheaders};
2031  foreach $extraheader (@extraheaders) {
2032    next unless $extraheader =~ /:/;
2033    ($key, $value) = split(/:\s*/, $extraheader, 2);
2034    $key =~ s/\s+/-/g; # Replace spaces in header name with dashes
2035
2036    # Replace _TO_ in the header value with a comma-separated list of recips
2037    if ($value =~ /_TO_/) {
2038      # Get the actual text for the header value
2039      my($recipient, %tolist);
2040      foreach $recipient (@{$this->{to}}) {
2041        $tolist{$recipient} = 1;
2042      }
2043      $recipient = join(', ', sort keys %tolist);
2044      # Now reflow the To list in case it is very long
2045      $recipient = $this->ReflowHeader($key . ':', $recipient);
2046      $value =~ s/_TO_/$recipient/g;
2047    }
2048
2049    $global::MS->{mta}->AddMultipleHeaderName($this, $key . ':', $value, ', ');
2050  }
2051
2052  # Add watermark header if chosen to do so.
2053  if ($this->{addmshmac}) {
2054    my $mshmacheader = MailScanner::Config::Value('mshmacheader', $this);
2055    my $mshmac = $this->{mshmac};
2056
2057    $global::MS->{mta}->ReplaceHeader($this, $mshmacheader, $mshmac);
2058  }
2059
2060  # Add the secret archive recipients
2061  my($extra, @extras, %alreadydone);
2062  foreach $extra (@{$this->{archiveplaces}}) {
2063    # Email archive recipients include a '@'
2064    next if $extra =~ /^\//;
2065    next unless $extra =~ /@/;
2066    $extra =~ s/_HOUR_/$this->{hournumber}/g;
2067    $extra =~ s/_DATE_/$this->{datenumber}/g;
2068    $extra =~ s/_FROMUSER_/$this->{fromuser}/g;
2069    $extra =~ s/_FROMDOMAIN_/$this->{fromdomain}/g;
2070    if ($extra !~ /_TOUSER_|_TODOMAIN_/) {
2071      # It's a simple email address
2072      push @extras, $extra unless $alreadydone{$extra};
2073      $alreadydone{$extra} = 1;
2074    } else {
2075      # It contains a substitution so we need to loop through all the recips
2076      my $numrecips = scalar (@{$this->{to}});
2077      foreach my $recip (0..$numrecips-1) {
2078        my $extracopy = $extra;
2079        my $u = $this->{touser}[$recip];
2080        my $d = $this->{todomain}[$recip];
2081        $extracopy =~ s/_TOUSER_/$u/g;
2082        $extracopy =~ s/_TODOMAIN_/$d/g;
2083        push @extras, $extracopy unless $alreadydone{$extracopy};
2084        $alreadydone{$extracopy} = 1; # Dont add the same address twice
2085      }
2086    }
2087  }
2088  $global::MS->{mta}->AddRecipients($this, @extras) if @extras;
2089
2090  # Write the new qf file, delete originals and unlock the message
2091  $store->WriteHeader($this, $OutQ);
2092  unless ($this->{gonefromdisk}) {
2093    $store->DeleteUnlock();
2094    $this->{gonefromdisk} = 1;
2095  }
2096
2097  # Note this does not kick the MTA into life here any more
2098}
2099
2100# Add the X-Envelope-From and X-Envelope-To headers
2101sub AddFromAndTo {
2102  my $this = shift;
2103
2104  my($to, %tolist, $from, $envtoheader);
2105
2106  # Do they all want the From header
2107  if (MailScanner::Config::Value('addenvfrom', $this) !~ /0/) {
2108    $from = $this->{from};
2109    $global::MS->{mta}->ReplaceHeader($this,
2110                        MailScanner::Config::Value('envfromheader', $this),
2111                        $from);
2112  }
2113
2114  # Do they all want the To header
2115  if (MailScanner::Config::Value('addenvto', $this) =~ /^[1\s]+$/) {
2116    # Get the actual text for the header value
2117    foreach $to (@{$this->{to}}) {
2118      $tolist{$to} = 1;
2119    }
2120    $to = join(', ', sort keys %tolist);
2121
2122    $envtoheader = MailScanner::Config::Value('envtoheader', $this);
2123    # Now reflow the To list in case it is very long
2124    $to = $this->ReflowHeader($envtoheader, $to);
2125
2126    $global::MS->{mta}->ReplaceHeader($this, $envtoheader, $to);
2127  }
2128}
2129
2130# Replace the attachments of the message with a zip archive
2131# containing them all.
2132sub ZipAttachments {
2133  my $this = shift;
2134
2135  return if $this->{deleted};
2136
2137  return unless MailScanner::Config::Value('zipattachments', $this) =~ /1/;
2138
2139  my $workarea = $global::MS->{work};
2140  my $explodeinto = $workarea->{dir} . "/" . $this->{id};
2141
2142  #print STDERR "Processing files in $explodeinto\n";
2143  chdir $explodeinto;
2144  my $dir = new DirHandle $explodeinto;
2145  unless ($dir) {
2146    MailScanner::Log::WarnLog("Error: could not open message dir %s", $explodeinto);
2147    return;
2148  }
2149
2150  # Build a regexp of the filename suffixes to ignore
2151  my($suffix,$suffixes,@suffixes,$regexp,@escaped);
2152  $suffixes = MailScanner::Config::Value('attachzipignore', $this);
2153  @suffixes = split " ", $suffixes;
2154  foreach $suffix (@suffixes) {
2155    push @escaped, quotemeta($suffix) . '$';
2156  }
2157  $regexp = join('|', @escaped);
2158  #print STDERR "Regexp is \"$regexp\"\n";
2159
2160  # Build a list of attachment entities
2161  my($file,@files,$entity,@entitylist,@entitiestodelete,$unsafefile);
2162  $this->ListLeafEntities($this->{entity}, \@entitylist);
2163
2164  my $totalsize = 0; #  Track total size of all attachments added to zip
2165  foreach $entity (@entitylist) {
2166    $file = $this->{entity2safefile}{$entity};
2167    next if $file eq ''; # Has this attachment been removed from the message?
2168    #print STDERR "Looking for $file\n";
2169    next unless -f "$explodeinto/$file";
2170    #print STDERR "Possibly adding file $file\n";
2171    #print STDERR "Nasty filename is " . $this->{entity2file}{$entity} . "\n";
2172    #$entity = $this->{file2entity}{$this->{safefile2file}{$file}};
2173    #print STDERR "Entity is $entity\n";
2174    next unless $entity;
2175    # Don't add the file if it's the winmail.dat file
2176    unless ($entity eq $this->{tnefentity} && $this->{tnefentity}) {
2177      # Add the file if it is an attachment, not an inline file
2178      if ($entity->head->mime_attr("content-disposition") =~ /attachment/i) {
2179        unless ($file =~ /$regexp/i) {
2180          push @files, $file;
2181          push @entitiestodelete, $entity;
2182          $totalsize += -s "$explodeinto/$file";
2183          #print STDERR "Added $file to attachment list\n";
2184        }
2185      }
2186    }
2187  }
2188
2189  # If no files in the archive, don't create it.
2190  return unless @files;
2191
2192  # If the total file sizes are too small, don't zip them
2193  return if $totalsize < MailScanner::Config::Value('attachzipminsize', $this);
2194
2195  # Find the name of the new zip file, if there is one
2196  my $newzipname = MailScanner::Config::Value('attachzipname', $this);
2197  #print STDERR "New zip name = $newzipname\n";
2198  return unless $newzipname;
2199
2200  # Create a new zip archive
2201  my $zip = Archive::Zip->new();
2202  foreach $file (@files) {
2203    #JKF 20080331 $zip->addFile("$explodeinto/$file", $file);
2204    $unsafefile = $this->{safefile2file}{$file};
2205    #print STDERR "Adding $file as $unsafefile\n";
2206    MailScanner::Log::InfoLog("Adding zip member name \"%s\"", $file);
2207    $zip->addFile("$explodeinto/$file", $unsafefile);
2208  }
2209  # The new zip file is a normal attachment.
2210  my $safezipname = $this->MakeNameSafe('n'.$newzipname, $explodeinto);
2211  #print STDERR "Writing to zip $safezipname\n";
2212  my $result = $zip->writeToFileNamed($explodeinto . '/' . $safezipname);
2213  unless($result == AZ_OK) {
2214    #print STDERR "Error: Zip file could not be created!\n";
2215    MailScanner::Log::WarnLog("Zip file %s for message %s could not be created",
2216                              $safezipname, $this->{id});
2217    return;
2218  }
2219
2220  # Add the new zipfile entity
2221  $entity = $this->{entity};
2222  $entity->make_multipart;
2223  my $newentity = MIME::Entity->build(Path => "$explodeinto/$safezipname",
2224                                      Top => 0,
2225                                      Type => "application/zip",
2226                                      Encoding => "base64",
2227                                      Filename => $newzipname,
2228                                      Disposition => "attachment");
2229  $entity->add_part($newentity);
2230  $this->{bodymodified} = 1;
2231
2232  # Create all the Helpers for the new attachment
2233  $this->{entity2file}{$newentity} = $newzipname;
2234  $this->{entity2safefile}{$newentity} = $safezipname;
2235  $this->{entity2parent}{$newentity} = 0;
2236  $this->{file2entity}{$newzipname} = $newentity;
2237  $this->{name2entity}{scalar($newentity)} = $newentity;
2238  $this->{file2safefile}{$newzipname} = $safezipname;
2239  $this->{safefile2file}{$safezipname} = $newzipname;
2240
2241  # Delete the old attachments' entities
2242  my($attachfile, $attachentity);
2243  foreach $entity (@entitiestodelete) {
2244    $attachfile = $this->{entity2safefile}{$entity};
2245    #$attachentity = $this->{file2entity}{$attachfile};
2246    $this->DeleteEntity($entity, $this->{entity}, $this->{tnefentity});
2247    # Thought this was right: $this->DeleteEntity($entity, $this->{tnefentity});
2248    # And the files themselves
2249    unlink("$explodeinto/$attachfile");
2250    #print STDERR "Deleted file $attachfile\n";
2251  }
2252
2253}
2254
2255# Explode a message into its MIME structure and attachments.
2256# Pass in the workarea where it should go.
2257sub Explode {
2258  my $this = shift;
2259
2260  # $handle is Sendmail only
2261  my($entity, $pipe, $handle, $pid, $workarea, $mailscannername);
2262
2263  return if $this->{deleted};
2264
2265  # Get the translation of MailScanner, we use it a lot
2266  $mailscannername = MailScanner::Config::LanguageValue($this, 'mailscanner');
2267
2268  # Set up something so that the hash exists
2269  $this->{file2parent}{""} = "";
2270
2271  # df file is already locked
2272  $workarea = $global::MS->{work};
2273  my $explodeinto = $workarea->{dir} . "/" . $this->{id};
2274  #print STDERR "Going to explode message " . $this->{id} .
2275  #             " into $explodeinto\n";
2276
2277  # Setup everything for the MIME parser
2278  my $parser = MIME::Parser->new;
2279  my $filer  = MIME::Parser::MailScanner->new($explodeinto);
2280
2281  # Over-ride the default default character set handler so it does it
2282  # much better than the MIME-tools default handling.
2283  MIME::WordDecoder->default->handler('*' => \&WordDecoderKeep7Bit);
2284
2285  #print STDERR "Exploding message " . $this->{id} . " into " .
2286  #             $explodeinto . "\n";
2287  $parser->filer($filer);
2288  $parser->extract_uuencode(1); # uue is off by default
2289  $parser->output_to_core('NONE'); # everything into files
2290  # 101318 Bug workaround in MIME::Parser not writing UTF8 encoded MIME Parts
2291  # MIME attachments not parsing with certain unicode characters
2292  # See https://github.com/MailScanner/v5/issues/233
2293  # Observed on MIME:Tools 5.505/Resolved in MIME::Tools 5.509
2294  # $parser->decode_headers(1);
2295
2296  # The whole parsing thing is totally different for sendmail & Exim for speed.
2297  # Many thanks for those who know themselves for this great improvement!
2298  #20090327 if (MailScanner::Config::Value('mta') =~ /sendmail|exim|postfix|zmailer/i) {
2299
2300    #
2301    # This is for sendmail and Exim systems
2302    # -- CORRECTION: Now *all* systems. The "else" block is never used.
2303    #
2304
2305    $handle = IO::File->new_tmpfile or die "Your /tmp needs to be set to \"chmod 1777 /tmp\"";
2306    binmode($handle);
2307    $this->{store}->ReadMessageHandle($this, $handle) or return;
2308
2309    ## Do the actual parsing
2310    my $maxparts = MailScanner::Config::Value('maxparts', $this) || 200;
2311    MIME::Entity::ResetMailScannerCounter($maxparts);
2312
2313    # Inform MIME::Parser about our maximum
2314    $parser->max_parts($maxparts * 3);
2315    $entity = eval { $parser->parse($handle) };
2316
2317    # close and delete tmpfile
2318    close($handle);
2319
2320    if (!$entity && !MIME::Entity::MailScannerCounter()>=$maxparts) {
2321      unless ($this->{dpath}) {
2322        # It probably ran out of disk space, drop this message from the batch
2323        MailScanner::Log::WarnLog("Failed to create message structures for %s" .
2324          ", dropping it from the batch", $this->{id});
2325        my @toclear = ( $this->{id} );
2326        $workarea->ClearIds(\@toclear); # Delete attachments we might have made
2327        $this->DropFromBatch();
2328        return;
2329      }
2330
2331      MailScanner::Log::WarnLog("Cannot parse " . $this->{headerspath} . " and " .
2332                   $this->{dpath} . ", $@");
2333      $this->{entity} = $entity; # In case it failed due to too many attachments
2334      $this->{cantparse} = 1;
2335      $this->{otherinfected} = 1;
2336      return;
2337    }
2338
2339    # Too many attachments in the message?
2340    if ($maxparts>0 && MIME::Entity::MailScannerCounter()>=$maxparts) {
2341      #print STDERR "Found an error!\n";
2342      #Not with sendmail: $pipe->close();
2343      #Not with sendmail: kill 9, $pid; # Make sure we are reaping a dead'un
2344      #Not with sendmail: waitpid $pid, 0;
2345      MailScanner::Log::WarnLog("Too many attachments (%d) in %s",
2346                              MIME::Entity::MailScannerCounter(), $this->{id});
2347      $this->{entity} = $entity; # In case it failed due to too many attachments
2348      $this->{toomanyattach} = 1;
2349      $this->{otherinfected} = 1;
2350      return;
2351    }
2352
2353    # Closing the pipe this way will reap the child, apparently!
2354    #Not with sendmail: $pipe->close;
2355    #Not with sendmail: kill 9, $pid; # Make sure we are reaping a dead'un
2356    $this->{entity} = $entity;
2357
2358  # 20090327 } else {
2359
2360  # 20090327   #
2361  # 20090327   # This is for non-sendmail/Postfix systems
2362  # 20090327   #
2363
2364  # 20090327   # Create the message stream
2365  # 20090327   # NOTE: This still uses the real path of the message body file.
2366  # 20090327   ($pipe,$pid) = $this->{store}->ReadMessagePipe($this) or return;
2367
2368  # 20090327   # Do the actual parsing
2369  # 20090327   my $maxparts = MailScanner::Config::Value('maxparts', $this) || 200;
2370  # 20090327   MIME::Entity::ResetMailScannerCounter($maxparts);
2371
2372  # 20090327   # Inform MIME::Parser about our maximum
2373  # 20090327   $parser->max_parts($maxparts * 3);
2374
2375  # 20090327   $entity = eval { $parser->parse($pipe) };
2376
2377  # 20090327   if (!$entity && !MIME::Entity::MailScannerCounter()>=$maxparts) {
2378  # 20090327     #print STDERR "Found an error!\n";
2379  # 20090327     $pipe->close() if $pipe; # Don't close a pipe that failed to exist
2380  # 20090327     waitpid $pid, 0;
2381  # 20090327     unless ($this->{dpath}) {
2382  # 20090327       # It probably ran out of disk space, drop this message from the batch
2383  # 20090327       MailScanner::Log::WarnLog("Failed to create message structures for %s" .
2384  # 20090327         ", dropping it from the batch", $this->{id});
2385  # 20090327       my @toclear = ( $this->{id} );
2386  # 20090327       $workarea->ClearIds(\@toclear); # Delete attachments we might have made
2387  # 20090327       $this->DropFromBatch();
2388  # 20090327       return;
2389  # 20090327     }
2390
2391  # 20090327      MailScanner::Log::WarnLog("Cannot parse " . $this->{headerspath} .
2392  # 20090327                                " and " .  $this->{dpath} . ", $@");
2393  # 20090327      $this->{entity} = $entity;# In case it failed due to too many attachments
2394  # 20090327      $this->{cantparse} = 1;
2395  # 20090327      $this->{otherinfected} = 1;
2396  # 20090327      return;
2397  # 20090327   }
2398  # 20090327   # Too many attachments in the message?
2399  # 20090327   if ($maxparts>0 && MIME::Entity::MailScannerCounter()>=$maxparts) {
2400  # 20090327     #print STDERR "Found an error!\n";
2401  # 20090327     $pipe->close();
2402  # 20090327     kill 9, $pid; # Make sure we are reaping a dead'un
2403  # 20090327     waitpid $pid, 0;
2404  # 20090327     MailScanner::Log::WarnLog("Too many attachments (%d) in %s",
2405  # 20090327                             MIME::Entity::MailScannerCounter(), $this->{id});
2406  # 20090327     $this->{entity} = $entity; # In case it failed due to too many attachments
2407  # 20090327     $this->{toomanyattach} = 1;
2408  # 20090327     $this->{otherinfected} = 1;
2409  # 20090327     return;
2410  # 20090327   }
2411
2412  # 20090327   # Closing the pipe this way will reap the child, apparently!
2413  # 20090327   $pipe->close;
2414  # 20090327   kill 9, $pid; # Make sure we are reaping a dead'un
2415  # 20090327   $this->{entity} = $entity;
2416  # 20090327 }
2417
2418  # Now handle TNEF files. They should be the only attachment to the message.
2419  ($this->{tnefentity},$this->{tnefname}) =
2420    MailScanner::TNEF::FindTNEFFile($entity)
2421    if MailScanner::Config::Value('expandtnef');
2422
2423  # Look for winmail.dat files in each attachment directory $path.
2424  # When we find one explode it into its files and store the root MIME
2425  # entity into $IsTNEF{$id} so we can handle it separately later.
2426  # Pattern to match is actually winmail(digits).dat(digits) as that copes
2427  # with forwarded or bounced messages from mail packages that download
2428  # all attachments into 1 directory, adding numbers to their filenames.
2429
2430  # Only delete original tnef if no-one wants to not replace it nor use it
2431  my $DeleteTNEF = 0;
2432  $DeleteTNEF = 1
2433    if MailScanner::Config::Value('replacetnef', $this) !~ /[01]/;
2434    #print STDERR "ReplaceTNEF = " . MailScanner::Config::Value('replacetnef', $this) . "\n";
2435
2436  if (MailScanner::Config::Value('tnefexpander') && $this->{tnefentity}) {
2437    my($tneffile, @tneffiles);
2438    # Find all the TNEF files called winmail.dat
2439    my $outputdir = new DirHandle;
2440    $outputdir->open($explodeinto)
2441      or MailScanner::Log::WarnLog("Failed to open dir " . $explodeinto .
2442                      " while scanning for TNEF files, %s", $!);
2443    # This regexp must *not* be anchored to the start of the filename as
2444    # there should be a prefix type indicator character in the filename.
2445    @tneffiles = map { /(.winmail\d*\.dat\d*)/i } $outputdir->read();
2446    $outputdir->close();
2447
2448    #print STDERR "TNEF Entity is " . $this->{tnefentity} . "\n";
2449    #print STDERR "TNEF files are " . join(',',@tneffiles) . "\n";
2450    #print STDERR "Tree is \n" . $this->{entity}->dump_skeleton;
2451
2452    foreach $tneffile (@tneffiles) {
2453      my $result;
2454      # Remove the type indicator character for logging.
2455      my $tnefnotype = substr($tneffile,1);
2456      MailScanner::Log::InfoLog("Expanding TNEF archive at %s/%s",
2457                                $explodeinto, $tnefnotype);
2458      $result = MailScanner::TNEF::Decoder($explodeinto, $tneffile, $this);
2459      if ($result) {
2460        # If they want to replace the TNEF rather than add to it,
2461        # then delete the original winmail.dat-style attachment
2462        # and remove the flag saying it is a TNEF message at all.
2463        #print STDERR "***** Found TNEF Attachments = " . $this->{foundtnefattachments} . "\n";
2464        #print STDERR "*** DeleteTNEF = $DeleteTNEF and foundtnefatt = " . $this->{foundtnefattachments} . "\n";
2465        if ($DeleteTNEF && $this->{foundtnefattachments}) {
2466          $this->DeleteEntity($this->{tnefentity}, $this->{entity},
2467                              $this->{tnefentity});
2468          unlink "$explodeinto/$tneffile";
2469          #print STDERR "*** Deleted $explodeinto/$tneffile\n";
2470          delete $this->{tnefentity};
2471          MailScanner::Log::InfoLog("Message %s has had TNEF %s removed",
2472                                    $this->{id}, $tnefnotype);
2473        }
2474      } else {
2475        MailScanner::Log::WarnLog("Corrupt TNEF %s that cannot be " .
2476                                  "analysed in message %s", $tnefnotype,
2477                                  $this->{id});
2478        $this->{badtnef} = 1;
2479        $this->{otherinfected} = 1;
2480      }
2481    }
2482  }
2483
2484  $explodeinto =~ /^(.*)$/;
2485  $explodeinto = $1;
2486  unless(chdir $explodeinto) { # TAINT
2487    MailScanner::Log::WarnLog("Could not chdir to %s just before unpacking " .
2488                              "extra message parts", $explodeinto);
2489    return;
2490  }
2491
2492  # -------------------------------
2493  # If the MIME boundary exists and is "" then remove the entire message.
2494  # The top level must be multipart/mixed
2495  if (defined($entity) && $entity->head) {
2496    if ($entity->is_multipart || $entity->head->mime_type =~ /^multipart/i) {
2497      my $boundary = $entity->head->multipart_boundary;
2498      #print STDERR "Boundary is \"$boundary\"\n";
2499      if ($boundary eq "" || $boundary eq "\"\"" || $boundary =~ /\s$/) {
2500        my $cantparse = MailScanner::Config::LanguageValue($this,
2501                                                           'cantanalyze');
2502        $this->{allreports}{""} .= "$mailscannername: $cantparse\n";
2503        $this->{alltypes}{""} .= 'c';
2504        $this->{otherinfected}++;
2505        #print STDERR "Found error\n";
2506      }
2507    }
2508  }
2509
2510
2511  # -------------------------------
2512
2513  # Now try to extract messages from text files as they might be things
2514  # we didn't manage to extract first time around.
2515  # And try to expand .tar.gz .tar.z .tgz .zip files.
2516  # We will then scan everything from inside them.
2517  my($allowpasswords, $couldnotreadmesg, $passwordedmesg, $toodeepmesg);
2518  my($insistpasswords, $nonpasswordedmesg);
2519  $allowpasswords = MailScanner::Config::Value('allowpasszips', $this);
2520  $allowpasswords = ($allowpasswords !~ /0/)?1:0;
2521  $insistpasswords = MailScanner::Config::Value('insistpasszips', $this);
2522  $insistpasswords = ($insistpasswords !~ /0/)?1:0;
2523  $couldnotreadmesg = MailScanner::Config::LanguageValue($this,
2524                                                         'unreadablearchive');
2525  $passwordedmesg = MailScanner::Config::LanguageValue($this,
2526                                                       'passwordedarchive');
2527  $nonpasswordedmesg = MailScanner::Config::LanguageValue($this,
2528                                                       'nonpasswordedarchive');
2529  $toodeepmesg    = MailScanner::Config::LanguageValue($this,
2530                                                       'archivetoodeep');
2531  #print STDERR "About to unpack parts and archives\n";
2532  $this->ExplodePartAndArchives($explodeinto,
2533                                MailScanner::Config::Value('maxzipdepth', $this),
2534                                $allowpasswords, $insistpasswords,
2535                                $couldnotreadmesg, $passwordedmesg,
2536                                $nonpasswordedmesg,  $toodeepmesg,
2537                                $mailscannername);
2538
2539  # Now unpack all the *.doc Word files if they want me to
2540  if (MailScanner::Config::Value('addtextofdoc', $this) =~ /1/) {
2541    # Find all the *.doc files in the attachments we now have
2542    my %nullhash = ();
2543    my $docfiles = MailScanner::Antiword::FindDocFiles($this->{entity},$this->{entity},\%nullhash);
2544
2545    # For each one, create the *.txt file using antiword.
2546    #foreach my $docfile (@docfiles) {
2547    my($docfile,$parent);
2548    while(($docfile,$parent) = each %$docfiles) {
2549    #foreach my $docfile (@docfiles) {
2550      #print STDERR "Antiwording $docfile,$parent into $explodeinto\n";
2551      #MailScanner::Antiword::RunAntiword($explodeinto, $docfile, $this);
2552      MailScanner::Antiword::RunAntiword($explodeinto,$docfile,$parent,$this);
2553    }
2554  }
2555
2556  ## Now unpack all the *.html HTML files if they want me to
2557  #if (MailScanner::Config::Value('addtextofhtml', $this) =~ /1/) {
2558  #  # Find all the *.doc files in the attachments we now have
2559  #  my %nullhash = ();
2560  #  my $docfiles = MailScanner::LinksDump::FindHTMLFiles($this->{entity},$this->{entity},\%nullhash);
2561  #
2562  #  # For each one, create the *.txt file using Links/eLinks/Lynx.
2563  #  #foreach my $docfile (@docfiles) {
2564  #  my($docfile,$parent);
2565  #  while(($docfile,$parent) = each %$docfiles) {
2566  #  #foreach my $docfile (@docfiles) {
2567  #    #print STDERR "Antiwording $docfile,$parent into $explodeinto\n";
2568  #    #MailScanner::Antiword::RunAntiword($explodeinto, $docfile, $this);
2569  #    MailScanner::LinksDump::RunLinks($explodeinto,$docfile,$parent,$this);
2570  #  }
2571  #}
2572
2573
2574  # Unpack zip and other archives if they are very small and they want me to
2575  if (MailScanner::Config::Value('unzipmaxmembers', $this) > 0) {
2576    MailScanner::Unzip::UnpackZipMembers($this, $explodeinto);
2577  }
2578
2579  # Check we haven't filled the disk. Remove this message if we have, so
2580  # that we can continue processing the other messages.
2581  my $dir = MailScanner::Config::Value("incomingworkdir");
2582  my $df  = df($dir, 1024);
2583  if ($df) {
2584    my $freek = $df->{bavail};
2585    if (defined($freek) && $freek<10000 && $freek>=0) {
2586      MailScanner::Log::WarnLog("Message %s is too big for available disk space in %s, skipping it", $this->{id}, $dir);
2587      my @toclear = ( $this->{id} );
2588      $workarea->ClearIds(\@toclear); # Delete attachments we might have made
2589      $this->DropFromBatch();
2590      return;
2591    }
2592  }
2593
2594  # Set the owner and group on all the extracted files
2595  # JKF 20100211 chown $workarea->{uid}, $workarea->{gid}, map { m/(.*)/ } grep { -f } glob "$explodeinto/* $explodeinto/.*"
2596  # JKF 20100211  if $workarea->{changeowner};
2597  my($tmplist1,@tmplist);
2598  if ($workarea->{changeowner}) {
2599    foreach $tmplist1 (glob "$explodeinto/* $explodeinto/.*") {
2600      $tmplist1 =~ /(.*)/;
2601      $tmplist1 = $1;
2602      push @tmplist, $tmplist1 unless -d $tmplist1;
2603    }
2604    chown $workarea->{uid}, $workarea->{gid}, @tmplist if @tmplist;
2605  }
2606  # JKF 20100528 Now set the perms on all the extracted files
2607  my $workperms = MailScanner::Config::Value('workperms') || '0660';
2608  # Make it octal with a leading zero if necessary
2609  $workperms = sprintf "0%lo", $workperms unless $workperms =~ /^0/;
2610  $workperms = oct($workperms); # and back to decimal for chmod
2611  chmod $workperms, @tmplist if @tmplist;
2612}
2613
2614sub ListLeafEntities {
2615  my($message, $entity, $entitylist) = @_;
2616
2617  my(@parts, $part);
2618
2619  # Fallen off the tree?
2620  return unless $entity && defined($entity->head);
2621
2622  # Found a leaf node
2623  if ($entity && !$entity->parts) {
2624    push @$entitylist, $entity;
2625    return;
2626  }
2627
2628  # Walk down each sub-tree
2629  @parts = $entity->parts;
2630  foreach $part (@parts) {
2631    ListLeafEntities($message, $part, $entitylist);
2632  }
2633}
2634
2635# Delete a given entity from the MIME entity tree.
2636# Have to walk the entire tree to do this.
2637# Bail out as soon as we've found it.
2638# Return 0 if DeleteEntity fell off a leaf node.
2639# Return 1 if DeleteEntity hit the TNEF node.
2640# Return 2 if DeleteEntity is just walking back up the tree.
2641sub DeleteEntity {
2642  my($message, $entitytodelete, $subtree, $tnef) = @_;
2643
2644  my(@parts, $part, @keep);
2645
2646  #print STDERR "In DeleteEntity\n";
2647
2648  # If we have a no-body message then replace the TNEF entity with an
2649  # empty attachment. Special case.
2650  if (scalar($message->{entity}) eq $tnef) {
2651    #print STDERR "Found message with no body but a TNEF attachment.\n";
2652    $part = MIME::Entity->build(Type => "text/plain",
2653                                Encoding => "quoted-printable",
2654                                Data => ["\n"]);
2655    push @keep, $part;
2656    $message->{entity}->parts(\@keep);
2657    $message->{bodymodified} = 1;
2658    #print STDERR "Replaced single part with empty text/plain attachment\n";
2659    return 2;
2660  }
2661
2662  # Fallen off a leaf node?
2663  #print STDERR "Returning 0\n" unless $subtree && defined($subtree->head);
2664  #return 0 unless $entity && defined($entity->head);
2665  return 0 unless $subtree && defined($subtree->head);
2666
2667  return 1 if $subtree eq $entitytodelete;
2668
2669  if ($subtree && !$subtree->parts) { # FIX FIX FIX !$entity->is_multipart) {
2670    # Found the TNEF entity at a leaf node?
2671    #(print STDERR "Found TNEF entity at a leaf node $entity\n"),return 1 if scalar($entity) eq $tnef;
2672    #(print STDERR "Not found TNEF entity at a leaf node $entity\n"),return 2;
2673    return 1 if scalar($subtree) eq $tnef;
2674    #print STDERR "Returning 2\n";
2675    return 2;
2676  }
2677
2678  @parts = $subtree->parts;
2679  #print STDERR "Parts are " . join(',',@parts) . "\n";
2680  foreach $part (@parts) {
2681    my $foundit =  DeleteEntity($message, $entitytodelete, $part, $tnef);
2682    #print STDERR "DeleteEntity = $foundit\n";
2683    push @keep, $part unless $foundit == 1;
2684  }
2685  # Make sure there is always at least 1 part.
2686  #print STDERR "Keep is " . join(',',@keep) . "\n";
2687  unless (@keep) {
2688    #print STDERR "Adding an empty text/plain\n";
2689    $part = MIME::Entity->build(Type => "text/plain",
2690                                Encoding => "quoted-printable",
2691                                Data => ["\n"]);
2692    push @keep, $part;
2693  }
2694  $subtree->parts(\@keep);
2695  $message->{bodymodified} = 1;
2696
2697  # If there are no parts left, make this entity a singlepart entity
2698  $subtree->make_singlepart unless scalar(@keep);
2699
2700  return 2;
2701}
2702
2703# Quietly drop a message from the batch. Used when we run out of disk
2704# space.
2705sub DropFromBatch {
2706  my($message) = @_;
2707  $message->{deleted} = 1;
2708  $message->{gonefromdisk} = 1; # Don't try to delete the original
2709  $message->{store}->Unlock(); # Unlock it so other processes can pick it up
2710  $message->{abandoned} = 1; # This message was abandoned, re-try it n times
2711}
2712
2713# Try to recursively unpack tar (with or without gzip) files and zip files.
2714# Extracts to a given maximum unpacking depth.
2715sub ExplodePartAndArchives {
2716  my($this, $explodeinto, $maxlevels, $allowpasswords, $insistpasswords,
2717     $couldnotreadmesg, $passwordedmesg,
2718     $nonpasswordedmesg, $toodeepmesg, $msname) = @_;
2719
2720  my($dir, $file, $part, @parts, $buffer);
2721  my(%seenbefore, %seenbeforesize, $foundnewfiles);
2722  my($size, $level, $ziperror, $tarerror, $silentviruses, $noisyviruses);
2723  my($allziperrors, $alltarerrors, $textlevel, $failisokay);
2724  my($linenum, $foundheader, $prevline, $line, $position, $prevpos, $nextpos);
2725  my($cyclecounter, $rarerror, $create0files, $oleerror, $sevenzerror);
2726  my($filecommand, $PipeTimeOut, $memb, $use_unpacker);
2727
2728  $dir = new DirHandle;
2729  $file = new FileHandle;
2730  $level = 0; #-1;
2731  $textlevel = 0;
2732  $cyclecounter = 0;
2733  $ziperror = 0;
2734  $tarerror = 0;
2735  $sevenzerror = 0;
2736
2737  # Do they only want encryption checking and nothing else?
2738  my $onlycheckencryption;
2739  $onlycheckencryption = 0;
2740  # More robust way of saying maxlevels==0 && allowpasswords==0;
2741  $onlycheckencryption = 1 if !$maxlevels && !$allowpasswords;
2742  $onlycheckencryption = 1 if !$maxlevels && $insistpasswords;
2743  $create0files = 0;
2744  $create0files = 1 if MailScanner::Config::Value('checkppafilenames', $this) =~ /1/;
2745
2746  $silentviruses = ' '. MailScanner::Config::Value('silentviruses', $this) .' ';
2747  $noisyviruses = ' ' . MailScanner::Config::Value('noisyviruses', $this) .' ';
2748
2749  $dir->open($explodeinto);
2750
2751  # $cyclecounter is a sanity check to ensure we don't loop forever
2752  OUTER: while($cyclecounter<30) {
2753    $cyclecounter++;
2754    $textlevel++;
2755    last if $level>$maxlevels; # && $textlevel>1;
2756    $foundnewfiles = 0;
2757    $dir->rewind();
2758    @parts = $dir->read();
2759    #print STDERR "Level = $level\n";
2760    foreach $part (@parts) {
2761      next if $part eq '.' || $part eq '..';
2762      # Skip the entire loop if it's not what we are looking for
2763      # JKF I really haven't the faintest idea why I wrote the next line :-)
2764      #next unless $part =~
2765      #  /(^msg.*txt$)|(\.(tar\.g?z|taz|tgz|tz|zip|exe|rar)$)/i;
2766
2767      $size = -s "$explodeinto/$part";
2768      next if $seenbefore{$part} &&
2769              $seenbeforesize{$part} == $size;
2770      $seenbefore{$part} = 1;
2771      $seenbeforesize{$part} = $size;
2772      #print STDERR "$level/$maxlevels Found new file $part\n";
2773
2774      #print STDERR "Reading $part\n";
2775      # Added a . on the front to handle the type indicator character
2776      if ($part =~ /^.msg.*txt/ && $textlevel<=2) {
2777        # Try and find hidden messages in the text files
2778        #print STDERR "About to read $explodeinto/$part\n";
2779        $file->open("$explodeinto/$part") or next;
2780
2781        # Try reading the first few lines to see if they look like mail headers
2782        $linenum = 0;
2783        $foundheader = 0;
2784        $prevline = "";
2785        $prevpos = 0;
2786        $nextpos = 0;
2787        $line = undef;
2788
2789        for ($linenum=0; $linenum<30; $linenum++) {
2790          #$position = $file->getpos();
2791          $line = <$file>;
2792          last unless defined $line;
2793          $nextpos += length $line;
2794          # Must have 2 lines of header
2795          # prevline looks like Header:
2796          # line     looks like       setting
2797          #          or         Header:
2798          if ($prevline =~ /^[^:\s]+: / && $line =~ /(^\s+\S)|(^[^:\s]+: )/) { #|(^\s+.*=)/) {
2799            #print STDERR "Found header start at \"$prevline\"\n and \"$line\"\n";
2800            $foundheader = 1;
2801            last;
2802          }
2803          $prevline = $line;
2804          $prevpos  = $position;
2805          $position = $nextpos;
2806        }
2807
2808        if ($foundheader) {
2809          # Check all lines are header lines up to next blank line
2810          my($num, $reallyfoundheader);
2811          $reallyfoundheader = 0;
2812          # Check for a maximum of 30 lines of headers
2813          foreach $num (0..30) {
2814            $line = <$file>;
2815            last unless defined $line;
2816            # Must have a valid header line
2817            #print STDERR "Examining: \"$line\"\n";
2818            next if $line =~ /(^\s+\S)|(^[^:\s]+: )/;
2819            #print STDERR "Not a header line\n";
2820            # Or a blank line
2821            if ($line =~ /^[\r\n]*$/) {
2822              $reallyfoundheader = 1;
2823              last;
2824            }
2825            #print STDERR "Not a blank line\n";
2826            # Non-header line, so it isn't a valid message part
2827            $reallyfoundheader = 0;
2828            last;
2829          }
2830          #print STDERR "Really found header = $reallyfoundheader\n";
2831          if ($reallyfoundheader) {
2832            # Rewind to the start of the header
2833            #$file->setpos($prevpos);
2834            seek $file, $prevpos, 0;
2835            #print STDERR "First line is \"" . <$file> . "\"\n";
2836
2837            # Setup everything for the MIME parser
2838            my $parser = MIME::Parser->new;
2839            my $filer  = MIME::Parser::MailScanner->new($explodeinto);
2840
2841            # Over-ride the default default character set handler so it does it
2842            # much better than the MIME-tools default handling.
2843            MIME::WordDecoder->default->handler('*' => \&WordDecoderKeep7Bit);
2844
2845            #print STDERR "Exploding message " . $this->{id} . " into " .
2846            #             $explodeinto . "\n";
2847            $parser->filer($filer);
2848            $parser->extract_uuencode(1); # uue is off by default
2849            $parser->output_to_core('NONE'); # everything into files
2850
2851            # Do the actual parsing
2852            #print STDERR "About to parse\n";
2853            my $entity = eval { $parser->parse($file) };
2854            #print STDERR "Done the parse\n";
2855
2856            # We might have created new files that need parsing
2857            $foundnewfiles = 1;
2858            next OUTER;
2859          }
2860        }
2861        $file->close;
2862      }
2863
2864      # Not got anything to do?
2865      next if !$maxlevels && $allowpasswords;
2866
2867      #$level++;
2868      next if $level > $maxlevels;
2869
2870      # Do a file (magic) on every file we run into and appoint an unpacker
2871      $PipeTimeOut = MailScanner::Config::Value('filetimeout');
2872      $filecommand = MailScanner::Config::Value('filecommand');
2873      $use_unpacker = '';
2874      if ($filecommand && -x $filecommand) { # check if we got support
2875        $memb = SafePipe("$filecommand -b '$explodeinto/$part' 2>&1",
2876                   $PipeTimeOut);
2877
2878        if ($memb =~ /ERROR/) {
2879          MailScanner::Log::WarnLog("File magic error (%s)", $memb);
2880        } elsif ( ($memb =~ /^Zip archive/i) ) { # use zip unpacker
2881          $use_unpacker = "zip";
2882        } elsif ( ($memb =~ /^RAR archive/i) ) { # use (official) rar unpacker
2883          $use_unpacker = "rar";
2884        } elsif ( ($memb =~ /^(7-zip|arj|cpio|lha(.*)|xar|GNU tar|POSIX tar|ASCII cpio) archive/i) || # use 7zip unpacker
2885          ($memb =~ /^(lzh|lzma|bzip2|gzip|xz) compressed/i) ||
2886          ($memb =~ /^(RPM|Delta-RPM|Windows imaging|ISO)/i) ) {
2887          $use_unpacker = "7z";
2888        }
2889      }
2890      #MailScanner::Log::WarnLog("Unpack-engine (%s) file (%s)", $use_unpacker, $part);
2891
2892      # Find all the zip files
2893      #print STDERR "Looking at $explodeinto/$part\n";
2894      #next if MailScanner::Config::Value('filecommand', $this) eq "";
2895      next unless $file->open("$explodeinto/$part");
2896      #print STDERR "About to read 4 bytes\n";
2897      unless (read($file, $buffer, 4) == 4) {
2898        #print STDERR "Very short file $part\n";
2899        $file->close;
2900        next;
2901      }
2902      my $uudfilename = "";
2903      $uudfilename = FindUUEncodedFile($file)
2904        if MailScanner::Config::Value('lookforuu', $this) =~ /1/;
2905      #$file->close;
2906      $failisokay = 0;
2907      if ($buffer =~ /^MZ/) {
2908        $failisokay = 1;
2909      }
2910      $file->close, next unless $buffer eq "PK\003\004" ||
2911                  $buffer eq "Rar!"       ||
2912                  $part =~ /\.rar$/       ||
2913                  defined($uudfilename) ||
2914                  $failisokay;
2915      #print STDERR "Found a zip or rar file\n" ;
2916      $file->close, next unless MailScanner::Config::Value('findarchivesbycontent', $this) ||
2917                  $part =~ /\.(001|7z|arj|bz2|bzip2|cab|cpio|deb|dmg|fat|gz|gzip|hfs|iso|img|jar|lha|lzh|lzma|ntfs|rpm|squashfs|swm|tar|taz|tbz2?|tgz|tar\.g?z|tz|gz|zip|exe|rar|uue?|docx?|xlsx?|pptx?|dotx?|xltx?|ppsx?|tpz|txz|vhd|wim|xar|x?z)$/i;
2918      $foundnewfiles = 1;
2919      #print STDERR "Unpacking $part at level $level\n";
2920
2921      if ($uudfilename ne "") {
2922        # It cannot be a zip or a rar, so skip the checks for them.
2923        # Oh yes it can! Do all the checks.
2924        # Ignore the return value, don't care if uudecode fails, it was
2925        # probably just a false positive on the uuencoded-data locator.
2926        #print STDERR "About to unpackuue $part into $uudfilename\n";
2927        # uudfilename does not have the type indicator character on the front.
2928        $this->UnpackUUE($part, $explodeinto, $file, $uudfilename);
2929      }
2930      $file->close;
2931      # Is it a zip file, in which case unpack the zip
2932      $ziperror = "";
2933      #print STDERR "About to unpackzip $part\n";
2934      # Skip unzip processing when "Unpack Microsoft Documents = no" and
2935      # the `file -b' result is "Microsoft Word|Excel|PowerPoint 2007+".
2936      if (MailScanner::Config::Value('unpackole', $this) ||
2937          $memb !~ /^Microsoft (Word|Excel|PowerPoint) 2007\+/) {
2938        $ziperror = $this->UnpackZip($part, $explodeinto, $allowpasswords,
2939                                   $insistpasswords,
2940                                   $onlycheckencryption, $create0files);
2941      }
2942      #MailScanner::Log::WarnLog("UnpackZip (%s) file (%s)", $ziperror, $part);
2943      #print STDERR "* * * * * * * Unpackzip $part returned $ziperror\n";
2944      # If unpacking as a zip failed, try it as a rar
2945      $rarerror = "";
2946      if ($part =~ /\.rar$/i || $use_unpacker eq "rar" || $buffer eq "Rar!" or $buffer =~ /^MZ[P]?/) {
2947        $rarerror = $this->UnpackRar($part, $explodeinto, $allowpasswords,
2948                                     $insistpasswords,
2949                                     $onlycheckencryption, $create0files);
2950      }
2951      # And if that failed, try it as a Microsoft OLE
2952      $oleerror = "";
2953      if (MailScanner::Config::Value('unpackole', $this) &&
2954          ($buffer eq "\320\317\021\340" || $buffer eq "\376\067\0\043" ||
2955           $buffer eq "\x31\xbe\0\0"     || $buffer eq "\0\0\xbc\x31"   ||
2956           $buffer eq "PO^Q"             || $buffer eq "\333\245-\0")
2957         ) {
2958        $oleerror = $this->UnpackOle($part, $explodeinto, $allowpasswords,
2959                                     $insistpasswords,
2960                                     $onlycheckencryption, $create0files);
2961      }
2962
2963      # $tarerror = "";
2964      # $tarerror = 0 # $this->UnpackTar($part, $explodeinto, $allowpasswords)
2965      #  if $ziperror || $part =~ /(tar\.g?z|tgz|gz)$/i;
2966      #print STDERR "In inner: \"$part\"\n";
2967
2968      if (
2969        ($use_unpacker eq "7z") ||
2970        ($part =~ /\.(001|7z|arj|bz2|bzip2|cab|cpio|deb|dmg|fat|gz|gzip|hfs|iso|img|jar|lha|lzh|lzma)$/) ||
2971        ($part =~ /\.(ntfs|rpm|squashfs|swm|tar|taz|tbz|tbz2|tgz|tpz|txz|vhd|wim|xar|xz|z)$/) ) {
2972          $sevenzerror = $this->Unpack7zip($part, $explodeinto, $allowpasswords,
2973                                     $insistpasswords,
2974                                     $onlycheckencryption, $create0files);
2975      }
2976
2977      if ($ziperror eq "nonpassword" || $rarerror eq "nonpassword" || $sevenzerror eq "nonpassword") {
2978        # Trim off leading type indicator character for logging.
2979        my $f = substr($part,1);
2980        MailScanner::Log::WarnLog("Non-password-protected archive (%s) in %s",
2981                                  $f, $this->{id});
2982        $this->{allreports}{$part} .= "$msname: $nonpasswordedmesg\n";
2983        $this->{alltypes}{$part} .= 'c'; # JKF 19/12/2007 'p'
2984        $this->{nonpasswordprotected} = 1;
2985        $this->{otherinfected} = 1;
2986        # JKF 19/12/2007 $this->{passwordinfected} = 1;
2987        # JKF 19/12/2007 and comment out the previous line about otherinfected.
2988        $this->{cantdisinfect} = 1; # Don't even think about disinfecting this!
2989        $this->{silent}=1 if $silentviruses =~ / Zip-NonPassword | All-Viruses /i;
2990        $this->{noisy} =1 if $noisyviruses  =~ / Zip-NonPassword /i;
2991      } elsif ($ziperror eq "password" || $rarerror eq "password" || $sevenzerror eq "password") {
2992        # Trim off leading type indicator character for logging.
2993        my $f = substr($part,1);
2994        MailScanner::Log::WarnLog("Password-protected archive (%s) in %s",
2995                                  $f, $this->{id});
2996        $this->{allreports}{$part} .= "$msname: $passwordedmesg\n";
2997        $this->{alltypes}{$part} .= 'c'; # JKF 19/12/2007 'p'
2998        $this->{passwordprotected} = 1;
2999        $this->{otherinfected} = 1;
3000        # JKF 19/12/2007 $this->{passwordinfected} = 1;
3001        # JKF 19/12/2007 and comment out the previous line about otherinfected.
3002        $this->{cantdisinfect} = 1; # Don't even think about disinfecting this!
3003        $this->{silent}=1 if $silentviruses =~ / Zip-Password | All-Viruses /i;
3004        $this->{noisy} =1 if $noisyviruses  =~ / Zip-Password /i;
3005      } elsif ($ziperror && $rarerror && $sevenzerror && !$failisokay) {
3006        # Trim off leading type indicator character for logging.
3007        my $f = substr($part,1);
3008        MailScanner::Log::WarnLog("Unreadable archive (%s) in %s",
3009                                  $f, $this->{id});
3010        $this->{allreports}{$part} .= "$msname: $couldnotreadmesg\n";
3011        $this->{alltypes}{$part} .= 'c';
3012        $this->{otherinfected} = 1;
3013      }
3014    }
3015    #print STDERR "In outer: \"$part\"\n";
3016    last if !$foundnewfiles || $level>$maxlevels;
3017    $dir->rewind;
3018    #print STDERR "Rewinding, Incrementing level from $level to " . ($level+1) . "\n";
3019    $level++;
3020  }
3021
3022  #print STDERR "Level=$level($maxlevels)\n";
3023  #print STDERR "Onlycheckencryption=$onlycheckencryption\n";
3024  if ($level>$maxlevels && !$onlycheckencryption && $maxlevels) {
3025    MailScanner::Log::WarnLog("Files hidden in very deeply nested archive " .
3026                              "in %s", $this->{id});
3027    $this->{allreports}{""} .= "$msname: $toodeepmesg\n";
3028    $this->{alltypes}{""} .= 'c';
3029    $this->{otherinfected}++;
3030  }
3031}
3032
3033# Search the given filehandle for signs that this could contain uu-encoded data
3034# Return the filename if found, undef otherwise. Also return the open file
3035# handle.
3036sub FindUUEncodedFile {
3037  my $fh = shift;
3038
3039  my($mode, $file);
3040  my $linecounter = 0;
3041
3042  seek $fh, 0, 0; # Rewind the file to the start
3043  while (<$fh>) {
3044    if (/^begin(.*)/) {
3045      my $modefile = $1;
3046      if ($modefile =~ /^(\s+(\d+))?(\s+(.*?\S))?\s*\Z/) {
3047        ($mode, $file) = ($2, $4);
3048      }
3049      MailScanner::Log::InfoLog("Found uu-encoded file %s", $file);
3050      last;
3051    }
3052    $linecounter++;
3053    seek($fh, 0, 0), return undef if $linecounter>50;
3054  }
3055  return $file;
3056}
3057
3058
3059# We now have a uuencoded file to decode. We have a target filename we have
3060# read from the uuencode header.
3061# uudecoded does *not* have the type indicator character. Add a 'u' to get
3062# the output filename.
3063sub UnpackUUE {
3064  my $this = shift;
3065  my($uuencoded, $explodeinto, $uuehandle, $uudecoded) = @_;
3066
3067  # Trim off leading type indicator for logging
3068  my $attachmentname = substr($uuencoded,1);
3069
3070  # Set up all the tree structures for cross-referencing
3071  my $safename = $this->MakeNameSafe('u'.$uudecoded,$explodeinto);
3072  MailScanner::Log::InfoLog("Unpacking UU-encoded file %s to %s in message %s",
3073                            $attachmentname, substr($safename,1), $this->{id});
3074  $this->{file2parent}{$uudecoded} = $uuencoded;
3075  $this->{file2parent}{$safename} = $uuencoded;
3076  $this->{file2safefile}{$uudecoded} = $safename;
3077  $this->{safefile2file}{$safename} = $uudecoded;
3078
3079  $safename = "$explodeinto/$safename";
3080
3081  my $out = new FileHandle;
3082  unless ($out->open("> $safename")) {
3083    MailScanner::Log::WarnLog("Unpacking UU-encoded file %s, could not create target file %s in message %s", $this->MakeNameSafe($uuencoded,$explodeinto), $safename, $this->{id});
3084    return;
3085  }
3086
3087  while  (<$uuehandle>) {
3088    last if /^end/;
3089    next if /[a-z]/;
3090    next unless int((((ord() - 32) & 077) + 2) / 3) == int(length() / 4);
3091    $out->print(unpack('u', $_));
3092  }
3093  $out->close;
3094}
3095
3096
3097# Unpack a 7za file into the named directory.
3098# Return 1 if an error occurred, else 0.
3099# Return 0 on success.
3100# Return "password" if a member was password-protected.
3101# Very much like UnpackZip except it uses the external "7z" command.
3102sub Unpack7zip {
3103  my($this, $zipname, $explodeinto, $allowpasswords, $insistpasswords, $onlycheckencryption, $touchfiles) = @_;
3104
3105  my($zip, @members, $member, $name, $fh, $safename, $memb, $check, $junk,
3106     $unzip, $unrar, $IsEncrypted, $PipeTimeOut, $PipeReturn,$NameTwo, $HasErrors,
3107     $member2, $Stuff, $BeginInfo, $EndInfo, $ParseLine, $what, $nopathname);
3108
3109  # Timeout value for unrar is currently the same as that of the file
3110  # command + 20. Julian, when you add the filetimeout to the config file
3111  # perhaps you should think about adding a maxcommandexecutetime setting
3112  # as well
3113  $PipeTimeOut = MailScanner::Config::Value('un7ziptimeout');
3114  $unzip = MailScanner::Config::Value('un7zipcommand');
3115  return 1 unless $unzip && -x $unzip;
3116
3117  MailScanner::Log::NoticeLog("Unpacking 7zip archive: %s", $zipname);
3118
3119  # This part lists the archive contents and makes the list of
3120  # file names within. "This is a list verbose option"
3121  #$memb = SafePipe("$unrar v -p- '$explodeinto/$zipname' 2>&1",
3122  #                   $PipeTimeOut);
3123  $memb = SafePipe("$unzip l '$explodeinto/$zipname' 2>&1",
3124                   $PipeTimeOut);
3125
3126  if ($memb =~ /^error/i) {
3127     MailScanner::Log::WarnLog("7ZipUnpacker: (%s)", $memb);
3128     $HasErrors = 1;
3129  }
3130  #MailScanner::Log::WarnLog("7z output: %s", $memb);
3131
3132  $junk = "";
3133  $Stuff = "";
3134  $BeginInfo = 0;
3135  $EndInfo = 0;
3136  $ParseLine = 1;
3137  $memb =~ s/\r//gs;
3138  my @test = split /\n/, $memb;
3139  $memb = '';
3140
3141  # Have to parse the output from the 'v' command and parse the information
3142  # between the ----------------------------- lines
3143  foreach $what (@test) {
3144    #print STDERR "Processing \"$what\"\n";
3145    #MailScanner::Log::WarnLog("7z what: %s", $what);
3146
3147    # Have we already hit the beginng and now find another ------ string?
3148    # If so then we are at the end
3149    $EndInfo = 1 if $what =~ /-{18,}$/ && $BeginInfo;
3150
3151    # if we are after the begning but haven't reached the end,
3152    # then process this line
3153    if ($BeginInfo && !$EndInfo) {
3154      # MailScanner::Log::WarnLog("7z what: %s", $what);
3155      # If we are on line one then it's the file name with full path
3156      # otherwise we are on the info line containing the attributes
3157      ##$what =~ s/ +/ /g;
3158      ##my (@Zarray ) = split /\s/, $what;
3159
3160      # Add support to filenames with spaces.
3161      my @Zarray;
3162      for (my $i=0; $i <= 4; $i++) {
3163        $what =~ / +/;
3164        $Zarray[$i]= substr($what, 0,     $-[0]),
3165        $what=substr($what, $+[0]);
3166      }
3167      $Zarray[5]=$what;
3168
3169      # my $Zname = pop @Zarray; # this is the most important value, other values are nice to have but this one we must have
3170      my $Zname = $Zarray[5]; # this is the most important value, other values are nice to have but this one we must have
3171      my $Zdate = $Zarray[0];
3172      my $Ztime = $Zarray[1];
3173      my $Zattr = $Zarray[2];
3174      #my $Zsize = $Zarray[3];
3175      #my $ZCsize = $Zarray[4];
3176      #MailScanner::Log::WarnLog("7z-members: [%s] [%s] [%s] [%s] [%s] [%s]", $Zdate, $Ztime, $Zattr, $Zsize, $ZCsize, $Zname);
3177
3178      $memb .= "$Zname\n" if $Zattr !~ /^d|^D/;
3179    }
3180
3181    # If we have a line full of ---- and $BeginInfo is not set then
3182    # we are at the first and we need to set $BeginInfo so next pass
3183    # begins processing file information
3184    if ($what =~ /-{18,}$/ && ! $BeginInfo) {
3185      $BeginInfo = 1;
3186    }
3187  }
3188
3189
3190  # Remove returns from the output string, exit if the archive is empty
3191  # or the output is empty
3192
3193  $memb =~ s/\r//gs;
3194  return 1 if $memb ne '' &&
3195              $memb =~ /(No files to extract|^COMMAND_TIMED_OUT$)/si;
3196
3197  return 0 if $memb eq '';
3198  #MailScanner::Log::DebugLog("Unrar : Archive Testing Completed On : %s",
3199  #                           $memb);
3200
3201  @members = split /\n/, $memb;
3202  $fh = new FileHandle;
3203
3204  foreach $member2 (@members) {
3205    $IsEncrypted = 0;
3206    $HasErrors = 0;
3207    #MailScanner::Log::InfoLog("Checking member %s",$member2);
3208    # Test the current file name to see if it's password protected
3209    # and capture the output. If the command times out, then return
3210
3211    next if $member2 eq "";
3212    $member = quotemeta $member2;
3213    #print STDERR "Member is ***$member***\n";
3214    #MailScanner::Log::WarnLog("Un7zip: member %s",$member );
3215
3216    $check = SafePipe(
3217      "$unzip -y t '$explodeinto/$zipname' $member 2>&1",
3218      $PipeTimeOut);
3219    #print STDERR "Point 1\n";
3220    return 1 if $check =~ /^COMMAND_TIMED_OUT$/;
3221
3222    # Check for any error with this file. Format is FileName - Error string
3223    if ($check =~ /$member\s+-\s/i){
3224      MailScanner::Log::WarnLog("Un7zip Error in file: %s -> %s",
3225                                $zipname,$member);
3226      $HasErrors = 1;
3227    }
3228
3229    $check =~ s/\n/:/gsi;
3230    #MailScanner::Log::WarnLog("Got : %s", $check);
3231
3232    # If we get the string Encrypted then we have found a password
3233    # protected archive and we handle it the same as zips are handled
3234
3235    if ($check =~ /\bEnter password(.*)\bWrong password/s) {
3236      $IsEncrypted = 1;
3237      MailScanner::Log::WarnLog("Password Protected archive Found");
3238      #print STDERR "Checking member " . $member . "\n";
3239      #print STDERR "******** Encryption = " . $IsEncrypted . "\n";
3240      return "password" if !$allowpasswords && $IsEncrypted;
3241    } else {
3242      if ($insistpasswords) {
3243        MailScanner::Log::WarnLog("Non-Password Protected archive Found");
3244        return "nonpassword";
3245      }
3246    }
3247
3248
3249    # If they don't want to extract, but only check for encryption,
3250    # then skip the rest of this as we don't actually want the files
3251    # checked against the file name/type rules
3252
3253    next if $onlycheckencryption;
3254
3255    $name = $member2;
3256    #print STDERR "UnPackRar : Making Safe Name from $name\n";
3257
3258    # There is no facility to change the output name for a rar file
3259    # but we can rename rename the files inside the archive
3260    # prefer to use $NameTwo because there is no path attached
3261    # $safename is guaranteed not to exist, but NameTwo gives us the
3262    # filename without any directory information, which we use later.
3263    $nopathname = $name;
3264    $nopathname =~ s/^.*\///;
3265    $safename = $this->MakeNameSafe('r'.$nopathname,$explodeinto);
3266    $safename =~ m|(.*)|;
3267    $safename = $1;
3268    $NameTwo = $safename;
3269    $NameTwo = $1 if $NameTwo =~ /([^\/]+)$/;
3270    #MailScanner::Log::InfoLog("UnPackRar: Member : %s", $member);
3271    #print STDERR "UnPackRar : Safe Name is $safename\n";
3272
3273    #MailScanner::Log::InfoLog("UnPackRar: SafeName : %s", $safename);
3274    $this->{file2parent}{$name} = $zipname;
3275    $this->{file2parent}{$safename} = $zipname;
3276    $this->{file2safefile}{$name} = $safename;
3277    $this->{safefile2file}{$safename} = $name;
3278    #print STDERR "Archive member \"$name\" is now \"$safename\"\n";
3279
3280    #$this->{file2entity}{$name} = $this->{entity};
3281    # JKF 20090505 Don't do this: $this->{file2safefile}{$name} = $zipname;
3282    #$this->{safefile2file}{$safename} = $zipname;
3283
3284    $safename = "$explodeinto/$safename";
3285
3286    $PipeReturn = '';
3287    $? = 0;
3288    if (!$IsEncrypted && !$HasErrors) {
3289      #print STDERR "Expanding ***$member***\ninto ***$NameTwo***\n";
3290      $PipeReturn = SafePipe(
3291                   "$unzip e -y -so '$explodeinto/$zipname' $member > \"$NameTwo\"",
3292                   $PipeTimeOut);
3293      unless ("$?" == 0 && $PipeReturn ne 'COMMAND_TIMED_OUT'){
3294        # The rename operation failed!, so skip the extraction of a
3295        # potentially bad file name.
3296        # JKF Temporary testing code
3297        #MailScanner::Log::WarnLog("UnPackRar: RC: %s PipeReturn : ",$?,$PipeReturn);
3298        MailScanner::Log::WarnLog("7zipUnpacker: Could not rename or use " .
3299            "safe name in Extract, NOT Unpacking file %s", $safename);
3300        next;
3301      }
3302      #MailScanner::Log::InfoLog("7zipUnacker: Done...., got %d and %s for %s", $?, $PipeReturn, $safename);
3303    }
3304    #MailScanner::Log::WarnLog("RC = %s : Encrypt = %s : PipeReturn = %s",
3305    #                          $?,$IsEncrypted,$PipeReturn );
3306    unless ("$?" == 0 && !$HasErrors && !$IsEncrypted &&
3307            $PipeReturn ne 'COMMAND_TIMED_OUT') {
3308
3309      # If we got an error, or this file is encrypted create a zero-length
3310      # file so the filename tests will still work.
3311      MailScanner::Log::WarnLog("7zipUnpacker : Encrypted Or Extract Error Creating" .
3312                                " 0 length %s",$NameTwo);
3313      $touchfiles && $fh->open(">$safename") && $fh->close();
3314    }
3315  }
3316  return 0;
3317}
3318
3319# Unpack a rar file into the named directory.
3320# Return 1 if an error occurred, else 0.
3321# Return 0 on success.
3322# Return "password" if a member was password-protected.
3323# Very much like UnpackZip except it uses the external "unrar" command.
3324sub UnpackRar {
3325  my($this, $zipname, $explodeinto, $allowpasswords, $insistpasswords, $onlycheckencryption, $touchfiles) = @_;
3326
3327  my($zip, @members, $member, $name, $fh, $safename, $memb, $check, $junk,
3328     $unrar,$IsEncrypted, $PipeTimeOut, $PipeReturn,$NameTwo, $HasErrors,
3329     $member2, $Stuff, $BeginInfo, $EndInfo, $ParseLine, $what, $nopathname, $UnrarVersion);
3330
3331  # Timeout value for unrar is currently the same as that of the file
3332  # command + 20. Julian, when you add the filetimeout to the config file
3333  # perhaps you should think about adding a maxcommandexecutetime setting
3334  # as well
3335  $PipeTimeOut = MailScanner::Config::Value('unrartimeout');
3336  $unrar = MailScanner::Config::Value('unrarcommand');
3337  return 1 unless $unrar && -x $unrar;
3338
3339  MailScanner::Log::NoticeLog("Unpacking RAR archive: %s", $zipname);
3340
3341  # Get unrar version
3342  # Unrar Version 5.21 (and possibly others in the future do not use --help, grab without --help here)
3343  $UnrarVersion = (split /\ /, (split /\n/, SafePipe("$unrar 2>&1",$PipeTimeOut))[1])[1];
3344
3345  # Check for version 4 or 5 of unrar.
3346  # Future versions of unrar will need tested
3347  # If unrar itself does not output version, grab again using --help as a fail safe
3348  $UnrarVersion = (split /\ /, (split /\n/, SafePipe("$unrar --help 2>&1",$PipeTimeOut))[1])[1] unless  $UnrarVersion =~ /^\d+\.\d*$/;
3349
3350  # Check version
3351  return 1 unless $UnrarVersion =~ /^\d+\.\d*$/ && ( $UnrarVersion >= 4.0 && $UnrarVersion < 6.0 );
3352
3353  # Escape spaces in filename
3354  # $zipname =~ s/\ /\\\ /g;
3355
3356  #MailScanner::Log::WarnLog("UnPackRar Testing : %s", $zipname);
3357  # Unrar Version 4x file parse
3358  if ($UnrarVersion >= 4.0 && $UnrarVersion < 5.0) {
3359
3360    # This part lists the archive contents and makes the list of
3361    # file names within. "This is a list verbose option"
3362    $memb = SafePipe("$unrar v -p- '$explodeinto/$zipname' 2>&1",
3363                   $PipeTimeOut);
3364
3365    $junk = "";
3366    $Stuff = "";
3367    $BeginInfo = 0;
3368    $EndInfo = 0;
3369    $ParseLine = 1;
3370    $memb =~ s/\r//gs;
3371    my @test = split /\n/, $memb;
3372    $memb = '';
3373
3374    # Have to parse the output from the 'v' command and parse the information
3375    # between the ----------------------------- lines
3376    foreach $what (@test) {
3377      #print STDERR "Processing \"$what\"\n";
3378      # If we haven't hit any ------- lines at all, and we are prompted for
3379      # a password, then the whole archive is password-protected.
3380      unless ($BeginInfo || $EndInfo) {
3381        if ($what =~ /^Encrypted file:/i && !$allowpasswords) {
3382          MailScanner::Log::WarnLog("Password Protected RAR Found");
3383          return "password";
3384        }
3385      }
3386
3387      # Have we already hit the beginng and now find another ------ string?
3388      # If so then we are at the end
3389      $EndInfo = 1 if $what =~ /-{40,}$/ && $BeginInfo;
3390
3391      # if we are after the begning but haven't reached the end,
3392      # then process this line
3393      if ($BeginInfo && !$EndInfo) {
3394        # If we are on line one then it's the file name with full path
3395        # otherwise we are on the info line containing the attributes
3396        if ($ParseLine eq 1) {
3397          $junk = $what;
3398          $junk =~ s/^\s+|\s+$//g;
3399          chomp($junk);
3400          $ParseLine = 2;
3401        } else {
3402          $Stuff = $what;
3403          $Stuff =~ s/^\s+|\s+$//g;
3404          # Need to remove redundant spaces from our info line and
3405          # split it into it's components
3406          chomp($Stuff);
3407          $Stuff =~ s/\s{2,}/ /g;
3408          my ($RSize,$RPacked,$RRatio,$RDate,$RTime,$RAttrib,$RCrc,$RMeth,$RVer)
3409             = split /\s/, $Stuff;
3410          # If RAttrib doesn't begin with d then it's a file and we
3411          # add it to our $memb string, otherwise we ignore the directory
3412          # only entries
3413          #MailScanner::Log::WarnLog("UnPackRar InfoLine :%s:", $Stuff);
3414          #MailScanner::Log::WarnLog("UnPackRar Looking at ATTRIB :->%s<-:",
3415          #                          $RAttrib);
3416          $memb .= "$junk\n" if $RAttrib !~ /^d|^.D/;
3417          $junk = '';
3418          $Stuff = '';
3419          $ParseLine = 1;
3420        }
3421      }
3422      # If we have a line full of ---- and $BeginInfo is not set then
3423      # we are at the first and we need to set $BeginInfo so next pass
3424      # begins processing file information
3425      if ($what =~ /-{40,}$/ && ! $BeginInfo) {
3426        $BeginInfo = 1;
3427      }
3428    }
3429
3430    # Remove returns from the output string, exit if the archive is empty
3431    # or the output is empty
3432
3433    $memb =~ s/\r//gs;
3434    return 1 if $memb ne '' &&
3435                $memb =~ /(No files to extract|^COMMAND_TIMED_OUT$)/si;
3436
3437    return 0 if $memb eq ''; # JKF If no members it probably wasn't a Rar self-ext
3438    MailScanner::Log::DebugLog("Unrar : Archive Testing Completed On : %s",
3439                               $memb);
3440
3441    @members = split /\n/, $memb;
3442
3443  # Unrar Version 5x file parse
3444  } elsif ($UnrarVersion >= 5.0 && $UnrarVersion < 6.0) {
3445    # This part lists the archive contents and makes the list of
3446    # file names within. "This is a list verbose option"
3447    $memb = SafePipe("$unrar vt -idcp -p- '$explodeinto/$zipname' 2>&1",
3448                      $PipeTimeOut);
3449
3450    # SafePipe timed out
3451    return 1 if $memb =~ /COMMAND_TIMED_OUT$/si;
3452
3453    # clean spaces
3454    $memb =~ s/^\s+$//mg;
3455    $memb =~ s/^\s+|\s+$//g;
3456    $memb =~ s/\r//gs;
3457
3458    # in vt mode, files are separated by empty lines
3459    my @test = split /\n\n/, $memb;
3460
3461    my $ArchiveInfo = shift @test;
3462
3463    # not a RAR file, fail to extract
3464    return 1 if $ArchiveInfo =~ /^\s*No files to extract\s*$/mi;
3465
3466    # Test whole RAR encryption
3467    if ($ArchiveInfo =~ /encrypted headers|password is incorrect/i) {
3468      MailScanner::Log::WarnLog("Password Protected RAR Found");
3469      return "password" if !$allowpasswords;
3470    } elsif ($insistpasswords) {
3471      MailScanner::Log::WarnLog("Non-Password Protected RAR Found");
3472      return "nonpassword";
3473    };
3474
3475    # Have to parse the output from the 'vt' command
3476    foreach $what (@test) {
3477      #print STDERR "Processing \"$what\"\n";
3478      #MailScanner::Log::WarnLog("UnPackRar Processing : %s", $what);
3479      $what =~ s/^\s+|\s+$//mg;
3480
3481      # compatibility with "unrar vta"
3482      last if $what =~ /^Service: EOF$/i;
3483
3484      # Test single RAR encryption
3485      if ($what =~ /^Flags:.*encrypted/m) {
3486        MailScanner::Log::WarnLog("Password Protected RAR Found");
3487        return "password" if !$allowpasswords;
3488      } elsif ($insistpasswords) {
3489        MailScanner::Log::WarnLog("Non-Password Protected RAR Found");
3490        return "nonpassword";
3491      }
3492
3493      $what =~ /^Name\: *(.*)\s*$/mi;
3494      push @members, $1 if defined($1)
3495    }
3496    #MailScanner::Log::DebugLog("Unrar : Archive Testing Completed On : %s",
3497    #                           join(', ', @members));
3498  }
3499
3500  $fh = new FileHandle;
3501
3502  foreach $member2 (@members) {
3503    $IsEncrypted = 0;
3504    $HasErrors = 0;
3505    #MailScanner::Log::InfoLog("Checking member %s",$member2);
3506    # Test the current file name to see if it's password protected
3507    # and capture the output. If the command times out, then return
3508
3509    next if $member2 eq "";
3510    $member = quotemeta $member2;
3511    #print STDERR "Member is ***$member***\n";
3512    $check = SafePipe(
3513      "$unrar  t -p- -idp '$explodeinto/$zipname' $member 2>&1",
3514      $PipeTimeOut);
3515    #print STDERR "Point 1\n";
3516    return 1 if $check =~ /^COMMAND_TIMED_OUT$/;
3517
3518    # Check for any error with this file. Format is FileName - Error string
3519    if ($check =~ /$member\s+-\s/i){
3520      MailScanner::Log::WarnLog("Unrar: Error in file: %s -> %s",
3521                                $zipname,$member);
3522      $HasErrors = 1;
3523    }
3524
3525    $check =~ s/\n/:/gsi;
3526    #MailScanner::Log::WarnLog("Got : %s", $check);
3527
3528    # If we get the string Encrypted then we have found a password
3529    # protected archive and we handle it the same as zips are handled
3530
3531    if ($check =~ /\bEncrypted file:\s.+\(password incorrect/si) {
3532      $IsEncrypted = 1;
3533      MailScanner::Log::WarnLog("Password Protected RAR Found");
3534      #print STDERR "Checking member " . $member . "\n";
3535      #print STDERR "******** Encryption = " . $IsEncrypted . "\n";
3536      return "password" if !$allowpasswords && $IsEncrypted;
3537    } else {
3538      if ($insistpasswords) {
3539        MailScanner::Log::WarnLog("Non-Password Protected RAR Found");
3540        return "nonpassword";
3541      }
3542    }
3543
3544
3545    # If they don't want to extract, but only check for encryption,
3546    # then skip the rest of this as we don't actually want the files
3547    # checked against the file name/type rules
3548    next if $onlycheckencryption;
3549
3550    $name = $member2;
3551    #print STDERR "UnPackRar : Making Safe Name from $name\n";
3552
3553    # There is no facility to change the output name for a rar file
3554    # but we can rename rename the files inside the archive
3555    # prefer to use $NameTwo because there is no path attached
3556    # $safename is guaranteed not to exist, but NameTwo gives us the
3557    # filename without any directory information, which we use later.
3558    $nopathname = $name;
3559    $nopathname =~ s/^.*\///;
3560    $safename = $this->MakeNameSafe('r'.$nopathname,$explodeinto);
3561    $NameTwo = $safename;
3562    $NameTwo = $1 if $NameTwo =~ /([^\/]+)$/;
3563    #MailScanner::Log::InfoLog("UnPackRar: Member : %s", $member);
3564    #print STDERR "UnPackRar : Safe Name is $safename\n";
3565
3566    #MailScanner::Log::InfoLog("UnPackRar: SafeName : %s", $safename);
3567    $this->{file2parent}{$name} = $zipname;
3568    $this->{file2parent}{$safename} = $zipname;
3569    $this->{file2safefile}{$name} = $safename;
3570    $this->{safefile2file}{$safename} = $name;
3571    #print STDERR "Archive member \"$name\" is now \"$safename\"\n";
3572
3573    #$this->{file2entity}{$name} = $this->{entity};
3574    # JKF 20090505 Don't do this: $this->{file2safefile}{$name} = $zipname;
3575    #$this->{safefile2file}{$safename} = $zipname;
3576
3577    $safename = "$explodeinto/$safename";
3578
3579    $PipeReturn = '';
3580    $? = 0;
3581    if (!$IsEncrypted && !$HasErrors) {
3582      #print STDERR "Expanding ***$member***\ninto ***$NameTwo***\n";
3583      $PipeReturn = SafePipe(
3584                   "$unrar p -y -inul -p- -idp '$explodeinto/$zipname' $member > \"$NameTwo\"",
3585                   $PipeTimeOut);
3586      unless ("$?" == 0 && $PipeReturn ne 'COMMAND_TIMED_OUT'){
3587        # The rename operation failed!, so skip the extraction of a
3588        # potentially bad file name.
3589        # JKF Temporary testing code
3590        #MailScanner::Log::WarnLog("UnPackRar: RC: %s PipeReturn : ",$?,$PipeReturn);
3591        MailScanner::Log::WarnLog("UnPackRar: Could not rename or use " .
3592            "safe name in Extract, NOT Unpacking file %s", $safename);
3593        next;
3594      }
3595      #MailScanner::Log::InfoLog("UnPackRar: Done...., got %d and %s", $?, $PipeReturn);
3596    }
3597    #MailScanner::Log::WarnLog("RC = %s : Encrypt = %s : PipeReturn = %s",
3598    #                          $?,$IsEncrypted,$PipeReturn );
3599    unless ("$?" == 0 && !$HasErrors && !$IsEncrypted &&
3600            $PipeReturn ne 'COMMAND_TIMED_OUT') {
3601
3602      # If we got an error, or this file is encrypted create a zero-length
3603      # file so the filename tests will still work.
3604      MailScanner::Log::WarnLog("Unrar : Encrypted Or Extract Error Creating" .
3605                                " 0 length %s",$NameTwo);
3606      $touchfiles && $fh->open(">$safename") && $fh->close();
3607    }
3608  }
3609  return 0;
3610}
3611
3612# Modified Julian's code from SweepOther.pm
3613# Changed to allow execution of any given command line with a time
3614# control. This could replace any call to system or use of backticks
3615#
3616# $Cmd         = command line to execute
3617# $timeout     = max time in seconds to allow execution
3618#
3619sub SafePipe {
3620  my ($Cmd, $TimeOut) = @_;
3621
3622  my($Kid, $pid, $TimedOut, $Str);
3623  $Kid  = new FileHandle;
3624  $TimedOut = 0;
3625
3626  #print STDERR "SafePipe : Command : $Cmd\n";
3627  #print STDERR "SafePipe : TimeOut : $TimeOut\n";
3628
3629  $? = 0; # Make sure there's no junk left in here
3630
3631  eval {
3632    die "Can't fork: $!" unless defined($pid = open($Kid, '-|'));
3633    if ($pid) {
3634      # In the parent
3635
3636      # Set up a signal handler and set the alarm time to the timeout
3637      # value passed to the function
3638
3639      local $SIG{ALRM} = sub { $TimedOut = 1; die "Command Timed Out" };
3640      alarm $TimeOut;
3641
3642      # while the command is running we will collect it's output
3643      # in the $Str variable. We don't process it in any way here so
3644      # whatever called us will get back exactly what they would have
3645      # gotten with a system() or backtick call
3646
3647      #MailScanner::Log::DebugLog("SafePipe : Processing %s", $Cmd);
3648
3649      while(<$Kid>) {
3650        $Str .= $_;
3651        #print STDERR "SafePipe : Processing line \"$_\"\n";
3652      }
3653      close $Kid;
3654
3655      #MailScanner::Log::DebugLog("SafePipe : Completed $Cmd");
3656      #print STDERR "SafePipe : Returned $PipeReturnCode\n";
3657
3658      $pid = 0; # 2.54
3659      alarm 0;
3660      # Workaround for bug in perl shipped with Solaris 9,
3661      # it doesn't unblock the SIGALRM after handling it.
3662      eval {
3663        my $unblockset = POSIX::SigSet->new(SIGALRM);
3664        sigprocmask(SIG_UNBLOCK, $unblockset)
3665          or die "Could not unblock alarm: $!\n";
3666      };
3667    } else {
3668      # In the child
3669      POSIX::setsid();
3670
3671      # Execute the command via an exec call, bear in mind this will only
3672      # capture STDIN so if you need STDERR, or both you have to handle, for
3673      # example, 2>&1 as part of the command line just as you would with
3674      # system() or backticks
3675      #
3676      #the line following the
3677      # call should *never* be reached unless the call it's self fails
3678      #print STDERR "SafePipe in child exec $Cmd\n";
3679
3680      my @args = ( "$Cmd" );
3681      #exec $Cmd or print STDERR "SafePipe :  failed to execute $Cmd\n";
3682
3683      open STDIN, "< /dev/null";
3684
3685      exec map { m/(.*)/ } @args
3686        or MailScanner::Log::WarnLog("SafePipe :  failed to execute %s", $Cmd);
3687      #MailScanner::Log::DebugLog("SafePipe in Message.pm : exec failed " .
3688      #                           "for $Cmd");
3689      exit 1;
3690    }
3691  };
3692  alarm 0; # 2.53
3693
3694  #MailScanner::Log::DebugLog("SafePipe in Message.pm : Completed $Cmd");
3695  #MailScanner::Log::WarnLog("Returned Code : %d", $?);
3696  # Catch failures other than the alarm
3697  MailScanner::Log::WarnLog("SafePipe in Message.pm : $Cmd failed with real error: $@")
3698    if $@ and $@ !~ /Command Timed Out/;
3699
3700  #print STDERR "SafePipe : pid = $pid and \@ = $@\n";
3701
3702  # In which case any failures must be the alarm
3703  if ($@ or $pid>0) {
3704    # Kill the running child process
3705    my($i);
3706    kill -15, $pid;
3707    # Wait for up to 5 seconds for it to die
3708    for ($i=0; $i<5; $i++) {
3709      sleep 1;
3710      waitpid($pid, &POSIX::WNOHANG);
3711      ($pid=0),last unless kill(0, $pid);
3712      kill -15, $pid;
3713    }
3714    # And if it didn't respond to 11 nice kills, we kill -9 it
3715    if ($pid) {
3716      kill -9, $pid;
3717      waitpid $pid, 0; # 2.53
3718    }
3719  }
3720
3721  # If the command timed out return the string below, otherwise
3722  # return the command output in $Str
3723  return $Str unless $TimedOut;
3724
3725  MailScanner::Log::WarnLog("Safepipe in Message.pm : %s timed out!", $Cmd);
3726  return "COMMAND_TIMED_OUT";
3727}
3728
3729
3730# Unpack a zip file into the named directory.
3731# Return 1 if an error occurred, else 0.
3732# Return 0 on success.
3733# Return "password" if a member was password-protected.
3734my $zipadd = 0;
3735sub UnpackZip {
3736  my($this, $zipname, $explodeinto, $allowpasswords, $insistpasswords, $onlycheckencryption, $touchfiles) = @_;
3737
3738  my($zip, @members, $member, $name, $fh, $safename);
3739
3740  #print STDERR "Unpacking $zipname\n";
3741  my $tmpname = "$explodeinto/$zipname";
3742  $tmpname =~ /^(.*)$/;
3743  $tmpname = $1;
3744  return 1 if -s $tmpname == 4_237_4; # zip of death?
3745  Archive::Zip::setErrorHandler( sub {} ); # Stop error messages
3746  return 1 unless $zip = Archive::Zip->new("$explodeinto/$zipname");
3747  return 1 unless @members = $zip->members();
3748  #print STDERR "Members are " . join(',',@members) . "\n";
3749
3750  MailScanner::Log::NoticeLog("Unpacked Zip archive: %s", $zipname);
3751
3752  $fh = new FileHandle;
3753
3754  foreach $member (@members) {
3755    #print STDERR "Checking member " . $member->fileName() . "\n";
3756    #print STDERR "******** Encryption = " . $member->isEncrypted() . "\n";
3757    return "password" if !$allowpasswords && $member->isEncrypted();
3758    return "nonpassword" if $insistpasswords && !($member->isEncrypted());
3759
3760    # If they don't want to extract, but only check for encryption,
3761    # then skip the rest of this as we don't actually want the files.
3762    next if $onlycheckencryption;
3763
3764    # Untaint member's attributes.
3765    # Fix to use workperms in preference by Rick Cooper rcooper@dwford.com
3766    my $workperms = MailScanner::Config::Value('workperms') || '0660';
3767    # Make it octal with a leading zero if necessary by Curu Wong prinbra@gmail.com
3768    $workperms = sprintf("0%lo", $workperms) unless $workperms =~ /^0/;
3769    $workperms = oct($workperms); # and back to decimal for chmod
3770    $member->unixFileAttributes($workperms);
3771
3772    $name = $member->fileName();
3773    # Trim off any leading directory path
3774    $name =~ s#^.*/##;
3775    $zipadd = ($zipadd + 1) % 100;
3776    $safename = $this->MakeNameSafe('z'.$zipadd.$name, $explodeinto);
3777    #print STDERR "MakeNameSafe(z + $zipadd + $name) = $safename\n";
3778    $this->{file2parent}{$name} = $zipname;
3779    $this->{file2parent}{$safename} = $zipname;
3780    $this->{file2safefile}{$name} = $safename;
3781    $this->{safefile2file}{$safename} = $name;
3782    #print STDERR "Archive member \"$name\" is now \"$safename\"\n";
3783
3784    #$this->{file2entity}{$name} = $this->{entity};
3785    # JKF 20090505 Don't do this: $this->{file2safefile}{$name} = $zipname;
3786    #$this->{safefile2file}{$safename} = $zipname;
3787
3788    # Useless: $safename = "$explodeinto/$safename";
3789    # Untaint output filename
3790    $safename =~ /^(.*)$/;
3791    $safename = $1;
3792
3793    #print STDERR "About to extract $member to $safename\n";
3794    unless ($zip->extractMemberWithoutPaths($member, $safename) == AZ_OK) {
3795      # Create a zero-length file if extraction failed
3796      # so the filename tests will still work.
3797      #print STDERR "Done passworded extraction of $member to $safename\n";
3798      $touchfiles && $fh->open(">$safename") && $fh->close();
3799    }
3800    #print STDERR "Done extraction of $member to $safename\n";
3801  }
3802  return 0;
3803}
3804
3805# Unpack an ole file into the named directory.
3806# Return 1 if an error occurred, else 0.
3807# Return 0 on success.
3808# Return "password" if a member was password-protected.
3809# Currently does not support password-encryption, will merely not create
3810# any members.
3811sub UnpackOle {
3812  my($this, $olename, $explodeinto, $allowpasswords, $insistpasswords, $onlycheckencryption, $touchfiles) = @_;
3813
3814  my($ole, $tree, @NativeFilenames);
3815
3816  #print STDERR "Unpacking $explodeinto/$olename\n";
3817  eval {
3818    #return 1 unless $ole = OLE::Storage_Lite::PPS->new(1,2,3,4,5,6,7,8,
3819    #                                                   9,10,11,12,13);
3820    my $tmpnam = "$explodeinto/$olename";
3821    $tmpnam =~ /^(.*)$/;
3822    $tmpnam = $1;
3823    return 1 unless $ole = OLE::Storage_Lite->new($tmpnam);
3824    return 1 unless $tree = $ole->getPpsTree(1); # (1) => Get Data too
3825
3826    my $level = 0;
3827    @NativeFilenames = $this->OleUnpackTree($tree, 0, \$level, $explodeinto, $olename);
3828  };
3829
3830  if ($@) {
3831    #print STDERR "Skipping OLE document unpacking due to analysis failure\n";
3832    MailScanner::Log::WarnLog("Skipping OLE document unpacking due to OLE analysis failure");
3833  } else {
3834    $this->OleUnpackPackages($explodeinto, $olename, @NativeFilenames);
3835  }
3836
3837  return 0;
3838}
3839
3840# Each embedded object in an OLE tree is packages in a special format.
3841# This converts a list of named filenames into their original data.
3842sub OleUnpackPackages {
3843  my($this, $explodeinto, $parentname, @NativeFilenames) = @_;
3844
3845  my($infh, $byte, $number, $buffer, $outname);
3846  my($finished, $length, $size);
3847
3848  OLEFILE: foreach my $inname (@NativeFilenames) {
3849    $size = -s "$explodeinto/$inname";
3850
3851    # Start with the simple version of the format which is just 4 bytes of junk
3852    close $infh if $infh;
3853    $infh = new FileHandle;
3854    sysopen $infh, "$explodeinto/$inname", O_RDONLY;
3855    sysseek $infh, 4, SEEK_SET; # Skip 1st 4 bytes
3856    sysread($infh, $buffer, $size);
3857    my $outfh = new FileHandle;
3858    $outname = $inname . "_tmp";
3859    my $outsafe = $this->MakeNameSafe('o'.$outname, $explodeinto);
3860    sysopen $outfh, "$explodeinto/$outsafe", (O_CREAT | O_WRONLY);
3861    syswrite $outfh, $buffer, $size if $outfh;
3862    close $outfh if $outfh;
3863    # Set up MailScanner data structures
3864    $this->{file2parent}{$outname} = $parentname;
3865    $this->{file2parent}{$outsafe} = $parentname;
3866    $this->{file2parent}{substr($outsafe,1)} = $parentname; # Why not? :-)
3867    $this->{file2safefile}{$outname} = $outsafe;
3868    $this->{safefile2file}{$outsafe} = $outname;
3869
3870    # Now do the version which uses and analyses the full header.
3871    $byte = "";
3872    $buffer = "";
3873    #close $infh if $infh;
3874    #$infh = new FileHandle;
3875    #sysopen $infh, "$explodeinto/$inname", O_RDONLY;
3876    sysseek $infh, 6, SEEK_SET; # Skip 1st 6 bytes
3877    $outname = "";
3878    $finished = 0;
3879    $length = 0;
3880    until ($byte eq "\0" || $finished || $length>1000) {
3881      # Read a C-string into $outname
3882      sysread($infh, $byte, 1) or $finished = 1;
3883      $outname .= $byte;
3884      $length++;
3885    }
3886    next OLEFILE if $length>1000; # Bail out if it went wrong
3887    $finished = 0;
3888    $byte = 1;
3889    $length = 0;
3890    until ($byte eq "\0" || $finished || $length>1000) { # Throw away a C-string
3891      sysread($infh, $byte, 1) or $finished = 1;
3892      $length++;
3893    }
3894    next OLEFILE if $length>1000; # Bail out if it went wrong
3895    sysseek $infh, 4, Fcntl::SEEK_CUR or next OLEFILE; # Skip next 4 bytes
3896    sysread $infh, $number, 4 or next OLEFILE;
3897    $number = unpack 'V', $number;
3898    #print STDERR "Skipping $number bytes of header filename\n";
3899    if ($number>0 && $number<1_000_000) {
3900      sysseek $infh, $number, Fcntl::SEEK_CUR; # Skip the next bit of header (C-string)
3901    } else {
3902      next OLEFILE;
3903    }
3904    sysread $infh, $number, 4 or next OLEFILE;
3905    $number = unpack 'V', $number;
3906    #print STDERR "Reading $number bytes of file data\n";
3907    sysread $infh, $buffer, $number
3908      if $number>0 && $number < $size; # Sanity check
3909    $outfh = new FileHandle;
3910    $outsafe = $this->MakeNameSafe('o'.$outname, $explodeinto);
3911    sysopen $outfh, "$explodeinto/$outsafe", (O_CREAT | O_WRONLY)
3912      or next OLEFILE;
3913    if ($number>0 && $number<1_000_000_000) { # Number must be reasonable!
3914      syswrite $outfh, $buffer, $number or next OLEFILE;
3915    }
3916    close $outfh;
3917
3918    # Set up MailScanner data structures
3919    $this->{file2parent}{$outname} = $parentname;
3920    $this->{file2parent}{$outsafe} = $parentname;
3921    $this->{file2safefile}{$outname} = $outsafe;
3922    $this->{safefile2file}{$outsafe} = $outname;
3923  }
3924  close $infh if $infh;
3925}
3926
3927
3928# Unpack the tree of OLE objects in this Office Document
3929my %OleNum2Type = (1=>'DIR', 2=>'FILE', 5=>'ROOT');
3930sub OleUnpackTree {
3931  my($this, $tree, $level, $Ttl, $explodeinto, $parentname) = @_;
3932
3933  my(@OleNative);
3934
3935  my $olename = OLE::Storage_Lite::Ucs2Asc($tree->{Name});
3936  my $safename = $this->MakeNameSafe('o'.$olename, $explodeinto);
3937  #print STDERR "Unpacking OLE file to $safename\n";
3938
3939  # Save the data out to a new file. Probably not as fast as possible.
3940  if ($OleNum2Type{$tree->{Type}} eq 'FILE') {
3941    # Added leading . to account for type indicator character
3942    if ($safename =~ /^.Ole.*Native/i) {
3943      my $fh = new FileHandle;
3944      sysopen $fh, "$explodeinto/$safename", (O_CREAT | O_WRONLY);
3945      syswrite $fh, $tree->{Data};
3946      close $fh;
3947
3948      # Find all the embedded objects
3949      push @OleNative, $safename if $safename =~ /^.Ole.*Native/;
3950
3951      # Set up MailScanner data structures
3952      $this->{file2parent}{$olename} = $parentname;
3953      $this->{file2parent}{$safename} = $parentname;
3954      $this->{file2safefile}{$olename} = $safename;
3955      $this->{safefile2file}{$safename} = $olename;
3956    }
3957  }
3958
3959  ${$Ttl}++;
3960  foreach my $child (@{$tree->{Child}}) {
3961    push @OleNative,
3962         $this->OleUnpackTree($child, $level+1, $Ttl, $explodeinto, $parentname)
3963      if $child && $level<50; # Simple DoS prevention measure
3964  }
3965  return @OleNative;
3966}
3967
3968
3969# Is this filename evil?
3970sub IsNameEvil {
3971  my($this, $name, $dir) = @_;
3972
3973  #print STDERR "Testing \"$name\" to see if it is evil\n";
3974  return 1 if (!defined($name) or ($name eq ''));   ### empty
3975  #JKF 20080307 return 1 if ($name =~ m{(^\s)|(\s+\Z)});  ### leading/trailing whitespace
3976  return 1 if ($name =~ m{\s});  ### whitespace
3977  return 1 if ($name =~ m{^\.+\Z});         ### dots
3978  # JKF 20080307 return 1 if ($name =~ tr{ \%\(\)\+\,\-\.0-9\=A-Z_a-z\{\}\x80-\xFF}{}c);
3979  return 1 if ($name =~ tr{\%\(\)\+\,\-\.0-9\=A-Z_a-z\{\}\x80-\xFF}{}c);
3980  return 1 if (length($name) > 50);
3981  return 'exists' if (-e "$dir/$name");
3982
3983  #print STDERR "It is okay\n";
3984  #$self->debug("it's ok");
3985  0;
3986}
3987
3988# Make this filename safe and return the safe version
3989sub MakeNameSafe {
3990  my($self, $fname, $dir) = @_;
3991
3992    ### Isolate to last path element:
3993    # JKF Drop Vax support my $last = $fname; $last =~ s{^.*[/\\\[\]:]}{};
3994    my $firstchar = substr($fname,0,1);
3995    $fname = substr($fname,1);
3996    my $last = $fname; $last =~ s{^.*[/\\:]}{};
3997    if ($last and !$self->IsNameEvil($last, $dir)) {
3998        #$self->debug("looks like I can use the last path element");
3999        #print STDERR "MakeNameSafe: 1 $fname,$last\n";
4000        return $firstchar . $last;
4001    }
4002
4003    # Try removing leading whitespace, trailing whitespace and all
4004    # dangerous characters to start with.
4005    $last =~ s/^\s+//;
4006    $last =~ s/\s+\Z//;
4007    # JKF 20080307 $last =~ tr/ \%\(\)\+\,\-\.0-9\=A-Z_a-z\{\}\x80-\xFF//cd;
4008    #print STDERR "MakeNameSafe: 2before = $last\n";
4009    $last =~ tr/\%\(\)\+\,\-\.0-9\=A-Z_a-z\{\}\x80-\xFF//cd;
4010    #print STDERR "MakeNameSafe: 2 $fname,$last\n";
4011    return $firstchar . $last unless $self->IsNameEvil($last, $dir);
4012
4013    ### Break last element into root and extension, and truncate:
4014    my ($root, $ext) = (($last =~ /^(.*)\.([^\.]+)\Z/)
4015                        ? ($1, $2)
4016                        : ($last, ''));
4017    # JKF Delete leading and trailing whitespace
4018    # JKF 20080307 $root =~ s/^\s+//;
4019    # JKF 20080307 $ext  =~ s/\s+$//;
4020    $root =~ s/\s+//g;
4021    $ext  =~ s/\s+//g;
4022    $root = substr($root, 0, ($self->{MPF_TrimRoot} || 14));
4023    $ext  = substr($ext,  0, ($self->{MPF_TrimExt}  ||  3));
4024    $ext =~ /^\w+$|^$/ or $ext = "dat";
4025    my $trunc = $root . ($ext ? ".$ext" : '');
4026    if (!$self->IsNameEvil($trunc, $dir)) {
4027        #$self->debug("looks like I can use the truncated last path element");
4028        #print STDERR "MakeNameSafe: 3 $fname,$trunc\n";
4029        return $firstchar . $trunc;
4030    }
4031
4032    # It is still evil, but probably just because it exists
4033    if ($self->IsNameEvil($trunc, $dir) eq 'exists') {
4034      my $counter = 0;
4035      $trunc = $trunc . '0';
4036      do {
4037        $counter++;
4038        $trunc = $root . $counter . ($ext ? ".$ext" : '');
4039      } while $self->IsNameEvil($trunc, $dir) eq 'exists';
4040      return $firstchar . $trunc;
4041    }
4042    ### Hope that works:
4043    #print STDERR "MakeNameSafe: 4 $fname,:-(\n";
4044    #undef;
4045    # Return a new filename that doesn't exist.
4046    return File::Temp::tempnam($dir, $firstchar . "MStemp");
4047}
4048
4049# Unpack a tar file into the named directory.
4050# Return 1 if an error occurred, else 0.
4051sub UnpackTar {
4052  my($this, $tarname, $explodeinto) = @_;
4053
4054  return 1; # Not yet implemented
4055}
4056
4057
4058# Try to parse all the text bits of each message, looking to see if they
4059# can be parsed into files which might be infected.
4060# I then throw these sections back to the MIME parser.
4061sub ExplodePart {
4062  my($this, $explodeinto) = @_;
4063
4064  my($dir, $file, $part, @parts);
4065
4066  $dir = new DirHandle;
4067  $file = new FileHandle;
4068
4069  $dir->open($explodeinto);
4070  @parts = $dir->read();
4071  $dir->close();
4072
4073  my($linenum, $foundheader, $prevline, $line, $position, $prevpos, $nextpos);
4074  foreach $part (@parts) {
4075    #print STDERR "Reading $part\n";
4076    # Allow for leading type indicator character.
4077    next unless $part =~ /^.msg.*txt/;
4078
4079    # Try and find hidden messages in the text files
4080    #print STDERR "About to read $explodeinto/$part\n";
4081    $file->open("$explodeinto/$part") or next;
4082
4083    # Try reading the first few lines to see if they look like mail headers
4084    $linenum = 0;
4085    $foundheader = 0;
4086    $prevline = "";
4087    $prevpos = 0;
4088    $nextpos = 0;
4089    $line = undef;
4090
4091    for ($linenum=0; $linenum<30; $linenum++) {
4092      #$position = $file->getpos();
4093      $line = <$file>;
4094      last unless defined $line;
4095      $nextpos += length $line;
4096      # Must have 2 lines of header
4097      if ($prevline =~ /^[^:\s]+: / && $line =~ /(^\s+)|(^[^:]+ )|(^\s+.*=)/) {
4098        #print STDERR "Found header start at \"$prevline\"\n and \"$line\"\n";
4099        $foundheader = 1;
4100        last;
4101      }
4102      $prevline = $line;
4103      $prevpos  = $position;
4104      $position = $nextpos;
4105    }
4106
4107    unless ($foundheader) {
4108      $file->close();
4109      next;
4110    }
4111
4112    # Rewind to the start of the header
4113    #$file->setpos($prevpos);
4114    seek $file, $prevpos, 0;
4115    #print STDERR "First line is \"" . <$file> . "\"\n";
4116
4117    # Setup everything for the MIME parser
4118    my $parser = MIME::Parser->new;
4119    my $filer  = MIME::Parser::MailScanner->new($explodeinto);
4120
4121    # Over-ride the default default character set handler so it does it
4122    # much better than the MIME-tools default handling.
4123    MIME::WordDecoder->default->handler('*' => \&WordDecoderKeep7Bit);
4124
4125    #print STDERR "Exploding message " . $this->{id} . " into " .
4126    #             $explodeinto . "\n";
4127    $parser->filer($filer);
4128    $parser->extract_uuencode(1); # uue is off by default
4129    $parser->output_to_core('NONE'); # everything into files
4130
4131    # Do the actual parsing
4132    my $entity = eval { $parser->parse($file) };
4133
4134    $file->close;
4135  }
4136}
4137
4138
4139# Print the infection reports for this message
4140sub PrintInfections {
4141  my $this = shift;
4142
4143  my($filename, $report, $type);
4144
4145  print STDERR "Virus reports for " . $this->{id} . ":\n";
4146  foreach $filename (keys %{$this->{virusreports}}) {
4147    print STDERR "    ";
4148    print STDERR $filename . "\t" . $this->{virusreports}{$filename} . "\n";
4149    print STDERR "    " . $this->{virustypes}{$filename} . "\n";
4150  }
4151
4152  print STDERR "Name reports for " . $this->{id} . ":\n";
4153  foreach $filename (keys %{$this->{namereports}}) {
4154    print STDERR "    ";
4155    print STDERR $filename . "\t" . $this->{namereports}{$filename} . "\n";
4156    print STDERR "    " . $this->{nametypes}{$filename} . "\n";
4157  }
4158
4159  print STDERR "Other reports for " . $this->{id} . ":\n";
4160  foreach $filename (keys %{$this->{otherreports}}) {
4161    print STDERR "    ";
4162    print STDERR $filename . "\t" . $this->{otherreports}{$filename} . "\n";
4163    print STDERR "    " . $this->{othertypes}{$filename} . "\n";
4164  }
4165
4166  print STDERR "Entity reports for " . $this->{id} . ":\n";
4167  foreach $filename (keys %{$this->{entityreports}}) {
4168    print STDERR "    ";
4169    print STDERR $filename . "\t" . $this->{entityreports}{$filename} . "\n";
4170  }
4171
4172  print STDERR "All reports for " . $this->{id} . ":\n";
4173  foreach $filename (keys %{$this->{allreports}}) {
4174    print STDERR "    ";
4175    print STDERR $filename . "\t" . $this->{allreports}{$filename} . "\n";
4176  }
4177
4178  print STDERR "Message is TNEF? " . ($this->{tnefentity}?"Yes":"No") . "\n";
4179  print STDERR "Message is bad TNEF? " . ($this->{badtnef}?"Yes":"No") . "\n";
4180  print STDERR "Message has " . $this->{virusinfected} . " virus infections\n";
4181  print STDERR "Message has " . $this->{sizeinfected} . " size problems\n";
4182  # JKF 19/12/2007 print STDERR "Message has " . $this->{passwordinfected} . " passworded archive problems\n";
4183  print STDERR "Message has " . $this->{otherinfected} . " other problems\n";
4184
4185  print STDERR "\n";
4186}
4187
4188
4189# Create the Entity2Parent and Entity2File hashes for a message
4190#    $message->CreateEntitiesHelpers($this->{entity2parent},
4191#                                    $this->{entity2file});
4192
4193sub CreateEntitiesHelpers {
4194  my $this = shift;
4195  #my($Entity2Parent, $Entity2File) = @_;
4196
4197  return undef unless $this->{entity};
4198
4199  # Set this up so it's ready for de-miming filenames in odd charsets.
4200  MIME::WordDecoder->default->handler('*' => \&MailScanner::Message::WordDecoderKeep7Bit);
4201
4202  $this->{numberparts} = CountParts($this->{entity}) || 1;
4203
4204  # Put something useless in the 2 hashes so that they exist.
4205  $this->{entity2file}{""} = 0;
4206  $this->{entity2safefile}{""} = 0;
4207  $this->{entity2parent}{""} = 0;
4208  $this->{file2entity}{""} = $this->{entity}; # Root of this message
4209  $this->{name2entity}{""} = 0;
4210  $this->{file2safefile}{""} = "";
4211  $this->{safefile2file}{""} = "";
4212  BuildFile2EntityAndEntity2File($this->{entity},
4213                                 $this->{file2entity},
4214                                 $this->{file2safefile},
4215                                 $this->{safefile2file},
4216                                 $this->{entity2file},
4217                                 $this->{entity2safefile},
4218                                 $this->{name2entity});
4219  #print STDERR "In CreateEntitiesHelpers, this = $this\n";
4220  #print STDERR "In CreateEntitiesHelpers, this entity = " .
4221  #             $this->{entity} . "\n";
4222  #print STDERR "In CreateEntitiesHelpers, parameters are " .
4223  #             scalar($this->{entity2file}) . " and " .
4224  #             scalar($this->{entity2parent}) . "\n";
4225  BuildEntity2Parent($this->{entity}, $this->{entity2parent}, undef);
4226}
4227
4228
4229# For the MIME entity given, work out the number of message parts.
4230# Recursive. This is a class function, not a normal method.
4231sub CountParts {
4232  my($entity) = @_;
4233  my(@parts, $total, $part);
4234
4235  return 0 unless $entity;
4236  @parts = $entity->parts;
4237  $total += int(@parts);
4238  foreach $part (@parts) {
4239    $total += CountParts($part);
4240  }
4241  return $total;
4242}
4243
4244
4245# Build the file-->entity and entity-->file mappings for a message.
4246# This will let us replace infected entities later. Key is the filename,
4247# value is the entity.
4248# This is recursive. This is a class function, not a normal method.
4249sub BuildFile2EntityAndEntity2File {
4250  my($entity, $file2entity, $file2safefile, $safefile2file, $entity2file,
4251     $entity2safefile, $name2entity) = @_;
4252
4253  # Build the conversion hash from scalar(entity) --> real entity object
4254  # Need to do this as objects cannot be hash keys.
4255  $name2entity->{scalar($entity)} = $entity;
4256
4257  my(@parts, $body, $headfile, $part, $path, $namewithouttype);
4258
4259  # Find the body for this entity
4260  $body = $entity->bodyhandle;
4261  if (defined($body) && defined($body->path)) {   # data is on disk:
4262    $path = $body->path;
4263    $path =~ s#^.*/([^/]*)$#$1#;
4264    # At this point $path will contain the filename with the leading type char
4265    $namewithouttype = substr($path,1);
4266    #$file2entity->{$path} = $entity;
4267    #$entity2file->{$entity} = $path;
4268    $file2entity->{$namewithouttype} = $entity;
4269    $entity2file->{$entity} = $namewithouttype;
4270    #print STDERR "Path is $path\n";
4271  }
4272  # And the head, which is where the recommended filename is stored
4273  # This is so we can report infections in the filenames which are
4274  # recommended, even if they are evil and we hence haven't used them.
4275  # JKF 20090327 Safefile always has the leading type indicator character.
4276  # JKF 20090327 None of the others do, they represent the real attach name.
4277  $headfile = $entity->head->recommended_filename || $namewithouttype; # $path;
4278  #print STDERR "rec filename for \"$headfile\" is \"" . $entity->head->recommended_filename . "\"\n";
4279
4280  # Remove any wide characters so that WordDecoder can parse
4281  # mime_to_perl_string is ignoring the built-in handler that was set earlier
4282  # https://github.com/MailScanner/v5/issues/253
4283  # Also enforce 7 bit characters for filenames
4284  $headfile =~  tr/\x00-\x7F/#/c;
4285
4286  $headfile = MIME::WordDecoder::mime_to_perl_string($headfile);
4287  #print STDERR "headfile is $headfile\n";
4288  if ($headfile) {
4289    # headfile does *NOT* have the type indicator character on it.
4290    $file2entity->{$headfile} = $entity if !$file2entity->{$headfile};
4291    $file2safefile->{$headfile} = $path;
4292    $entity2safefile->{$entity} = $path;
4293    $safefile2file->{$path}     = $headfile;
4294    #print STDERR "File2SafeFile (\"$headfile\") = \"$path\"\n";
4295  }
4296
4297  # And for all its children
4298  @parts = $entity->parts;
4299  foreach $part (@parts) {
4300    BuildFile2EntityAndEntity2File($part, $file2entity, $file2safefile,
4301                                   $safefile2file, $entity2file,
4302                                   $entity2safefile, $name2entity);
4303  }
4304}
4305
4306
4307# Build a hash that gives the parent of any entity
4308# (except for root ones which will be undef).
4309# This is recursive.
4310sub BuildEntity2Parent {
4311  my($entity, $Entity2Parent, $parent) = @_;
4312
4313  my(@parts, $part);
4314
4315  $Entity2Parent->{$entity} = $parent;
4316  @parts = $entity->parts;
4317  foreach $part (@parts) {
4318    #print STDERR "BuildEntity2Parent: Doing part $part\n";
4319    $Entity2Parent->{$part} = $entity;
4320    BuildEntity2Parent($part, $Entity2Parent, $entity);
4321  }
4322}
4323
4324
4325# Combine the virus reports and the other reports, as otherwise the
4326# cleaning code is really messy. I might combine them when I create
4327# them some time later, but I wanted to keep them separate if possible
4328# in case anyone wanted a feature in the future which would be easier
4329# with separate reports.
4330# If safefile2file does not map for a filename, ban the whole message
4331# to be on the safe side.
4332# No text in reports will contain any file type indicators.
4333# But the data structures will, as they must be accurate filenames (safefiles).
4334sub CombineReports {
4335  my $this = shift;
4336
4337  my($file, $text, $Name);
4338  my(%reports, %types);
4339  #print STDERR "Combining reports for " . $this->{id} . "\n";
4340
4341  # If they want to include the scanner name in the reports, then also
4342  # include the translation of "MailScanner" in the filename/type/content
4343  # reports.
4344  # If they set "MailScanner = " in languages.conf then this string will
4345  # *not* be inserted at the start of the reports.
4346  $Name = MailScanner::Config::LanguageValue($this, 'mailscanner')
4347    if MailScanner::Config::Value('showscanner', $this);
4348  $Name .= ': ' if $Name ne "" && $Name !~ /:/;
4349
4350  # Or the flags together
4351  $this->{infected} = $this->{virusinfected} |
4352                      $this->{nameinfected}  |
4353                      $this->{sizeinfected}  |
4354                      # JKF 19/12/2007 $this->{passwordinfected} |
4355                      $this->{otherinfected} ;
4356
4357  # Combine all the reports and report-types
4358  while (($file, $text) = each %{$this->{virusreports}}) {
4359    #print STDERR "Adding file $file report $text\n";
4360    $this->{allreports}{$file} .= $text;
4361    $reports{$file} .= $text;
4362  }
4363  while (($file, $text) = each %{$this->{virustypes}}) {
4364    #print STDERR "Adding file $file type $text\n";
4365    $this->{alltypes}{$file} .= $text;
4366    $types{$file} .= $text;
4367  }
4368  while (($file, $text) = each %{$this->{namereports}}) {
4369    #print STDERR "Adding file \"$file\" report \"$text\"\n";
4370    # Next line not needed as we prepend the $Name anyway
4371    #$text =~ s/\n(.)/\n$Name: NEWSTABLE $1/g if $Name; # Make sure name is at the front of this
4372    #print STDERR "report is now \"$text\"\n";
4373    $this->{allreports}{$file} .= $Name . $text;
4374    $reports{$file} .= $Name . $text;
4375  }
4376  while (($file, $text) = each %{$this->{nametypes}}) {
4377    #print STDERR "Adding file $file type $text\n";
4378    $this->{alltypes}{$file} .= $text;
4379    $types{$file} .= $text;
4380  }
4381  while (($file, $text) = each %{$this->{otherreports}}) {
4382    #print STDERR "Adding file $file report $text\n";
4383    $this->{allreports}{$file} .= $Name . $text;
4384    $reports{$file} .= $Name . $text;
4385  }
4386  while (($file, $text) = each %{$this->{othertypes}}) {
4387    #print STDERR "Adding file $file type $text\n";
4388    $this->{alltypes}{$file} .= $text;
4389    $types{$file} .= $text;
4390  }
4391
4392  # Now try to map all the reports onto their parents as far as possible
4393  #print STDERR "About to combine reports\n";
4394  my($key, $value, $parentwithtype);
4395  while(($key, $value) = each %reports) {
4396    $parentwithtype = $this->{file2parent}{$key};
4397    if ($parentwithtype ne "" &&
4398        exists($this->{safefile2file}{$parentwithtype})) {
4399      #print STDERR "Found parent of $key is $parentwithtype\n";
4400      $this->{allreports}{$parentwithtype} .= $value;
4401      $this->{alltypes}{$parentwithtype}   .= $types{$key};
4402    } else {
4403      #print STDERR "Promoting report for $key\n";
4404      if($parentwithtype eq "" and exists($this->{safefile2file}{$key})) {
4405        delete $this->{allreports}{$key};
4406        delete $this->{alltypes}{$key};
4407        $this->{allreports}{$key} .= $value;
4408        $this->{alltypes}{$key}   .= $types{$key};
4409      } else {
4410        delete $this->{allreports}{$key};
4411        delete $this->{alltypes}{$key};
4412        $this->{allreports}{""} .= $value;
4413        $this->{alltypes}{""} .= $types{$key};
4414      }
4415    }
4416  }
4417
4418  #print STDERR "Finished combining reports\n";
4419  #$this->PrintInfections();
4420}
4421
4422# Clean the message. This involves removing all the infected or
4423# troublesome sections of the message and replacing them with
4424# nice little text files explaining what happened.
4425# We do not do true macro-virus disinfection here.
4426# Also mark the message as having had its body modified.
4427sub Clean {
4428  my $this = shift;
4429
4430  #print STDERR "\n\n\nStart Of Clean\n\n";
4431  #$this->PrintInfections();
4432  # Get out if nothing to do
4433  #print STDERR "Have we got anything to do?\n";
4434  return unless ($this->{allreports} && %{$this->{allreports}}) ||
4435                ($this->{entityreports} && %{$this->{entityreports}});
4436  #print STDERR "Yes we have\n";
4437
4438  my($file, $text, $entity, $parent, $filename, $everyreport, %AlreadyCleaned);
4439  my($untypedfile);
4440
4441  # Work out whether infected bits of this message should be stored
4442  my $storeme = 0;
4443  $storeme = 1
4444    if MailScanner::Config::Value('quarantineinfections', $this) =~ /1/;
4445  #print STDERR "StoreMe = $storeme\n";
4446  # Cancel the storage if it is silent and no-one wants it quarantined
4447  $storeme = 0 if $this->{silent} && !$this->{noisy} &&
4448                  MailScanner::Config::Value('quarantinesilent', $this) !~ /1/;
4449
4450  # Construct a string of all the reports, which is used if there is
4451  # cleaning needing doing on the whole message
4452  $everyreport = join("\n", values %{$this->{allreports}});
4453
4454  # Construct a hash of all the entities we will clean,
4455  # so we clean parents in preference to their children.
4456  my(%EntitiesWeClean);
4457  $EntitiesWeClean{scalar($this->{tnefentity})} = 1 if $this->{tnefentity};
4458
4459  # Work through each filename-based report in turn, 1 per attachment
4460  while(($file, $text) = each %{$this->{allreports}}) {
4461
4462    #print STDERR "Cleaning $file\n";
4463    $this->{bodymodified} = 1; # This message body has been changed in memory
4464
4465    # If it's a TNEF message, then use the entity of the winmail.dat
4466    # file, else use the entity of the infected file.
4467    my $tnefentity = $this->{tnefentity};
4468    #print STDERR "It's a TNEF message\n" if $tnefentity;
4469    if ($file eq "") {
4470      #print STDERR "It's a whole body infection, entity = ".$this->{entity}."\n";
4471      $entity = $this->{entity};
4472    } else {
4473      #print STDERR "It's just 1 file, which is $file\n";
4474      if ($tnefentity) {
4475        $entity = $tnefentity;
4476      } else {
4477	# Find the top-level parent's entity
4478        my %visited = (); # This makes sure we can't loop forever (typed files)
4479        my @entities; # Entities we hit on the way to the top, delete 'em all!
4480        my $parententity = $this->{file2entity}{substr($file,1)};
4481	while ($this->{file2parent}{$file} ne "" &&
4482               !defined($visited{$this->{file2parent}{$file}})) {
4483          #print STDERR "Traversing to top-level via $file, $parententity\n";
4484	  $file = $this->{file2parent}{$file};
4485          $visited{$file} = 1;
4486          push @entities, $parententity;
4487          $parententity = $this->{entity2parent}{$parententity};
4488	}
4489        # Delete all the entities on the way so we don't have any strays.
4490        #print STDERR "Must also delete entities " . join(',',@entities) . "\n";
4491        foreach (@entities) {
4492          #print STDERR "Deleting entity $_, file = " . $this->{entity2file}{$_} . "\n";
4493          $this->DeleteEntity($_, $this->{entity}, $tnefentity) if $_;
4494        }
4495        $untypedfile = substr($file,1);
4496        $entity = scalar($this->{file2entity}{$untypedfile})
4497          if $untypedfile ne "";
4498        #print STDERR "Found entity $entity for untypedfile $untypedfile\n";
4499        next if $entity && $EntitiesWeClean{$entity};
4500        #print STDERR "Survived the cut\n";
4501
4502        # Could not find parent, give up and zap whole message
4503        if (!$entity) {
4504          $entity = $this->{entity};
4505        }
4506        #print STDERR "Top-level parent's entity is $entity, file $file\n";
4507      }
4508    }
4509
4510    # Avoid cleaning the same entity twice as it will clean the wrong thing!
4511    next if $AlreadyCleaned{$entity};
4512    $AlreadyCleaned{$entity} = 1;
4513
4514    # Work out which message to replace the attachment with.
4515    # As there may be multiple types for 1 file, find them in
4516    # in decreasing order of importance.
4517    my $ModificationOnly = 0; # Is this just an "m" modification?
4518    my $type = $this->{alltypes}{"$file"};
4519    #print STDERR "In Clean message, type = $type and quar? = $storeme\n";
4520    if ($type =~ /v/i) {
4521      # It's a virus. Either delete or store it.
4522      if ($storeme) {
4523        $filename = MailScanner::Config::Value('storedvirusmessage',
4524                                               $this);
4525      } else {
4526        $filename = MailScanner::Config::Value('deletedvirusmessage',
4527                                               $this);
4528      }
4529    } elsif ($type =~ /f/i) {
4530      # It's a filename trap. Either delete or store it.
4531      if ($storeme) {
4532        $filename = MailScanner::Config::Value('storedfilenamemessage',
4533                                               $this);
4534      } else {
4535        $filename = MailScanner::Config::Value('deletedfilenamemessage',
4536                                               $this);
4537      }
4538    } elsif ($type =~ /c/i) {
4539      # It's dangerous content, either delete or store it.
4540      if ($storeme) {
4541        $filename = MailScanner::Config::Value('storedcontentmessage',
4542                                               $this);
4543      } else {
4544        $filename = MailScanner::Config::Value('deletedcontentmessage',
4545                                               $this);
4546      }
4547    } elsif ($type =~ /s/i) {
4548      # It's dangerous content, either delete or store it.
4549      if ($storeme) {
4550        $filename = MailScanner::Config::Value('storedsizemessage',
4551                                               $this);
4552      } else {
4553        $filename = MailScanner::Config::Value('deletedsizemessage',
4554                                               $this);
4555      }
4556    } elsif ($type eq 'm') {
4557      # The only thing wrong here is that the MIME structure has been
4558      # modified, so the message must be re-built. Nothing needs to
4559      # be removed from the message.
4560      $ModificationOnly = 1;
4561    } else {
4562      # Treat it like a virus anyway, to be on the safe side.
4563      if ($storeme) {
4564        $filename = MailScanner::Config::Value('storedvirusmessage',
4565                                               $this);
4566      } else {
4567        $filename = MailScanner::Config::Value('deletedvirusmessage',
4568                                               $this);
4569      }
4570    }
4571
4572    # If entity is null then there was a parsing problem with the message,
4573    # so don't try to walk its tree as it will fail.
4574    next unless $entity;
4575
4576    # MIME structure has been modified, so the message must be rebuilt.
4577    # Nothing needs to be cleaned though.
4578    next if $ModificationOnly;
4579
4580    # If it's a silent virus, then only generate the report if anyone
4581    # wants a copy of it in the quarantine. Or else it won't be quarantined
4582    # but they will still get a copy of the report.
4583    #print STDERR "\n\nSilent = " . $this->{silent} . " and Noisy = " . $this->{noisy} . "\n";
4584    $filename = "" if $this->{silent} && !$this->{noisy} &&
4585                      !MailScanner::Config::Value('deliversilent', $this); # &&
4586    #             MailScanner::Config::Value('quarantinesilent', $this) !~ /1/;
4587
4588    # Deliver silent message unmodified after spam scanning depite warnings? :/
4589    # https://github.com/MailScanner/v5/issues/384
4590    # Skips cleaning an attachment if conditions are met (old behavior)
4591    next if $this->{silent} && !$this->{noisy} &&
4592            MailScanner::Config::Value('deliversilent', $this) &&
4593            MailScanner::Config::Value('deliversilentunmodified', $this);
4594
4595    MailScanner::Log::DebugLog("Debug: Message %s needs an attachment replaced", $this->{id});
4596    # Do the actual attachment replacement
4597    #print STDERR "File = \"$file\"\nthis = \"$this\"\n";
4598    #print STDERR "Entity to clean is $entity\n" .
4599    #             "root entity is " . $this->{entity} . "\n";
4600    #print STDERR "About to try to clean $entity, $text, $filename\n";
4601    if ($file eq "") {
4602      # It's a report on the whole message, so use all the reports
4603      # This is a virus disinfection on the *whole* message, so the
4604      # cleaner needs to know not to generate any mime parts.
4605      $this->CleanEntity($entity, $everyreport, $filename);
4606    } else {
4607      # It's a report on 1 section, so just use the report for that
4608      $this->CleanEntity($entity, $text, $filename);
4609    }
4610  }
4611
4612  # Now do the entity reports. These are for things like unparsable tnef
4613  # files, partial messages, external-body messages, things like that
4614  # which are always just errors.
4615  # Work through each report in turn, 1 per attachment
4616  #print STDERR "Entity reports are " . $this->{entityreports} . "\n";
4617  while(($entity, $text) = each %{$this->{entityreports}}) {
4618    #print STDERR "Cleaning $entity which had a report of $text\n";
4619
4620    # Find rogue entity reports that should point to tnefentity but don't
4621    $entity = $this->{tnefentity} if $this->{badtnef} && !$entity;
4622    next unless $entity; # Skip rubbish in the reports
4623
4624    # Turn the text name of the entity into the object itself
4625    $entity = $this->{name2entity}{scalar($entity)};
4626
4627    $this->{bodymodified} = 1; # This message body has been changed in memory
4628
4629    #print STDERR "In Clean message, quar? = $storeme and entity = $entity\n";
4630    # It's always an error, so handle it like a virus.
4631    # Either delete or store it.
4632    if ($storeme) {
4633      $filename = MailScanner::Config::Value('storedvirusmessage', $this);
4634    } else {
4635      $filename = MailScanner::Config::Value('deletedvirusmessage', $this);
4636    }
4637
4638    # Do the actual attachment replacement
4639    #print STDERR "About to try to clean $entity, $text, $filename\n";
4640    $this->CleanEntity($entity, $text, $filename);
4641  }
4642
4643  # Sign the top of the message body with a text/html warning if they want.
4644  # https://github.com/MailScanner/v5/issues/384
4645  # Skip warning if deliversilentunmodified is true (old behavior)
4646  if (MailScanner::Config::Value('markinfectedmessages',$this) =~ /1/ &&
4647      !$this->{signed} &&
4648      !MailScanner::Config::Value('deliversilentunmodified', $this)) {
4649    #print STDERR "In Clean message, about to sign message " . $this->{id} .
4650    #             "\n";
4651    MailScanner::Log::DebugLog("Debug: Adding warning to message %s body", $this->{id});
4652    $this->SignWarningMessage($this->{entity});
4653    $this->{signed} = 1;
4654  }
4655
4656  #print STDERR "\n\n\nAfter Clean()\n";
4657  #$this->PrintInfections();
4658}
4659
4660
4661# Do the actual attachment replacing
4662sub CleanEntity {
4663  my $this = shift;
4664  my($entity, $report, $reportname) = @_;
4665
4666  my(@parts, $Warning, $Disposition, $warningfile, $charset, $i);
4667
4668  # Knock out the helper's list of entity to filename mapping,
4669  # so auto-zip won't find the attachment
4670  delete $this->{entity2safefile}{$entity};
4671
4672  # Find the parent as that's what you have to change
4673  #print STDERR "CleanEntity: In ".$this->{id}." entity is $entity and " .
4674  #             "its parent is " . $this->{entity2parent}{$entity} . "\n";
4675  #print STDERR "Reportname is $reportname\n";
4676  my $parent = $this->{entity2parent}{$entity};
4677  $warningfile = MailScanner::Config::Value('attachmentwarningfilename', $this);
4678  $charset = MailScanner::Config::Value('attachmentcharset', $this);
4679
4680  #print STDERR "Cleaning entity whose report is $report\n";
4681
4682  # Infections applying to the entire message cannot be simply disinfected.
4683  # Have to replace the entire message with a text/plain error.
4684  unless ($parent) {
4685    #print STDERR "Doing the whole message\n";
4686    #print STDERR "ConstructingWarning for $report, " . $this->{id} . ", $reportname\n";
4687    $Warning = $this->ConstructWarning(
4688                 MailScanner::Config::LanguageValue($this, 'theentiremessage'),
4689                 $report, $this->{id}, $reportname);
4690    #print STDERR "Warning message is $Warning\n";
4691    #031118 if ($this->{entity} eq $entity) {
4692    if ($entity->bodyhandle) {
4693      #print STDERR "Really doing the whole message\n";
4694      #print STDERR "Really doing Whole message\n";
4695      # Replacing the whole message as the main body text of the message
4696      # contained a virus (e.g. the text of EICAR) without any proper
4697      # MIME structure at all.
4698
4699      #print STDERR "Entity in CleanEntity is $entity\n";
4700      #print STDERR "Bodyhandle is " . $entity->bodyhandle . "\n";
4701      #031118 $entity->bodyhandle or return undef;
4702
4703      # Output message back into body
4704      my($io, $filename, $temp);
4705      $io = $entity->open("w");
4706      $io->print($Warning . "\n");
4707      $io->close;
4708      # Set the MIME type if it was wrong
4709      $filename = MailScanner::Config::Value('attachmentwarningfilename',
4710                                             $this);
4711      $temp = $entity->head->mime_attr('content-type');
4712      $entity->head->mime_attr('Content-Type', 'text/plain') if
4713        $temp && $temp ne 'text/plain';
4714      # Set the charset if there was already a Content-type: header
4715      $entity->head->mime_attr('Content-type.charset', $charset) if $temp;
4716      $temp = $entity->head->mime_attr('content-type.name');
4717      $entity->head->mime_attr('Content-type.name', $filename) if $temp;
4718      $temp = $entity->head->mime_attr('content-disposition');
4719      $entity->head->mime_attr('content-disposition', 'inline') if $temp;
4720      $temp = $entity->head->mime_attr('content-disposition.filename');
4721      $entity->head->mime_attr('content-disposition.filename', $filename)
4722        if $temp;
4723      return;
4724    } else {
4725      # If the message is multipart but the boundary is "" then it won't
4726      # have any parts() which makes it impossible to overwrite without
4727      # first forcing it to throw away all the structure by becoming
4728      # single-part.
4729      $entity->make_singlepart
4730        if $entity->is_multipart && $entity->head &&
4731           $entity->head->multipart_boundary eq "";
4732
4733      $parts[0] = MIME::Entity->build(
4734                        Type => 'text/plain',
4735                        Filename => $warningfile,
4736                        Disposition => 'inline',
4737                        Data => $Warning,
4738                        Encoding => 'quoted-printable',
4739                        Charset => $charset,
4740                        Top => 0);
4741      $entity->make_multipart()
4742        if $entity->head && $entity->head->mime_attr('content-type') eq "";
4743      $entity->parts(\@parts);
4744      return;
4745    }
4746  }
4747
4748  # Now know that the infection only applies to one part of the message,
4749  # so replace that part with an error message.
4750  @parts = $parent->parts;
4751  # Find the infected part
4752  my $tnef = $this->{tnefentity};
4753  #print STDERR "TNEF entity is " . scalar($tnef) . "\n";
4754  my $infectednum = -1;
4755  #print STDERR "CleanEntity: Looking for entity $entity\n";
4756  for ($i=0; $i<@parts; $i++) {
4757    #print STDERR "CleanEntity: Comparing " . scalar($parts[$i]) .
4758    #             " with $entity\n";
4759    if (scalar($parts[$i]) eq scalar($entity)) {
4760      #print STDERR "Found it in part $i\n";
4761      $infectednum = $i;
4762      last;
4763    }
4764    if ($tnef && (scalar($parts[$i]) eq scalar($tnef))) {
4765      #print STDERR "Found winmail.dat in part $i\n";
4766      $infectednum = $i;
4767      last;
4768    }
4769  }
4770
4771  #MailScanner::Log::WarnLog(
4772  #  "Oh bother, missed infected entity in message %s :-(", $this->{id}), return
4773  #  if $infectednum<0;
4774
4775  # Now to actually do something about it...
4776  #print STDERR "About to constructwarning from $report\n";
4777  $Warning = $this->ConstructWarning($this->{entity2file}{$entity},
4778                                     $report, $this->{id}, $reportname);
4779  #print STDERR "Reportname is \"$reportname\"\n";
4780  #print STDERR "Warning is \"$Warning\"\n";
4781  # If the warning is now 0 bytes, don't add it, just remove the virus
4782  if ($Warning ne "") {
4783    $Disposition = MailScanner::Config::Value('warningisattachment',$this)
4784                   ?'attachment':'inline';
4785    $parts[$infectednum] = build MIME::Entity
4786                             Type => 'text/plain',
4787                             Filename => $warningfile,
4788                             Disposition => $Disposition,
4789                             Data => $Warning,
4790                             Encoding => 'quoted-printable',
4791                             Charset => $charset,
4792                             Top => 0;
4793  } else {
4794    # We are just deleting the part, not replacing it
4795    # @parts = splice @parts, $infectednum, 1;
4796    $parts[$infectednum] = undef; # We prune the tree just during delivery
4797  }
4798  $parent->parts(\@parts);
4799
4800  # And make the parent a multipart/mixed if it's a multipart/alternative
4801  # or multipart/related or message/partial
4802  $parent->head->mime_attr("Content-type" => "multipart/mixed")
4803    if ($parent->is_multipart) &&
4804       ($parent->head->mime_attr("content-type") =~
4805                                   /multipart\/(alternative|related)/i);
4806  if ($parent->head->mime_attr("content-type") =~ /message\/partial/i) {
4807    $parent->head->mime_attr("Content-type" => "multipart/mixed");
4808  #  $parent->make_singlepart();
4809  }
4810  #print STDERR "Finished CleanEntity\n";
4811}
4812
4813
4814# Construct a warning message given an attachment filename, a copy of
4815# what the virus scanner said, the message id and a message filename to parse.
4816# The id is passed in purely for substituting into the warning message file.
4817sub ConstructWarning {
4818  my $this = shift;
4819  my($attachmententity, $scannersaid, $id, $reportname) = @_;
4820
4821  # If there is no report file then we create no warning
4822  return "" unless $reportname;
4823
4824  my $date = $this->{datestring}; # scalar localtime;
4825  my $textfh = new FileHandle;
4826  my $dir = $global::MS->{work}{dir}; # Get the working directory
4827  my $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
4828  my $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
4829
4830  #print STDERR "ConstructWarning for $attachmententity. Scanner said \"" .
4831  #             "$scannersaid\", message id $id, file = $reportname\n";
4832
4833  # Reformat the virus scanner report a bit, and optionally remove dirs
4834  $scannersaid =~ s/^/   /gm;
4835  if (MailScanner::Config::Value('hideworkdir',$this)) {
4836    my $pattern = '(' . quotemeta($global::MS->{work}->{dir}) . "|\\\.)/";
4837    #print STDERR "In replacement, regexp is \"$pattern\"\n";
4838    $scannersaid =~ s/$pattern//g; #m # Remove the work dir
4839    $scannersaid =~ s/\/?$id\/?//g; # Remove the message id
4840  }
4841  #print STDERR "After replacement, scanner said \"$scannersaid\"\n";
4842
4843  my $output = "";
4844  my $result = "";
4845  # These are all the variables that are allowed to appear
4846  # in the report template.
4847  my $filename = ($attachmententity ||
4848                  MailScanner::Config::LanguageValue($this, 'notnamed'));
4849  #my $date = scalar localtime; Already defined above
4850  my $report = $scannersaid;
4851  my $hostname = MailScanner::Config::Value('hostname',$this);
4852  my $linkhostname = lc($hostname);
4853  $linkhostname =~ tr/a-z0-9_-//dc;
4854  my $quarantinedir = MailScanner::Config::Value('quarantinedir', $this);
4855
4856  # And let them put the date number in there too
4857  my($day, $month, $year);
4858  #($day, $month, $year) = (localtime)[3,4,5];
4859  #$month++;
4860  #$year += 1900;
4861  #my $datenumber = sprintf("%04d%02d%02d", $year, $month, $day);
4862  my $datenumber = $this->{datenumber};
4863
4864#  # Do we want to hide the directory and message id from the report path?
4865#  if (MailScanner::Config::Value('hideworkdir', $this)) {
4866#    my $pattern = "(" . quotemeta($global::MS->{work}->{dir}) . "|\.)/$id/";
4867#    $report =~ s/$pattern//gm;
4868#  }
4869
4870  # add recipients
4871  my $to = join ', ', @{$this->{to}};
4872
4873  open($textfh, $reportname)
4874    or MailScanner::Log::WarnLog("Cannot open message file %s, %s",
4875                                 $reportname, $!);
4876  my $line;
4877  while(defined ($line = <$textfh>)) {
4878    chomp $line;
4879    #$line =~ s/"/\\"/g; # Escape any " characters
4880    #$line =~ s/@/\\@/g; # Escape any @ characters
4881    $line =~ s/([\(\)\[\]\.\?\*\+\^"'@])/\\$1/g; # Escape any regex characters
4882    # Untainting joy...
4883    $line = $1 if $line =~ /(.*)/;
4884    $result = eval "\"$line\"";
4885    $output .= MailScanner::Config::DoPercentVars($result) . "\n";
4886  }
4887  $output;
4888}
4889
4890
4891# Sign the body of the message with a text or html warning message
4892# directing users to read the VirusWarning.txt attachment.
4893# Return 0 if nothing was signed, true if it signed something.
4894sub SignWarningMessage {
4895  my $this = shift;
4896  my $top = shift;
4897
4898  #print STDERR "Top is $top\n";
4899  return 0 unless $top;
4900
4901  # If multipart, try to sign our first part
4902  if ($top->is_multipart) {
4903    my $sigcounter = 0;
4904    #print STDERR "It's a multipart message\n";
4905    $sigcounter += $this->SignWarningMessage($top->parts(0));
4906    $sigcounter += $this->SignWarningMessage($top->parts(1))
4907      if $top->head and $top->effective_type =~ /multipart\/alternative/i;
4908
4909    # JKF 20090424 Commented out this whole chunk, as we don't want this
4910    # JKF 20090424 to happen anyway really. If we can't sign the message,
4911    # JKF 20090424 we can't sign the message, tough s***.
4912    #if ($sigcounter == 0) {
4913    #  # If we haven't signed anything by now, it must be a multipart
4914    #  # message containing only things we can't sign. So add a text/plain
4915    #  # section on the front and sign that.
4916    #  my $text = $this->ReadVirusWarning('inlinetextwarning') . "\n\n";
4917    #  my $newpart = build MIME::Entity
4918    #                      Type => 'text/plain',
4919    #                      Disposition => 'inline',
4920    #                      Data => $text,
4921    #                      Encoding => 'quoted-printable',
4922    #                      Top => 0;
4923    #  $top->add_part($newpart, 0);
4924    #  $sigcounter = 1;
4925    #}
4926    return $sigcounter;
4927  }
4928
4929  my $MimeType = $top->head->mime_type if $top->head;
4930  #print STDERR "MimeType is $MimeType\n";
4931  return 0 unless $MimeType =~ m{text/}i; # Won't sign non-text message.
4932  # Won't sign attachments.
4933  return 0 if $top->head->mime_attr('content-disposition') =~ /attachment/i;
4934
4935  # Get body data as array of newline-terminated lines
4936  #print STDERR "Bodyhandle is " . $top->bodyhandle . "\n";
4937  $top->bodyhandle or return undef;
4938  my @body = $top->bodyhandle->as_lines;
4939
4940  #print STDERR "Signing message part\n";
4941
4942  # Output message back into body, followed by original data
4943  my($line, $io, $warning);
4944  $io = $top->open("w");
4945  if ($MimeType =~ /text\/html/i) {
4946    $warning = $this->ReadVirusWarning('inlinehtmlwarning');
4947    #$warning = quotemeta $warning; # Must leave HTML tags alone!
4948    foreach $line (@body) {
4949      # html tags can have extra attributes.  In a case where the <html> tag
4950      # has attributes and is closed on a subsequent line, the warning will
4951      # actually be in the tag, but it's malformed in any case because it
4952      # precedes any <head> and <body> tags and clients seem to render it OK.
4953      $line =~ s/\<html( [^>]*)?(\>|$)/$&$warning/i;
4954      $io->print($line);
4955    }
4956  } else {
4957    $warning = $this->ReadVirusWarning('inlinetextwarning');
4958    $io->print($warning . "\n");
4959    foreach $line (@body) { $io->print($line) }; # Original body data
4960  }
4961  (($body[-1]||'') =~ /\n\Z/) or $io->print("\n"); # Ensure final newline
4962  $io->close;
4963
4964  # We signed something
4965  return 1;
4966}
4967
4968# https://github.com/MailScanner/v5/issues/375
4969# Sign the body of the message with a text or html warning message
4970# alerting users that message was from an external source
4971# Set bodymodifed and externalsigned upon signing
4972sub SignExternalMessage {
4973  my $this = shift;
4974  my $top = shift;
4975
4976  MailScanner::Log::DebugLog("Debug: Entered SignExternalMessage for message %s", $this->{id});
4977
4978  return 0 unless $top;
4979
4980  # If multipart, try to sign our first part
4981  if ($top->is_multipart) {
4982    MailScanner::Log::DebugLog("Debug: Detected multipart mime for message %s", $this->{id});
4983
4984    my $sigcounter = 0;
4985    $sigcounter += $this->SignExternalMessage($top->parts(0));
4986    $sigcounter += $this->SignExternalMessage($top->parts(1))
4987      if $top->head and $top->effective_type =~ /multipart\/alternative/i;
4988
4989    return $sigcounter;
4990  }
4991
4992  my $MimeType = $top->head->mime_type if $top->head;
4993  return 0 unless $MimeType =~ m{text/}i; # Won't sign non-text message.
4994  # Won't sign attachments.
4995  return 0 if $top->head->mime_attr('content-disposition') =~ /attachment/i;
4996
4997  # Get body data as array of newline-terminated lines
4998  $top->bodyhandle or return undef;
4999  my @body = $top->bodyhandle->as_lines;
5000
5001  # Output message back into body, followed by original data
5002  my($line, $io, $warning);
5003  $io = $top->open("w");
5004  if ($MimeType =~ /text\/html/i) {
5005    MailScanner::Log::DebugLog("Debug: Adding external html for message %s", $this->{id});
5006    $warning = $this->ReadExternalWarning('inlineexternalhtml');
5007    #$warning = quotemeta $warning; # Must leave HTML tags alone!
5008    my $htmltagfound = 0;
5009    foreach $line (@body) {
5010      # html tags can have extra attributes.  In a case where the <html> tag
5011      # has attributes and is closed on a subsequent line, the warning will
5012      # actually be in the tag, but it's malformed in any case because it
5013      # precedes any <head> and <body> tags and clients seem to render it OK.
5014      if ( $line =~ /\<html( [^>]*)?(\>|$)/ ) {
5015        $htmltagfound = 1;
5016      }
5017    }
5018
5019    # Just sign if no html tag present
5020    $io->print($warning)
5021      unless $htmltagfound == 1;
5022    foreach $line (@body) {
5023      # if at <html> tag, sign here
5024      $line =~ s/<html( [^>]*)?(\>|$)/$&$warning/i
5025        unless $htmltagfound == 0;
5026      $io->print($line);
5027    }
5028  } else {
5029    MailScanner::Log::DebugLog("Debug: Adding external text for message %s", $this->{id});
5030    $warning = $this->ReadExternalWarning('inlineexternaltext');
5031    $io->print($warning . "\n");
5032    foreach $line (@body) { $io->print($line) }; # Original body data
5033  }
5034  (($body[-1]||'') =~ /\n\Z/) or $io->print("\n"); # Ensure final newline
5035  $io->close;
5036
5037  MailScanner::Log::DebugLog("Debug: Exiting SignExternalMessage for message %s", $this->{id});
5038
5039  # We signed something
5040  $this->{bodymodified} = 1;
5041  $this->{externalsigned} = 1;
5042}
5043
5044# Read the appropriate warning message to sign the top of cleaned messages.
5045# Passed in the name of the config variable that points to the filename.
5046# This is also used to read the inline signature added to the bottom of
5047# clean messages.
5048# Substitutions allowed in the message are
5049#     $viruswarningfilename -- by default VirusWarning.txt
5050#     $id
5051#     $from
5052#     $to -- comma-separated list of to address
5053#     $subject
5054# and $filename -- comma-separated list of infected attachments
5055sub ReadVirusWarning {
5056  my $this = shift;
5057  my($option) = @_;
5058
5059  my $file = MailScanner::Config::Value($option, $this);
5060  my $viruswarningname = MailScanner::Config::Value('attachmentwarningfilename',
5061                                                    $this);
5062  my($line);
5063
5064  #print STDERR "Reading virus warning message from $filename\n";
5065  my $fh = new FileHandle;
5066  $fh->open($file)
5067    or (MailScanner::Log::WarnLog("Could not open inline file %s, %s",
5068                                  $file, $!),
5069        return undef);
5070
5071  # Work out the list of all the infected attachments, including
5072  # reports applying to the whole message
5073  my($typedattach, $attach, $text, %infected, $filename, $from, $subject, $id);
5074  while (($typedattach, $text) = each %{$this->{allreports}}) {
5075    # It affects the entire message if the entity of this file matches
5076    # the entity of the entire message.
5077    $attach = substr($typedattach,1);
5078    my $entity = $this->{file2entity}{"$attach"};
5079    #if ($attach eq "") {
5080    if ($this->{entity} eq $entity) {
5081      $infected{MailScanner::Config::LanguageValue($this, "theentiremessage")}
5082        = 1;
5083    } else {
5084      $infected{"$attach"} = 1;
5085    }
5086  }
5087  # And don't forget the external bodies which are just entity reports
5088  while (($typedattach, $text) = each %{$this->{entityreports}}) {
5089    $infected{MailScanner::Config::LanguageValue($this, 'notnamed')} = 1;
5090  }
5091  $attach = substr($typedattach,1);
5092  $filename = join(', ', keys %infected);
5093  $id = $this->{id};
5094  $from = $this->{from};
5095  $subject = $this->{subject};
5096
5097  my $result = "";
5098  while (<$fh>) {
5099    chomp;
5100    s#"#\\"#g;
5101    s#@#\\@#g;
5102    # Boring untainting again...
5103    /(.*)/;
5104    $line = eval "\"$1\"";
5105    $result .= MailScanner::Config::DoPercentVars($line) . "\n";
5106  }
5107  $fh->close();
5108  $result;
5109}
5110
5111# https://github.com/MailScanner/v5/issues/375
5112# Read the appropriate warning message to sign the top of messages.
5113# Passed in the name of the config variable that points to the filename.
5114# Substitutions allowed in the message are
5115#     $id
5116#     $from
5117#     $to -- comma-separated list of to address
5118#     $subject
5119sub ReadExternalWarning {
5120  my $this = shift;
5121  my($option) = @_;
5122
5123  my $file = MailScanner::Config::Value($option, $this);
5124  my($line);
5125
5126  MailScanner::Log::DebugLog("Debug: Entered ReadExternalWarning for message %s", $this->{id});
5127
5128  my $fh = new FileHandle;
5129  $fh->open($file)
5130    or (MailScanner::Log::WarnLog("Could not open inline file %s, %s",
5131                                  $file, $!),
5132        return undef);
5133
5134  MailScanner::Log::DebugLog("Debug: External warning file opened for reading for message %s", $this->{id});
5135
5136  my($from, $subject, $id);
5137  $id = $this->{id};
5138  $from = $this->{from};
5139  $subject = $this->{subject};
5140
5141  my($to, %tolist);
5142  foreach $to (@{$this->{to}}) {
5143    $tolist{$to} = 1;
5144  }
5145  $to = join(', ', sort keys %tolist);
5146
5147  my $result = "";
5148  while (<$fh>) {
5149    chomp;
5150    s#"#\\"#g;
5151    s#@#\\@#g;
5152    # Boring untainting again...
5153    /(.*)/;
5154    $line = eval "\"$1\"";
5155    $result .= $line . "\n";
5156  }
5157  $fh->close();
5158
5159  MailScanner::Log::DebugLog("Debug: Exiting ReadExternalWarning for message %s", $this->{id});
5160
5161  $result;
5162}
5163
5164# Work out if the message is a reply or an original posting
5165sub IsAReply {
5166  my $this = shift;
5167
5168  # Are we a reply or an original message?
5169
5170  # The old favourite subject line check... :-(
5171  #if ($this->{subject} =~ /^re:/i) {
5172  #  $this->{isreply} = 1;
5173  #  return;
5174  #}
5175
5176  # Bail out very quickly if the list of header names is empty.
5177  my $lookfor = MailScanner::Config::Value('isareply', $this);
5178  return 0 unless $lookfor;
5179
5180  # Find the list of all the names of all the headers
5181  my @headers;
5182  foreach my $line (@{$this->{headers}}) {
5183    #print STDERR "Looking at $line\n";
5184    next if     $line =~ /^\s/;
5185    next unless $line =~ /^([^:]+):/i;
5186    push @headers, $1;
5187  }
5188  #@headers = map { s/://; $_; } @headers; # Strip out all ':' characters
5189
5190  my $headernames = join (',', @headers);
5191  # Must test the next line to make sure it does what I intend!
5192  $headernames = ',' . $headernames . ',';
5193  $headernames =~ s/,{2,}/,/g;
5194  $headernames =~ s/^,*/,/; # Make sure line starts and
5195  $headernames =~ s/,*$/,/; # ends with exactly 1 ','
5196  # $headernames now contains a comma-separated list of the msg's headers
5197
5198  #print STDERR "Headers to look for are $lookfor\n";
5199  $lookfor =~ s/://g;
5200  $lookfor =~ s/[\s,]+/\|/g; # Turn comma/space-separated list into
5201  $lookfor =~ s/^\|//; # regexp matching ',(alternatives-list),'
5202  $lookfor =~ s/\|$//;
5203  $lookfor = ',(' . $lookfor . '),';
5204  # $lookfor is now a regexp which will match if any isareply are present
5205
5206  # Are there any "lookfor" headers in the "headernames"?
5207  $this->{isreply} = 0;
5208  $this->{isreply} = 1 if $headernames =~ /$lookfor/i;
5209}
5210
5211
5212# Sign the bottom of the message with a tag-line saying it is clean
5213# and MailScanner is wonderful :-)
5214# Have already checked that message is not infected, and that they want
5215# clean signatures adding to messages.
5216sub SignUninfected {
5217  my $this = shift;
5218
5219  return if $this->{infected}; # Double-check!
5220
5221  my($entity, $scannerheader);
5222
5223  # Use the presence of an X-MailScanner: header to decide if the
5224  # message will have already been signed by another MailScanner server.
5225  $scannerheader = MailScanner::Config::Value('mailheader', $this);
5226  $scannerheader =~ tr/://d;
5227
5228  #print STDERR "Signing uninfected message " . $this->{id} . "\n";
5229
5230  # Want to sign the bottom of the highest-level MIME entity
5231  $entity = $this->{entity};
5232  if (MailScanner::Config::Value('signalreadyscanned', $this) ||
5233      (defined($entity) && !$entity->head->count($scannerheader))) {
5234    $this->AppendSignCleanEntity($entity, 0);
5235    #$this->PrependSignCleanEntity($entity)
5236    #  if MailScanner::Config::Value('signtopaswell', $this);
5237    if ($entity && $entity->head) {
5238      $entity->head->add('MIME-Version', '1.0')
5239        unless $entity->head->get('mime-version');
5240    }
5241    $this->{bodymodified} = 1;
5242  }
5243}
5244
5245
5246# Sign the end of a message (which is an entity) with the given tag-line
5247sub PrependSignCleanEntity {
5248  my $this = shift;
5249  my($top) = @_;
5250
5251  my($MimeType, $signature, @signature);
5252
5253  return unless $top;
5254
5255  #print STDERR "In PrependSignCleanEntity, signing $top\n";
5256
5257  # If multipart, try to sign our first part
5258  if ($top->is_multipart) {
5259    my $sigcounter = 0;
5260    # JKF Signed and encrypted multiparts must not be touched.
5261    # JKF Instead put the sig in the epilogue. Breaks the RFC
5262    # JKF but in a harmless way.
5263    if ($top->effective_type =~ /multipart\/(signed|encrypted)/i) {
5264      # Read the sig and put it in the epilogue, which may be ignored
5265      $signature = $this->ReadVirusWarning('inlinetextpresig');
5266      @signature = map { "$_\n" } split(/\n/, $signature);
5267      unshift @signature, "\n";
5268      $top->preamble(\@signature);
5269      return 1;
5270    }
5271    # If any of the PSCE() calls said they didn't sign anything then return
5272    # a marker saying we didn't sign anything, and DON'T sign anything!
5273    my $result0 = $this->PrependSignCleanEntity($top->parts(0));
5274    if ($result0 >= 0) {
5275      $sigcounter += $result0;
5276    } else {
5277      $sigcounter = -1;
5278    }
5279    if ($top->head and $top->effective_type =~ /multipart\/alternative/i) {
5280      my $result1 = $this->PrependSignCleanEntity($top->parts(1));
5281      if ($result1 >= 0) {
5282        $sigcounter += $result1;
5283      } else {
5284        $sigcounter = -1;
5285      }
5286    }
5287
5288    if ($sigcounter == 0) {
5289      # If we haven't signed anything by now, it must be a multipart
5290      # message containing only things we can't sign. So add a text/plain
5291      # section on the front and sign that.
5292      my $text = $this->ReadVirusWarning('inlinetextpresig') . "\n\n";
5293      my $newpart = build MIME::Entity
5294                          Type => 'text/plain',
5295                          Charset =>
5296                    MailScanner::Config::Value('attachmentcharset', $this),
5297                          Disposition => 'inline',
5298                          Data => $text,
5299                          Encoding => 'quoted-printable',
5300                          Top => 0;
5301      $top->add_part($newpart, 0);
5302      $sigcounter = 1;
5303    }
5304    return $sigcounter;
5305  }
5306
5307  $MimeType = $top->head->mime_type if $top->head;
5308  return 0 unless $MimeType =~ m{text/}i; # Won't sign non-text message.
5309  # Won't sign attachments.
5310  return 0 if $top->head->mime_attr('content-disposition') =~ /attachment/i;
5311
5312  # Get body data as array of newline-terminated lines
5313  $top->bodyhandle or return undef;
5314  my @body = $top->bodyhandle->as_lines;
5315
5316  # Output original data back into body, followed by message
5317  my($line, $io);
5318  $io = $top->open("w");
5319  if ($MimeType =~ /text\/html/i) {
5320    if (($this->{sigimagepresent} &&
5321        MailScanner::Config::Value('allowmultsigs', $this) !~ /1/) ||
5322        (MailScanner::Config::Value('isareply', $this) &&
5323        $this->{isreply})) {
5324      # Either: there is an image already and we don't want multiples,
5325      # Or    : it's a reply and we don't sign replies,
5326      # Then  : We don't want an image, so do nothing
5327      $io->close;
5328      return -1; # Send back a token saying we found one and didn't sign it
5329    } else {
5330      $signature = $this->ReadVirusWarning('inlinehtmlpresig');
5331      foreach $line (@body) {
5332        $line =~ s/\<x?html\>/$&$signature/i;
5333        $io->print($line);
5334      }
5335      #(($body[-1]||'') =~ /\n\Z/) or $io->print("\n"); # Ensure final newline
5336    }
5337  } else {
5338    $signature = $this->ReadVirusWarning('inlinetextpresig');
5339    $io->print("$signature\n");
5340    foreach $line (@body) { $io->print($line) }; # Original body data
5341  }
5342  $io->close;
5343
5344  # We signed something
5345  return 1;
5346}
5347
5348# Sign the end of a message (which is an entity) with the given tag-line
5349sub AppendSignCleanEntity {
5350  my $this = shift;
5351  my($top, $parent) = @_;
5352
5353  my($MimeType, $signature, @signature);
5354
5355  return unless $top;
5356
5357  #print STDERR "In AppendSignCleanEntity, signing $top\n";
5358
5359  # If multipart, try to sign our first part
5360  if ($top->is_multipart) {
5361    my $sigcounter = 0;
5362    # JKF Signed and encrypted multiparts must not be touched.
5363    # JKF Instead put the sig in the epilogue. Breaks the RFC
5364    # JKF but in a harmless way.
5365    if ($top->effective_type =~ /multipart\/(signed|encrypted)/i) {
5366      # Read the sig and put it in the epilogue, which may be ignored
5367      $signature = $this->ReadVirusWarning('inlinetextsig');
5368      @signature = map { "$_\n" } split(/\n/, $signature);
5369      unshift @signature, "\n";
5370      $top->epilogue(\@signature);
5371      return 1;
5372    }
5373    # If the ASCE(0) returned -1 then we found something we could sign but
5374    # chose not to, so set $sigcounter so we won't try to sign anything else.
5375    my $result0 = $this->AppendSignCleanEntity($top->parts(0), $top);
5376    if ($result0 >= 0) {
5377      $sigcounter += $result0;
5378    } else {
5379      $sigcounter = -1;
5380    }
5381    # If the ASCE(1) returned -1 then we found something we could sign but
5382    # chose not to, so set $sigcounter so we won't try to sign anything else.
5383    if ($top->head and $top->effective_type =~ /multipart\/alternative/i) {
5384      my $result1 = $this->AppendSignCleanEntity($top->parts(1), $top);
5385      if ($result1 >= 0) {
5386        $sigcounter += $result1;
5387      } else {
5388        $sigcounter = -1;
5389      }
5390    }
5391
5392    if ($sigcounter == 0) {
5393      # If we haven't signed anything by now, it must be a multipart
5394      # message containing only things we can't sign. So add a text/plain
5395      # section on the front and sign that.
5396      my $text = $this->ReadVirusWarning('inlinetextsig') . "\n\n";
5397      my $newpart = build MIME::Entity
5398                          Type => 'text/plain',
5399                          Charset =>
5400                    MailScanner::Config::Value('attachmentcharset', $this),
5401                          Disposition => 'inline',
5402                          Data => $text,
5403                          Encoding => 'quoted-printable',
5404                          Top => 0;
5405      $top->add_part($newpart, 0);
5406      $sigcounter = 1;
5407    }
5408    return $sigcounter;
5409  }
5410
5411  $MimeType = $top->head->mime_type if $top->head;
5412  return 0 unless $MimeType =~ m{text/(html|plain)}i; # Won't sign non-text message.
5413  # Won't sign attachments.
5414  return 0 if $top->head->mime_attr('content-disposition') =~ /attachment/i;
5415  # Won't sign HTML parts when we already have a sig and don't allow duplicates
5416  # Or we are a reply and we don't sign replies.
5417  # We return -1 as a special token indicating that there was something we
5418  # could sign but chose not to. If I pick up a -1 when called then don't
5419  # try to sign anything else.
5420  return -1 if ($this->{sigimagepresent} &&
5421                $MimeType =~ /text\/html/i &&
5422                MailScanner::Config::Value('allowmultsigs', $this) !~ /1/) ||
5423               ($this->{isreply} &&
5424                MailScanner::Config::Value('isareply', $this));
5425
5426  # Get body data as array of newline-terminated lines
5427  $top->bodyhandle or return undef;
5428  my @body = $top->bodyhandle->as_lines;
5429
5430  # Output original data back into body, followed by message
5431  my($line, $io, $FoundHTMLEnd, $FoundBodyEnd, $FoundSigMark, $html);
5432  $FoundHTMLEnd = 0; # If there is no </html> tag, still append the signature
5433  $FoundBodyEnd = 0; # If there is no </body> tag, still append the signature
5434  $FoundSigMark = 0; # Try to replace _SIGNATURE_ with the sig if it's there
5435  $html = 0;
5436  $io = $top->open("w");
5437  if ($MimeType =~ /text\/html/i) {
5438    $signature = $this->ReadVirusWarning('inlinehtmlsig');
5439    foreach $line (@body) {
5440      # Try to insert the signature where they want it.
5441      $FoundSigMark = 1 if $line =~ s/_SIGNATURE_/$signature/;
5442      $FoundBodyEnd = 1 if !$FoundSigMark && $line =~ s/\<\/body\>/$signature$&/i;
5443      $FoundHTMLEnd = 1 if !$FoundSigMark && !$FoundBodyEnd && $line =~ s/\<\/x?html\>/$signature$&/i;
5444      $io->print($line);
5445    }
5446    $io->print($signature . "\n")
5447      unless $FoundBodyEnd || $FoundHTMLEnd || $FoundSigMark;
5448    (($body[-1]||'') =~ /\n\Z/) or $io->print("\n"); # Ensure final newline
5449    $html = 1;
5450  } else {
5451    $signature = $this->ReadVirusWarning('inlinetextsig');
5452    foreach $line (@body) {
5453      # Replace _SIGNATURE_ with the inline sig, if it's present.
5454      $FoundSigMark = 1 if $line =~ s/_SIGNATURE_/$signature/;
5455      $io->print($line); # Original body data
5456    }
5457    # Else just tack the sig on the end.
5458    $io->print("\n$signature\n") unless $FoundSigMark;
5459  }
5460  $io->close;
5461
5462  # Add Image Attachment from Mail Scanner, unless there already is one
5463  if (MailScanner::Config::Value('attachimage', $this) =~ /1/ && !$this->{sigimagepresent}) {
5464    #print STDERR "Adding image signature\n";
5465    my $attach = MailScanner::Config::Value('attachimagetohtmlonly', $this);
5466    if (($html && $attach =~ /1/) || $attach =~ /0/) {
5467      my $filename = MailScanner::Config::Value('attachimagename', $this);
5468      my $ext = 'unknown';
5469      $ext = $1 if $filename =~ /\.([a-z]{3,4})$/;
5470      $ext = 'jpeg' if $ext =~ /jpg/i;
5471      my $internalname =  MailScanner::Config::Value('attachimageinternalname', $this);
5472      if (length($filename) && -f $filename) {
5473        my $newentity = MIME::Entity->build(Path => $filename,
5474                                            Top => 0,
5475                                            Type => "image/$ext",
5476                                            Encoding => "base64",
5477                                            Filename => $internalname,
5478                                            Disposition => "inline",
5479                                            'Content-Id:' => '<' . $internalname . '>');
5480        if ($parent && $parent->effective_type =~ /multipart\/related/i) {
5481          # It's already been signed once, so don't nest the MIME structure more
5482          $parent->add_part($newentity);
5483        } else {
5484          # It's a first-time sig, so next it into a multipart/related
5485          $top->make_multipart('related');
5486          $top->add_part($newentity);
5487        }
5488      }
5489    }
5490  }
5491
5492  # We signed something
5493  return 1;
5494}
5495
5496
5497# Deliver an uninfected message. It is already signed as necessary.
5498# If the body has been modified then we need to reconstruct it from
5499# the MIME structure. If not modified, then just link it across to
5500# the outgoing queue.
5501sub DeliverUninfected {
5502  my $this = shift;
5503  if ($this->{bodymodified}) {
5504    # The body of this message has been modified, so reconstruct
5505    # it from the MIME structure and deliver that.
5506    #print STDERR "Body modified\n";
5507    $this->DeliverModifiedBody('cleanheader');
5508  } else {
5509    #print STDERR "Body not modified\n";
5510    if (MailScanner::Config::Value('virusscan', $this) =~ /1/) {
5511      #print STDERR "Message is scanned and clean\n";
5512      $this->DeliverUnmodifiedBody('cleanheader');
5513    } else {
5514      #print STDERR "Message is unscanned\n";
5515      $this->DeliverUnmodifiedBody('unscannedheader');
5516    }
5517  }
5518}
5519
5520my($DisarmFormTag, $DisarmScriptTag, $DisarmCodebaseTag, $DisarmIframeTag,
5521   $DisarmWebBug, $DisarmPhishing, $DisarmHidden, $DisarmNumbers, $DisarmHTMLChangedMessage,
5522   $DisarmWebBugFound, $DisarmPhishingFound, $PhishingSubjectTag,
5523   $PhishingHighlight, $StrictPhishing, $WebBugWhitelist, $WebBugReplacement,
5524   $WebBugBlacklist, $SigImageFound);
5525
5526# Deliver a message which has not had its body modified in any way.
5527# This is a lot faster as it doesn't involve reconstructing the message
5528# body at all, it is just copied from the inqueue to the outqueue.
5529sub DeliverUnmodifiedBody {
5530  my $this = shift;
5531  my($headervalue) = @_;
5532  #print STDERR "DisarmPhishingFound = " . $DisarmPhishingFound . " for message " . $this->{id} . "\n";
5533
5534  return if $this->{deleted}; # This should never happen
5535
5536  # Prune the entity tree to remove all undef values
5537  PruneEntityTree($this->{entity},$this->{entity2file},$this->{file2entity});
5538
5539  #print STDERR "Delivering Unmodified Body message\n";
5540
5541  my $OutQ = MailScanner::Config::Value('outqueuedir', $this);
5542  my $store = $this->{store};
5543
5544  # Link the queue data file from in to out
5545  $store->LinkData($OutQ);
5546
5547  # Set up the output envelope with its (possibly modified) headers
5548  # Used to do next line but it breaks text-only messages with no MIME
5549  # structure as the MIME explosion will have created a MIME structure.
5550  #$global::MS->{mta}->AddHeadersToQf($this, $this->{entity}->stringify_header);
5551  $global::MS->{mta}->AddHeadersToQf($this);
5552
5553  # Remove duplicate subject: lines
5554  $global::MS->{mta}->UniqHeader($this, 'Subject:');
5555
5556  # Add the information/help X- header
5557  my $infoheader = MailScanner::Config::Value('infoheader', $this);
5558  if ($infoheader) {
5559    my $infovalue = MailScanner::Config::Value('infovalue', $this);
5560    $global::MS->{mta}->ReplaceHeader($this, $infoheader, $infovalue);
5561  }
5562  my $idheader = MailScanner::Config::Value('idheader', $this);
5563  if ($idheader) {
5564    $global::MS->{mta}->ReplaceHeader($this, $idheader, $this->{id});
5565  }
5566
5567  my $headervalue2 = MailScanner::Config::Value($headervalue, $this);
5568  $global::MS->{mta}->AddMultipleHeader($this, 'mailheader',
5569               $headervalue2, ', ') if $headervalue2 ne "";
5570  # Delete all content length headers anyway. They are unsafe.
5571  # No, leave them if nothing in the body has been modified.
5572  #$global::MS->{mta}->DeleteHeader($this, 'Content-length:');
5573
5574  # Add IPv6 or IPv4 protocol version header
5575  my $ipverheader = MailScanner::Config::Value('ipverheader', $this);
5576  $global::MS->{mta}->ReplaceHeader($this, $ipverheader,
5577                             ( ($this->{clientip} =~ /:/)?'IPv6':'IPv4' ))
5578    if $ipverheader;
5579
5580  # Add the spamvirusreport to the input to SA.
5581  # The header name should be documented in the MailScanner.conf docs.
5582  # 20090730
5583  my $svheader = MailScanner::Config::Value('spamvirusheader', $this);
5584  if ($svheader && $this->{spamvirusreport}) {
5585    $svheader .= ':' unless $svheader =~ /:$/;
5586    $global::MS->{mta}->AppendHeader($this, $svheader, $this->{spamvirusreport}, ' ');
5587  }
5588
5589  # Add the MCP header if necessary
5590  $global::MS->{mta}->AddMultipleHeader($this, 'mcpheader',
5591                                        $this->{mcpreport}, ', ')
5592    if $this->{ismcp} ||
5593       MailScanner::Config::Value('includemcpheader', $this);
5594
5595  # Add the spam header if they want that
5596  #$global::MS->{mta}->AddHeader($this,
5597  #                              MailScanner::Config::Value('spamheader',$this),
5598  #                              $this->{spamreport})
5599  # JKF 3/10/2005
5600  $global::MS->{mta}->AddMultipleHeader($this, 'spamheader',
5601                                        $this->{spamreport}, ', ')
5602    if MailScanner::Config::Value('includespamheader', $this) ||
5603       ($this->{spamreport} && $this->{isspam});
5604
5605  # Add the spam stars if they want that. Limit it to 60 characters to avoid
5606  # a potential denial-of-service attack.
5607  my($stars,$starcount,$scoretext,$minstars,$scorefmt);
5608  $starcount = int($this->{sascore}) + 0;
5609  $starcount = 0 if $this->{spamwhitelisted}; # 0 stars if white-listed
5610  $scorefmt = MailScanner::Config::Value('scoreformat', $this);
5611  $scorefmt = '%d' if $scorefmt eq '';
5612  $scoretext = sprintf($scorefmt, $this->{sascore}+0);
5613  $minstars = MailScanner::Config::Value('minstars', $this);
5614  $starcount = $minstars if $this->{isrblspam} && $minstars &&
5615                            $starcount<$minstars;
5616  if (MailScanner::Config::Value('spamscorenotstars', $this)) {
5617    $stars = $scoretext; # int($starcount);
5618  } else {
5619    $starcount = 60 if $starcount>60;
5620    $stars = MailScanner::Config::Value('spamstarscharacter') x $starcount;
5621  }
5622  if (MailScanner::Config::Value('spamstars', $this) =~ /1/ && $starcount>0) {
5623    $global::MS->{mta}->AddMultipleHeader($this, 'spamstarsheader',
5624                                          $stars, ', ');
5625  }
5626
5627  # Add the Envelope to and from headers
5628  AddFromAndTo($this);
5629
5630  # Repair the subject line
5631  $global::MS->{mta}->ReplaceHeader($this, 'Subject:', $this->{safesubject})
5632    if $this->{subjectwasunsafe};
5633
5634  # Modify the subject line for Disarming
5635  my $subjectchanged = 0;
5636  my $disarmtag = MailScanner::Config::Value('disarmsubjecttext',$this);
5637  my $phishingtag = MailScanner::Config::Value('phishingsubjecttag', $this);
5638  #if ($this->{messagedisarmed}) {
5639  #  #print STDERR "Found messagedisarmed = " . join(',',@{$this->{disarmedtags}}) . "\n";
5640  #  if(MailScanner::Config::Value('disarmprependsubject',$this) =~ /1/ &&
5641  #     !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $disarmtag)) {
5642  #   $global::MS->{mta}->PrependHeader($this, 'Subject:', $disarmtag, ' ');
5643  #   $subjectchanged = 1;
5644  #  }
5645  #  if (grep /phishing/i, @{$this->{disarmedtags}}) {
5646  #    #print STDERR "Found a phishing disarmedtags\n";
5647  #    # We found it had a phishing link in it. Are we tagging phishing Subject?
5648  #    if (MailScanner::Config::Value('tagphishingsubject',$this) =~ /1/ &&
5649  #        !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $phishingtag)
5650#) {
5651  #      $global::MS->{mta}->PrependHeader($this, 'Subject:', $phishingtag, ' ');
5652  #      $subjectchanged = 1;
5653  #    }
5654  #  }
5655  #}
5656  if ($this->{messagedisarmed}) {
5657    #print STDERR "MessageDisarmed is set at 3878\n";
5658    my $where = MailScanner::Config::Value('disarmmodifysubject',$this);
5659    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $disarmtag)) {
5660      $global::MS->{mta}->AppendHeader($this, 'Subject:', $disarmtag, ' ');
5661      $subjectchanged = 1;
5662      #print STDERR "MessageDisarmed is set (end)\n";
5663    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $disarmtag)) {
5664      $global::MS->{mta}->PrependHeader($this, 'Subject:', $disarmtag, ' ');
5665      $subjectchanged = 1;
5666      #print STDERR "MessageDisarmed is set (start)\n";
5667    }
5668    #print STDERR "disarmedtags = " . join(',',@{$this->{disarmedtags}}) . "\n";
5669  }
5670
5671  #print STDERR "Hello from 3840\n";
5672  if ($this->{disarmphishingfound}) { # grep /phishing/i, @{$this->{disarmedtags}}) {
5673      # We found it had a phishing link in it. Are we tagging phishing Subject?
5674      #print STDERR "DisarmPhishingFound at 3896!\n";
5675      #print STDERR "ID = " . $this->{id} . "\n";
5676      my $where = MailScanner::Config::Value('tagphishingsubject', $this);
5677      if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $phishingtag)) {
5678        #print STDERR "end\n";
5679        $global::MS->{mta}->AppendHeader($this, 'Subject:', $phishingtag, ' ');
5680        $subjectchanged = 1;
5681      } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $phishingtag)) {
5682        #print STDERR "start\n";
5683        $global::MS->{mta}->PrependHeader($this, 'Subject:', $phishingtag, ' ');
5684        $subjectchanged = 1;
5685      }
5686    #}
5687  }
5688
5689  # Add watermark header if chosen to do so.
5690  if ($this->{addmshmac}) {
5691    my $mshmacheader = MailScanner::Config::Value('mshmacheader', $this);
5692    my $mshmac = $this->{mshmac};
5693
5694    $global::MS->{mta}->ReplaceHeader($this, $mshmacheader, $mshmac);
5695  }
5696
5697  # Modify the subject line for spam
5698  # if it's spam AND they want to modify the subject line AND it's not
5699  # already been modified by another of your MailScanners.
5700  my $spamtag = MailScanner::Config::Value('spamsubjecttext', $this);
5701  $spamtag =~ s/_SCORE_/$scoretext/;
5702  $spamtag =~ s/_STARS_/$stars/i;
5703  #if ($this->{isspam} && !$this->{ishigh} &&
5704  #    MailScanner::Config::Value('spamprependsubject',$this) &&
5705  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
5706  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
5707  #  $subjectchanged = 1;
5708  #}
5709  if ($this->{isspam} && !$this->{ishigh}) {
5710    my $where = MailScanner::Config::Value('spammodifysubject',$this);
5711    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $spamtag)) {
5712      $global::MS->{mta}->AppendHeader($this, 'Subject:', $spamtag, ' ');
5713      $subjectchanged = 1;
5714    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
5715      $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
5716      $subjectchanged = 1;
5717    }
5718  }
5719
5720
5721  # If it is high-scoring spam, then add a different bit of text
5722  $spamtag = MailScanner::Config::Value('highspamsubjecttext', $this);
5723  $spamtag =~ s/_SCORE_/$scoretext/;
5724  $spamtag =~ s/_STARS_/$stars/i;
5725  #if ($this->{isspam} && $this->{ishigh} &&
5726  #    MailScanner::Config::Value('highspamprependsubject',$this) &&
5727  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
5728  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
5729  #  $subjectchanged = 1;
5730  #}
5731  if ($this->{isspam} && $this->{ishigh}) {
5732    my $where = MailScanner::Config::Value('highspammodifysubject',$this);
5733    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $spamtag)) {
5734      $global::MS->{mta}->AppendHeader($this, 'Subject:', $spamtag, ' ');
5735      $subjectchanged = 1;
5736    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
5737      $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
5738      $subjectchanged = 1;
5739    }
5740  }
5741
5742
5743  # Modify the subject line for MCP
5744  # if it's MCP AND they want to modify the subject line AND it's not
5745  # already been modified by another of your MailScanners.
5746  $starcount = int($this->{mcpsascore}) + 0;
5747  $starcount = 0 if $this->{mcpwhitelisted}; # 0 stars if white-listed
5748  $scorefmt = MailScanner::Config::Value('scoreformat', $this);
5749  $scorefmt = '%d' if $scorefmt eq '';
5750  $scoretext = sprintf($scorefmt, $this->{mcpsascore}+0);
5751  my $mcptag = MailScanner::Config::Value('mcpsubjecttext', $this);
5752  $mcptag =~ s/_SCORE_/$scoretext/;
5753  #if ($this->{ismcp} && !$this->{ishighmcp} &&
5754  #    MailScanner::Config::Value('mcpprependsubject',$this) &&
5755  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
5756  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
5757  #  $subjectchanged = 1;
5758  #}
5759  if ($this->{ismcp} && !$this->{ishighmcp}) {
5760    my $where = MailScanner::Config::Value('mcpmodifysubject',$this);
5761    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $mcptag)) {
5762      $global::MS->{mta}->AppendHeader($this, 'Subject:', $mcptag, ' ');
5763      $subjectchanged = 1;
5764    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
5765      $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
5766      $subjectchanged = 1;
5767    }
5768  }
5769
5770  # If it is high-scoring MCP, then add a different bit of text
5771  $mcptag = MailScanner::Config::Value('highmcpsubjecttext', $this);
5772  $mcptag =~ s/_SCORE_/$scoretext/;
5773  #if ($this->{ismcp} && $this->{ishighmcp} &&
5774  #    MailScanner::Config::Value('highmcpprependsubject',$this) &&
5775  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
5776  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
5777  #  $subjectchanged = 1;
5778  #}
5779  if ($this->{ismcp} && $this->{ishighmcp}) {
5780    my $where = MailScanner::Config::Value('highmcpmodifysubject',$this);
5781    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $mcptag)) {
5782      $global::MS->{mta}->AppendHeader($this, 'Subject:', $mcptag, ' ');
5783      $subjectchanged = 1;
5784    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
5785      $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
5786      $subjectchanged = 1;
5787    }
5788  }
5789
5790
5791  # Modify the subject line for scanning -- but only do it if the
5792  # subject hasn't already been modified by MailScanner for another reason.
5793  my $modifscan = MailScanner::Config::Value('scannedmodifysubject', $this);
5794  my $scantag   = MailScanner::Config::Value('scannedsubjecttext', $this);
5795  if ($modifscan =~ /start/ && !$subjectchanged &&
5796      !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $scantag)) {
5797    $global::MS->{mta}->PrependHeader($this, 'Subject:', $scantag, ' ');
5798    $subjectchanged = 1;
5799  } elsif ($modifscan =~ /end|1/ && !$subjectchanged &&
5800      !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $scantag)) {
5801    $global::MS->{mta}->AppendHeader($this, 'Subject:', $scantag, ' ');
5802    $subjectchanged = 1;
5803  }
5804
5805  # Remove any headers we don't want in the message
5806  my(@removeme, $remove);
5807  @removeme = split(/[,\s]+/, MailScanner::Config::Value('removeheaders', $this));
5808  foreach $remove (@removeme) {
5809    # Add a : if there isn't one already, it's needed for DeleteHeader()
5810    # 20090312 Done in DeleteHeader: $remove .= ':' unless $remove =~ /:$/;
5811    $global::MS->{mta}->DeleteHeader($this, $remove);
5812  }
5813
5814  # Add the extra headers they want for MCP and spam messages
5815  my(@extraheaders, $extraheader);
5816  my($key, $value);
5817  @extraheaders = @{$this->{extramcpheaders}} if $this->{extramcpheaders};
5818  push @extraheaders, @{$this->{extraspamheaders}} if $this->{extraspamheaders};
5819  foreach $extraheader (@extraheaders) {
5820    #print STDERR "Unmod Adding extra header $extraheader\n";
5821    next unless $extraheader =~ /:/;
5822    ($key, $value) = split(/:\s*/, $extraheader, 2);
5823    $key =~ s/\s+/-/g; # Replace spaces in header name with dashes
5824
5825    # Replace _TO_ in the header value with a comma-separated list of recips
5826    if ($value =~ /_TO_/) {
5827      # Get the actual text for the header value
5828      my($recipient, %tolist);
5829      foreach $recipient (@{$this->{to}}) {
5830        $tolist{$recipient} = 1;
5831      }
5832      $recipient = join(', ', sort keys %tolist);
5833      # Now reflow the To list in case it is very long
5834      $recipient = $this->ReflowHeader($key . ':', $recipient);
5835      $value =~ s/_TO_/$recipient/g;
5836    }
5837
5838    $global::MS->{mta}->AddMultipleHeaderName($this, $key . ':', $value, ', ');
5839  }
5840
5841  # Add the secret archive recipients
5842  my($extra, @extras, %alreadydone);
5843  foreach $extra (@{$this->{archiveplaces}}) {
5844    # Email archive recipients include a '@'
5845    next if $extra =~ /^\//;
5846    next unless $extra =~ /@/;
5847    $extra =~ s/_HOUR_/$this->{hournumber}/g;
5848    $extra =~ s/_DATE_/$this->{datenumber}/g;
5849    $extra =~ s/_FROMUSER_/$this->{fromuser}/g;
5850    $extra =~ s/_FROMDOMAIN_/$this->{fromdomain}/g;
5851    if ($extra !~ /_TOUSER_|_TODOMAIN_/) {
5852      # It's a simple email address
5853      push @extras, $extra unless $alreadydone{$extra};
5854      $alreadydone{$extra} = 1;
5855    } else {
5856      # It contains a substitution so we need to loop through all the recips
5857      my $numrecips = scalar (@{$this->{to}});
5858      foreach my $recip (0..$numrecips-1) {
5859        my $extracopy = $extra;
5860        my $u = $this->{touser}[$recip];
5861        my $d = $this->{todomain}[$recip];
5862        $extracopy =~ s/_TOUSER_/$u/g;
5863        $extracopy =~ s/_TODOMAIN_/$d/g;
5864        push @extras, $extracopy unless $alreadydone{$extracopy};
5865        $alreadydone{$extracopy} = 1; # Dont add the same address twice
5866      }
5867    }
5868  }
5869  $global::MS->{mta}->AddRecipients($this, @extras) if @extras;
5870
5871  # Write the new qf file, delete originals and unlock the message
5872  $store->WriteHeader($this, $OutQ);
5873  unless ($this->{gonefromdisk}) {
5874    $store->DeleteUnlock();
5875    $this->{gonefromdisk} = 1;
5876  }
5877
5878  # Note this does not kick the MTA into life here any more
5879}
5880
5881
5882# Deliver a message which has had its body modified.
5883# This is slower as the message has to be reconstructed from all its
5884# MIME entities.
5885sub DeliverModifiedBody {
5886  my $this = shift;
5887  my($headervalue) = @_;
5888
5889  return if $this->{deleted}; # This should never happen
5890
5891  #print STDERR "Delivering Modified Body message with header \"$headervalue\"\n";
5892
5893  my $store = $this->{store};
5894
5895  # If there is no data structure at all for this message, then we
5896  # can't sensibly deliver anything, so just delete it.
5897  # The parsing must have failed completely.
5898  my $entity = $this->{entity};
5899  unless ($entity) {
5900    #print STDERR "Deleting duff message\n";
5901    unless ($this->{gonefromdisk}) {
5902      $store->DeleteUnlock();
5903      $this->{gonefromdisk} = 1;
5904    }
5905    return;
5906  }
5907
5908  # Prune the entity tree to remove all undef values
5909  #PruneEntityTree($this->{entity},$this->{entity2file},$this->{file2entity});
5910  PruneEntityTree($entity,$this->{entity2file},$this->{file2entity});
5911
5912  my $OutQ = MailScanner::Config::Value('outqueuedir', $this);
5913
5914  # Write the new body file
5915  #print STDERR "Writing the MIME body of $this, " . $this->{id} . "\n";
5916  $store->WriteMIMEBody($this->{id}, $entity, $OutQ);
5917  #print STDERR "Written the MIME body\n";
5918
5919  # Set up the output envelope with its (possibly modified) headers
5920  # Leave utf-8 encodings in place
5921  # https://github.com/MailScanner/v5/issues/287
5922  #$global::MS->{mta}->AddHeadersToQf($this, $this->{entity}->stringify_header);
5923  $global::MS->{mta}->AddHeadersToQf($this);
5924
5925  # Remove duplicate subject: lines
5926  $global::MS->{mta}->UniqHeader($this, 'Subject:');
5927
5928  # Add the information/help X- header
5929  my $infoheader = MailScanner::Config::Value('infoheader', $this);
5930  if ($infoheader) {
5931    my $infovalue = MailScanner::Config::Value('infovalue', $this);
5932    $global::MS->{mta}->ReplaceHeader($this, $infoheader, $infovalue);
5933  }
5934  my $idheader = MailScanner::Config::Value('idheader', $this);
5935  if ($idheader) {
5936    $global::MS->{mta}->ReplaceHeader($this, $idheader, $this->{id});
5937  }
5938
5939  # Add the clean/dirty header
5940  #print STDERR "Adding clean/dirty header $headervalue\n";
5941  my $headervalue2 = MailScanner::Config::Value($headervalue, $this);
5942  $global::MS->{mta}->AddMultipleHeader($this, 'mailheader',
5943	       $headervalue2, ', ') if $headervalue2 ne "";
5944
5945  # Delete all content length headers as the body has been modified.
5946  $global::MS->{mta}->DeleteHeader($this, 'Content-length:');
5947
5948  # Add IPv6 or IPv4 protocol version header
5949  my $ipverheader = MailScanner::Config::Value('ipverheader', $this);
5950  $global::MS->{mta}->ReplaceHeader($this, $ipverheader,
5951                             ( ($this->{clientip} =~ /:/)?'IPv6':'IPv4' ))
5952    if $ipverheader;
5953
5954  # Add the spamvirusreport to the input to SA.
5955  # The header name should be documented in the MailScanner.conf docs.
5956  # 20090730
5957  my $svheader = MailScanner::Config::Value('spamvirusheader', $this);
5958  if ($svheader && $this->{spamvirusreport}) {
5959    $svheader .= ':' unless $svheader =~ /:$/;
5960    $global::MS->{mta}->AppendHeader($this, $svheader, $this->{spamvirusreport}, ' ');
5961  }
5962
5963  # Add the MCP header if necessary
5964  $global::MS->{mta}->AddMultipleHeader($this, 'mcpheader',
5965					$this->{mcpreport}, ', ')
5966    if $this->{ismcp} ||
5967       MailScanner::Config::Value('includemcpheader', $this);
5968
5969  # Add the spam header if they want that
5970  #$global::MS->{mta}->AddHeader($this,
5971  #                              MailScanner::Config::Value('spamheader',$this),
5972  #                              $this->{spamreport})
5973  # JKF 3/10/2005
5974  $global::MS->{mta}->AddMultipleHeader($this, 'spamheader',
5975					$this->{spamreport}, ', ')
5976    if MailScanner::Config::Value('includespamheader', $this) ||
5977       ($this->{spamreport} && $this->{isspam});
5978
5979  # Add the spam stars if they want that. Limit it to 60 characters to avoid
5980  # a potential denial-of-service attack.
5981  my($stars,$starcount,$scoretext,$minstars,$scorefmt);
5982  $starcount = int($this->{sascore}) + 0;
5983  $starcount = 0 if $this->{spamwhitelisted}; # 0 stars if white-listed
5984  $scorefmt = MailScanner::Config::Value('scoreformat', $this);
5985  $scorefmt = '%d' if $scorefmt eq '';
5986  $scoretext = sprintf($scorefmt, $this->{sascore}+0);
5987  $minstars = MailScanner::Config::Value('minstars', $this);
5988  $starcount = $minstars if $this->{isrblspam} && $minstars &&
5989			    $starcount<$minstars;
5990  if (MailScanner::Config::Value('spamscorenotstars', $this)) {
5991    $stars = $scoretext; # int($starcount);
5992  } else {
5993    $starcount = 60 if $starcount>60;
5994    $stars = MailScanner::Config::Value('spamstarscharacter') x $starcount;
5995  }
5996  if (MailScanner::Config::Value('spamstars', $this) =~ /1/ && $starcount>0) {
5997    $global::MS->{mta}->AddMultipleHeader($this, 'spamstarsheader',
5998					  $stars, ', ');
5999  }
6000
6001  # Add the Envelope to and from headers
6002  AddFromAndTo($this);
6003
6004  # Repair the subject line
6005  #print STDERR "Metadata is " . join("\n", @{$this->{metadata}}) . "\n";
6006  $global::MS->{mta}->ReplaceHeader($this, 'Subject:', $this->{safesubject})
6007    if $this->{subjectwasunsafe};
6008
6009  my $subjectchanged = 0;
6010
6011  # Modify the subject line for viruses or filename traps.
6012  # Only use the filename trap test if it isn't infected by anything else.
6013  my $nametag = MailScanner::Config::Value('namesubjecttext', $this);
6014  my $contenttag = MailScanner::Config::Value('contentsubjecttext', $this);
6015  my $sizetag = MailScanner::Config::Value('sizesubjecttext', $this);
6016  #print STDERR "I have triggered a size trap\n" if $this->{sizeinfected};
6017  if ($this->{nameinfected} &&   # Triggered a filename trap
6018      !$this->{virusinfected} && # No other reports about it
6019      !$this->{otherinfected} && # They want the tagging & not already tagged
6020      !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $nametag)) {
6021    #if (MailScanner::Config::Value('nameprependsubject',$this)) {
6022    #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $nametag, ' ');
6023    #  $subjectchanged = 1;
6024    #}
6025    my $where = MailScanner::Config::Value('namemodifysubject',$this);
6026    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $nametag)) {
6027      $global::MS->{mta}->AppendHeader($this, 'Subject:', $nametag, ' ');
6028      $subjectchanged = 1;
6029    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $nametag)) {
6030      $global::MS->{mta}->PrependHeader($this, 'Subject:', $nametag, ' ');
6031      $subjectchanged = 1;
6032    }
6033
6034  } elsif ($this->{sizeinfected} &&   # Triggered a size trap
6035      !$this->{virusinfected} &&
6036      !$this->{nameinfected}) { # &&
6037      #!$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $sizetag)) {
6038    #if (MailScanner::Config::Value('sizeprependsubject',$this)) {
6039    #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $sizetag, ' ');
6040    #  $subjectchanged = 1;
6041    #}
6042    my $where = MailScanner::Config::Value('sizemodifysubject',$this);
6043    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $sizetag)) {
6044      $global::MS->{mta}->AppendHeader($this, 'Subject:', $sizetag, ' ');
6045      $subjectchanged = 1;
6046    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $sizetag)) {
6047      $global::MS->{mta}->PrependHeader($this, 'Subject:', $sizetag, ' ');
6048      $subjectchanged = 1;
6049    }
6050
6051  } elsif ($this->{otherinfected} &&   # Triggered a content trap
6052      !$this->{virusinfected} && # No other reports about it
6053      !$this->{nameinfected}) { #&& # They want the tagging & not already tagged
6054      #!$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $contenttag)) {
6055    #if (MailScanner::Config::Value('contentprependsubject',$this)) {
6056    #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $contenttag, ' ');
6057    #  $subjectchanged = 1;
6058    #}
6059    my $where = MailScanner::Config::Value('contentmodifysubject',$this);
6060    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $contenttag)) {
6061      $global::MS->{mta}->AppendHeader($this, 'Subject:', $contenttag, ' ');
6062      $subjectchanged = 1;
6063    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $contenttag)) {
6064      $global::MS->{mta}->PrependHeader($this, 'Subject:', $contenttag, ' ');
6065      $subjectchanged = 1;
6066    }
6067
6068  } else {
6069    # It may be really virus infected.
6070    # Modify the subject line for viruses
6071    # if it's infected AND they want to modify the subject line AND it's not
6072    # already been modified by another of your MailScanners.
6073    my $virustag = MailScanner::Config::Value('virussubjecttext', $this);
6074    #print STDERR "I am infected\n" if $this->{infected};
6075    #if ($this->{infected} &&
6076    #  MailScanner::Config::Value('virusprependsubject',$this) &&
6077    #  !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $virustag)) {
6078    #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $virustag, ' ');
6079    #  $subjectchanged = 1;
6080    #}
6081    if ($this->{infected}) {
6082      my $where = MailScanner::Config::Value('virusmodifysubject',$this);
6083      if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $virustag)) {
6084        $global::MS->{mta}->AppendHeader($this, 'Subject:', $virustag, ' ');
6085        $subjectchanged = 1;
6086      } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $virustag)) {
6087        $global::MS->{mta}->PrependHeader($this, 'Subject:', $virustag, ' ');
6088        $subjectchanged = 1;
6089      }
6090    }
6091
6092  }
6093
6094  # Modify the subject line for Disarming
6095  my $disarmtag = MailScanner::Config::Value('disarmsubjecttext',$this);
6096  my $phishingtag = MailScanner::Config::Value('phishingsubjecttag', $this);
6097  #print STDERR "phishingtag = $phishingtag\n";
6098  if ($this->{messagedisarmed}) {
6099    #print STDERR "DisarmPhishingFound is set at 4200\n";
6100    #print STDERR "Message id = " . $this->{id} . "\n";
6101    #print STDERR "Found messagedisarmed = " . join(',',@{$this->{disarmedtags}}) . "\n";
6102    #if(MailScanner::Config::Value('disarmprependsubject',$this) =~ /1/ &&
6103    #   !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $disarmtag)) {
6104    # $global::MS->{mta}->PrependHeader($this, 'Subject:', $disarmtag, ' ');
6105    # $subjectchanged = 1;
6106    #}
6107    my $where = MailScanner::Config::Value('disarmmodifysubject',$this);
6108    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $disarmtag)) {
6109      $global::MS->{mta}->AppendHeader($this, 'Subject:', $disarmtag, ' ');
6110      $subjectchanged = 1;
6111      #print STDERR "MessageDisarmed is set (end)\n";
6112    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $disarmtag)) {
6113      $global::MS->{mta}->PrependHeader($this, 'Subject:', $disarmtag, ' ');
6114      $subjectchanged = 1;
6115      #print STDERR "MessageDisarmed is set (start)\n";
6116    }
6117  }
6118
6119  if ($this->{disarmphishingfound}) {
6120    #print STDERR "disarmedtags = " . join(',',@{$this->{disarmedtags}}) . "\n";
6121    #if (grep /phishing/i, @{$this->{disarmedtags}}) {
6122      #print STDERR "Found phishing disarmedtags2\n";
6123      # We found it had a phishing link in it. Are we tagging phishing Subject?
6124      #if (MailScanner::Config::Value('tagphishingsubject',$this) =~ /1/ &&
6125      #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $phishingtag)) {
6126      #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $phishingtag, ' ');
6127      #  $subjectchanged = 1;
6128      #}
6129      # We found it had a phishing link in it. Are we tagging phishing Subject?
6130      my $where = MailScanner::Config::Value('tagphishingsubject', $this);
6131      #print STDERR "Where is $where\n";
6132      #print STDERR "Subject tag check = " . $global::MS->{mta}->TextStartsHeader($this, 'Subject:', $phishingtag) . "***\n";
6133      if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $phishingtag)) {
6134        $global::MS->{mta}->AppendHeader($this, 'Subject:', $phishingtag, ' ');
6135        $subjectchanged = 1;
6136        #print STDERR "end\n";
6137      } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $phishingtag)) {
6138        $global::MS->{mta}->PrependHeader($this, 'Subject:', $phishingtag, ' ');
6139        $subjectchanged = 1;
6140        #print STDERR "start\n";
6141      }
6142    #}
6143  }
6144
6145  # Add watermark header if chosen to do so.
6146  if ($this->{addmshmac}) {
6147    my $mshmacheader = MailScanner::Config::Value('mshmacheader', $this);
6148    my $mshmac = $this->{mshmac};
6149
6150    $global::MS->{mta}->ReplaceHeader($this, $mshmacheader, $mshmac);
6151  }
6152
6153  # Modify the subject line for spam
6154  # if it's spam AND they want to modify the subject line AND it's not
6155  # already been modified by another of your MailScanners.
6156  my $spamtag = MailScanner::Config::Value('spamsubjecttext', $this);
6157  $spamtag =~ s/_SCORE_/$scoretext/;
6158  $spamtag =~ s/_STARS_/$stars/i;
6159  #if ($this->{isspam} && !$this->{ishigh} &&
6160#	      MailScanner::Config::Value('spamprependsubject',$this) &&
6161#	      !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
6162  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
6163  #  $subjectchanged = 1;
6164  #}
6165  if ($this->{isspam} && !$this->{ishigh}) {
6166    my $where = MailScanner::Config::Value('spammodifysubject',$this);
6167    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $spamtag)) {
6168      $global::MS->{mta}->AppendHeader($this, 'Subject:', $spamtag, ' ');
6169      $subjectchanged = 1;
6170    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
6171      $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
6172      $subjectchanged = 1;
6173    }
6174  }
6175
6176  # If it is high-scoring spam, then add a different bit of text
6177  $spamtag = MailScanner::Config::Value('highspamsubjecttext', $this);
6178  $spamtag =~ s/_SCORE_/$scoretext/;
6179  $spamtag =~ s/_STARS_/$stars/i;
6180  #if ($this->{isspam} && $this->{ishigh} &&
6181  #    MailScanner::Config::Value('highspamprependsubject',$this) &&
6182  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
6183  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
6184  #  $subjectchanged = 1;
6185  #}
6186  if ($this->{isspam} && $this->{ishigh}) {
6187    my $where = MailScanner::Config::Value('highspammodifysubject',$this);
6188    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $spamtag)) {
6189      $global::MS->{mta}->AppendHeader($this, 'Subject:', $spamtag, ' ');
6190      $subjectchanged = 1;
6191    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $spamtag)) {
6192      $global::MS->{mta}->PrependHeader($this, 'Subject:', $spamtag, ' ');
6193      $subjectchanged = 1;
6194    }
6195  }
6196
6197
6198  # Modify the subject line for MCP
6199  # if it's MCP AND they want to modify the subject line AND it's not
6200  # already been modified by another of your MailScanners.
6201  $starcount = int($this->{mcpsascore}) + 0;
6202  $starcount = 0 if $this->{mcpwhitelisted}; # 0 stars if white-listed
6203  $scorefmt = MailScanner::Config::Value('scoreformat', $this);
6204  $scorefmt = '%d' if $scorefmt eq '';
6205  $scoretext = sprintf($scorefmt, $this->{mcpsascore}+0);
6206  my $mcptag = MailScanner::Config::Value('mcpsubjecttext', $this);
6207  $mcptag =~ s/_SCORE_/$scoretext/;
6208  #if ($this->{ismcp} && !$this->{ishighmcp} &&
6209  #    MailScanner::Config::Value('mcpprependsubject',$this) &&
6210  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
6211  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
6212  #  $subjectchanged = 1;
6213  #}
6214  if ($this->{ismcp} && !$this->{ishighmcp}) {
6215    my $where = MailScanner::Config::Value('mcpmodifysubject',$this);
6216    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $mcptag)) {
6217      $global::MS->{mta}->AppendHeader($this, 'Subject:', $mcptag, ' ');
6218      $subjectchanged = 1;
6219    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
6220      $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
6221      $subjectchanged = 1;
6222    }
6223  }
6224
6225
6226  # If it is high-scoring MCP, then add a different bit of text
6227  $mcptag = MailScanner::Config::Value('highmcpsubjecttext', $this);
6228  $mcptag =~ s/_SCORE_/$scoretext/;
6229  #if ($this->{ismcp} && $this->{ishighmcp} &&
6230  #    MailScanner::Config::Value('highmcpprependsubject',$this) &&
6231  #    !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
6232  #  $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
6233  #  $subjectchanged = 1;
6234  #}
6235  if ($this->{ismcp} && $this->{ishighmcp}) {
6236    my $where = MailScanner::Config::Value('highmcpmodifysubject',$this);
6237    if ($where =~ /end/ && !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $mcptag)) {
6238      $global::MS->{mta}->AppendHeader($this, 'Subject:', $mcptag, ' ');
6239      $subjectchanged = 1;
6240    } elsif ($where =~ /start|1/ && !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $mcptag)) {
6241      $global::MS->{mta}->PrependHeader($this, 'Subject:', $mcptag, ' ');
6242      $subjectchanged = 1;
6243    }
6244  }
6245
6246
6247  # Modify the subject line for scanning -- but only do it if the
6248  # subject hasn't already been modified by MailScanner for another reason.
6249  my $modifscan = MailScanner::Config::Value('scannedmodifysubject', $this);
6250  my $scantag   = MailScanner::Config::Value('scannedsubjecttext', $this);
6251  if ($modifscan =~ /start/ && !$subjectchanged &&
6252      !$global::MS->{mta}->TextStartsHeader($this, 'Subject:', $scantag)) {
6253    $global::MS->{mta}->PrependHeader($this, 'Subject:', $scantag, ' ');
6254  } elsif ($modifscan =~ /end|1/ && !$subjectchanged &&
6255      !$global::MS->{mta}->TextEndsHeader($this, 'Subject:', $scantag)) {
6256    $global::MS->{mta}->AppendHeader($this, 'Subject:', $scantag, ' ');
6257  }
6258
6259  # Remove any headers we don't want in the message
6260  my(@removeme, $remove);
6261  @removeme = split(/[,\s]+/, MailScanner::Config::Value('removeheaders', $this));
6262  foreach $remove (@removeme) {
6263    # Add a : if there isn't one already, it's needed for DeleteHeader()
6264    # 20090312 Done in DeleteHeader: $remove .= ':' unless $remove =~ /:$/;
6265    $global::MS->{mta}->DeleteHeader($this, $remove);
6266  }
6267
6268  # Add the extra headers they want for MCP and spam messages
6269  my(@extraheaders, $extraheader);
6270  my($key, $value);
6271  @extraheaders = @{$this->{extramcpheaders}} if $this->{extramcpheaders};
6272  push @extraheaders, @{$this->{extraspamheaders}} if $this->{extraspamheaders};
6273  foreach $extraheader (@extraheaders) {
6274    #print STDERR "Mod Adding extra header $extraheader\n";
6275    next unless $extraheader =~ /:/;
6276    ($key, $value) = split(/:\s*/, $extraheader, 2);
6277    $key =~ s/\s+/-/g; # Replace spaces in header name with dashes
6278
6279    # Replace _TO_ in the header value with a comma-separated list of recips
6280    if ($value =~ /_TO_/) {
6281      # Get the actual text for the header value
6282      my($recipient, %tolist);
6283      foreach $recipient (@{$this->{to}}) {
6284        $tolist{$recipient} = 1;
6285      }
6286      $recipient = join(', ', sort keys %tolist);
6287      # Now reflow the To list in case it is very long
6288      $recipient = $this->ReflowHeader($key . ':', $recipient);
6289      $value =~ s/_TO_/$recipient/g;
6290    }
6291
6292    $global::MS->{mta}->AddMultipleHeaderName($this, $key . ':', $value, ', ');
6293  }
6294
6295  # Add the secret archive recipients
6296  my($extra, @extras, %alreadydone);
6297  foreach $extra (@{$this->{archiveplaces}}) {
6298    # Email archive recipients include a '@'
6299    next if $extra =~ /^\//;
6300    next unless $extra =~ /@/;
6301    $extra =~ s/_HOUR_/$this->{hournumber}/g;
6302    $extra =~ s/_DATE_/$this->{datenumber}/g;
6303    $extra =~ s/_FROMUSER_/$this->{fromuser}/g;
6304    $extra =~ s/_FROMDOMAIN_/$this->{fromdomain}/g;
6305    if ($extra !~ /_TOUSER_|_TODOMAIN_/) {
6306      # It's a simple email address
6307      push @extras, $extra unless $alreadydone{$extra};
6308      $alreadydone{$extra} = 1;
6309    } else {
6310      # It contains a substitution so we need to loop through all the recips
6311      my $numrecips = scalar (@{$this->{to}});
6312      foreach my $recip (0..$numrecips-1) {
6313        my $extracopy = $extra;
6314        my $u = $this->{touser}[$recip];
6315        my $d = $this->{todomain}[$recip];
6316        $extracopy =~ s/_TOUSER_/$u/g;
6317        $extracopy =~ s/_TODOMAIN_/$d/g;
6318        push @extras, $extracopy unless $alreadydone{$extracopy};
6319        $alreadydone{$extracopy} = 1; # Dont add the same address twice
6320      }
6321    }
6322  }
6323  $global::MS->{mta}->AddRecipients($this, @extras) if @extras;
6324
6325  # Write the new qf file, delete originals and unlock the message
6326  #print STDERR "Writing the new qf file\n";
6327  $store->WriteHeader($this, $OutQ);
6328  unless ($this->{gonefromdisk}) {
6329    $store->DeleteUnlock();
6330    $this->{gonefromdisk} = 1;
6331  }
6332
6333  # Note this does not kick the MTA into life here any more
6334}
6335
6336
6337# Prune all the undef branches out of an entity tree
6338sub PruneEntityTree {
6339  my ($entity,$entity2file,$file2entity) = @_;
6340
6341  return undef unless $entity;
6342  return $entity unless $entity->parts;
6343
6344  my(@newparts, $part, $newpart, $counter);
6345
6346  # Do a pre-traversal depth-first search of the tree
6347  #print STDERR "Looking at $entity, has " . scalar($entity->parts) . " parts\n";
6348  foreach $part ($entity->parts) {
6349    #print STDERR "$counter Going down to $part\n";
6350    next unless $part;
6351    #print STDERR "Non null $part\n";
6352    $newpart = PruneEntityTree($part,$entity2file,$file2entity);
6353    #print STDERR "Replacement is $newpart\n";
6354    if ($newpart) {
6355      #print STDERR "Adding replacement $newpart\n";
6356      push @newparts, $newpart;
6357    }
6358    #print STDERR "Coming up, added $newpart\n";
6359  }
6360
6361  #print STDERR "About to return\n";
6362  # Keep all the parts we found, prune as much as we can
6363  if (@newparts) {
6364    #print STDERR "Returning entity $entity with " . join(',',@newparts) . "\n";
6365    $entity->parts(\@newparts);
6366    return $entity;
6367  } else {
6368    #print STDERR "Returning undef\n";
6369    return undef;
6370  }
6371}
6372
6373
6374# Delete a message from the incoming queue
6375sub DeleteMessage {
6376  my $this = shift;
6377
6378  #print STDERR "DeletingMessage " . $this->{id} . "\n";
6379
6380  unless ($this->{gonefromdisk}) {
6381    $this->{store}->DeleteUnlock();
6382    $this->{gonefromdisk} = 1;
6383  }
6384  $this->{deleted} = 1;
6385  $this->{abandoned} = 0; # It was intentionally deleted
6386}
6387
6388
6389# Work out if the message is infected with a "silent" virus such as Klez.
6390# Set the "silent" flag on all such messages.
6391# At the same time, find the "noisy" non-spoofing infections such as
6392# document macro viruses.
6393sub FindSilentAndNoisyInfections {
6394  my $this = shift;
6395
6396  my(@silentin) = split(" ",MailScanner::Config::Value('silentviruses', $this));
6397  my($silent, $silentin, @silent, $regexp, $allreports, $logstring, $allsilent);
6398  my($virusreports);
6399
6400  my(@noisyin) = split(" ",MailScanner::Config::Value('noisyviruses', $this));
6401  my($noisy, $noisyin, @noisy, $nregexp);
6402
6403  #print "-1 Silentin = \"" . join(',',@silentin) . "\"\n";
6404  #print "-1 Noisy in = \"" . join(',',@noisyin) . "\"\n";
6405
6406  # Get out quickly if there's nothing to do
6407  return unless @silentin || @noisyin;
6408
6409  # Turn each silent and noisy report into a regexp
6410  $allsilent = 0;
6411  foreach $silent (@silentin) {
6412    if (lc($silent) eq 'all-viruses') {
6413      $allsilent = 1;
6414      next;
6415    }
6416    $silentin = quotemeta $silent;
6417    push @silent, $silentin;
6418  }
6419  foreach $noisy (@noisyin) {
6420    next if lc($noisy) eq 'all-viruses';
6421    $noisyin = quotemeta $noisy;
6422    push @noisy, $noisyin;
6423  }
6424  # Make 2 big regexps from them all
6425  $regexp = "";
6426  $nregexp = "";
6427  $regexp = '(?:' . join(')|(?:', @silent) . ')' if @silent;
6428  $nregexp = '(?:' . join(')|(?:', @noisy) . ')' if @noisy;
6429
6430  # Make 1 big string from all the reports
6431  $allreports = join('', values %{$this->{allreports}});
6432  $virusreports = join(' ', values %{$this->{virusreports}});
6433
6434  #print STDERR "FindSilentInfection: Looking for \"$regexp\" in \"" .
6435  #             $allreports . "\"\n";
6436  #print STDERR "FindNoisyInfection: Looking for \"$nregexp\" in \"" .
6437  #             $allreports . "\"\n";
6438
6439  #$this->{silent} = 1 if @silentin && $allreports =~ /$regexp/i;
6440  #$this->{noisy}  = 1 if @noisyin  && $allreports =~ /$nregexp/i;
6441
6442  # Do this with grep so I can extract the matching line.
6443  $this->{silent} = 1 if $regexp && grep {$logstring .= "$_ " if /$regexp/i;}
6444                                         values %{$this->{allreports}};
6445  if ($allsilent && $virusreports) {
6446    $this->{silent} = 1;
6447    $logstring .= $virusreports;
6448  }
6449  $this->{noisy}  = 1 if $nregexp && grep /$nregexp/i,
6450                                          values %{$this->{allreports}};
6451  #print STDERR "0 regexp = $nregexp and search = \"" . join('","',values %{$this->{allreports}}) . "\"\n";
6452
6453  #print STDERR "1 FindSilentInfection: Found it!\n" if $this->{silent};
6454  #print STDERR "1 FindNoisyInfection: Found it!\n" if $this->{noisy};
6455
6456  return unless MailScanner::Config::Value('logsilentviruses', $this);
6457
6458  $logstring = join(',', values %{$this->{allreports}})
6459    if !$logstring && $allsilent && $this->{silent} == 1;
6460  $logstring =~ s/[\n,]+(.)/,$1/g;
6461  MailScanner::Log::NoticeLog("Viruses marked as silent: %s", $logstring)
6462    if $logstring;
6463
6464  #print STDERR "2 FindSilentInfection: Found it!\n" if $this->{silent};
6465  #print STDERR "2 FindNoisyInfection: Found it!\n" if $this->{noisy};
6466}
6467
6468
6469# Deliver a cleaned message and remove it from the incoming queue
6470sub DeliverCleaned {
6471  my $this = shift;
6472
6473  # The body of this message has been modified, so reconstruct
6474  # it from the MIME structure and deliver that.
6475  #print STDERR "Delivering cleaned up message " . $this->{id} . "\n";
6476  $this->DeliverModifiedBody('dirtyheader');
6477}
6478
6479
6480# Send a warning message to the person who sent this message.
6481# Need to create variables for from, to, subject, date and report
6482# for use within the message.
6483sub WarnSender {
6484  my $this = shift;
6485
6486  my($from,$to,$subject,$date,$allreports,$alltypes,$report,$type);
6487  my($entityreports, @everyreportin, $entitytypes, @everytype);
6488  my($emailmsg, $line, $messagefh, $msgname, $localpostmaster, $id);
6489  my($hostname, $postmastername, $messagesize, $maxmessagesize);
6490
6491  # Do we want to send the sender a warning at all?
6492  # If nosenderprecedence is set to non-blank and contains this
6493  # message precedence header, then just return.
6494  my(@preclist, $prec, $precedence, $header);
6495  @preclist = split(" ",
6496                  lc(MailScanner::Config::Value('nosenderprecedence', $this)));
6497  $precedence = "";
6498  foreach $header (@{$this->{headers}}) {
6499    $precedence = lc($1) if $header =~ /^precedence:\s+(\S+)/i;
6500  }
6501  if (@preclist && $precedence ne "") {
6502    foreach $prec (@preclist) {
6503      if ($precedence eq $prec) {
6504        MailScanner::Log::InfoLog("Skipping sender of precedence %s",
6505                                  $precedence);
6506        return;
6507      }
6508    }
6509  }
6510
6511  # Now we know we want to send the message, it's not a bulk mail
6512  $from = $this->{from};
6513
6514  # Don't ever send a message to "" or "<>"
6515  return if $from eq "" || $from eq "<>";
6516
6517  # Setup other variables they can use in the message template
6518  $id = $this->{id};
6519  #$to = join(', ', @{$this->{to}});
6520  $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
6521  $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
6522  $hostname = MailScanner::Config::Value('hostname', $this);
6523  $subject = $this->{subject};
6524  $date = $this->{datestring}; # scalar localtime;
6525  # Some more for the size reports
6526  $messagesize = $this->{size};
6527  $maxmessagesize = $this->{maxmessagesize};
6528
6529  my($to, %tolist);
6530  foreach $to (@{$this->{to}}) {
6531    $tolist{$to} = 1;
6532  }
6533  $to = join(', ', sort keys %tolist);
6534
6535  $allreports    = $this->{allreports};
6536  $entityreports = $this->{entityreports};
6537  push @everyreportin, values %$allreports;
6538  push @everyreportin, values %$entityreports;
6539  my $reportword = MailScanner::Config::LanguageValue($this, "report");
6540  my($reportline, @everyreport);
6541  foreach $reportline (@everyreportin) {
6542    push @everyreport, map { ((/^$reportword: /m)?$_:"$reportword: $_") . "\n" }
6543                           split(/\n/, $reportline);
6544  }
6545  #print STDERR "Reports are \"" . join('", "', @everyreport) . "\"\n";
6546  #$report = join('', @everyreport);
6547  my %seen = ();
6548  $report = join('', grep { ! $seen{$_} ++ } @everyreport);
6549  #print STDERR "***Report to sender is***\n$report***END***\n";
6550
6551  $alltypes    = $this->{alltypes};
6552  $entitytypes = $this->{entitytypes};
6553  push @everytype, values %$alltypes;
6554  push @everytype, values %$entitytypes;
6555  $type  = join('', @everytype);
6556
6557  # Do we want to hide the directory and message id from the report path?
6558  if (MailScanner::Config::Value('hideworkdir', $this)) {
6559    my $pattern = "(" . quotemeta($global::MS->{work}->{dir}) . "|\\\.)/";
6560    $report =~ s/$pattern//g; # m # Remove the work dir
6561    $report =~ s/\/?$id\/?//g; # Remove the message id
6562  }
6563
6564  # Set the report filename dependent on what triggered MailScanner, be it
6565  # a virus, a filename trap, a Denial Of Service attack, or an parsing error.
6566  if ($type =~ /v/i) {
6567    $msgname = MailScanner::Config::Value('sendervirusreport', $this);
6568  } elsif ($type =~ /f/i) {
6569    $msgname = MailScanner::Config::Value('senderfilenamereport', $this);
6570  } elsif ($type =~ /e/i) {
6571    $msgname = MailScanner::Config::Value('sendererrorreport', $this);
6572  } elsif ($type =~ /c/i) {
6573    $msgname = MailScanner::Config::Value('sendercontentreport', $this);
6574  } elsif ($type =~ /s/i) {
6575    $msgname = MailScanner::Config::Value('sendersizereport', $this);
6576  # JKF 19/12/2007 } elsif ($type =~ /p/i) {
6577  # JKF 19/12/2007   $msgname = MailScanner::Config::Value('senderpasswordreport', $this);
6578  } else {
6579    $msgname = MailScanner::Config::Value('sendervirusreport', $this);
6580  }
6581  #print STDERR "Report is $msgname\n";
6582
6583  # Work out the list of all the infected attachments, including
6584  # reports applying to the whole message
6585  my($attach, $text, %infected, $filename);
6586  while (($attach, $text) = each %$allreports) {
6587    if ($attach eq "") {
6588      $infected{MailScanner::Config::LanguageValue($this, "theentiremessage")}
6589        = 1;
6590    } else {
6591      $infected{substr($attach, 1)} = 1; # Remove the type identifier
6592    }
6593  }
6594  # And don't forget the external bodies which are just entity reports
6595  while (($attach, $text) = each %$entityreports) {
6596    $infected{MailScanner::Config::LanguageValue($this, 'notnamed')} = 1;
6597  }
6598  $filename = join(', ', keys %infected);
6599
6600  $messagefh = new FileHandle;
6601  $messagefh->open($msgname)
6602    or MailScanner::Log::WarnLog("Cannot open message file %s, %s",
6603                                 $msgname, $!);
6604  $emailmsg = "";
6605  while(<$messagefh>) {
6606    chomp;
6607    s#"#\\"#g;
6608    s#@#\\@#g;
6609    # Boring untainting again...
6610    /(.*)/;
6611    $line = eval "\"$1\"";
6612    $emailmsg .= MailScanner::Config::DoPercentVars($line) . "\n";
6613  }
6614  $messagefh->close();
6615
6616  # This did say $localpostmaster in the last parameter, but I changed
6617  # it to '<>' so that the sender warnings couldn't bounce.
6618  $global::MS->{mta}->SendMessageString($this, $emailmsg, '<>')
6619    or MailScanner::Log::WarnLog("Could not send sender warning, %s", $!);
6620}
6621
6622
6623# Create the headers for a postmaster notification message.
6624# This is expensive so don't do it much!
6625sub CreatePostmasterHeaders {
6626  my $this = shift;
6627  my($to)  = @_;
6628
6629  my($result, $charset);
6630
6631  # Make sure the Postmaster notice is in the right character set
6632  $charset = MailScanner::Config::Value('attachmentcharset',$this);
6633
6634  $result = "From: \"" .
6635            MailScanner::Config::Value('noticesfrom', $this) . "\" <" .
6636            MailScanner::Config::Value('localpostmaster',$this) . ">\nTo: ";
6637  #$to = MailScanner::Config::Value('noticerecipient',$this);
6638  #$to =~ s/ +/, /g;
6639  $result .= $to . "\nSubject: " .
6640             MailScanner::Config::LanguageValue($this, 'noticesubject') . "\n";
6641  $result .= "Content-type: text/plain; charset=$charset\n" if $charset;
6642
6643  return $result;
6644}
6645
6646
6647# Create the notification text for 1 email message.
6648sub CreatePostmasterNotice {
6649  my $this = shift;
6650
6651  my(@everyrept);
6652  push @everyrept, values %{$this->{allreports}};
6653  push @everyrept, values %{$this->{entityreports}};
6654
6655  foreach (@everyrept) {
6656      chomp;
6657      s/\n/\n            /g;
6658      $_ .= "\n";
6659  }
6660
6661  my $reportword = MailScanner::Config::LanguageValue($this, "report");
6662  my $id   = $this->{id};
6663  my $from = $this->{from};
6664  #my $to   = join(', ', @{$this->{to}});
6665  my $subj = $this->{subject};
6666  my $ip   = $this->{clientip};
6667  my $rept = join("    $reportword: ", @everyrept);
6668  #print STDERR "Rept is\n$rept\n";
6669
6670  # Build list of unique archive and quarantine storage locations
6671  my @quarantines = grep /\//, @{$this->{archiveplaces}};
6672  push @quarantines, grep /\//, @{$this->{quarantineplaces}};
6673  my($quarantine, %quarantinelist);
6674  foreach $quarantine (@quarantines) {
6675    $quarantinelist{$quarantine} = 1;
6676  }
6677  $quarantine = join(', ', sort keys %quarantinelist);
6678
6679  # Build unique list of recipients. Avoids Postfix problem which has
6680  # separate lists of real recipients and original recipients.
6681  my($to, %tolist);
6682  foreach $to (@{$this->{to}}) {
6683    $tolist{$to} = 1;
6684  }
6685  $to = join(', ', sort keys %tolist);
6686
6687  my($result, $headers);
6688
6689  if (MailScanner::Config::Value('hideworkdirinnotice',$this)) {
6690    my $pattern = '(' . quotemeta($global::MS->{work}->{dir}) . "|\\\.)/";
6691    #print STDERR "In replacement, regexp is \"$pattern\"\n";
6692    $rept =~ s/$pattern//g; #m # Remove the work dir
6693    $rept =~ s/\/?$id\/?//g; # Remove the message id
6694  }
6695
6696  my $reportspaces = 10 - length($reportword);
6697  $reportword = ' ' x $reportspaces . $reportword if $reportspaces>0;
6698  $result = "\n" .
6699            "    Sender: $from\n" .
6700            "IP Address: $ip\n" .
6701            " Recipient: $to\n" .
6702            "   Subject: $subj\n" .
6703            " MessageID: $id\n" .
6704            "Quarantine: $quarantine\n" .
6705            "$reportword: $rept\n";
6706
6707  if (MailScanner::Config::Value('noticefullheaders', $this)) {
6708    $headers = join("\n ", $global::MS->{mta}->OriginalMsgHeaders($this));
6709    $result .= MailScanner::Config::LanguageValue($this, 'fullheadersare') .
6710               ":\n\n $headers\n\n";
6711  }
6712
6713  $result;
6714}
6715
6716
6717# Find the attachments that have been disinfected and deliver them all
6718# in a new MIME message.
6719sub DeliverDisinfectedAttachments {
6720  my $this = shift;
6721
6722  my(@list, $reports, $attachment);
6723
6724  $reports = $this->{oldviruses};
6725
6726  # Loop through every attachment in the original list.
6727  # $attachment will contain the type indicator.
6728  foreach $attachment (keys %$reports) {
6729    #print STDERR "Looking to see if \"$attachment\" has been disinfected\n";
6730    # Never attempt "whole body" disinfections
6731    next if $attachment eq "";
6732    # Skip messages that are in the new report list
6733    next if defined $this->{virusreports}{"$attachment"};
6734    # Don't disinfect files the disinfector renamed
6735    if (!$global::MS->{work}->FileExists($this, $attachment)) {
6736      #print STDERR "Skipping deleted/renamed attachment $attachment\n";
6737      next;
6738    }
6739    # Add it to the list
6740    #print STDERR "Adding $attachment to list of disinfected files\n";
6741    push @list, $attachment;
6742  }
6743
6744  # Is there nothing to do?
6745  return unless @list;
6746
6747  #print STDERR "Have disinfected attachments " . join(',',@list) . "\n";
6748  # Deliver a message to the original recipients containing the
6749  # disinfected attachments. This is really a Sendmail-specific thing.
6750  $global::MS->{work}->ChangeToMessage($this);
6751  $this->DeliverFiles(@list);
6752}
6753
6754
6755# Create and deliver a new message from MailScanner about the
6756# disinfected files passed in @list.
6757sub DeliverFiles {
6758  my $this = shift;
6759  my(@files) = @_;
6760
6761  my($MaxSubjectLength, $from, $to, $subject, $newsubject, $top);
6762  my($localpostmaster, $postmastername);
6763  $MaxSubjectLength  = 25;
6764  $from = $this->{from};
6765  #$to   = join(', ', @{$this->{to}});
6766  my($to, %tolist);
6767  foreach $to (@{$this->{to}}) {
6768    $tolist{$to} = 1;
6769  }
6770  $to = join(', ', sort keys %tolist);
6771
6772  $subject = $this->{subject};
6773  $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
6774  $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
6775
6776  $newsubject = MailScanner::Config::LanguageValue($this, 'disinfected') .
6777                ": " . substr($subject, 0, $MaxSubjectLength);
6778  $newsubject .= '...' if length($subject)>$MaxSubjectLength;
6779
6780  #print STDERR "About to deliver " . join(',',@files) . " to original " .
6781  #             "recipients after disinfection\n";
6782
6783  # Create the top-level MIME entity, just the headers
6784  $top = MIME::Entity->build(Type       => 'multipart/mixed',
6785                             From       => "$postmastername <$localpostmaster>",
6786                             To         => $to,
6787                             Subject    => $newsubject,
6788                             'X-Mailer' => 'MailScanner',
6789                       MailScanner::Config::Value('mailheader', $this) =>
6790                       MailScanner::Config::Value('disinfectedheader', $this));
6791
6792  # Construct the text of the message body
6793  my($textfh, $textfile, $output, $result, $attachment);
6794  $textfh = new FileHandle;
6795  $textfile = MailScanner::Config::Value('disinfectedreporttext', $this);
6796  $textfh->open($textfile)
6797    or MailScanner::Log::WarnLog("Cannot open disinfected report message " .
6798                                 "file %s, %s", $textfile, $!);
6799  $output = "";
6800  my $line;
6801  my $ea = qr/([\(\)\[\]\.\?\*\+\^"'@<>:])/;
6802  while(<$textfh>) {
6803    $line = chomp;
6804    #s#"#\\"#g; # Escape any " characters
6805    #s#@#\\@#g; # Escape any @ characters
6806    $line =~ s/$ea/\\$1/g; # Escape any regex characters
6807    # Untainting joy...
6808    $line =~ /(.*)/;
6809    $result = eval "\"$1\"";
6810    $output .= MailScanner::Config::DoPercentVars($result) . "\n";
6811  }
6812  $textfh->close();
6813  $top->attach(Data => $output);
6814
6815  # Construct all the attachments
6816  my($notype);
6817  foreach $attachment (@files) {
6818    # As each $attachment will contain the type indicator, we need to
6819    # create one which doesn't to name it with in the resulting message.
6820    $notype = substr($attachment,1);
6821    # Added "./" to start of next line to avoid potential DoS attack
6822    $top->attach(Filename    => "$notype",
6823                 Path        => "./$attachment",
6824                 Type        => "application/octet-stream",
6825                 Encoding    => "base64",
6826                 Disposition => "attachment");
6827  }
6828
6829  # Now send the message
6830  $global::MS->{mta}->SendMessageEntity($this, $top, $localpostmaster)
6831    or MailScanner::Log::WarnLog("Could not send disinfected message, %s",$!);
6832}
6833
6834
6835# Archive this message to any directories in its archiveplaces attribute
6836sub ArchiveToFilesystem {
6837  my $this = shift;
6838
6839  my($dir, $todaydir, $target, $didanything, %alreadydone);
6840  $didanything = 0;
6841  my $numrecips = scalar (@{$this->{to}});
6842
6843  # Assume it's a filename or a directory name. d=>directory, f=>file.
6844  my $assumeisdir = (MailScanner::Config::Value("assumeisdir", $this)
6845                     =~ /1/)?1:0;
6846  $todaydir = $this->{datenumber}; #MailScanner::Quarantine::TodayDir();
6847
6848  foreach $dir (@{$this->{archiveplaces}}) {
6849    #print STDERR "Archive to $dir\n";
6850    next unless $dir =~ /^\//; # Must be a pathname
6851    $dir =~ s/_HOUR_/$this->{hournumber}/g;
6852    $dir =~ s/_DATE_/$this->{datenumber}/g;
6853    $dir =~ s/_FROMUSER_/$this->{fromuser}/g;
6854    $dir =~ s/_FROMDOMAIN_/$this->{fromdomain}/g;
6855    foreach my $recip (0..$numrecips-1) {
6856      my $dircopy = $dir;
6857      my $u = $this->{touser}[$recip];
6858      my $d = $this->{todomain}[$recip];
6859      $dircopy =~ s/_TOUSER_/$u/g;
6860      $dircopy =~ s/_TODOMAIN_/$d/g;
6861
6862      # Don't archive to the same place twice
6863      next if $alreadydone{$dircopy};
6864      $alreadydone{$dircopy} = 1;
6865
6866      # If it exists, and it's a file, then append the message to it
6867      # in mbox format.
6868      if (-f $dircopy || !$assumeisdir) {
6869        #print STDERR "It is a file\n";
6870        $this->AppendToMbox($dircopy);
6871        $didanything = 1;
6872        next;
6873      }
6874      $target = "$dircopy/$todaydir";
6875      unless (-d "$target") {
6876        umask $global::MS->{quar}->{dirumask};
6877        mkpath "$target" or
6878          MailScanner::Log::WarnLog("Cannot create directory %s", $target);
6879        umask 0077;
6880      }
6881      #print STDERR "It is a dir\n";
6882      umask $global::MS->{quar}->{fileumask};
6883      $this->{store}->CopyToDir($target, $this->{id});
6884      #print STDERR "Stored " . $this->{id} . " to $target\n";
6885      umask 0077;
6886      $didanything = 1;
6887    }
6888  }
6889  return $didanything;
6890}
6891
6892
6893# Append a message to an mbox file.
6894# The mbox file may not exist, nor may its directory.
6895sub AppendToMbox {
6896  my($this, $mbox) = @_;
6897
6898  #untaint
6899  $mbox =~ m|(.*)|;
6900  $mbox = $1;
6901
6902  # Find the complete directory name.
6903  my $dir = $mbox;
6904  $dir =~ s#^(.*)/[^/]+$#$1#;
6905  # Create the directory (and its tree) if it doesn't exist.
6906  unless (-d $dir) {
6907    umask $global::MS->{quar}->{dirumask};
6908    mkpath $dir;
6909    umask 0077;
6910  }
6911
6912  my $fh = new IO::File "$mbox", "a";
6913  if ($fh) {
6914    # Print the mbox message header starting with a blank line and "From"
6915    # From $from `date "+%a %b %d %T %Y"`
6916    my($now, $recip);
6917    $now = ctime();
6918    $now =~ s/  (\d)/ 0$1/g; # Insert leading zeros where needed
6919
6920    print $fh "From " . $this->{from} . ' ' . $now . "\n";
6921    foreach $recip (@{$this->{to}}) {
6922      print $fh "X-MailScanner-Recipient: $recip\n";
6923    }
6924    $fh->flush;
6925
6926    # Write the entire message to this handle, then close.
6927    $this->{store}->WriteEntireMessage($this, $fh);
6928    print $fh "\n"; # Blank line at end of message to separate messages
6929    $fh->close;
6930    MailScanner::Log::InfoLog("Archived message %s to mbox file %s",
6931                              $this->{id}, $mbox);
6932  } else {
6933    MailScanner::Log::WarnLog("Failed to append message to pre-existing " .
6934                              "mbox file %s", $mbox);
6935  }
6936}
6937
6938
6939sub ReflowHeader {
6940  my($this, $key, $input) = @_;
6941  my($output, $pos, $len, $firstline, @words, $word);
6942  $output = "";
6943  $pos = 0;
6944  $firstline = 1;
6945
6946  @words = split(/,\s*/, $input);
6947  foreach $word (@words) {
6948    $len = length($word);
6949    if ($firstline) {
6950      $output = "$word";
6951      $pos = $len + length($key)+1; # 1 = space between key and input
6952      $firstline = 0;
6953      next;
6954    }
6955
6956    # Wrap at column 75 (pretty arbitrary number just less than 80)
6957    if ($pos+$len < 75) {
6958      $output .= ", $word";
6959      $pos += 2 + $len;
6960    } else {
6961      $output .= ",\n\t$word";
6962      $pos = 8 + $len;
6963    }
6964  }
6965
6966  return $output;
6967}
6968
6969
6970# Strip the HTML out of this message. All the checks have already
6971# been done, so just get on with it.
6972sub StripHTML {
6973  my $this = shift;
6974
6975  #print STDERR "Stripping HTML from message " . $this->{id} . "\n";
6976  $this->HTMLToText($this->{entity});
6977}
6978
6979
6980# Disarm some of the HTML tags in this message.
6981sub DisarmHTML {
6982  my $this = shift;
6983
6984  #print STDERR "Tags to convert are " . $this->{tagstoconvert} . " on message " . $this->{id} . "\n";
6985
6986  # Set the disarm booleans for this message
6987  $DisarmFormTag     = 0;
6988  $DisarmScriptTag   = 0;
6989  $DisarmCodebaseTag = 0;
6990  $DisarmCodebaseTag = 0;
6991  $DisarmIframeTag   = 0;
6992  $DisarmWebBug      = 0;
6993  $DisarmPhishing    = 0;
6994  $DisarmHidden      = 0;
6995  $DisarmNumbers     = 0;
6996  $StrictPhishing    = 0;
6997  $DisarmWebBugFound = 0;
6998  $PhishingSubjectTag= 0;
6999  $PhishingHighlight = 0;
7000  $SigImageFound     = 0;
7001  $DisarmFormTag     = 1 if $this->{tagstoconvert} =~ /form/i;
7002  $DisarmScriptTag   = 1 if $this->{tagstoconvert} =~ /script/i;
7003  $DisarmCodebaseTag = 1 if $this->{tagstoconvert} =~ /codebase/i;
7004  $DisarmCodebaseTag = 1 if $this->{tagstoconvert} =~ /data/i;
7005  $DisarmIframeTag   = 1 if $this->{tagstoconvert} =~ /iframe/i;
7006  $DisarmWebBug      = 1 if $this->{tagstoconvert} =~ /webbug/i;
7007  $PhishingSubjectTag= 1
7008    if MailScanner::Config::Value('tagphishingsubject', $this) =~ /1/;
7009  #print STDERR "PhishingSubjectTag = $PhishingSubjectTag\n";
7010  $PhishingHighlight = 1
7011    if MailScanner::Config::Value('phishinghighlight', $this) =~ /1/;
7012  #print STDERR "PhishingHighlight = $PhishingHighlight\n";
7013  $DisarmPhishingFound = 0;
7014  $this->{disarmphishingfound} = 0;
7015  $DisarmHTMLChangedMessage = 0;
7016  if (MailScanner::Config::Value('findphishing', $this) =~ /1/) {
7017    $DisarmPhishing = 1;
7018    $DisarmNumbers = 1
7019      if MailScanner::Config::Value('phishingnumbers', $this) =~ /1/;
7020    $StrictPhishing = 1
7021      if MailScanner::Config::Value('strictphishing', $this) =~ /1/;
7022  }
7023  $DisarmHidden = 1
7024    if MailScanner::Config::Value('highlighthiddenurls', $this) =~ /1/;
7025  # Construct the WebBugWhitelist - space and comma-separated list of words
7026  $WebBugWhitelist = MailScanner::Config::Value('webbugwhitelist', $this);
7027  $WebBugWhitelist =~ s/^\s+//;
7028  $WebBugWhitelist =~ s/\s+$//;
7029  $WebBugWhitelist =~ s/[\s,]+/|/g;
7030  $WebBugReplacement = MailScanner::Config::Value('webbugurl', $this);
7031  $WebBugBlacklist = MailScanner::Config::Value('webbugblacklist', $this);
7032  $WebBugBlacklist =~ s/^\s+//;
7033  $WebBugBlacklist =~ s/\s+$//;
7034  $WebBugBlacklist =~ s/[\s,]+/|/g;
7035
7036  my($counter, @disarmedtags);
7037  ($counter, @disarmedtags) = $this->DisarmHTMLTree($this->{entity});
7038  #print STDERR "disarmedtags = ". join(', ', @disarmedtags) . "\n";
7039
7040  # If the HTML checks found a real problem or there really was a phishing
7041  # attack, only then should we log anything.
7042  #print "DisarmPhishingFound = $DisarmPhishingFound on message " . $this->{id} . "\n";
7043  $this->{disarmphishingfound} = 1 if $DisarmPhishingFound;
7044  @disarmedtags = ('phishing') if $DisarmPhishingFound && $PhishingHighlight && !@disarmedtags; #JKF1 && $PhishingHighlight && !@disarmedtags;
7045  #print STDERR "Found DisarmPhishingFound\n" if $DisarmPhishingFound;
7046  MailScanner::Log::InfoLog('Content Checks: Detected and have disarmed ' .
7047                            join(', ', @disarmedtags) . ' tags in ' .
7048                            'HTML message in %s from %s',
7049                            $this->{id}, $this->{from})
7050    if $DisarmHTMLChangedMessage || $DisarmPhishingFound;
7051
7052  # And save the results from the phishing trip
7053  if ($DisarmPhishingFound) {
7054    # Do we want this or not? I say no. $this->{otherinfected} = 1;
7055    $this->{bodymodified} = 1;
7056    #print STDERR "DisarmPhishingFound = $DisarmPhishingFound\n";
7057  }
7058  if ($DisarmHTMLChangedMessage) {
7059    #print STDERR "Disarm Changed the message at 5132.\n";
7060    $this->{bodymodified} = 1;
7061    $this->{messagedisarmed} = 1;
7062  } else {
7063    $this->{messagedisarmed} = 0;
7064  }
7065
7066  # Did we find signs of a MailScanner signature image?
7067  $this->{sigimagepresent} = $SigImageFound;
7068
7069  # Store all the tags we disarmed
7070  #print STDERR "Storing " . join(',', @disarmedtags) . "\n";
7071  @{$this->{disarmedtags}} = @disarmedtags;
7072}
7073
7074
7075# Search for a multipart/alternative.
7076# If found, change it to multipart/mixed and make all its members into
7077# suitable named attachments.
7078sub EncapsulateAttachments {
7079  my($message, $searchtype, $entity, $filename) = @_;
7080
7081  # Reached a leaf node?
7082  return 0 unless $entity && defined($entity->head);
7083
7084  my(@parts, $part, $type, $extension, $newname);
7085  my $counter = 0;
7086
7087  $type = $entity->head->mime_attr('content-type');
7088  if (!$searchtype || ($type && $type =~ /$searchtype/i)) {
7089    #print STDERR "Found alternative message at entity $entity\n";
7090
7091    # Turn it into a multipart/mixed
7092    $entity->head->mime_attr('content-type' => 'multipart/mixed')
7093      if $searchtype;
7094
7095    # Change the parts into attachments
7096    @parts = $entity->parts;
7097    foreach $part (@parts) {
7098      my $head = $part->head;
7099      $type = $head->mime_attr('content-type') || 'text/plain';
7100      $extension = '.dat';
7101      $type =~ /\/([a-z0-9-]+)$/i and $extension = '.' . lc($1);
7102      $extension = '.txt'  if $type =~ /text\/plain/i;
7103      $extension = '.html' if $type =~ /text\/html/i;
7104
7105      $newname = $filename . $extension;
7106
7107      $head->mime_attr('Content-Type'                 => $type);
7108      $head->mime_attr('Content-Disposition'          => 'attachment');
7109      $head->mime_attr('Content-Disposition.filename' => $newname)
7110        unless $head->mime_attr('Content-Disposition.filename');
7111      $head->mime_attr('Content-Type.name'            => $newname)
7112        unless $head->mime_attr('Content-Type.name');
7113
7114      $counter++;
7115    }
7116  } else {
7117    # Now try the same on all the parts
7118    foreach $part (@parts) {
7119      $counter += $message->EncapsulateAttachments($searchtype, $part,
7120                                                   $filename);
7121    }
7122  }
7123
7124  return $counter;
7125}
7126
7127
7128sub EncapsulateMessageHTML {
7129  my $this = shift;
7130
7131  my($entity, $filename, $newpart);
7132
7133  $entity = $this->{entity};
7134
7135  $filename = MailScanner::Config::Value('originalmessage', $this);
7136
7137  $entity->make_multipart('mixed');
7138  $this->EncapsulateAttachments('multipart/alternative', $entity, $filename)
7139    or $this->EncapsulateAttachments(undef, $entity, $filename);
7140
7141  # Insert the new message part
7142  $newpart = MIME::Entity->build(Type => "text/plain",
7143                                 Disposition => undef,
7144                                 Data => [ "Hello\n","There\n","Last line\n" ],
7145                                 Filename => undef,
7146                                 Top  => 0,
7147                                 'X-Mailer' => undef
7148                                );
7149  $entity->add_part($newpart, 0); # Insert at the start of the message
7150
7151  # Clean up the message so spammers can't pollute me
7152  $this->{entity}->preamble(undef);
7153  $this->{entity}->epilogue(undef);
7154  $this->{entity}->head->add('MIME-Version', '1.0')
7155    unless $this->{entity}->head->get('mime-version');
7156  $this->{bodymodified} = 1;
7157  return;
7158}
7159
7160
7161# Encapsulate the message in an RFC822 structure so that it becomes a
7162# single atachment of the message. Need to build the spam report to put
7163# in as the text/plain body of the main message.
7164sub EncapsulateMessage {
7165  my $this = shift;
7166
7167  my($entity, $rfc822, $mimeversion, $mimeboundary, @newparts);
7168  my($messagefh, $filename, $emailmsg, $line, $charset, $datenumber);
7169  my($id, $to, $from, $localpostmaster, $hostname, $subject, $date);
7170  my($fullspamreport, $briefspamreport, $longspamreport, $sascore);
7171  my($postmastername);
7172
7173  # For now, if there is no entity structure at all then just return,
7174  # we cannot encapsulate a message without it.
7175  # Unfortunately that means we can't encapsulate messages that are
7176  # Virus Scanning = no ("yes" but also having "Virus Scanners=none" is
7177  # fine, and works). The encapsulation will merely fail to do anything.
7178  # Hopefully this will only be used by corporates who are virus scanning
7179  # everything anyway.
7180  # Workaround: Instead of using "Virus Scanning = no", use
7181  # "Virus Scanners = none" and a set of filename rules that pass all files.
7182  return unless $this->{entity};
7183
7184  # Construct the RFC822 attachment
7185  $mimeversion = $this->{entity}->head->get('mime-version');
7186  # Prune all the dead branches off the tree
7187  my $Pruned = PruneEntityTree($this->{entity},$this->{entity2file},$this->{file2entity});
7188  #print STDERR "Pruned tree = $Pruned\n";
7189  return unless $Pruned; # Bail out if the tree has no leaves
7190
7191  $entity = $this->{entity};
7192  $rfc822 = $entity->stringify;
7193
7194  # Setup variables they can use in the spam report that is inserted at
7195  # the top of the message.
7196  $id = $this->{id};
7197  #$to = join(', ', @{$this->{to}});
7198  my($to, %tolist);
7199  foreach $to (@{$this->{to}}) {
7200    $tolist{$to} = 1;
7201  }
7202  $to = join(', ', sort keys %tolist);
7203
7204  $from = $this->{from};
7205  $localpostmaster = MailScanner::Config::Value('localpostmaster', $this);
7206  $postmastername  = MailScanner::Config::LanguageValue($this, 'mailscanner');
7207  $hostname = MailScanner::Config::Value('hostname', $this);
7208  $subject = $this->{subject};
7209  $date = $this->{datestring}; # scalar localtime;
7210  $fullspamreport = $this->{spamreport};
7211  $longspamreport = $this->{salongreport};
7212  $sascore = $this->{sascore};
7213  #$this->{salongreport} = ""; # Reset it so we don't ever insert it twice
7214
7215  # Delete everything in brackets after the SA report, if it exists
7216  $briefspamreport = $fullspamreport;
7217  $briefspamreport =~ s/(spamassassin)[^(]*\([^)]*\)/$1/i;
7218  $charset = MailScanner::Config::Value('attachmentcharset', $this);
7219  $datenumber = $this->{datenumber};
7220
7221  # Construct the spam report at the top of the message
7222  $messagefh = new FileHandle;
7223  $filename  = MailScanner::Config::Value('inlinespamwarning', $this);
7224  $messagefh->open($filename)
7225    or MailScanner::Log::WarnLog("Cannot open inline spam warning file %s, %s",
7226                                 $filename, $!);
7227  $emailmsg = "";
7228  while(<$messagefh>) {
7229    chomp;
7230    s#"#\\"#g;
7231    s#@#\\@#g;
7232    # Boring untainting again...
7233    /(.*)/;
7234    $line = eval "\"$1\"";
7235    $emailmsg .= MailScanner::Config::DoPercentVars($line) . "\n";
7236  }
7237  $messagefh->close();
7238
7239  $newparts[0] = MIME::Entity->build(Type     => 'text/plain',
7240                                     Disposition => 'inline',
7241                                     Encoding => 'quoted-printable',
7242                                     Top      => 0,
7243                                     'X-Mailer' => undef,
7244                                     Charset => $charset,
7245                                     Data     => $emailmsg);
7246
7247  $newparts[1] = MIME::Entity->build(Type     => 'message/rfc822',
7248                                     Disposition => 'attachment',
7249                                     Top      => 0,
7250                                     'X-Mailer' => undef,
7251                                     Data     => $rfc822);
7252
7253  # If there was a multipart boundary, then create a new one so that
7254  # the main message has a different boundary from the RFC822 attachment.
7255  # Leave the RFC822 one alone, so we don't corrupt the original message,
7256  # but make sure we create a new one instead.
7257  # Keep generating random boundaries until we have definitely got a new one.
7258  my $oldboundary = $entity->head->multipart_boundary;
7259  do {
7260    $mimeboundary = '======' . $$ . '==' . int(rand(100000)) . '======';
7261  } while $mimeboundary eq $oldboundary;
7262
7263  # Put the new parts in place, hopefully it will correct all the multipart
7264  # headers for me. Wipe the preamble and epilogue or else someone will use
7265  # them to bypass the encapsulation process.
7266  # Make it a report if it wasn't multipart already.
7267  $entity->make_multipart("report"); # Used to be digest
7268  # Try *real* hard to make it a digest.
7269  $entity->head->mime_attr("Content-type" => "multipart/report"); # Used to be digest
7270  $entity->head->mime_attr("Content-type.boundary" => $mimeboundary);
7271  # Delete the "type" subfield which I don't think should be there
7272  $entity->head->mime_attr("Content-type.type" => undef);
7273  # JKF 09/11/2005 Added after bug report from Georg@hackt.net
7274  $entity->head->mime_attr("Content-type.report-type" => 'spam-notification');
7275  $entity->parts(\@newparts);
7276  $entity->preamble(undef);
7277  $entity->epilogue(undef);
7278  $entity->head->add('MIME-Version', '1.0') unless $mimeversion;
7279  $this->{bodymodified} = 1; # No infection but we changed the MIIME tree
7280}
7281
7282sub DisarmHTMLTree {
7283  MailScanner::Log::DebugLog("Debug: Entering DisarmHTMLTree");
7284  my($this, $entity) = @_;
7285
7286  my $counter = 0; # Have we modified this message at all?
7287  my @disarmed; # List of tags we have disarmed
7288
7289  #print STDERR "Disarming HTML Tree\n";
7290
7291  # Reached a leaf node?
7292  return 0 unless $entity && defined($entity->head);
7293
7294  if ($entity->head->mime_attr('content-disposition') !~ /attachment/i &&
7295      $entity->head->mime_attr('content-type')        =~ /text\/html/i) {
7296    #print STDERR "Found text/html message at entity $entity\n";
7297    @disarmed = $this->DisarmHTMLEntity($entity);
7298    #print STDERR "Disarmed = " . join(', ',@disarmed) . "\n";
7299    if (@disarmed) {
7300      $this->{bodymodified} = 1;
7301      $this->{denialofservice} = 1 if grep(/^denialofservice$/, @disarmed);
7302      $DisarmHTMLChangedMessage = 1;
7303      $counter++;
7304    }
7305  }
7306
7307  # Now try the same on all the parts
7308  my(@parts, $part, $newcounter, @newtags);
7309  @parts = $entity->parts;
7310  foreach $part (@parts) {
7311    ($newcounter, @newtags) = $this->DisarmHTMLTree($part);
7312    $counter += $newcounter;
7313    @disarmed = (@disarmed, @newtags);
7314  }
7315
7316  #print STDERR "Returning " . join(', ', @disarmed) . " from DisarmHTMLTree\n";
7317  return ($counter, @disarmed);
7318}
7319
7320
7321# Walk the MIME tree, looking for text/html entities. Whenever we find
7322# one, create a new filename for a text/plain entity, and replace the
7323# part that pointed to the filename with a replacement that points to
7324# the new txt filename.
7325# Only replace inline sections, don't replace attachments, so that your
7326# users can still mail HTML attachments to each other.
7327# Then tag the message to say it has been modified, so that it is
7328# rebuilt from the MIME tree when it is delivered.
7329sub HTMLToText {
7330  my($this, $entity) = @_;
7331
7332  my $counter; # Have we modified this message at all?
7333
7334  # Reached a leaf node?
7335  return 0 unless $entity && defined($entity->head);
7336
7337  if ($entity->head->mime_attr('content-disposition') !~ /attachment/i &&
7338      $entity->head->mime_attr('content-type')        =~ /text\/html/i) {
7339    #print STDERR "Found text/html message at entity $entity\n";
7340    $this->HTMLEntityToText($entity);
7341    MailScanner::Log::InfoLog('Content Checks: Detected and will convert ' .
7342                              'HTML message to plain text in %s',
7343                              $this->{id});
7344    $this->{bodymodified} = 1; # No infection but we changed the MIIME tree
7345    #$this->{otherreports}{""} .= "Converted HTML to plain text\n";
7346    #$this->{othertypes}{""} .= "m"; # Modified body, but no infection
7347    #$this->{otherinfected}++;
7348    $counter++;
7349  }
7350
7351  # Now try the same on all the parts
7352  my(@parts, $part);
7353  @parts = $entity->parts;
7354  foreach $part (@parts) {
7355    $counter += $this->HTMLToText($part);
7356  }
7357
7358  return $counter;
7359}
7360
7361# HTML::Parset callback function for normal text
7362my(%DisarmDoneSomething, $DisarmLinkText, $DisarmLinkURL, $DisarmAreaURL,
7363   $DisarmInsideLink, $DisarmBaseURL);
7364
7365# Convert 1 MIME entity from html to dis-armed HTML using HTML::Parser.
7366sub DisarmHTMLEntity {
7367  MailScanner::Log::DebugLog("Debug: Entering DisarmHTMLEntity");
7368  my($this, $entity) = @_;
7369
7370  my($oldname, $newname, $oldfh, $outfh, $htmlparser);
7371
7372  #print STDERR "Disarming HTML $entity\n";
7373
7374  # Initialise all the variables we will use in the parsing, so nothing
7375  # is inherited from old messages
7376  $DisarmLinkText   = "";
7377  $DisarmLinkURL    = "";
7378  $DisarmInsideLink = 0;
7379  $DisarmBaseURL    = "";
7380  $DisarmAreaURL    = "";
7381  %DisarmDoneSomething = ();
7382
7383  # Replace the filename with a new one
7384  $oldname = $entity->bodyhandle->path();
7385  #print STDERR "Path is $oldname\n";
7386  $newname = $oldname;
7387  $newname =~ s/\..?html?$//i; # Remove .htm .html .shtml
7388  $newname .= '2.html'; # This should always pass the filename checks
7389  $entity->bodyhandle->path($newname);
7390
7391  # Forking now: $outfh = new FileHandle;
7392  # Forking now: unless ($outfh->open(">$newname")) {
7393  # Forking now:   MailScanner::Log::WarnLog('Could not create disarmed HTML file %s',
7394  # Forking now:                             $newname);
7395  # Forking now:   return keys %DisarmDoneSomething;
7396  # Forking now: }
7397
7398  # Forking now: # Set default output filehandle so we generate the new HTML
7399  # Forking now: $oldfh = select $outfh;
7400
7401  # Process the old HTML file into the new one
7402  my $pipe = new IO::Pipe
7403    or MailScanner::Log::DieLog('Failed to create pipe, %s, while parsing ' .
7404                  'HTML. Try reducing the maximum number of unscanned ' .
7405                  'messages per batch', $!);
7406  my $PipeReturn = 0;
7407  my $pid = fork();
7408  die "Can't fork: $!" unless defined($pid);
7409  if ($pid == 0) {
7410    # In the child
7411    $pipe->writer();
7412    $pipe->autoflush();
7413    $outfh = new FileHandle;
7414    unless ($outfh->open(">$newname")) {
7415      MailScanner::Log::WarnLog('Could not create disarmed HTML file %s',
7416                                $newname);
7417      exit 1;
7418    }
7419
7420    select $outfh;
7421    if ($DisarmPhishing) {
7422      HTML::Parser->new(api_version => 3,
7423        start_h     => [\&DisarmTagCallback,    "tagname, text, attr, attrseq"],
7424        end_h       => [\&DisarmEndtagCallback, "tagname, text, '" . $this->{id} . "'"],
7425        text_h      => [\&DisarmTextCallback,   "text"],
7426        default_h   => [ sub { print @_; },     "text"],
7427                     )
7428        ->parse_file($oldname)
7429        or MailScanner::Log::WarnLog("HTML disarming, can't open file %s: %s",
7430                                     $oldname, $!);
7431      # JKF 20101107 Try to fix unterminated links
7432      if ($DisarmInsideLink) {
7433        DisarmEndtagCallback('a', " ", $this->{id});
7434        print $outfh "\n";
7435      }
7436    } else {
7437      HTML::Parser->new(api_version => 3,
7438        start_h     => [\&DisarmTagCallback,    "tagname, text, attr, attrseq"],
7439        end_h       => [\&DisarmEndtagCallback, "tagname, text, '" . $this->{id} . "'"],
7440        default_h   => [ sub { print @_; },     "text"],
7441                     )
7442        ->parse_file($oldname)
7443        or MailScanner::Log::WarnLog("HTML disarming, can't open file %s: %s",
7444                                     $oldname, $!);
7445    }
7446    # Dump the contents of %DisarmDoneSomething down the pipe
7447    foreach my $ddskey (keys %DisarmDoneSomething) {
7448      print $pipe "$ddskey\n";
7449    }
7450    print $pipe "ENDENDEND\n";
7451    $pipe->close;
7452    $pipe = undef;
7453    exit 0;
7454    # The child will never get here.
7455  }
7456
7457  # In the parent.
7458  my @DisarmDoneSomething;
7459  eval {
7460    $pipe->reader();
7461    local $SIG{ALRM} = sub { die "Command Timed Out" };
7462    alarm MailScanner::Config::Value('spamassassintimeout');
7463    # Read the contents of %DisarmDoneSomething from the pipe
7464    my($pipedata);
7465    while (defined($pipedata = <$pipe>)) {
7466      last if $pipedata eq "ENDENDEND\n";
7467      chomp $pipedata;
7468      push @DisarmDoneSomething, $pipedata;
7469      #print STDERR "DisarmDoneSomething $pipedata\n";
7470    }
7471    waitpid $pid, 0;
7472    $pipe->close;
7473    $PipeReturn = $?;
7474    alarm 0;
7475    $pid = 0;
7476  };
7477  alarm 0;
7478  # Workaround for bug in perl shipped with Solaris 9,
7479  # it doesn't unblock the SIGALRM after handling it.
7480  eval {
7481    my $unblockset = POSIX::SigSet->new(SIGALRM);
7482    sigprocmask(SIG_UNBLOCK, $unblockset)
7483      or die "Could not unblock alarm: $!\n";
7484  };
7485
7486  # If pid != 0 then it failed so we have to kill the child and mark it somehow
7487  #print STDERR "pid==$pid\n";
7488  #print STDERR "PipeReturn==$PipeReturn\n";
7489  if ($pid>0) {
7490    kill 15, $pid; # Was -15
7491    # Wait for up to 10 seconds for it to die
7492    for (my $i=0; $i<5; $i++) {
7493      sleep 1;
7494      waitpid($pid, &POSIX::WNOHANG);
7495      ($pid=0),last unless kill(0, $pid);
7496      kill 15, $pid; # Was -15
7497    }
7498    # And if it didn't respond to 11 nice kills, we kill -9 it
7499    if ($pid) {
7500      kill 9, $pid; # Was -9
7501      waitpid $pid, 0; # 2.53
7502    }
7503  }
7504
7505  # Forking now: select $oldfh;
7506  # Forking now: $outfh->close();
7507
7508  # Tell the caller if we did anything
7509  #print STDERR "Keys are " . join(', ', keys %DisarmDoneSomething) . "\n";
7510  #return keys %DisarmDoneSomething;
7511
7512  if ($PipeReturn) {
7513    if ( MailScanner::Config::Value("ignoredenialofservice", $this) =~ /0/ ) {
7514        # It went badly wrong!
7515        # Overwrite the output file to kill it, and return the error.
7516        # Log the fact and the exit status.
7517        MailScanner::Log::WarnLog("HTML disarming died, status = $PipeReturn");
7518        $outfh = new FileHandle;
7519        unless ($outfh->open(">$newname")) {
7520          MailScanner::Log::WarnLog('Could not wipe deadly HTML file %s',
7521                                    $newname);
7522          exit 1;
7523        }
7524        my $report = "MailScanner was attacked by a Denial Of Service attack, and has therefore \ndeleted this part of the message. Please contact your e-mail providers \nfor more information if you need it, giving them the whole of this report.\n";
7525        my $report2 = MailScanner::Config::LanguageValue(0, 'htmlparserattack');
7526        $report = $report2 if $report2 && $report2 ne 'htmlparserattack';
7527        print $outfh $report . "\n\nAttack in: $oldname\n";
7528        $outfh->close;
7529
7530        push @DisarmDoneSomething, 'denialofservice';
7531     } else {
7532       # Ignore the denial of service per configuration
7533       # This does not solve the root causes of the DOS/fork pipe failure message
7534       # Use with caution
7535       MailScanner::Log::WarnLog("HTML disarming died, status = $PipeReturn");
7536       MailScanner::Log::WarnLog("Ignore Denial of Service is enabled, proceeding");
7537     }
7538  }
7539
7540  #print STDERR "Results of HTML::Parser are " . join(',',@DisarmDoneSomething) . "\n";
7541  return @DisarmDoneSomething;
7542}
7543
7544# HTML::Parser callback for text so we can collect the contents of links
7545sub DisarmTextCallback {
7546  my($text) = @_;
7547
7548  unless ($DisarmInsideLink) {
7549    print $text;
7550    #print STDERR "DisarmText just printed \"$text\"\n";
7551    return;
7552  }
7553
7554  # We are inside a link.
7555  # Save the original text, we well might need it.
7556  $DisarmLinkText .= $text;
7557  #print STDERR "DisarmText just added \"$text\"\n";
7558}
7559
7560# HTML::Parser callback function for start tags
7561sub DisarmTagCallback {
7562  MailScanner::Log::DebugLog("Debug: Entering DisarmTagCallback");
7563  my($tagname, $text, $attr, $attrseq) = @_;
7564
7565  #print STDERR "Disarming $tagname\n";
7566
7567  my $output = "";
7568  my $webbugfilename;
7569
7570  if ($tagname eq 'form' && $DisarmFormTag) {
7571    #print "It's a form\n";
7572    $text = substr $text, 1;
7573    $output .= "<BR><MailScannerForm$$ " . $text;
7574    $DisarmDoneSomething{'form'} = 1;
7575  } elsif ($tagname eq 'input' && $DisarmFormTag) {
7576    #print "It's an input button\n";
7577    $attr->{'type'} = "reset";
7578    $output .= '<' . $tagname;
7579    foreach (@$attrseq) {
7580      next if /^on/;
7581      $output .= ' ' . $_ . '="' . $attr->{$_} . '"';
7582    }
7583    $output .= '>';
7584    $DisarmDoneSomething{'form input'} = 1;
7585  } elsif ($tagname eq 'button' && $DisarmFormTag) {
7586    #print "It's a button\n";
7587    $attr->{'type'} = "reset";
7588    $output .= '<' . $tagname;
7589    foreach (@$attrseq) {
7590      next if /^on/;
7591      $output .= ' ' . $_ . '="' . $attr->{$_} . '"';
7592    }
7593    $output .= '>';
7594    $DisarmDoneSomething{'form button'} = 1;
7595  } elsif ($tagname eq 'object' && $DisarmCodebaseTag) {
7596    #print "It's an object\n";
7597    if (exists $attr->{'codebase'}) {
7598      $text = substr $text, 1;
7599      $output .= "<MailScannerObject$$ " . $text;
7600      $DisarmDoneSomething{'object codebase'} = 1;
7601    } elsif (exists $attr->{'data'}) {
7602      $text = substr $text, 1;
7603      $output .= "<MailScannerObject$$ " . $text;
7604      $DisarmDoneSomething{'object data'} = 1;
7605    } else {
7606      $output .= $text;
7607    }
7608  } elsif ($tagname eq 'iframe' && $DisarmIframeTag) {
7609    #print "It's an iframe\n";
7610    $text = substr $text, 1;
7611    $output .= "<MailScannerIFrame$$ " . $text;
7612    $DisarmDoneSomething{'iframe'} = 1;
7613  } elsif ($tagname eq 'script' && $DisarmScriptTag) {
7614    #print "It's a script\n";
7615    $text = substr $text, 1;
7616    $output .= "<MailScannerScript$$ " . $text;
7617    $DisarmDoneSomething{'script'} = 1;
7618  } elsif ($tagname eq 'a' && $DisarmPhishing) {
7619    #print STDERR "It's a link\n";
7620    $output .= $text;
7621    $DisarmLinkText = ''; # Reset state of automaton
7622    $DisarmLinkURL = '';
7623    $DisarmLinkURL = $attr->{'href'} if exists $attr->{'href'};
7624    $DisarmInsideLink = 1;
7625    $DisarmInsideLink = 0 if $DisarmLinkURL eq ''; # JPSB empty A tags. Was:
7626    #Old: $DisarmInsideLink = 0 if $text =~ /\/\>$/; # JKF Catch /> empty A tags
7627    #print STDERR "DisarmInsideLink = $DisarmInsideLink\n";
7628  } elsif ($tagname eq 'img') {
7629    #print STDERR "It's an image\n";
7630    #print STDERR "The src is \"" . $attr->{'src'} . "\"\n";
7631    # If the alt text has the required magic text in it then it's a sig image.
7632    # Look for "MailScanner" and "Signature" and "%org-name%" (if %org-name% is defined)
7633    my $orgname = MailScanner::Config::DoPercentVars('%org-name%');
7634    $SigImageFound = 1
7635      if exists $attr->{'alt'} && $attr->{'alt'} =~ /MailScanner/i
7636                               && $attr->{'alt'} =~ /Signature/i
7637                               && ($orgname eq '' ||
7638                                   ($orgname && $attr->{'alt'} =~ /$orgname/i)
7639                                  );
7640    #print STDERR "Found a signature image\n"
7641    #  if exists $attr->{'alt'} && $attr->{'alt'} =~ /MailScanner.*Signature/i;
7642    if ($DisarmWebBug) {
7643      my $server = $attr->{'src'};
7644      $server =~ s#^[^:]+:/+([^/]+)/.*$#$1#;
7645      if (($server && $WebBugBlacklist && $server =~ /$WebBugBlacklist/i) ||
7646          (exists $attr->{'width'}  && $attr->{'width'}<=2 &&
7647           exists $attr->{'height'} && $attr->{'height'}<=2 &&
7648           exists $attr->{'src'}    && $attr->{'src'} !~ /^cid:|^$WebBugReplacement/i)) {
7649        # Is the filename in the WebBug whitelist?
7650        $webbugfilename = $attr->{'src'};
7651        $webbugfilename = $1 if $webbugfilename =~ /\/([^\/]+)$/;
7652        if ($webbugfilename &&  $WebBugWhitelist &&
7653            $webbugfilename =~ /$WebBugWhitelist/i) {
7654          # It's in the whitelist, so ignore it
7655          $output .= $text;
7656        } else {
7657          # It's not in the whitelist, so zap it with insecticide!
7658          $output .= '<img src="' . $WebBugReplacement . '" width="' .
7659                     $attr->{'width'} .  '" height="' . $attr->{'height'} .
7660                     '" alt="';
7661          $output .= 'Web Bug from ' . $attr->{'src'} if $attr->{'src'};
7662          $output .= '" />';
7663          $DisarmWebBugFound = 1;
7664          $DisarmDoneSomething{'web bug'} = 1;
7665        }
7666      } else {
7667        $output .= $text;
7668      }
7669    } else {
7670      $output .= $text;
7671    }
7672  } elsif ($tagname eq 'base') {
7673    #print STDERR "It's a Base URL\n";
7674    $output .= $text;
7675    #print STDERR "Base URL = " . $attr->{'href'} . "\n";
7676    $DisarmBaseURL = $attr->{'href'} if exists $attr->{'href'};
7677  } elsif ($tagname eq 'area' && $DisarmInsideLink && $DisarmPhishing) {
7678    #print STDERR "It's an imagemap area\n";
7679    $output .= $text;
7680    #print STDERR "Area URL = " . $attr->{'href'} . "\n";
7681    $DisarmAreaURL = $attr->{'href'};
7682  } else {
7683    #print STDERR "The tag was a \"$tagname\"\n";
7684    $output .= $text;
7685    #print STDERR "output text is now \"$output\"\n";
7686  }
7687  # tagname DisarmPhishing
7688  #    a     0               0 1
7689  #    a     1               0 0 tagname=a && Disarm=1
7690  #    b     0               1 1
7691  #    b     1               1 0
7692  #if ($DisarmInsideLink && !($tagname eq 'a' && $DisarmPhishing)) {
7693  if ($DisarmInsideLink && ($tagname ne 'a' || !$DisarmPhishing)) {
7694    $DisarmLinkText .= $output;
7695    #print STDERR "StartCallback: DisarmLinkText now equals \"$DisarmLinkText\"\n";
7696  } else {
7697    print $output;
7698    #print STDERR "StartCallback: Printed2 \"$output\"\n";
7699  }
7700}
7701
7702# HTML::Parser callback function for end tags
7703sub DisarmEndtagCallback {
7704  my($tagname, $text, $id) = @_;
7705
7706  if ($tagname eq 'iframe' && $DisarmIframeTag) {
7707    print "</MailScannerIFrame$$>";
7708    $DisarmDoneSomething{'iframe'} = 1;
7709  } elsif ($tagname eq 'form' && $DisarmFormTag) {
7710    print "</MailScannerForm$$>";
7711    $DisarmDoneSomething{'form'} = 1;
7712  } elsif ($tagname eq 'script' && $DisarmScriptTag) {
7713    print "</MailScannerScript$$>";
7714    $DisarmDoneSomething{'script'} = 1;
7715  } elsif ($tagname eq 'map' && $DisarmAreaURL) {
7716    # We are inside an imagemap that is part of a phishing imagemap
7717    $DisarmLinkText .= '</map>';
7718  } elsif ($tagname eq 'a' && $DisarmPhishing) {
7719    MailScanner::Log::DebugLog("Debug: Entering Disarm <a> tag");
7720    #print STDERR "---------------------------\n";
7721    #print STDERR "Endtag Callback found link, " .
7722    #             "disarmlinktext = \"$DisarmLinkText\"\n";
7723    my($squashedtext,$linkurl,$alarm,$numbertrap,$emailuser,$disarmedflag);
7724    # Local flag used for hidden link detection to suppress multiple detections
7725    $disarmedflag = 0;
7726    $DisarmInsideLink = 0;
7727    $squashedtext = lc($DisarmLinkText);
7728    if ($DisarmAreaURL) {
7729      $squashedtext = $DisarmLinkURL;
7730      $DisarmLinkURL = lc($DisarmAreaURL);
7731      $DisarmAreaURL = ""; # End of a link, so reset this
7732      } else {
7733      $squashedtext = lc($DisarmLinkText);
7734    }
7735
7736    # Try to filter out mentions of Microsoft's .NET system
7737    $squashedtext = "" if $squashedtext eq ".net";
7738    $squashedtext = "" if $squashedtext =~ /(^|\b)(ado|asp)\.net($|\b)/;
7739
7740    $squashedtext =~ s/\%a0//g;
7741    $squashedtext =~ s#%([0-9a-f][0-9a-f])#chr(hex('0x' . $1))#gei; # Unescape
7742    #Moved below tag removal, as required by new 'Remove tags' re.
7743    #$squashedtext =~ s/\s+//g; # Remove any whitespace
7744    $squashedtext =~ s/\\/\//g; # Change \ to / as many browsers do this
7745    $squashedtext =~ s/^\[\d*\]//; # Removing leading [numbers]
7746    #$squashedtext =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
7747    $squashedtext =~ tr/\n/ /; # Join multiple lines onto 1 line
7748    $squashedtext =~ s/(\<\/?[a-z][a-z0-9:._-]*((\s+[a-z][a-z0-9:._-]*(\s*=\s*(?:\".*?\"|\'.*?\'|[^\'\">\s]+))?)+\s*|\s*)\/?\>)*//ig; # Remove tags, better re from snifer_@hotmail.com
7749    # Remove extraneous text around urls
7750    # https://github.com/MailScanner/v5/issues/401
7751    # Throw out objects that have no .'s or :'s in them surrounded by whitespace, unless they are fax/tel
7752    if ( $DisarmLinkURL !~ /^(fax|tel):/ ) {
7753      # Look ahead to any something.something or something:something and throw out preceding text
7754      $squashedtext =~ s/^.*\s+(?=(.+[.:])+.+)//;
7755      # Throw out any :'s surrounded by whitespace
7756      $squashedtext =~ s/\s:+\s//g;
7757      # Remove any trailing text with whitespace
7758      $squashedtext =~ s/\s.*$//;
7759    } else {
7760      $squashedtext =~ s/\s+//g; # Remove any whitespace
7761    }
7762    if ( $DisarmLinkURL =~ m/^mailto:/i ) {
7763      # Convert HTML entities, if present
7764      # https://github.com/MailScanner/v5/issues/335
7765      $squashedtext = decode_entities($squashedtext);
7766      if ( $squashedtext =~ /@/ ) {
7767        $squashedtext =~ s/^.*\s+(?=.*\@)//;
7768        $squashedtext =~ s/\s+.*$//;
7769        # Remove any leading or trailing text
7770        # Remove < and > tags, if present
7771        # https://github.com/MailScanner/v5/issues/320
7772        $squashedtext =~ s/(?:\<|\>)//g;
7773        my @list = split(/@/, $squashedtext);
7774        $emailuser = $list[0];
7775        $squashedtext = $list[1]; # Remove username of email addresses
7776      }
7777    }
7778    #$squashedtext =~ s/\&\w*\;//g; # Remove things like &lt; and &gt;
7779    $squashedtext =~ s/^.*(\&lt\;|\<)((https?|ftp|mailto|webcal):.+?)(\&gt\;|\>).*$/$2/i; # Turn blah-blah <http://link.here> blah-blah into "http://link.here"
7780    $squashedtext =~ s/^\&lt\;//g; # Remove leading &lt;
7781    $squashedtext =~ s/\&gt\;$//g; # Remove trailing &gt;
7782    $squashedtext =~ s/\&lt\;/\</g; # Remove things like &lt; and &gt;
7783    $squashedtext =~ s/\&gt\;/\>/g; # rEmove things like &lt; and &gt;
7784    $squashedtext =~ s/\&nbsp\;//g; # Remove fixed spaces
7785    $squashedtext =~ s/^(http:\/\/[^:]+):80(\D|$)/$1$2/i; # Remove http:...:80
7786    $squashedtext =~ s/^(https:\/\/[^:]+):443(\D|$)/$1$2/i; # Remove https:...:443
7787    #$squashedtext =~ s/./CharToIntnl("$&")/ge;
7788    $squashedtext =  StringToIntnl($squashedtext); # s/./CharToIntnl("$&")/ge;
7789    #print STDERR "Text = \"$text\"\n";
7790    #print STDERR "1SquashedText = \"$squashedtext\"\n";
7791    #print STDERR "1LinkURL      = \"$DisarmLinkURL\"\n";
7792    # If it looks like a link, remove any leading https:// or ftp://
7793    ($linkurl,$alarm) = CleanLinkURL($DisarmLinkURL);
7794    #print STDERR "linkurl = $linkurl\nBefore If statement\n";
7795    #print STDERR "squashedtext = $squashedtext\nBefore If statement\n";
7796
7797    # Has it fallen foul of the numeric-ip phishing net? Must treat x
7798    # like a digit so it catches 0x41 (= 'A')
7799    $numbertrap = ($DisarmNumbers && $linkurl !~ /[<>g-wyz]+/)?1:0;
7800
7801    #
7802    # Known Dangerous Sites List code here
7803    #
7804    my $AlreadyReported = 0;
7805    if (InPhishingBlacklist($linkurl) and not InPhishingWhitelist($linkurl)) {
7806      use bytes;
7807      print MailScanner::Config::LanguageValue(0, 'definitefraudstart') .
7808            ' "' . $linkurl . '"' .
7809            MailScanner::Config::LanguageValue(0, 'definitefraudend') .
7810            ' ' if $PhishingHighlight;
7811      $DisarmPhishingFound = 1;
7812      $disarmedflag = 1;
7813      $linkurl = substr $linkurl, 0, 80;
7814      $squashedtext = substr $squashedtext, 0, 80;
7815      $DisarmDoneSomething{'phishing'} = 1 if $PhishingHighlight;
7816      use bytes; # Don't send UTF16 to syslog, it breaks!
7817      MailScanner::Log::NoticeLog('Found definite phishing fraud from %s ' .
7818                                  'in %s', $DisarmLinkURL, $id);
7819                                  #'in %s', $linkurl, $id);
7820      no bytes;
7821      $AlreadyReported = 1;
7822    } elsif ($alarm ||
7823      $squashedtext =~ /^(w+|ft+p|fpt+|ma[il]+to)([.,]|\%2e)/i ||
7824      $squashedtext =~ /[.,](com|org|net|info|biz|ws)/i ||
7825      $squashedtext =~ /[.,]com?[.,][a-z][a-z]/i ||
7826      $squashedtext =~ /^(ht+ps?|ft+p|fpt+|mailto|webcal)[:;](\/\/)?(.*(\.|\%2e))/i ||
7827      $numbertrap) {
7828      $squashedtext =~  s/^(ht+ps?|ft+p|fpt+|mailto|webcal)[:;](\/\/)?(.*(\.|\%2e))/$3/i;
7829      $squashedtext =~ s/^.*?-http:\/\///; # 20080206 Delete common pre-pended text
7830      $squashedtext =~ s/\/.*$//; # Only compare the hostnames
7831      $squashedtext =~ s/[,.]+$//; # Allow trailing dots and commas
7832      $squashedtext = 'www.' . $squashedtext
7833        unless $squashedtext =~ /^ww+|ft+p|fpt+|mailto|webcal/ || $numbertrap;
7834      #print STDERR "2SquashedText = \"$squashedtext\"\n";
7835      # If we have already tagged this link as a phishing attack, spot the
7836      # warning text we inserted last time and don't tag it again.
7837      my $possiblefraudstart = MailScanner::Config::LanguageValue(0, 'possiblefraudstart');
7838      my $squashedpossible = lc($possiblefraudstart);
7839      my $squashedsearch   = lc($DisarmLinkText);
7840      $squashedpossible =~ s/\s//g;
7841      $squashedpossible =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
7842      $squashedsearch   =~ s/\s//g;
7843      $squashedsearch   =~ s/(\<\/?[^>]*\>)*//ig; # Remove tags
7844      #$squashedpossible = "www.$squashedpossible\"$linkurl\"";
7845      $squashedpossible = quotemeta($squashedpossible);
7846      #print STDERR "NEW CODE: SquashedText     = $squashedtext\n";
7847      #print STDERR "NEW CODE: DisarmLinkText   = $DisarmLinkText\n";
7848      #print STDERR "NEW CODE: Text             = $text\n";
7849      #print STDERR "NEW CODE: SquashedPossible = $squashedpossible\n";
7850      #print STDERR "NEW CODE: LinkURL          = $linkurl\n";
7851      if ($squashedtext =~ /$squashedpossible/) {
7852        #print STDERR "FOUND IT\n";
7853        #print STDERR "$DisarmLinkText$text\n";
7854        print "$DisarmLinkText$text";
7855        $DisarmLinkText = ""; # Reset state of automaton
7856        return;
7857      }
7858      #print STDERR "2LinkURL      = \"$linkurl\"\n";
7859      # If it is a phishing catch, or else it's not (numeric or IPv6 numeric)
7860      # then notify.
7861      #print STDERR "LinkURL is \"$linkurl\"\n";
7862      #print STDERR "Squashe is \"$squashedtext\"\n";
7863      #print STDERR "Phishing by numbers is $DisarmNumbers\n";
7864
7865      #
7866      # Less strict phishing net code is here
7867      #
7868
7869      if (!$StrictPhishing) {
7870        my $TheyMatch = 0;
7871
7872        # Is this an email?  Prepare it to compare domains.
7873        # https://github.com/MailScanner/v5/issues/229
7874        MailScanner::Log::DebugLog("DisarmLinkURL = $DisarmLinkURL");
7875        MailScanner::Log::DebugLog("linkurl = $linkurl");
7876        if ($DisarmLinkURL =~ m/^mailto:/i ) {
7877          # Convert HTML entities, if present
7878          # https://github.com/MailScanner/v5/issues/335
7879          $linkurl = decode_entities($linkurl);
7880          if ( $linkurl =~ /@/ ) {
7881            my @list = split(/@/, $linkurl);
7882            $linkurl = $list[1];
7883          }
7884        }
7885
7886        unless (InPhishingWhitelist($linkurl)) {
7887          #print STDERR "Not strict phishing\n";
7888          # We are just looking at the domain name and country code (more or less)
7889          # Find the end of the domain name so we know what to strip
7890          my $domain = $linkurl;
7891          $domain =~ s/\/*$//; # Take off trailing /
7892          $domain =~ s/\.([^.]{2,100})$//; # Take off .TLD
7893          my $tld = $1;
7894          $domain =~ s/([^.]{2,100})$//; # Take off SLD
7895          my $sld = $1;
7896          # Now do the same for the squashed text, i.e. where they claim it is
7897          my $text = $squashedtext;
7898          #print STDERR "Comparing $linkurl and $squashedtext\n";
7899          #print STDERR "tld = $tld and sld = $sld\n";
7900          $text =~ s/\/*$//; # Take off trailing /
7901          $text =~ s/\.([^.]{2,100})$//; # Take off .TLD
7902          my $ttld = $1;
7903          $text =~ s/([^.]{2,100})$//; # Take off SLD
7904          my $tsld = $1;
7905          #print STDERR "ttld = $ttld and tsld = $tsld\n";
7906          if ($tld && $ttld && $sld && $tsld && $tld eq $ttld && $sld eq $tsld) {
7907            #print STDERR "tld/sld test matched\n";
7908            # domain.org or domain.3rd.2nd.india
7909            # Last 2 words match (domain.org), should that be enough or do we
7910            # need to compare the next word too (domain.org.uk) ?
7911            # We need to check the next word too.
7912            $domain =~ s/([^.]{2,100})\.$//; # Take off 3LD.
7913            my $third = $1;
7914            $text   =~ s/([^.]{2,100})\.$//; # Take off 3LD.
7915            my $tthird = $1;
7916            #print STDERR "third = $third and tthird = $tthird\n";
7917            if ($MailScanner::Config::SecondLevelDomainExists{"$sld.$tld"}) {
7918              # domain.org.uk
7919              $TheyMatch = 1 if $third && $tthird && $third eq $tthird;
7920            } else {
7921              # Maybe we have a 3rd level domain base?
7922              if ($MailScanner::Config::SecondLevelDomainExists{"$third.$sld.$tld"}) {
7923                # We need to check the next (4th) word too.
7924                $domain =~ /([^.]{2,100})\.$/; # Store 4LD
7925                my $fourth = $1;
7926                $text   =~ /([^.]{2,100})\.$/; # Store 4LD
7927                my $tfourth = $1;
7928                $TheyMatch = 1 if $fourth && $tfourth && $fourth eq $tfourth &&
7929                                  $third  && $tthird  && $third  eq $tthird;
7930              } else {
7931                # We don't have a 3rd level, and we cannot have got here if
7932                # there was a 2nd level, so it must just look like domain.org,
7933                # so matches if tld and sld are the same. But we must have that
7934                # true or we would never have got here, so they must match.
7935                $TheyMatch = 1;
7936              }
7937            }
7938          }
7939          #
7940          # Put phishing reporting code in here too.
7941          #
7942          if ($linkurl ne "") {
7943            if ($TheyMatch) {
7944              # Even though they are the same, still squeal if it's a raw IP
7945              # Ignore fax: and tel: (not ip but numeric)
7946              # https://github.com/MailScanner/v5/issues/224
7947              if ($numbertrap && $DisarmLinkURL !~ m/^(fax|tel)[:;]/i) {
7948                print MailScanner::Config::LanguageValue(0, 'numericlinkwarning')
7949                      . ' '
7950                      if $PhishingHighlight && !$AlreadyReported; # && !InPhishingWhitelist($linkurl);
7951                $DisarmPhishingFound = 1;
7952                $disarmedflag = 1;
7953                $linkurl = substr $linkurl, 0, 80;
7954                $squashedtext = substr $squashedtext, 0, 80;
7955                $DisarmDoneSomething{'phishing'} = 1 if $PhishingHighlight; #JKF1 $PhishingSubjectTag;
7956                use bytes; # Don't send UTF16 to syslog, it breaks!
7957                MailScanner::Log::NoticeLog('Found ip-based phishing fraud from ' .
7958                                          '%s in %s', $DisarmLinkURL, $id);
7959                                          #'%s in %s', $linkurl, $id);
7960              }
7961              # If it wasn't a raw IP, then everything looks fine
7962            } else {
7963              # They didn't match so it's definitely an attack
7964              print $possiblefraudstart . ' "' . $linkurl . '" ' .
7965                    MailScanner::Config::LanguageValue(0, 'possiblefraudend') .
7966                    ' ' if $PhishingHighlight && !$AlreadyReported; # && !InPhishingWhitelist($linkurl);
7967              $DisarmPhishingFound = 1;
7968              $disarmedflag = 1;
7969              $linkurl = substr $linkurl, 0, 80;
7970              $squashedtext = substr $squashedtext, 0, 80;
7971              $DisarmDoneSomething{'phishing'} = 1 if $PhishingHighlight; #JKF1 $PhishingSubjectTag;
7972              use bytes; # Don't send UTF16 to syslog, it breaks!
7973              MailScanner::Log::NoticeLog('Found phishing fraud from %s ' .
7974                                        'claiming to be %s in %s',
7975                                        $DisarmLinkURL, $squashedtext, $id);
7976                                        #$linkurl, $squashedtext, $id);
7977            }
7978            # End of less strict reporting code.
7979            # But it probably was a phishing attack so print it all out
7980            no bytes;
7981            print "$DisarmLinkText"; # JKF 20060820 $text";
7982            $DisarmLinkText = ""; # Reset state of automaton
7983          }
7984        }
7985        # End of less strict phishing net.
7986      } else {
7987        #
7988        # Strict Phishing Net Goes Here
7989
7990        # Is this an email?
7991        # https://github.com/MailScanner/v5/issues/229
7992        MailScanner::Log::DebugLog("DisarmLinkURL = $DisarmLinkURL");
7993        MailScanner::Log::DebugLog("linkurl = $linkurl");
7994
7995        if ($DisarmLinkURL =~ m/^mailto:/i ) {
7996          # Convert HTML entities, if present
7997          # https://github.com/MailScanner/v5/issues/335
7998          $linkurl = decode_entities($linkurl);
7999          if ( $linkurl =~ /@/ ) {
8000            my @list = split(/@/, $linkurl);
8001            if ( $emailuser ne "" && $list[0] ne $emailuser) {
8002              $alarm = 1;
8003            }
8004            $linkurl = $list[1];
8005          }
8006        }
8007
8008        # Ignore fax: and tel: (not ip but numeric)
8009        # https://github.com/MailScanner/v5/issues/224
8010        if ($alarm ||
8011          ($linkurl ne "" && $squashedtext !~ /^(w+\.)?\Q$linkurl\E\/?$/)
8012          || ($linkurl ne "" && $numbertrap && $DisarmLinkURL !~ m/^(fax|tel)[:;]/i)) {
8013
8014          unless (InPhishingWhitelist($linkurl)) {
8015            use bytes; # Don't send UTF16 to syslog, it breaks!
8016            if ($linkurl ne "" && $numbertrap && $linkurl eq $squashedtext && $DisarmLinkURL !~ m/^(fax|tel)[:;]/i) {
8017              # It's not a real phishing trap, just a use of numberic IP links
8018              print MailScanner::Config::LanguageValue(0, 'numericlinkwarning') .
8019                    ' ' if $PhishingHighlight && !$AlreadyReported;
8020            } else {
8021              # It's a phishing attack.
8022              print $possiblefraudstart . ' "' . $linkurl . '" ' .
8023                    MailScanner::Config::LanguageValue(0, 'possiblefraudend') .
8024                    ' ' if $PhishingHighlight && !$AlreadyReported;
8025            }
8026            $DisarmPhishingFound = 1;
8027            $disarmedflag = 1;
8028            $linkurl = substr $linkurl, 0, 80;
8029            $squashedtext = substr $squashedtext, 0, 80;
8030            $DisarmDoneSomething{'phishing'} = 1 if $PhishingHighlight; #JKF1 $PhishingSubjectTag;
8031            # Ignore fax: and tel: (not ip but numeric)
8032            # https://github.com/MailScanner/v5/issues/224
8033            if ($numbertrap && $DisarmLinkURL !~ m/^(fax|tel)[:;]/i) {
8034              MailScanner::Log::InfoLog('Found ip-based phishing fraud from ' .
8035                                        '%s in %s', $DisarmLinkURL, $id);
8036                                        #'%s in %s', $linkurl, $id);
8037            } else {
8038              MailScanner::Log::InfoLog('Found phishing fraud from %s ' .
8039                                        'claiming to be %s in %s',
8040                                        $DisarmLinkURL, $squashedtext, $id);
8041                                        #$linkurl, $squashedtext, $id);
8042            }
8043            #print STDERR "Fake\n";
8044            no bytes;
8045          }
8046        }
8047      }
8048      #print STDERR "End tag printed \"$DisarmLinkText$text\"\n";
8049      #print "$DisarmLinkText$text";
8050      #$DisarmLinkText = ""; # Reset state of automaton
8051      #print STDERR "Reset disarmlinktext\n";
8052      #
8053      # End of all phishing code
8054      #
8055    }
8056
8057    # Highlight Hidden URL?
8058    if ( $DisarmHidden && $disarmedflag != 1) {
8059      MailScanner::Log::DebugLog("Debug: DisarmLinkURL = %s", $DisarmLinkURL);
8060      MailScanner::Log::DebugLog("Debug: DisarmLinkText = %s", $DisarmLinkText);
8061      MailScanner::Log::DebugLog("Debug: squashedtext = %s", $squashedtext);
8062      MailScanner::Log::DebugLog("Debug: linkurl = %s", $linkurl);
8063      if ($squashedtext ne $linkurl && $squashedtext ne "www.$linkurl" && $DisarmLinkURL !~ m/^(mailto|fax|tel):/) {
8064        MailScanner::Log::DebugLog("Debug: Modifying Hidden URL");
8065        print "$DisarmLinkText" . ' ' . MailScanner::Config::LanguageValue(0, 'hiddenlinkwarningstart') . ' ' . $DisarmLinkURL . MailScanner::Config::LanguageValue(0, 'hiddenlinkwarningend');
8066        $DisarmDoneSomething{'hidden'} = 1;
8067        $DisarmLinkText = "";
8068      }
8069    }
8070    print "$DisarmLinkText$text";
8071    $DisarmLinkText = "";
8072  } elsif ($DisarmInsideLink) {
8073    # If inside a link, add the text to the link text to allow tags in links
8074    $DisarmLinkText .= $text;
8075  } else {
8076    # It is not a tag we worry about, so just print the text and continue.
8077    print $text;
8078  }
8079}
8080
8081my %CharToInternational = (
8082160,'nbsp',
8083161,'iexcl',
8084162,'cent',
8085163,'pound',
8086164,'curren',
8087165,'yen',
8088166,'brvbar',
8089167,'sect',
8090168,'uml',
8091169,'copy',
8092170,'ordf',
8093171,'laquo',
8094172,'not',
8095173,'shy',
8096174,'reg',
8097175,'macr',
8098176,'deg',
8099177,'plusmn',
8100178,'sup2',
8101179,'sup3',
8102180,'acute',
8103181,'micro',
8104182,'para',
8105183,'middot',
8106184,'cedil',
8107185,'sup1',
8108186,'ordm',
8109187,'raquo',
8110188,'frac14',
8111189,'frac12',
8112190,'frac34',
8113191,'iquest',
8114192,'Agrave',
8115193,'Aacute',
8116194,'Acirc',
8117195,'Atilde',
8118196,'Auml',
8119197,'Aring',
8120198,'AElig',
8121199,'Ccedil',
8122200,'Egrave',
8123201,'Eacute',
8124202,'Ecirc',
8125203,'Euml',
8126204,'Igrave',
8127205,'Iacute',
8128206,'Icirc',
8129207,'Iuml',
8130208,'ETH',
8131209,'Ntilde',
8132210,'Ograve',
8133211,'Oacute',
8134212,'Ocirc',
8135213,'Otilde',
8136214,'Ouml',
8137215,'times',
8138216,'Oslash',
8139217,'Ugrave',
8140218,'Uacute',
8141219,'Ucirc',
8142220,'Uuml',
8143221,'Yacute',
8144222,'THORN',
8145223,'szlig',
8146224,'agrave',
8147225,'aacute',
8148226,'acirc',
8149227,'atilde',
8150228,'auml',
8151229,'aring',
8152230,'aelig',
8153231,'ccedil',
8154232,'egrave',
8155233,'eacute',
8156234,'ecirc',
8157235,'euml',
8158236,'igrave',
8159237,'iacute',
8160238,'icirc',
8161239,'iuml',
8162240,'eth',
8163241,'ntilde',
8164242,'ograve',
8165243,'oacute',
8166244,'ocirc',
8167245,'otilde',
8168246,'ouml',
8169247,'divide',
8170248,'oslash',
8171249,'ugrave',
8172250,'uacute',
8173251,'ucirc',
8174252,'uuml',
8175253,'yacute',
8176254,'thorn',
8177255,'yuml'
8178);
8179
8180# Turn any character into an international version of it if it is in the range
8181# 160 to 255.
8182sub CharToIntnl {
8183  my $p = shift @_;
8184  # Passed in an 8-bit character.
8185  #print STDERR "Char in is $p\n";
8186  ($a) = unpack 'C', $p;
8187
8188  #print STDERR "Char is $a, $p\n";
8189
8190  # Bash char 160 (space) to nothing
8191  return '' if $a == 160;
8192  my $char = $CharToInternational{$a};
8193  return '&' . $char . ';' if $char ne "";
8194  return $p;
8195}
8196
8197# Like CharToIntnl but does entire string
8198sub StringToIntnl {
8199  my $original = shift;
8200
8201  # Much faster char conversion for whole strings
8202  my(@newlinkurl, $newlinkurl, $char);
8203  @newlinkurl = unpack("C*", $original); # Get an array of characters
8204  foreach (@newlinkurl) {
8205    next if $_ == 160;
8206    $char = $CharToInternational{$_};
8207    if (defined $char) {
8208      $newlinkurl .= '&' . $char . ';';
8209    } else {
8210      $newlinkurl .= chr($_);
8211    }
8212  }
8213  return $newlinkurl;
8214  #$linkurl = $newlinkurl unless $newlinkurl eq "";
8215  #$linkurl =~ s/./CharToIntnl("$&")/ge; -- Old slow version
8216}
8217
8218
8219# Clean up a link URL so it is suitable for phishing detection
8220# Return (clean url, alarm trigger value). An alarm trigger value non-zero
8221# means this is definitely likely to be a phishing trap, no matter what
8222# anything else says.
8223sub CleanLinkURL {
8224  my($DisarmLinkURL) = @_;
8225
8226  use bytes;
8227
8228  my($linkurl,$alarm,$mailto);
8229  $mailto = MailScanner::Config::Value("highlightmailtophishing");
8230  $alarm = 0;
8231  $linkurl = $DisarmLinkURL;
8232  $linkurl = lc($linkurl);
8233  #print STDERR "Cleaning up $linkurl\n";
8234  #$linkurl =~ s/\%a0//ig;
8235  #$linkurl =~ s/\%e9/&eacute;/ig;
8236
8237  $linkurl =~ s#%([0-9a-f][0-9a-f])#chr(hex('0x' . $1))#gei; # Unescape
8238  #print STDERR "2Cleaning up $linkurl\n";
8239
8240  $linkurl = StringToIntnl($linkurl);
8241  #$linkurl =~ s/./CharToIntnl("$&")/ge; -- Old slow version
8242
8243  #print STDERR "Was $linkurl\n";
8244  return ("",0) unless $linkurl =~ /[.\/]/; # Ignore if it is not a website at all
8245  #$linkurl = "" unless $linkurl =~ /[.\/]/; # Ignore if it is not a website at all
8246  $linkurl =~ s/\s+//g; # Remove any whitespace
8247  $linkurl =~ s/\\/\//g; # Change \ to / as many browsers do this
8248  # Don't ignore emails
8249  # https://github.com/MailScanner/v5/issues/229
8250  if ( $mailto =~ /0/) {
8251    return ("",0) if $linkurl =~ /\@/ && $linkurl !~ /\//; # Ignore emails
8252  }
8253  #$linkurl = "" if $linkurl =~ /\@/ && $linkurl !~ /\//; # Ignore emails
8254  $linkurl =~ s/[,.]+$//; # Remove trailing dots, but also commas while at it
8255  $linkurl =~ s/^\[\d*\]//; # Remove leading [numbers]
8256  $linkurl =~ s/^blocked[:\/]+//i; # Remove "blocked::" labels
8257  $linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 2
8258  $linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 3
8259  $linkurl =~ s/^blocked[:\/]+//i; # And again, in case there are 4
8260  $linkurl =~ s/^outbind:\/\/\d+\//http:\/\//i; # Remove "outbind://22/" type labels
8261  #$linkurl =~ s/^.*\<((https?|ftp|mailto|webcal):[^>]+)\>.*$/$1/i; # Turn blah-blah <http://link.here> blah-blah into "http://link.here"
8262  $linkurl = $DisarmBaseURL . '/' . $linkurl
8263    if $linkurl ne "" && $DisarmBaseURL ne "" &&
8264       $linkurl !~ /^(https?|ftp|mailto|webcal):/i;
8265  $linkurl =~ s/^(https?:\/\/[^:]+):80($|\D)/$1$2/i; # Remove http://....:80
8266  $linkurl =~ s/^(https?|ftp|webcal)[:;]\/\///i;
8267  # Remove fax and tel prefixes
8268  # https://github.com/MailScanner/v5/issues/224
8269  $linkurl =~ s/^(fax|tel)[:;]//i;
8270  # Don't ignore emails
8271  # https://github.com/MailScanner/v5/issues/229
8272  if ( $mailto =~ /0/) {
8273    return ("",0) if $linkurl =~ /^mailto:/i;
8274  }
8275  $linkurl =~ s/^mailto://i;
8276  #return ("",0) if $linkurl =~ /^ma[il]+to[:;]/i;
8277  #$linkurl = "" if $linkurl =~ /^ma[il]+to[:;]/i;
8278  $linkurl =~ s/[?\/].*$//; # Only compare up to the first '/' or '?'
8279  $linkurl =~ s/(\<\/?(br|p|ul)\>)*$//ig; # Remove trailing br, p, ul tags
8280  return ("",0) if $linkurl =~ /^file:/i; # Ignore file: URLs completely
8281  #$linkurl = "" if $linkurl =~ /^file:/i; # Ignore file: URLs completely
8282  return ("",0) if $linkurl =~ /^#/; # Ignore internal links completely
8283  #$linkurl = "" if $linkurl =~ /^#/; # Ignore internal links completely
8284  $linkurl =~ s/\/$//; # LinkURL is trimmed -- note
8285  $alarm = 1 if $linkurl =~ s/[\x00-\x1f[:^ascii:]]/_BAD_/g; # /\&\#/;
8286  $linkurl = 'JavaScript' if $linkurl =~ /^javascript:/i;
8287  ($linkurl, $alarm);
8288}
8289
8290# Return 1 if the hostname in $linkurl is in the safe sites file.
8291# Return 0 otherwise.
8292sub InPhishingWhitelist {
8293  my($linkurl) = @_;
8294
8295  # Quick lookup
8296  return 1 if $MailScanner::Config::PhishingWhitelist{$linkurl};
8297
8298  # Trim host. off the front of the hostname
8299  # This is needed to process wildcards in the whitelist
8300  while ($linkurl ne "" && $linkurl =~ s/^[^.]+\.//) {
8301    # And replace it with *. then look it up
8302    #print STDERR "Looking up *.$linkurl\n";
8303    return 1 if $MailScanner::Config::PhishingWhitelist{'*.' . $linkurl};
8304  }
8305
8306  return 0;
8307}
8308
8309# Return 1 if the hostname in $linkurl is in the bad sites file.
8310sub InPhishingBlacklist {
8311  my($linkurl) = @_;
8312
8313  # Quick lookup
8314  return 1 if $MailScanner::Config::PhishingBlacklist{$linkurl};
8315
8316  # Trim host. off the front of the hostname
8317  # This is needed to process wildcards in the blacklist
8318  while ($linkurl ne "" && $linkurl =~ s/^[^.]+\.//) {
8319    # And replace it with *. then look it up
8320    #print STDERR "Looking up *.$linkurl\n";
8321    return 1 if $MailScanner::Config::PhishingBlacklist{'*.' . $linkurl};
8322  }
8323
8324  return 0;
8325}
8326
8327# Convert 1 MIME entity from html to text using HTML::Parser.
8328sub HTMLEntityToText {
8329  my($this, $entity) = @_;
8330
8331  my($htmlname, $textname, $textfh, $htmlparser);
8332
8333  # Replace the MIME Content-Type
8334  $entity->head->mime_attr('Content-type' => 'text/plain');
8335
8336  # Replace the filename with a new one
8337  $htmlname = $entity->bodyhandle->path();
8338  $textname = $htmlname;
8339  $textname =~ s/\..?html?$//i; # Remove .htm .html .shtml
8340  $textname .= '.txt'; # This should always pass the filename checks
8341  $entity->bodyhandle->path($textname);
8342
8343  # Create the new file with the plain text in it
8344  $textfh = new FileHandle;
8345  unless ($textfh->open(">$textname")) {
8346    MailScanner::Log::WarnLog('Could not create plain text file %s', $textname);
8347    return;
8348  }
8349  $htmlparser = HTML::TokeParser::MailScanner->new($htmlname);
8350  # Turn links into text containing the URL
8351  $htmlparser->{textify}{a} = 'href';
8352  $htmlparser->{textify}{img} = 'src';
8353
8354  my $inscript = 0;
8355  my $instyle  = 0;
8356  while (my $token = $htmlparser->get_token()) {
8357    next if $token->[0] eq 'C';
8358    # Don't output the contents of style or script sections
8359    if ($token->[1] =~ /style/i) {
8360      $instyle = 1 if $token->[0] eq 'S';
8361      $instyle = 0 if $token->[0] eq 'E';
8362      next if $instyle;
8363    }
8364    if ($token->[1] =~ /script/i) {
8365      $inscript = 1 if $token->[0] eq 'S';
8366      $inscript = 0 if $token->[0] eq 'E';
8367      next if $inscript;
8368    }
8369    my $text = $htmlparser->get_trimmed_text();
8370    print $textfh $text . "\n" if $text;
8371  }
8372  $textfh->close();
8373}
8374
8375#
8376# Delete all the recipients from a message, completely
8377# This is currently only used in the forwarding system in the filename
8378# and filetype checks in SweepOther.pm
8379#
8380sub DeleteAllRecipients {
8381  my($message) = @_;
8382
8383  $global::MS->{mta}->DeleteRecipients($message);
8384  my(@dummy);
8385  @{$message->{to}} = @dummy;
8386  @{$message->{touser}} = @dummy;
8387  @{$message->{todomain}} = @dummy;
8388}
8389
8390# Quarantine a DoS attack message which has successfully killed
8391# MailScanner several times in the past.
8392sub QuarantineDOS {
8393    my($message) = @_;
8394
8395    if (MailScanner::Config::Value ('quarantinedenialofservice', $message) !~ /1/) {
8396        MailScanner::Log::WarnLog('Dropping message %s as it caused MailScanner to crash several times', $message->{id});
8397        last;
8398    };
8399
8400    MailScanner::Log::WarnLog('Quarantined message %s as it caused MailScanner to crash several times', $message->{id});
8401
8402    $message->{quarantinedinfections} = 1; # Stop it quarantining it twice
8403    $message->{deleted} = 1;
8404    $message->{denialofservice} = 1;
8405    $message->{abandoned} = 1;
8406    $message->{stillwarn} = 1;
8407    $message->{infected} = 1;
8408    $message->{virusinfected} = 0;
8409    $message->{otherinfected} = 1;
8410    my $report = MailScanner::Config::LanguageValue($message, 'mailscanner') . ': ' . MailScanner::Config::LanguageValue($message, 'killedmailscanner');
8411    $message->{reports}{""} = $report;
8412    $message->{allreports}{""} = $report;
8413    $message->{types}{""} = 'e'; # Error processing
8414    $message->{alltypes}{""} = 'e'; # Error processing
8415
8416    $global::MS->{quar}->StoreInfections($message);
8417}
8418
8419#
8420# This is an improvement to the default HTML-Parser routine for getting
8421# the text out of an HTML message. The only difference to their one is
8422# that I join the array of items together with spaces rather than "".
8423#
8424package HTML::TokeParser::MailScanner;
8425
8426use HTML::Entities qw(decode_entities);
8427
8428use vars qw(@ISA);
8429@ISA = qw(HTML::TokeParser);
8430
8431sub get_text
8432{
8433    my $self = shift;
8434    my $endat = shift;
8435    my @text;
8436    while (my $token = $self->get_token) {
8437        my $type = $token->[0];
8438        if ($type eq "T") {
8439            my $text = $token->[1];
8440            decode_entities($text) unless $token->[2];
8441            push(@text, $text);
8442        } elsif ($type =~ /^[SE]$/) {
8443            my $tag = $token->[1];
8444            if ($type eq "S") {
8445                if (exists $self->{textify}{$tag}) {
8446                    my $alt = $self->{textify}{$tag};
8447                    my $text;
8448                    if (ref($alt)) {
8449                        $text = &$alt(@$token);
8450                    } else {
8451                        $text = $token->[2]{$alt || "alt"};
8452                        $text = "[\U$tag]" unless defined $text;
8453                    }
8454                    push(@text, $text);
8455                    next;
8456                }
8457            } else {
8458                $tag = "/$tag";
8459            }
8460            if (!defined($endat) || $endat eq $tag) {
8461                 $self->unget_token($token);
8462                 last;
8463            }
8464        }
8465    }
8466    # JKF join("", @text);
8467    join(" ", @text);
8468}
8469
8470# And switch back to the original package we were in
8471package MailScanner::Message;
8472
8473#
8474# This is an improvement to the default MIME character set decoding that
8475# is done on attachment filenames. It decodes all the character sets it
8476# knows about, just as before. But instead of warning about character sets
8477# it doesn't know about (and removing characters in them), it strips
8478# out all the 8-bit characters (rare) and leaves the 7-bit ones (common).
8479#
8480sub WordDecoderKeep7Bit {
8481    local $_ = shift;
8482    # JKF 19/8/05 Allow characters with the top bit set.
8483    # JKF 19/8/05 Still blocks 16-bit characters though, as it should.
8484    #tr/\x00-\x7F/#/c;
8485    tr/\x00-\xFF/#/c;
8486    $_;
8487}
8488
8489#
8490# Create a subclass of MIME::Parser:FileInto so that I can over-ride
8491# the "evil filename" code with a slightly better one that detects
8492# filenames made up solely of whitespace, which breaks the Perl open().
8493# I have also improved exorcise_filename to detect and remove any leading
8494# or trailing whitespace, which should make life a lot easier for the
8495# virus scanner output parsers.
8496#
8497# For the original version see .../MIME/Parser/Filer.pm
8498
8499#package MIME::Parser::MailScanner;
8500#
8501#use vars qw(@ISA);
8502#@ISA = qw(MIME::Parser::Filer);
8503#
8504## A filename is evil unless it only contains any of the following:
8505##  \%\(\)\+\,\-\.0-9\=A-Z_a-z\x80-\xFF
8506## To get the correct pattern match string, do this:
8507## print '\x00-\x1F\x7F' . quotemeta(' !"£$&') . quotemeta("'") .
8508##       quotemeta('*/:/<>?@[\]^`{|}~') . "\n";
8509## print ' ' . quotemeta('%()+,-.') . '0-9' . quotemeta('=') .
8510##       'A-Z' . quotemeta('_') . 'a-z' . quotemeta('{}') . '\x80-\xFF' . "\n";
8511##
8512#sub evil_filename {
8513#    my ($self, $name) = @_;
8514#
8515#    #$self->debug("is this evil? '$name'");
8516#
8517#    #print STDERR "Testing \"$name\" to see if it is evil\n";
8518#    return 1 if (!defined($name) or ($name eq ''));   ### empty
8519#    return 1 if ($name =~ m{(^\s)|(\s+\Z)});  ### leading/trailing whitespace
8520#    return 1 if ($name =~ m{^\.+\Z});         ### dots
8521#    return 1 if ($name =~ tr{ \%\(\)\+\,\-\.0-9\=A-Z_a-z\{\}\x80-\xFF}{}c);
8522#    return 1 if ($self->{MPF_MaxName} and
8523#                 (length($name) > $self->{MPF_MaxName}));
8524#
8525#    #print STDERR "It is okay\n";
8526#    #$self->debug("it's ok");
8527#    0;
8528#}
8529#
8530#sub exorcise_filename {
8531#    my ($self, $fname) = @_;
8532#
8533#    ### Isolate to last path element:
8534#    my $last = $fname; $last =~ s{^.*[/\\\[\]:]}{};
8535#    if ($last and !$self->evil_filename($last)) {
8536#        #$self->debug("looks like I can use the last path element");
8537#        return $last;
8538#    }
8539#
8540#    # Try removing leading whitespace, trailing whitespace and all
8541#    # dangerous characters to start with.
8542#    $last =~ s/^\s+//;
8543#    $last =~ s/\s+\Z//;
8544#    $last =~ tr/ \%\(\)\+\,\-\.0-9\=A-Z_a-z\{\}\x80-\xFF//cd;
8545#    return $last unless $self->evil_filename($last);
8546#
8547#    ### Break last element into root and extension, and truncate:
8548#    my ($root, $ext) = (($last =~ /^(.*)\.([^\.]+)\Z/)
8549#                        ? ($1, $2)
8550#                        : ($last, ''));
8551#    # JKF Delete leading and trailing whitespace
8552#    $root =~ s/^\s+//;
8553#    $ext  =~ s/\s+$//;
8554#    $root = substr($root, 0, ($self->{MPF_TrimRoot} || 14));
8555#    $ext  = substr($ext,  0, ($self->{MPF_TrimExt}  ||  3));
8556#    $ext =~ /^\w+$/ or $ext = "dat";
8557#    my $trunc = $root . ($ext ? ".$ext" : '');
8558#    if (!$self->evil_filename($trunc)) {
8559#        #$self->debug("looks like I can use the truncated last path element");
8560#        return $trunc;
8561#    }
8562#
8563#    ### Hope that works:
8564#    undef;
8565#}
8566
8567
8568#
8569# Over-ride a function in MIME::Entity that gets called every time a MIME
8570# part is added to a message. The new version bails out if there were too
8571# many parts in the message. The limit will be read from the config.
8572# It just sets the entity to undef and relies on the supporting code to
8573# actually generate the error.
8574#
8575
8576package MIME::Entity;
8577
8578use vars qw(@ISA $EntityPartCounter $EntityPartCounterMax);
8579@ISA = qw(Mail::Internet);
8580
8581# Reset the counter and the limit
8582sub ResetMailScannerCounter {
8583    my($number) = @_;
8584    $EntityPartCounter = 0;
8585    $EntityPartCounterMax = $number;
8586}
8587
8588# Read the Counter
8589sub MailScannerCounter {
8590    return $EntityPartCounter || 0;
8591}
8592
8593
8594# Over-rise their add_part function with my own with counting added
8595sub add_part {
8596    my ($self, $part, $index) = @_;
8597    defined($index) or $index = -1;
8598
8599    # Incrememt the part counter so I can detect messages with too many parts
8600    $EntityPartCounter++;
8601    #print STDERR "Added a part. Counter = $EntityPartCounter, Max = " .
8602    #             $EntityPartCounterMax\n";
8603    return undef
8604      if $EntityPartCounterMax>0 && $EntityPartCounter > $EntityPartCounterMax;
8605
8606    ### Make $index count from the end if negative:
8607    $index = $#{$self->{ME_Parts}} + 2 + $index if ($index < 0);
8608    splice(@{$self->{ME_Parts}}, $index, 0, $part);
8609    $part;
8610}
8611
8612
8613#
8614# Over-ride a function in Mail::Header that parses the block of headers
8615# at the top of each MIME section. My improvement allows the first line
8616# of the header block to be missing, which breaks the original parser
8617# though the filename is still there.
8618#
8619
8620package Mail::Header;
8621our $FIELD_NAME = '[^\x00-\x1f\x7f-\xff :]+:';
8622
8623sub extract
8624{
8625 my($self, $lines) = @_;
8626 $self->empty;
8627
8628 # JKF Make this more robust by allowing first line of header to be missing
8629 shift @{$lines} while scalar(@{$lines}) &&
8630                       $lines->[0] =~ /\A[ \t]+/o &&
8631                       $lines->[1] =~ /\A$FIELD_NAME/o;
8632 # JKF End mod here
8633
8634    while(@$lines)
8635    {
8636        unless ($lines->[0] =~ /^($FIELD_NAME|From )/o) {
8637            if ($lines->[0] =~ /^$/o){
8638                last;
8639            }
8640            shift @$lines;
8641            next;
8642        }
8643        my $tag  = $1;
8644        my $line = shift @$lines;
8645        $line   .= shift @$lines
8646            while @$lines && $lines->[0] =~ /^[ \t]+/o;
8647
8648        ($tag, $line) = _fmt_line $self, $tag, $line;
8649
8650        _insert $self, $tag, $line, -1 if defined $line;
8651    }
8652
8653    shift @$lines
8654        if @$lines && $lines->[0] =~ /^\s*$/o;
8655
8656    $self;
8657}
8658
8659#
8660# Over-ride the read function similar to extract but reads from file
8661# Only change is my comment below. MAS
8662#
8663
8664sub read
8665{   my ($self, $fd) = @_;
8666
8667    $self->empty;
8668
8669    my ($tag, $line);
8670    my $ln = '';
8671    while(1)
8672    {   $ln = <$fd>;
8673
8674        if(defined $ln && defined $line && $ln =~ /\A[ \t]+/o)
8675        {   $line .= $ln;
8676            next;
8677        }
8678
8679        if(defined $line)
8680        {   ($tag, $line) = _fmt_line $self, $tag, $line;
8681            _insert $self, $tag, $line, -1
8682                if defined $line;
8683        }
8684
8685        # MAS - Change begins here
8686        if ( defined $ln && $ln =~ /^($FIELD_NAME|From )/o ) {
8687
8688            ($tag, $line) = ($1, $ln);
8689        } elsif ($ln =~ /^$/) {
8690            # Only stop on empty line - just drop a non-header,
8691            # non continuation line
8692            last;
8693        } # MAS End of change
8694    }
8695
8696    $self;
8697}
8698
8699##
8700## Over-ride the MIME boundary extracting code so that we fail to parse
8701## messages with an empty MIME boundary. Best I can do for now.
8702##
8703#
8704#package MIME::Parser::Reader;
8705#
8706#sub add_boundary {
8707#    my ($self, $bound) = @_;
8708#    unshift @{$self->{Bounds}}, $bound;   ### now at index 0
8709#    # JKF Fix problem with the Britney virus
8710#    $bound = "" if $bound eq '""';
8711#    # JKF End
8712#    $self->{BH}{"--$bound"}   = "DELIM $bound";
8713#    $self->{BH}{"--$bound--"} = "CLOSE $bound";
8714#    $self;
8715#}
8716
8717#
8718# Over-ride the hunt-for-uuencoded file code as it now needs to hunt for
8719# binhex-encoded text as well.
8720#
8721
8722#package MIME::Parser;
8723#
8724##------------------------------
8725##
8726## hunt_for_uuencode ENCODED, ENTITY
8727##
8728## I<Instance method.>
8729## Try to detect and dispatch embedded uuencode as a fake multipart message.
8730## Returns new entity or undef.
8731##
8732#sub hunt_for_uuencode {
8733#    my ($self, $ENCODED, $ent) = @_;
8734#    my ($good, $jkfis);
8735#    local $_;
8736#    $self->debug("sniffing around for UUENCODE");
8737#
8738#    ### Heuristic:
8739#    $ENCODED->seek(0,0);
8740#    while (defined($_ = $ENCODED->getline)) {
8741#        if ($good = /^begin [0-7]{3}/) {
8742#          $jkfis = 'uu';
8743#          last;
8744#        }
8745#        if ($good = /^\(This file must be converted with/i) {
8746#          $jkfis = 'binhex';
8747#          last;
8748#        }
8749#    }
8750#    $good or do { $self->debug("no one made the cut"); return 0 };
8751#
8752#    ### New entity:
8753#    my $top_ent = $ent->dup;      ### no data yet
8754#    $top_ent->make_multipart;
8755#    my @parts;
8756#
8757#    ### Made the first cut; on to the real stuff:
8758#    $ENCODED->seek(0,0);
8759#    my $decoder = MIME::Decoder->new(($jkfis eq 'uu')?'x-uuencode'
8760#                                                     :'binhex');
8761#    $self->whine("Found a $jkfis attachment");
8762#    my $pre;
8763#    while (1) {
8764#        my @bin_data;
8765#
8766#        ### Try next part:
8767#        my $out = IO::ScalarArray->new(\@bin_data);
8768#        eval { $decoder->decode($ENCODED, $out) }; last if $@;
8769#        my $preamble = $decoder->last_preamble;
8770#        my $filename = $decoder->last_filename;
8771#        my $mode     = $decoder->last_mode;
8772#
8773#        ### Get probable type:
8774#        my $type = 'application/octet-stream';
8775#        my ($ext) = $filename =~ /\.(\w+)\Z/; $ext = lc($ext || '');
8776#        if ($ext =~ /^(gif|jpe?g|xbm|xpm|png)\Z/) { $type = "image/$1" }
8777#
8778#        ### If we got our first preamble, create the text portion:
8779#        if (@$preamble and
8780#            (grep /\S/, @$preamble) and
8781#            !@parts) {
8782#            my $txt_ent = $self->interface('ENTITY_CLASS')->new;
8783#
8784#            MIME::Entity->build(Type => "text/plain",
8785#                                Data => "");
8786#            $txt_ent->bodyhandle($self->new_body_for($txt_ent->head));
8787#            my $io = $txt_ent->bodyhandle->open("w");
8788#            $io->print(@$preamble);
8789#            $io->close;
8790#            push @parts, $txt_ent;
8791#        }
8792#
8793#        ### Create the attachment:
8794#        ### We use the x-unix-mode convention from "dtmail 1.2.1 SunOS 5.6".
8795#        if (1) {
8796#            my $bin_ent = MIME::Entity->build(Type=>$type,
8797#                                              Filename=>$filename,
8798#                                              Data=>"");
8799#            $bin_ent->head->mime_attr('Content-type.x-unix-mode' => "0$mode");
8800#            $bin_ent->bodyhandle($self->new_body_for($bin_ent->head));
8801#            $bin_ent->bodyhandle->binmode(1);
8802#            my $io = $bin_ent->bodyhandle->open("w");
8803#            $io->print(@bin_data);
8804#            $io->close;
8805#            push @parts, $bin_ent;
8806#        }
8807#    }
8808#
8809#    ### Did we get anything?
8810#    @parts or return undef;
8811#
8812#    #
8813