1# <@LICENSE>
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements.  See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to you under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License.  You may obtain a copy of the License at:
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# </@LICENSE>
17
18=head1 NAME
19
20Mail::SpamAssassin::Plugin::Razor2 - perform Razor check of messages
21
22=head1 SYNOPSIS
23
24  loadplugin     Mail::SpamAssassin::Plugin::Razor2
25
26=head1 DESCRIPTION
27
28Vipul's Razor is a distributed, collaborative, spam detection and
29filtering network based on user submissions of spam.  Detection is done
30with signatures that efficiently spot mutating spam content and user
31input is validated through reputation assignments.
32
33See http://razor.sourceforge.net/ for more information about Razor.
34
35=head1 USER SETTINGS
36
37=over 4
38
39=cut
40
41package Mail::SpamAssassin::Plugin::Razor2;
42
43use Mail::SpamAssassin::Plugin;
44use Mail::SpamAssassin::Logger;
45use Mail::SpamAssassin::Timeout;
46use Mail::SpamAssassin::SubProcBackChannel;
47use strict;
48use warnings;
49# use bytes;
50use re 'taint';
51
52use Storable;
53use POSIX qw(PIPE_BUF WNOHANG _exit);
54
55our @ISA = qw(Mail::SpamAssassin::Plugin);
56
57sub new {
58  my $class = shift;
59  my $mailsaobject = shift;
60
61  $class = ref($class) || $class;
62  my $self = $class->SUPER::new($mailsaobject);
63  bless ($self, $class);
64
65  # figure out if razor is even available or not ...
66  $self->{razor2_available} = 0;
67  if ($mailsaobject->{local_tests_only}) {
68    dbg("razor2: local tests only, skipping Razor");
69  }
70  else {
71    if (eval { require Razor2::Client::Agent; }) {
72      $self->{razor2_available} = 1;
73      dbg("razor2: razor2 is available, version " . $Razor2::Client::Version::VERSION . "\n");
74    }
75    else {
76      dbg("razor2: razor2 is not available");
77    }
78  }
79
80  $self->register_eval_rule("check_razor2", $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS);
81  $self->register_eval_rule("check_razor2_range", $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS);
82
83  $self->set_config($mailsaobject->{conf});
84
85  return $self;
86}
87
88sub set_config {
89  my ($self, $conf) = @_;
90  my @cmds;
91
92=item use_razor2 (0|1)		(default: 1)
93
94Whether to use Razor2, if it is available.
95
96=cut
97
98  push(@cmds, {
99    setting => 'use_razor2',
100    is_admin => 1,
101    default => 1,
102    type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
103  });
104
105=item razor_fork (0|1)		(default: 0)
106
107Instead of running Razor2 synchronously, fork separate process for it and
108read the results in later (similar to async DNS lookups).  Increases
109throughput.  Experimental.
110
111=cut
112
113  push(@cmds, {
114    setting => 'razor_fork',
115    is_admin => 1,
116    default => 0,
117    type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
118  });
119
120=back
121
122=head1 ADMINISTRATOR SETTINGS
123
124=over 4
125
126=item razor_timeout n		(default: 5)
127
128How many seconds you wait for Razor to complete before you go on without
129the results
130
131=cut
132
133  push(@cmds, {
134    setting => 'razor_timeout',
135    is_admin => 1,
136    default => 5,
137    type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION,
138  });
139
140=item razor_config filename
141
142Define the filename used to store Razor's configuration settings.
143Currently this is left to Razor to decide.
144
145=cut
146
147  push(@cmds, {
148    setting => 'razor_config',
149    is_admin => 1,
150    type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING,
151  });
152
153  $conf->{parser}->register_commands(\@cmds);
154}
155
156sub razor2_access {
157  my ($self, $fulltext, $type, $deadline) = @_;
158  my $timeout = $self->{main}->{conf}->{razor_timeout};
159  my $return = 0;
160  my @results;
161
162  my $debug = $type eq 'check' ? 'razor2' : 'reporter';
163
164  # razor also debugs to stdout. argh. fix it to stderr...
165  if (would_log('dbg', $debug)) {
166    open(OLDOUT, ">&STDOUT");
167    open(STDOUT, ">&STDERR");
168  }
169
170  Mail::SpamAssassin::PerMsgStatus::enter_helper_run_mode($self);
171
172  my $rnd = rand(0x7fffffff);  # save entropy before Razor clobbers it
173
174  my $timer = Mail::SpamAssassin::Timeout->new(
175               { secs => $timeout, deadline => $deadline });
176  my $err = $timer->run_and_catch(sub {
177
178    local ($^W) = 0;    # argh, warnings in Razor
179
180    # everything's in the module!
181    my $rc = Razor2::Client::Agent->new("razor-$type");
182
183    if ($rc) {
184      $rc->{opt} = {
185	debug => (would_log('dbg', $debug) > 1),
186	foreground => 1,
187	config => $self->{main}->{conf}->{razor_config}
188      };
189      # no facility prefix on this die
190      $rc->do_conf() or die "$debug: " . $rc->errstr;
191
192      # Razor2 requires authentication for reporting
193      my $ident;
194      if ($type ne 'check') {
195	# no facility prefix on this die
196	$ident = $rc->get_ident
197	    or die("$type requires authentication");
198      }
199
200      my @msg = ($fulltext);
201      # no facility prefix on this die
202      my $objects = $rc->prepare_objects(\@msg)
203	  or die "$debug: error in prepare_objects";
204      unless ($rc->get_server_info()) {
205	my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during get_server_info";
206	die $error;
207      }
208
209      # let's reset the alarm since get_server_info() calls
210      # nextserver() which calls discover() which very likely will
211      # reset the alarm for us ... how polite.  :(
212      $timer->reset();
213
214      # no facility prefix on this die
215      my $sigs = $rc->compute_sigs($objects)
216	  or die "$debug: error in compute_sigs";
217
218      # if mail isn't whitelisted, check it out
219      # see 'man razor-whitelist'
220      if ($type ne 'check' || ! $rc->local_check($objects->[0])) {
221	# provide a better error message when servers are unavailable,
222	# than "Bad file descriptor Died".
223	$rc->connect() or die "$debug: could not connect to any servers\n";
224
225	# Talk to the Razor server and do work
226	if ($type eq 'check') {
227	  unless ($rc->check($objects)) {
228	    my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during check";
229	    die $error;
230	  }
231	}
232	else {
233	  unless ($rc->authenticate($ident)) {
234	    my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during authenticate";
235	    die $error;
236	    }
237	  unless ($rc->report($objects)) {
238	    my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during report";
239	    die $error;
240	  }
241	}
242
243	unless ($rc->disconnect()) {
244	  my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during disconnect";
245	  die $error;
246	}
247      }
248
249      # Razor 2.14 says that if we get here, we did ok.
250      $return = 1;
251
252      # figure out if we have a log file we need to close...
253      if (ref($rc->{logref}) && exists $rc->{logref}->{fd}) {
254        # the fd can be stdout or stderr, so we need to find out if it is
255        # so we don't close them by accident.  Note: we can't just
256        # undef the fd here (like the IO::Handle manpage says we can)
257        # because it won't actually close, unfortunately. :(
258        my $untie = 1;
259        foreach my $log (*STDOUT{IO}, *STDERR{IO}) {
260          if ($log == $rc->{logref}->{fd}) {
261            $untie = 0;
262            last;
263          }
264        }
265        if ($untie) {
266          close($rc->{logref}->{fd})  or die "error closing log: $!";
267        }
268      }
269
270      if ($type eq 'check') {
271        # so $objects->[0] is the first (only) message, and ->{spam} is a general yes/no
272        push(@results, { result => $objects->[0]->{spam} });
273
274        # great for debugging, but leave this off!
275        #use Data::Dumper;
276        #print Dumper($objects),"\n";
277
278        # ->{p} is for each part of the message
279        # so go through each part, taking the highest cf we find
280        # of any part that isn't contested (ct).  This helps avoid false
281        # positives.  equals logic_method 4.
282        #
283        # razor-agents < 2.14 have a different object format, so we now support both.
284        # $objects->[0]->{resp} vs $objects->[0]->{p}->[part #]->{resp}
285        my $part = 0;
286        my $arrayref = $objects->[0]->{p} || $objects;
287        if (defined $arrayref) {
288          foreach my $cf (@{$arrayref}) {
289            if (exists $cf->{resp}) {
290              for (my $response=0; $response<@{$cf->{resp}}; $response++) {
291                my $tmp = $cf->{resp}->[$response];
292                my $tmpcf = $tmp->{cf}; # Part confidence
293                my $tmpct = $tmp->{ct}; # Part contested?
294                my $engine = $cf->{sent}->[$response]->{e};
295
296                # These should always be set, but just in case ...
297                $tmpcf = 0 unless defined $tmpcf;
298                $tmpct = 0 unless defined $tmpct;
299                $engine = 0 unless defined $engine;
300
301                push(@results,
302                      { part => $part, engine => $engine, contested => $tmpct, confidence => $tmpcf });
303              }
304            }
305            else {
306              push(@results, { part => $part, noresponse => 1 });
307            }
308            $part++;
309          }
310        }
311        else {
312          # If we have some new $objects format that isn't close to
313          # the current razor-agents 2.x version, we won't FP but we
314          # should alert in debug.
315          dbg("$debug: it looks like the internal Razor object has changed format!");
316        }
317      }
318    }
319    else {
320      warn "$debug: undefined Razor2::Client::Agent\n";
321    }
322
323  });
324
325  # OK, that's enough Razor stuff. now, reset all that global
326  # state it futzes with :(
327  # work around serious brain damage in Razor2 (constant seed)
328  $rnd ^= int(rand(0xffffffff));  # mix old acc with whatever came out of razor
329  srand;                          # let Perl give it a try ...
330  $rnd ^= int(rand(0xffffffff));  # ... and mix-in that too
331  srand($rnd & 0x7fffffff);  # reseed, keep it unsigned 32-bit just in case
332
333  Mail::SpamAssassin::PerMsgStatus::leave_helper_run_mode($self);
334
335  if ($timer->timed_out()) {
336    dbg("$debug: razor2 $type timed out after $timeout seconds");
337  }
338
339  if ($err) {
340    chomp $err;
341    if ($err =~ /(?:could not connect|network is unreachable)/) {
342      # make this a dbg(); SpamAssassin will still continue,
343      # but without Razor checking.  otherwise there may be
344      # DSNs and errors in syslog etc., yuck
345      dbg("$debug: razor2 $type could not connect to any servers");
346    } elsif ($err =~ /timeout/i) {
347      dbg("$debug: razor2 $type timed out connecting to servers");
348    } else {
349      warn("$debug: razor2 $type failed: $! $err");
350    }
351  }
352
353  # razor also debugs to stdout. argh. fix it to stderr...
354  if (would_log('dbg', $debug)) {
355    open(STDOUT, ">&OLDOUT");
356    close OLDOUT;
357  }
358
359  return wantarray ? ($return, @results) : $return;
360}
361
362sub plugin_report {
363  my ($self, $options) = @_;
364
365  return unless $self->{razor2_available};
366  return if $self->{main}->{local_tests_only};
367  return unless $self->{main}->{conf}->{use_razor2};
368  return if $options->{report}->{options}->{dont_report_to_razor};
369
370  my $timer = $self->{main}->time_method("razor2_report");
371
372  if ($self->razor2_access($options->{text}, 'report', undef)) {
373    $options->{report}->{report_available} = 1;
374    info('reporter: spam reported to Razor');
375    $options->{report}->{report_return} = 1;
376  }
377  else {
378    info('reporter: could not report spam to Razor');
379  }
380}
381
382sub plugin_revoke {
383  my ($self, $options) = @_;
384
385  my $timer = $self->{main}->time_method("razor2_revoke");
386
387  return unless $self->{razor2_available};
388  return if $self->{main}->{local_tests_only};
389  return unless $self->{main}->{conf}->{use_razor2};
390  return if $options->{revoke}->{options}->{dont_report_to_razor};
391
392  if ($self->razor2_access($options->{text}, 'revoke', undef)) {
393    $options->{revoke}->{revoke_available} = 1;
394    info('reporter: spam revoked from Razor');
395    $options->{revoke}->{revoke_return} = 1;
396  }
397  else {
398    info('reporter: could not revoke spam from Razor');
399  }
400}
401
402sub finish_parsing_start {
403  my ($self, $opts) = @_;
404
405  # If forking, hard adjust priority -100 to launch early
406  # Find rulenames from eval_to_rule mappings
407  if ($opts->{conf}->{razor_fork}) {
408    foreach (@{$opts->{conf}->{eval_to_rule}->{check_razor2}}) {
409      dbg("razor2: adjusting rule $_ priority to -100");
410      $opts->{conf}->{priority}->{$_} = -100;
411    }
412    foreach (@{$opts->{conf}->{eval_to_rule}->{check_razor2_range}}) {
413      dbg("razor2: adjusting rule $_ priority to -100");
414      $opts->{conf}->{priority}->{$_} = -100;
415    }
416  }
417}
418
419sub check_razor2 {
420  my ($self, $pms, $full) = @_;
421
422  return 0 unless $self->{razor2_available};
423  return 0 unless $self->{main}->{conf}->{use_razor2};
424
425  return $pms->{razor2_result} if (defined $pms->{razor2_result});
426
427  return 0 if $pms->{razor2_running};
428  $pms->{razor2_running} = 1;
429
430  my $timer = $self->{main}->time_method("check_razor2");
431
432  ## non-forking method
433
434  if (!$self->{main}->{conf}->{razor_fork}) {
435    # TODO: check for cache header, set results appropriately
436    # do it this way to make it easier to get out the results later from the
437    # netcache plugin ... what netcache plugin?
438    (undef, my @results) =
439      $self->razor2_access($full, 'check', $pms->{master_deadline});
440    return $self->_check_result($pms, \@results);
441  }
442
443  ## forking method
444
445  $pms->{razor2_rulename} = $pms->get_current_eval_rule_name();
446  $pms->rule_pending($pms->{razor2_rulename}); # mark async
447
448  # create socketpair for communication
449  $pms->{razor2_backchannel} = Mail::SpamAssassin::SubProcBackChannel->new();
450  my $back_selector = '';
451  $pms->{razor2_backchannel}->set_selector(\$back_selector);
452  eval {
453    $pms->{razor2_backchannel}->setup_backchannel_parent_pre_fork();
454  } or do {
455    dbg("razor2: backchannel pre-setup failed: $@");
456    delete $pms->{razor2_backchannel};
457    return 0;
458  };
459
460  my $pid = fork();
461  if (!defined $pid) {
462    info("razor2: child fork failed: $!");
463    delete $pms->{razor2_backchannel};
464    return 0;
465  }
466  if (!$pid) {
467    $0 = "$0 (razor2)";
468    $SIG{CHLD} = 'DEFAULT';
469    $SIG{PIPE} = 'IGNORE';
470    $SIG{$_} = sub {
471      eval { dbg("razor2: child process $$ caught signal $_[0]"); };
472      _exit(6);  # avoid END and destructor processing
473      kill('KILL',$$);  # still kicking? die!
474      } foreach qw(INT HUP TERM TSTP QUIT USR1 USR2);
475    dbg("razor2: child process $$ forked");
476    $pms->{razor2_backchannel}->setup_backchannel_child_post_fork();
477    (undef, my @results) =
478      $self->razor2_access($full, 'check', $pms->{master_deadline});
479    my $backmsg;
480    eval {
481      $backmsg = Storable::freeze(\@results);
482    };
483    if ($@) {
484      dbg("razor2: child return value freeze failed: $@");
485      _exit(0); # avoid END and destructor processing
486    }
487    if (!syswrite($pms->{razor2_backchannel}->{parent}, $backmsg)) {
488      dbg("razor2: child backchannel write failed: $!");
489    }
490    _exit(0); # avoid END and destructor processing
491  }
492
493  $pms->{razor2_pid} = $pid;
494
495  eval {
496    $pms->{razor2_backchannel}->setup_backchannel_parent_post_fork($pid);
497  } or do {
498    dbg("razor2: backchannel post-setup failed: $@");
499    delete $pms->{razor2_backchannel};
500    return 0;
501  };
502
503  return 0;
504}
505
506sub check_tick {
507  my ($self, $opts) = @_;
508  $self->_check_forked_result($opts->{permsgstatus}, 0);
509}
510
511sub check_cleanup {
512  my ($self, $opts) = @_;
513  $self->_check_forked_result($opts->{permsgstatus}, 1);
514}
515
516sub _check_forked_result {
517  my ($self, $pms, $finish) = @_;
518
519  return 0 if !$pms->{razor2_backchannel};
520  return 0 if !$pms->{razor2_pid};
521
522  my $timer = $self->{main}->time_method("check_razor2");
523
524  $pms->{razor2_abort} = $pms->{deadline_exceeded} || $pms->{shortcircuited};
525
526  my $kid_pid = $pms->{razor2_pid};
527  # if $finish, force waiting for the child
528  my $pid = waitpid($kid_pid, $finish && !$pms->{razor2_abort} ? 0 : WNOHANG);
529  if ($pid == 0) {
530    #dbg("razor2: child process $kid_pid not finished yet, trying later");
531    if ($pms->{razor2_abort}) {
532      dbg("razor2: bailing out due to deadline/shortcircuit");
533      kill('TERM', $kid_pid);
534      if (waitpid($kid_pid, WNOHANG) == 0) {
535        sleep(1);
536        if (waitpid($kid_pid, WNOHANG) == 0) {
537          dbg("razor2: child process $kid_pid still alive, KILL");
538          kill('KILL', $kid_pid);
539          waitpid($kid_pid, 0);
540        }
541      }
542      delete $pms->{razor2_pid};
543      delete $pms->{razor2_backchannel};
544    }
545    return 0;
546  } elsif ($pid == -1) {
547    # child does not exist?
548    dbg("razor2: child process $kid_pid already handled?");
549    delete $pms->{razor2_backchannel};
550    return 0;
551  }
552
553  $pms->rule_ready($pms->{razor2_rulename}); # mark rule ready for metas
554
555  dbg("razor2: child process $kid_pid finished, reading results");
556
557  my $backmsg;
558  my $ret = sysread($pms->{razor2_backchannel}->{latest_kid_fh}, $backmsg, PIPE_BUF);
559  if (!defined $ret || $ret == 0) {
560    dbg("razor2: could not read result from child: ".($ret == 0 ? 0 : $!));
561    delete $pms->{razor2_backchannel};
562    return 0;
563  }
564
565  delete $pms->{razor2_backchannel};
566
567  my $results;
568  eval {
569    $results = Storable::thaw($backmsg);
570  };
571  if ($@) {
572    dbg("razor2: child return value thaw failed: $@");
573    return;
574  }
575
576  $self->_check_result($pms, $results);
577}
578
579sub _check_result {
580  my ($self, $pms, $results) = @_;
581
582  $self->{main}->call_plugins ('process_razor_result',
583  	{ results => $results, permsgstatus => $pms }
584  );
585
586  foreach my $result (@$results) {
587    if (exists $result->{result}) {
588      $pms->{razor2_result} = $result->{result} if $result->{result};
589    }
590    elsif ($result->{noresponse}) {
591      dbg('razor2: part=' . $result->{part} . ' noresponse');
592    }
593    else {
594      dbg('razor2: part=' . $result->{part} .
595        ' engine=' .  $result->{engine} .
596	' contested=' . $result->{contested} .
597	' confidence=' . $result->{confidence});
598
599      next if $result->{contested};
600
601      my $cf = $pms->{razor2_cf_score}->{$result->{engine}} || 0;
602      if ($result->{confidence} > $cf) {
603        $pms->{razor2_cf_score}->{$result->{engine}} = $result->{confidence};
604      }
605    }
606  }
607
608  $pms->{razor2_result} ||= 0;
609  $pms->{razor2_cf_score} ||= {};
610
611  dbg("razor2: results: spam? " . $pms->{razor2_result});
612  while(my ($engine, $cf) = each %{$pms->{razor2_cf_score}}) {
613    dbg("razor2: results: engine $engine, highest cf score: $cf");
614  }
615
616  if ($self->{main}->{conf}->{razor_fork}) {
617    # forked needs to run got_hit()
618    if ($pms->{razor2_rulename} && $pms->{razor2_result}) {
619      $pms->got_hit($pms->{razor2_rulename}, "", ruletype => 'eval');
620    }
621    # forked needs to run range callbacks
622    if ($pms->{razor2_range_callbacks}) {
623      foreach (@{$pms->{razor2_range_callbacks}}) {
624        $self->check_razor2_range($pms, '', @$_);
625      }
626    }
627  }
628
629  return $pms->{razor2_result};
630}
631
632# Check the cf value of a given message and return if it's within the
633# given range
634sub check_razor2_range {
635  my ($self, $pms, $body, $engine, $min, $max, $rulename) = @_;
636
637  # If Razor2 isn't available, or the general test is disabled, don't
638  # continue.
639  return unless $self->{razor2_available};
640  return unless $self->{main}->{conf}->{use_razor2};
641
642  # Check if callback overriding rulename
643  if (!defined $rulename) {
644    $rulename = $pms->get_current_eval_rule_name();
645  }
646
647  if ($pms->{razor2_abort}) {
648    $pms->rule_ready($rulename); # mark rule ready for metas
649    return;
650  }
651
652  # If forked, call back later unless results are in
653  if ($self->{main}->{conf}->{razor_fork}) {
654    if (!defined $pms->{razor2_result}) {
655      $pms->rule_pending($rulename); # mark async
656      dbg("razor2: delaying check_razor2_range call for $rulename");
657      # array matches check_razor2_range() argument order
658      push @{$pms->{razor2_range_callbacks}},
659        [$engine, $min, $max, $rulename];
660      return 0;
661    }
662  } else {
663    # If Razor2 hasn't been checked yet, go ahead and run it.
664    # (only if we are non-forking.. forking will handle these in
665    # callbacks)
666    if (!$pms->{razor2_running}) {
667      $self->check_razor2($pms, $body);
668    }
669  }
670
671  $pms->rule_ready($rulename); # mark rule ready for metas
672
673  my $cf = 0;
674  if ($engine) {
675    $cf = $pms->{razor2_cf_score}->{$engine};
676    return 0 unless defined $cf;
677  }
678  else {
679    # If no specific engine was given to the rule, find the highest cf
680    # determined and use that
681    while(my ($engine, $ecf) = each %{$pms->{razor2_cf_score}}) {
682      if ($ecf > $cf) {
683        $cf = $ecf;
684      }
685    }
686  }
687
688  if ($cf >= $min && $cf <= $max) {
689    my $cf_str = sprintf("cf: %3d", $cf);
690    $pms->test_log($cf_str, $rulename);
691    if ($self->{main}->{conf}->{razor_fork}) {
692      $pms->got_hit($rulename, "", ruletype => 'eval');
693    }
694    return 1;
695  }
696
697  return 0;
698}
699
700# Version features
701sub has_fork { 1 }
702
7031;
704
705=back
706
707=cut
708