1# <@LICENSE> 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to you under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at: 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# </@LICENSE> 17 18=head1 NAME 19 20Mail::SpamAssassin::Plugin::Razor2 - perform Razor check of messages 21 22=head1 SYNOPSIS 23 24 loadplugin Mail::SpamAssassin::Plugin::Razor2 25 26=head1 DESCRIPTION 27 28Vipul's Razor is a distributed, collaborative, spam detection and 29filtering network based on user submissions of spam. Detection is done 30with signatures that efficiently spot mutating spam content and user 31input is validated through reputation assignments. 32 33See http://razor.sourceforge.net/ for more information about Razor. 34 35=head1 USER SETTINGS 36 37=over 4 38 39=cut 40 41package Mail::SpamAssassin::Plugin::Razor2; 42 43use Mail::SpamAssassin::Plugin; 44use Mail::SpamAssassin::Logger; 45use Mail::SpamAssassin::Timeout; 46use Mail::SpamAssassin::SubProcBackChannel; 47use strict; 48use warnings; 49# use bytes; 50use re 'taint'; 51 52use Storable; 53use POSIX qw(PIPE_BUF WNOHANG _exit); 54 55our @ISA = qw(Mail::SpamAssassin::Plugin); 56 57sub new { 58 my $class = shift; 59 my $mailsaobject = shift; 60 61 $class = ref($class) || $class; 62 my $self = $class->SUPER::new($mailsaobject); 63 bless ($self, $class); 64 65 # figure out if razor is even available or not ... 66 $self->{razor2_available} = 0; 67 if ($mailsaobject->{local_tests_only}) { 68 dbg("razor2: local tests only, skipping Razor"); 69 } 70 else { 71 if (eval { require Razor2::Client::Agent; }) { 72 $self->{razor2_available} = 1; 73 dbg("razor2: razor2 is available, version " . $Razor2::Client::Version::VERSION . "\n"); 74 } 75 else { 76 dbg("razor2: razor2 is not available"); 77 } 78 } 79 80 $self->register_eval_rule("check_razor2", $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS); 81 $self->register_eval_rule("check_razor2_range", $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS); 82 83 $self->set_config($mailsaobject->{conf}); 84 85 return $self; 86} 87 88sub set_config { 89 my ($self, $conf) = @_; 90 my @cmds; 91 92=item use_razor2 (0|1) (default: 1) 93 94Whether to use Razor2, if it is available. 95 96=cut 97 98 push(@cmds, { 99 setting => 'use_razor2', 100 is_admin => 1, 101 default => 1, 102 type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, 103 }); 104 105=item razor_fork (0|1) (default: 0) 106 107Instead of running Razor2 synchronously, fork separate process for it and 108read the results in later (similar to async DNS lookups). Increases 109throughput. Experimental. 110 111=cut 112 113 push(@cmds, { 114 setting => 'razor_fork', 115 is_admin => 1, 116 default => 0, 117 type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC, 118 }); 119 120=back 121 122=head1 ADMINISTRATOR SETTINGS 123 124=over 4 125 126=item razor_timeout n (default: 5) 127 128How many seconds you wait for Razor to complete before you go on without 129the results 130 131=cut 132 133 push(@cmds, { 134 setting => 'razor_timeout', 135 is_admin => 1, 136 default => 5, 137 type => $Mail::SpamAssassin::Conf::CONF_TYPE_DURATION, 138 }); 139 140=item razor_config filename 141 142Define the filename used to store Razor's configuration settings. 143Currently this is left to Razor to decide. 144 145=cut 146 147 push(@cmds, { 148 setting => 'razor_config', 149 is_admin => 1, 150 type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING, 151 }); 152 153 $conf->{parser}->register_commands(\@cmds); 154} 155 156sub razor2_access { 157 my ($self, $fulltext, $type, $deadline) = @_; 158 my $timeout = $self->{main}->{conf}->{razor_timeout}; 159 my $return = 0; 160 my @results; 161 162 my $debug = $type eq 'check' ? 'razor2' : 'reporter'; 163 164 # razor also debugs to stdout. argh. fix it to stderr... 165 if (would_log('dbg', $debug)) { 166 open(OLDOUT, ">&STDOUT"); 167 open(STDOUT, ">&STDERR"); 168 } 169 170 Mail::SpamAssassin::PerMsgStatus::enter_helper_run_mode($self); 171 172 my $rnd = rand(0x7fffffff); # save entropy before Razor clobbers it 173 174 my $timer = Mail::SpamAssassin::Timeout->new( 175 { secs => $timeout, deadline => $deadline }); 176 my $err = $timer->run_and_catch(sub { 177 178 local ($^W) = 0; # argh, warnings in Razor 179 180 # everything's in the module! 181 my $rc = Razor2::Client::Agent->new("razor-$type"); 182 183 if ($rc) { 184 $rc->{opt} = { 185 debug => (would_log('dbg', $debug) > 1), 186 foreground => 1, 187 config => $self->{main}->{conf}->{razor_config} 188 }; 189 # no facility prefix on this die 190 $rc->do_conf() or die "$debug: " . $rc->errstr; 191 192 # Razor2 requires authentication for reporting 193 my $ident; 194 if ($type ne 'check') { 195 # no facility prefix on this die 196 $ident = $rc->get_ident 197 or die("$type requires authentication"); 198 } 199 200 my @msg = ($fulltext); 201 # no facility prefix on this die 202 my $objects = $rc->prepare_objects(\@msg) 203 or die "$debug: error in prepare_objects"; 204 unless ($rc->get_server_info()) { 205 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during get_server_info"; 206 die $error; 207 } 208 209 # let's reset the alarm since get_server_info() calls 210 # nextserver() which calls discover() which very likely will 211 # reset the alarm for us ... how polite. :( 212 $timer->reset(); 213 214 # no facility prefix on this die 215 my $sigs = $rc->compute_sigs($objects) 216 or die "$debug: error in compute_sigs"; 217 218 # if mail isn't whitelisted, check it out 219 # see 'man razor-whitelist' 220 if ($type ne 'check' || ! $rc->local_check($objects->[0])) { 221 # provide a better error message when servers are unavailable, 222 # than "Bad file descriptor Died". 223 $rc->connect() or die "$debug: could not connect to any servers\n"; 224 225 # Talk to the Razor server and do work 226 if ($type eq 'check') { 227 unless ($rc->check($objects)) { 228 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during check"; 229 die $error; 230 } 231 } 232 else { 233 unless ($rc->authenticate($ident)) { 234 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during authenticate"; 235 die $error; 236 } 237 unless ($rc->report($objects)) { 238 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during report"; 239 die $error; 240 } 241 } 242 243 unless ($rc->disconnect()) { 244 my $error = $rc->errprefix("$debug: spamassassin") || "$debug: razor2 had unknown error during disconnect"; 245 die $error; 246 } 247 } 248 249 # Razor 2.14 says that if we get here, we did ok. 250 $return = 1; 251 252 # figure out if we have a log file we need to close... 253 if (ref($rc->{logref}) && exists $rc->{logref}->{fd}) { 254 # the fd can be stdout or stderr, so we need to find out if it is 255 # so we don't close them by accident. Note: we can't just 256 # undef the fd here (like the IO::Handle manpage says we can) 257 # because it won't actually close, unfortunately. :( 258 my $untie = 1; 259 foreach my $log (*STDOUT{IO}, *STDERR{IO}) { 260 if ($log == $rc->{logref}->{fd}) { 261 $untie = 0; 262 last; 263 } 264 } 265 if ($untie) { 266 close($rc->{logref}->{fd}) or die "error closing log: $!"; 267 } 268 } 269 270 if ($type eq 'check') { 271 # so $objects->[0] is the first (only) message, and ->{spam} is a general yes/no 272 push(@results, { result => $objects->[0]->{spam} }); 273 274 # great for debugging, but leave this off! 275 #use Data::Dumper; 276 #print Dumper($objects),"\n"; 277 278 # ->{p} is for each part of the message 279 # so go through each part, taking the highest cf we find 280 # of any part that isn't contested (ct). This helps avoid false 281 # positives. equals logic_method 4. 282 # 283 # razor-agents < 2.14 have a different object format, so we now support both. 284 # $objects->[0]->{resp} vs $objects->[0]->{p}->[part #]->{resp} 285 my $part = 0; 286 my $arrayref = $objects->[0]->{p} || $objects; 287 if (defined $arrayref) { 288 foreach my $cf (@{$arrayref}) { 289 if (exists $cf->{resp}) { 290 for (my $response=0; $response<@{$cf->{resp}}; $response++) { 291 my $tmp = $cf->{resp}->[$response]; 292 my $tmpcf = $tmp->{cf}; # Part confidence 293 my $tmpct = $tmp->{ct}; # Part contested? 294 my $engine = $cf->{sent}->[$response]->{e}; 295 296 # These should always be set, but just in case ... 297 $tmpcf = 0 unless defined $tmpcf; 298 $tmpct = 0 unless defined $tmpct; 299 $engine = 0 unless defined $engine; 300 301 push(@results, 302 { part => $part, engine => $engine, contested => $tmpct, confidence => $tmpcf }); 303 } 304 } 305 else { 306 push(@results, { part => $part, noresponse => 1 }); 307 } 308 $part++; 309 } 310 } 311 else { 312 # If we have some new $objects format that isn't close to 313 # the current razor-agents 2.x version, we won't FP but we 314 # should alert in debug. 315 dbg("$debug: it looks like the internal Razor object has changed format!"); 316 } 317 } 318 } 319 else { 320 warn "$debug: undefined Razor2::Client::Agent\n"; 321 } 322 323 }); 324 325 # OK, that's enough Razor stuff. now, reset all that global 326 # state it futzes with :( 327 # work around serious brain damage in Razor2 (constant seed) 328 $rnd ^= int(rand(0xffffffff)); # mix old acc with whatever came out of razor 329 srand; # let Perl give it a try ... 330 $rnd ^= int(rand(0xffffffff)); # ... and mix-in that too 331 srand($rnd & 0x7fffffff); # reseed, keep it unsigned 32-bit just in case 332 333 Mail::SpamAssassin::PerMsgStatus::leave_helper_run_mode($self); 334 335 if ($timer->timed_out()) { 336 dbg("$debug: razor2 $type timed out after $timeout seconds"); 337 } 338 339 if ($err) { 340 chomp $err; 341 if ($err =~ /(?:could not connect|network is unreachable)/) { 342 # make this a dbg(); SpamAssassin will still continue, 343 # but without Razor checking. otherwise there may be 344 # DSNs and errors in syslog etc., yuck 345 dbg("$debug: razor2 $type could not connect to any servers"); 346 } elsif ($err =~ /timeout/i) { 347 dbg("$debug: razor2 $type timed out connecting to servers"); 348 } else { 349 warn("$debug: razor2 $type failed: $! $err"); 350 } 351 } 352 353 # razor also debugs to stdout. argh. fix it to stderr... 354 if (would_log('dbg', $debug)) { 355 open(STDOUT, ">&OLDOUT"); 356 close OLDOUT; 357 } 358 359 return wantarray ? ($return, @results) : $return; 360} 361 362sub plugin_report { 363 my ($self, $options) = @_; 364 365 return unless $self->{razor2_available}; 366 return if $self->{main}->{local_tests_only}; 367 return unless $self->{main}->{conf}->{use_razor2}; 368 return if $options->{report}->{options}->{dont_report_to_razor}; 369 370 my $timer = $self->{main}->time_method("razor2_report"); 371 372 if ($self->razor2_access($options->{text}, 'report', undef)) { 373 $options->{report}->{report_available} = 1; 374 info('reporter: spam reported to Razor'); 375 $options->{report}->{report_return} = 1; 376 } 377 else { 378 info('reporter: could not report spam to Razor'); 379 } 380} 381 382sub plugin_revoke { 383 my ($self, $options) = @_; 384 385 my $timer = $self->{main}->time_method("razor2_revoke"); 386 387 return unless $self->{razor2_available}; 388 return if $self->{main}->{local_tests_only}; 389 return unless $self->{main}->{conf}->{use_razor2}; 390 return if $options->{revoke}->{options}->{dont_report_to_razor}; 391 392 if ($self->razor2_access($options->{text}, 'revoke', undef)) { 393 $options->{revoke}->{revoke_available} = 1; 394 info('reporter: spam revoked from Razor'); 395 $options->{revoke}->{revoke_return} = 1; 396 } 397 else { 398 info('reporter: could not revoke spam from Razor'); 399 } 400} 401 402sub finish_parsing_start { 403 my ($self, $opts) = @_; 404 405 # If forking, hard adjust priority -100 to launch early 406 # Find rulenames from eval_to_rule mappings 407 if ($opts->{conf}->{razor_fork}) { 408 foreach (@{$opts->{conf}->{eval_to_rule}->{check_razor2}}) { 409 dbg("razor2: adjusting rule $_ priority to -100"); 410 $opts->{conf}->{priority}->{$_} = -100; 411 } 412 foreach (@{$opts->{conf}->{eval_to_rule}->{check_razor2_range}}) { 413 dbg("razor2: adjusting rule $_ priority to -100"); 414 $opts->{conf}->{priority}->{$_} = -100; 415 } 416 } 417} 418 419sub check_razor2 { 420 my ($self, $pms, $full) = @_; 421 422 return 0 unless $self->{razor2_available}; 423 return 0 unless $self->{main}->{conf}->{use_razor2}; 424 425 return $pms->{razor2_result} if (defined $pms->{razor2_result}); 426 427 return 0 if $pms->{razor2_running}; 428 $pms->{razor2_running} = 1; 429 430 my $timer = $self->{main}->time_method("check_razor2"); 431 432 ## non-forking method 433 434 if (!$self->{main}->{conf}->{razor_fork}) { 435 # TODO: check for cache header, set results appropriately 436 # do it this way to make it easier to get out the results later from the 437 # netcache plugin ... what netcache plugin? 438 (undef, my @results) = 439 $self->razor2_access($full, 'check', $pms->{master_deadline}); 440 return $self->_check_result($pms, \@results); 441 } 442 443 ## forking method 444 445 $pms->{razor2_rulename} = $pms->get_current_eval_rule_name(); 446 $pms->rule_pending($pms->{razor2_rulename}); # mark async 447 448 # create socketpair for communication 449 $pms->{razor2_backchannel} = Mail::SpamAssassin::SubProcBackChannel->new(); 450 my $back_selector = ''; 451 $pms->{razor2_backchannel}->set_selector(\$back_selector); 452 eval { 453 $pms->{razor2_backchannel}->setup_backchannel_parent_pre_fork(); 454 } or do { 455 dbg("razor2: backchannel pre-setup failed: $@"); 456 delete $pms->{razor2_backchannel}; 457 return 0; 458 }; 459 460 my $pid = fork(); 461 if (!defined $pid) { 462 info("razor2: child fork failed: $!"); 463 delete $pms->{razor2_backchannel}; 464 return 0; 465 } 466 if (!$pid) { 467 $0 = "$0 (razor2)"; 468 $SIG{CHLD} = 'DEFAULT'; 469 $SIG{PIPE} = 'IGNORE'; 470 $SIG{$_} = sub { 471 eval { dbg("razor2: child process $$ caught signal $_[0]"); }; 472 _exit(6); # avoid END and destructor processing 473 kill('KILL',$$); # still kicking? die! 474 } foreach qw(INT HUP TERM TSTP QUIT USR1 USR2); 475 dbg("razor2: child process $$ forked"); 476 $pms->{razor2_backchannel}->setup_backchannel_child_post_fork(); 477 (undef, my @results) = 478 $self->razor2_access($full, 'check', $pms->{master_deadline}); 479 my $backmsg; 480 eval { 481 $backmsg = Storable::freeze(\@results); 482 }; 483 if ($@) { 484 dbg("razor2: child return value freeze failed: $@"); 485 _exit(0); # avoid END and destructor processing 486 } 487 if (!syswrite($pms->{razor2_backchannel}->{parent}, $backmsg)) { 488 dbg("razor2: child backchannel write failed: $!"); 489 } 490 _exit(0); # avoid END and destructor processing 491 } 492 493 $pms->{razor2_pid} = $pid; 494 495 eval { 496 $pms->{razor2_backchannel}->setup_backchannel_parent_post_fork($pid); 497 } or do { 498 dbg("razor2: backchannel post-setup failed: $@"); 499 delete $pms->{razor2_backchannel}; 500 return 0; 501 }; 502 503 return 0; 504} 505 506sub check_tick { 507 my ($self, $opts) = @_; 508 $self->_check_forked_result($opts->{permsgstatus}, 0); 509} 510 511sub check_cleanup { 512 my ($self, $opts) = @_; 513 $self->_check_forked_result($opts->{permsgstatus}, 1); 514} 515 516sub _check_forked_result { 517 my ($self, $pms, $finish) = @_; 518 519 return 0 if !$pms->{razor2_backchannel}; 520 return 0 if !$pms->{razor2_pid}; 521 522 my $timer = $self->{main}->time_method("check_razor2"); 523 524 $pms->{razor2_abort} = $pms->{deadline_exceeded} || $pms->{shortcircuited}; 525 526 my $kid_pid = $pms->{razor2_pid}; 527 # if $finish, force waiting for the child 528 my $pid = waitpid($kid_pid, $finish && !$pms->{razor2_abort} ? 0 : WNOHANG); 529 if ($pid == 0) { 530 #dbg("razor2: child process $kid_pid not finished yet, trying later"); 531 if ($pms->{razor2_abort}) { 532 dbg("razor2: bailing out due to deadline/shortcircuit"); 533 kill('TERM', $kid_pid); 534 if (waitpid($kid_pid, WNOHANG) == 0) { 535 sleep(1); 536 if (waitpid($kid_pid, WNOHANG) == 0) { 537 dbg("razor2: child process $kid_pid still alive, KILL"); 538 kill('KILL', $kid_pid); 539 waitpid($kid_pid, 0); 540 } 541 } 542 delete $pms->{razor2_pid}; 543 delete $pms->{razor2_backchannel}; 544 } 545 return 0; 546 } elsif ($pid == -1) { 547 # child does not exist? 548 dbg("razor2: child process $kid_pid already handled?"); 549 delete $pms->{razor2_backchannel}; 550 return 0; 551 } 552 553 $pms->rule_ready($pms->{razor2_rulename}); # mark rule ready for metas 554 555 dbg("razor2: child process $kid_pid finished, reading results"); 556 557 my $backmsg; 558 my $ret = sysread($pms->{razor2_backchannel}->{latest_kid_fh}, $backmsg, PIPE_BUF); 559 if (!defined $ret || $ret == 0) { 560 dbg("razor2: could not read result from child: ".($ret == 0 ? 0 : $!)); 561 delete $pms->{razor2_backchannel}; 562 return 0; 563 } 564 565 delete $pms->{razor2_backchannel}; 566 567 my $results; 568 eval { 569 $results = Storable::thaw($backmsg); 570 }; 571 if ($@) { 572 dbg("razor2: child return value thaw failed: $@"); 573 return; 574 } 575 576 $self->_check_result($pms, $results); 577} 578 579sub _check_result { 580 my ($self, $pms, $results) = @_; 581 582 $self->{main}->call_plugins ('process_razor_result', 583 { results => $results, permsgstatus => $pms } 584 ); 585 586 foreach my $result (@$results) { 587 if (exists $result->{result}) { 588 $pms->{razor2_result} = $result->{result} if $result->{result}; 589 } 590 elsif ($result->{noresponse}) { 591 dbg('razor2: part=' . $result->{part} . ' noresponse'); 592 } 593 else { 594 dbg('razor2: part=' . $result->{part} . 595 ' engine=' . $result->{engine} . 596 ' contested=' . $result->{contested} . 597 ' confidence=' . $result->{confidence}); 598 599 next if $result->{contested}; 600 601 my $cf = $pms->{razor2_cf_score}->{$result->{engine}} || 0; 602 if ($result->{confidence} > $cf) { 603 $pms->{razor2_cf_score}->{$result->{engine}} = $result->{confidence}; 604 } 605 } 606 } 607 608 $pms->{razor2_result} ||= 0; 609 $pms->{razor2_cf_score} ||= {}; 610 611 dbg("razor2: results: spam? " . $pms->{razor2_result}); 612 while(my ($engine, $cf) = each %{$pms->{razor2_cf_score}}) { 613 dbg("razor2: results: engine $engine, highest cf score: $cf"); 614 } 615 616 if ($self->{main}->{conf}->{razor_fork}) { 617 # forked needs to run got_hit() 618 if ($pms->{razor2_rulename} && $pms->{razor2_result}) { 619 $pms->got_hit($pms->{razor2_rulename}, "", ruletype => 'eval'); 620 } 621 # forked needs to run range callbacks 622 if ($pms->{razor2_range_callbacks}) { 623 foreach (@{$pms->{razor2_range_callbacks}}) { 624 $self->check_razor2_range($pms, '', @$_); 625 } 626 } 627 } 628 629 return $pms->{razor2_result}; 630} 631 632# Check the cf value of a given message and return if it's within the 633# given range 634sub check_razor2_range { 635 my ($self, $pms, $body, $engine, $min, $max, $rulename) = @_; 636 637 # If Razor2 isn't available, or the general test is disabled, don't 638 # continue. 639 return unless $self->{razor2_available}; 640 return unless $self->{main}->{conf}->{use_razor2}; 641 642 # Check if callback overriding rulename 643 if (!defined $rulename) { 644 $rulename = $pms->get_current_eval_rule_name(); 645 } 646 647 if ($pms->{razor2_abort}) { 648 $pms->rule_ready($rulename); # mark rule ready for metas 649 return; 650 } 651 652 # If forked, call back later unless results are in 653 if ($self->{main}->{conf}->{razor_fork}) { 654 if (!defined $pms->{razor2_result}) { 655 $pms->rule_pending($rulename); # mark async 656 dbg("razor2: delaying check_razor2_range call for $rulename"); 657 # array matches check_razor2_range() argument order 658 push @{$pms->{razor2_range_callbacks}}, 659 [$engine, $min, $max, $rulename]; 660 return 0; 661 } 662 } else { 663 # If Razor2 hasn't been checked yet, go ahead and run it. 664 # (only if we are non-forking.. forking will handle these in 665 # callbacks) 666 if (!$pms->{razor2_running}) { 667 $self->check_razor2($pms, $body); 668 } 669 } 670 671 $pms->rule_ready($rulename); # mark rule ready for metas 672 673 my $cf = 0; 674 if ($engine) { 675 $cf = $pms->{razor2_cf_score}->{$engine}; 676 return 0 unless defined $cf; 677 } 678 else { 679 # If no specific engine was given to the rule, find the highest cf 680 # determined and use that 681 while(my ($engine, $ecf) = each %{$pms->{razor2_cf_score}}) { 682 if ($ecf > $cf) { 683 $cf = $ecf; 684 } 685 } 686 } 687 688 if ($cf >= $min && $cf <= $max) { 689 my $cf_str = sprintf("cf: %3d", $cf); 690 $pms->test_log($cf_str, $rulename); 691 if ($self->{main}->{conf}->{razor_fork}) { 692 $pms->got_hit($rulename, "", ruletype => 'eval'); 693 } 694 return 1; 695 } 696 697 return 0; 698} 699 700# Version features 701sub has_fork { 1 } 702 7031; 704 705=back 706 707=cut 708