1# <@LICENSE> 2# Licensed to the Apache Software Foundation (ASF) under one or more 3# contributor license agreements. See the NOTICE file distributed with 4# this work for additional information regarding copyright ownership. 5# The ASF licenses this file to you under the Apache License, Version 2.0 6# (the "License"); you may not use this file except in compliance with 7# the License. You may obtain a copy of the License at: 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# </@LICENSE> 17 18=head1 NAME 19 20Mail::SpamAssassin::BayesStore::MySQL - MySQL Specific Bayesian Storage Module Implementation 21 22=head1 DESCRIPTION 23 24This module implements a MySQL specific based bayesian storage module. It 25requires that you are running at least version 4.1 of MySQL, if you are running 26a version of MySQL < 4.1 then several aspects of this module will fail and 27possibly corrupt your bayes database data. 28 29In addition, this module will support rollback on error, if you are 30using the InnoDB database table type in MySQL. For more information 31please review the instructions in sql/README.bayes. 32 33=cut 34 35package Mail::SpamAssassin::BayesStore::MySQL; 36 37use strict; 38use warnings; 39# use bytes; 40use re 'taint'; 41 42use Mail::SpamAssassin::BayesStore::SQL; 43use Mail::SpamAssassin::Logger; 44 45our @ISA = qw( Mail::SpamAssassin::BayesStore::SQL ); 46 47use constant HAS_DBI => eval { require DBI; }; 48 49=head1 METHODS 50 51=head2 token_expiration 52 53public instance (Integer, Integer, 54 Integer, Integer) token_expiration(\% $opts, 55 Integer $newdelta, 56 @ @vars) 57 58Description: 59This method performs the database specific expiration of tokens based on 60the passed in C<$newdelta> and C<@vars>. 61 62=cut 63 64sub token_expiration { 65 my ($self, $opts, $newdelta, @vars) = @_; 66 67 my $num_hapaxes; 68 my $num_lowfreq; 69 my $deleted; 70 71 # Figure out how old is too old... 72 my $too_old = $vars[10] - $newdelta; # tooold = newest - delta 73 74 # if token atime > newest, reset to newest ... 75 my $sql = "UPDATE bayes_token SET atime = ? 76 WHERE id = ? 77 AND atime > ?"; 78 79 my $rows = $self->{_dbh}->do($sql, undef, $vars[10], $self->{_userid}, $vars[10]); 80 81 unless (defined($rows)) { 82 dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr()); 83 $deleted = 0; 84 $self->{_dbh}->rollback(); 85 goto token_expiration_final; 86 } 87 88 # Check to make sure the expire won't remove too many tokens 89 $sql = "SELECT count(token) FROM bayes_token 90 WHERE id = ? 91 AND atime < ?"; 92 93 my $sth = $self->{_dbh}->prepare_cached($sql); 94 95 unless (defined($sth)) { 96 dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr()); 97 $deleted = 0; 98 $self->{_dbh}->rollback(); 99 goto token_expiration_final; 100 } 101 102 my $rc = $sth->execute($self->{_userid}, $too_old); 103 104 unless ($rc) { 105 dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr()); 106 $deleted = 0; 107 $self->{_dbh}->rollback(); 108 goto token_expiration_final; 109 } 110 111 my ($count) = $sth->fetchrow_array(); 112 113 $sth->finish(); 114 115 # Sanity check: if we expired too many tokens, abort! 116 if ($vars[3] - $count < 100000) { 117 dbg("bayes: token expiration would expire too many tokens, aborting"); 118 # set these appropriately so the next expire pass does the first pass 119 $deleted = 0; 120 $newdelta = 0; 121 } 122 else { 123 # Do the expire 124 $sql = "DELETE from bayes_token 125 WHERE id = ? 126 AND atime < ?"; 127 128 $rows = $self->{_dbh}->do($sql, undef, $self->{_userid}, $too_old); 129 130 unless (defined($rows)) { 131 dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr()); 132 $deleted = 0; 133 $self->{_dbh}->rollback(); 134 goto token_expiration_final; 135 } 136 137 $deleted = ($rows eq '0E0') ? 0 : $rows; 138 } 139 140 # Update the magic tokens as appropriate 141 $sql = "UPDATE bayes_vars SET token_count = token_count - ?, 142 last_expire = ?, 143 last_atime_delta = ?, 144 last_expire_reduce = ?, 145 oldest_token_age = (SELECT min(atime) 146 FROM bayes_token 147 WHERE id = ?) 148 WHERE id = ?"; 149 150 $rows = $self->{_dbh}->do($sql, undef, $deleted, time(), $newdelta, $deleted, $self->{_userid}, $self->{_userid}); 151 152 unless (defined($rows)) { 153 # Very bad, we actually deleted the tokens, but were unable to update 154 # bayes_vars with the new data. 155 dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr()); 156 $self->{_dbh}->rollback(); 157 $deleted = 0; 158 goto token_expiration_final; 159 } 160 161 $self->{_dbh}->commit(); 162 163token_expiration_final: 164 my $kept = $vars[3] - $deleted; 165 166 $num_hapaxes = $self->_get_num_hapaxes() if ($opts->{verbose}); 167 $num_lowfreq = $self->_get_num_lowfreq() if ($opts->{verbose}); 168 169 # Call untie_db() first so we unlock correctly etc. first 170 $self->untie_db(); 171 172 return ($kept, $deleted, $num_hapaxes, $num_lowfreq); 173} 174 175=head2 seen_put 176 177public (Boolean) seen_put (string $msgid, char $flag) 178 179Description: 180This method records C<$msgid> as the type given by C<$flag>. C<$flag> is one of 181two values 's' for spam and 'h' for ham. 182 183=cut 184 185sub seen_put { 186 my ($self, $msgid, $flag) = @_; 187 188 return 0 if (!$msgid); 189 return 0 if (!$flag); 190 191 return 0 unless (defined($self->{_dbh})); 192 193 my $sql = "INSERT INTO bayes_seen (id, msgid, flag) 194 VALUES (?,?,?) 195 ON DUPLICATE KEY UPDATE flag=VALUES(flag)"; 196 197 #added ON DUPLICATE KEY UPDATE flag=VALUES(flag) per bug 5998 on 4/8/2015 198 199 my $rows = $self->{_dbh}->do($sql, 200 undef, 201 $self->{_userid}, $msgid, $flag); 202 203 unless (defined($rows)) { 204 dbg("bayes: seen_put: SQL error: ".$self->{_dbh}->errstr()); 205 $self->{_dbh}->rollback(); 206 return 0; 207 } 208 209 dbg("bayes: seen ($msgid) put"); 210 $self->{_dbh}->commit(); 211 return 1; 212} 213 214=head2 seen_delete 215 216public instance (Boolean) seen_delete (string $msgid) 217 218Description: 219This method removes C<$msgid> from the database. 220 221=cut 222 223sub seen_delete { 224 my ($self, $msgid) = @_; 225 226 return 0 if (!$msgid); 227 228 return 0 unless (defined($self->{_dbh})); 229 230 my $sql = "DELETE FROM bayes_seen 231 WHERE id = ? 232 AND msgid = ?"; 233 234 my $rows = $self->{_dbh}->do($sql, 235 undef, 236 $self->{_userid}, $msgid); 237 238 unless (defined($rows)) { 239 dbg("bayes: seen_delete: SQL error: ".$self->{_dbh}->errstr()); 240 $self->{_dbh}->rollback(); 241 return 0; 242 } 243 244 $self->{_dbh}->commit(); 245 return 1; 246} 247 248=head2 set_last_expire 249 250public instance (Boolean) set_last_expire (Integer $time) 251 252Description: 253This method sets the last expire time. 254 255=cut 256 257sub set_last_expire { 258 my ($self, $time) = @_; 259 260 return 0 unless (defined($time)); 261 262 return 0 unless (defined($self->{_dbh})); 263 264 my $sql = "UPDATE bayes_vars SET last_expire = ? WHERE id = ?"; 265 266 my $rows = $self->{_dbh}->do($sql, 267 undef, 268 $time, 269 $self->{_userid}); 270 271 unless (defined($rows)) { 272 dbg("bayes: set_last_expire: SQL error: ".$self->{_dbh}->errstr()); 273 $self->{_dbh}->rollback(); 274 return 0; 275 } 276 277 $self->{_dbh}->commit(); 278 return 1; 279} 280 281=head2 set_running_expire_tok 282 283public instance (String $time) set_running_expire_tok () 284 285Description: 286This method sets the time that an expire starts running. 287 288=cut 289 290sub set_running_expire_tok { 291 my ($self) = @_; 292 293 return 0 unless (defined($self->{_dbh})); 294 295 my $sql = "INSERT INTO bayes_expire (id,runtime) VALUES (?,?)"; 296 297 my $time = time(); 298 299 my $rows = $self->{_dbh}->do($sql, 300 undef, 301 $self->{_userid}, $time); 302 unless (defined($rows)) { 303 dbg("bayes: set_running_expire_tok: SQL error: ".$self->{_dbh}->errstr()); 304 $self->{_dbh}->rollback(); 305 return; 306 } 307 308 $self->{_dbh}->commit(); 309 return $time; 310} 311 312=head2 remove_running_expire_tok 313 314public instance (Boolean) remove_running_expire_tok () 315 316Description: 317This method removes the row in the database that indicates that 318and expire is currently running. 319 320=cut 321 322sub remove_running_expire_tok { 323 my ($self) = @_; 324 325 return 0 unless (defined($self->{_dbh})); 326 327 my $sql = "DELETE from bayes_expire 328 WHERE id = ?"; 329 330 my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid}); 331 332 unless (defined($rows)) { 333 dbg("bayes: remove_running_expire_tok: SQL error: ".$self->{_dbh}->errstr()); 334 $self->{_dbh}->rollback(); 335 return 0; 336 } 337 338 $self->{_dbh}->commit(); 339 return 1; 340} 341 342=head2 nspam_nham_change 343 344public instance (Boolean) nspam_nham_change (Integer $num_spam, 345 Integer $num_ham) 346 347Description: 348This method updates the number of spam and the number of ham in the database. 349 350=cut 351 352sub nspam_nham_change { 353 my ($self, $num_spam, $num_ham) = @_; 354 355 return 0 unless (defined($self->{_dbh})); 356 357 my $sql; 358 my @bindings; 359 360 if ($num_spam != 0 && $num_ham != 0) { 361 $sql = "UPDATE bayes_vars 362 SET spam_count = spam_count + ?, 363 ham_count = ham_count + ? 364 WHERE id = ?"; 365 @bindings = ($num_spam, $num_ham, $self->{_userid}); 366 } 367 elsif ($num_spam != 0) { 368 $sql = "UPDATE bayes_vars 369 SET spam_count = spam_count + ? 370 WHERE id = ?"; 371 @bindings = ($num_spam, $self->{_userid}); 372 } 373 elsif ($num_ham != 0) { 374 $sql = "UPDATE bayes_vars 375 SET ham_count = ham_count + ? 376 WHERE id = ?"; 377 @bindings = ($num_ham, $self->{_userid}); 378 } 379 else { 380 # For some reason called with no delta, it's ok though so just return 381 dbg("bayes: nspam_nham_change: Called with no delta on spam or ham"); 382 return 1; 383 } 384 385 my $rows = $self->{_dbh}->do($sql, 386 undef, 387 @bindings); 388 389 unless (defined($rows)) { 390 dbg("bayes: nspam_nham_change: SQL error: ".$self->{_dbh}->errstr()); 391 $self->{_dbh}->rollback(); 392 return 0; 393 } 394 395 $self->{_dbh}->commit(); 396 return 1; 397} 398 399=head2 tok_touch 400 401public instance (Boolean) tok_touch (String $token, 402 String $atime) 403 404Description: 405This method updates the given tokens (C<$token>) atime. 406 407The assumption is that the token already exists in the database. 408 409=cut 410 411sub tok_touch { 412 my ($self, $token, $atime) = @_; 413 414 return 0 unless (defined($self->{_dbh})); 415 416 # shortcut, will only update atime for the token if the atime is less than 417 # what we are updating to 418 my $sql = "UPDATE bayes_token 419 SET atime = ? 420 WHERE id = ? 421 AND token = ? 422 AND atime < ?"; 423 424 my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, 425 $token, $atime); 426 427 unless (defined($rows)) { 428 dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr()); 429 $self->{_dbh}->rollback(); 430 return 0; 431 } 432 433 # if we didn't update a row then no need to update newest_token_age 434 return 1 if ($rows eq '0E0'); 435 436 # need to check newest_token_age 437 # no need to check oldest_token_age since we would only update if the 438 # atime was newer than what is in the database 439 $sql = "UPDATE bayes_vars 440 SET newest_token_age = ? 441 WHERE id = ? 442 AND newest_token_age < ?"; 443 444 $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime); 445 446 unless (defined($rows)) { 447 dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr()); 448 $self->{_dbh}->rollback(); 449 return 0; 450 } 451 452 $self->{_dbh}->commit(); 453 return 1; 454} 455 456=head2 tok_touch_all 457 458public instance (Boolean) tok_touch (\@ $tokens 459 String $atime) 460 461Description: 462This method does a mass update of the given list of tokens C<$tokens>, if the existing token 463atime is < C<$atime>. 464 465The assumption is that the tokens already exist in the database. 466 467We should never be touching more than N_SIGNIFICANT_TOKENS, so we can make 468some assumptions about how to handle the data (ie no need to batch like we 469do in tok_get_all) 470 471=cut 472 473sub tok_touch_all { 474 my ($self, $tokens, $atime) = @_; 475 476 return 0 unless (defined($self->{_dbh})); 477 478 return 1 unless (scalar(@{$tokens})); 479 480 my $sql = "UPDATE bayes_token SET atime = ? WHERE id = ? AND token IN ("; 481 482 my @bindings = ($atime, $self->{_userid}); 483 foreach my $token (@{$tokens}) { 484 $sql .= "?,"; 485 push(@bindings, $token); 486 } 487 chop($sql); # get rid of trailing , 488 489 $sql .= ") AND atime < ?"; 490 push(@bindings, $atime); 491 492 my $rows = $self->{_dbh}->do($sql, undef, @bindings); 493 494 unless (defined($rows)) { 495 dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr()); 496 $self->{_dbh}->rollback(); 497 return 0; 498 } 499 500 # if we didn't update a row then no need to update newest_token_age 501 return 1 if ($rows eq '0E0'); 502 503 # need to check newest_token_age 504 # no need to check oldest_token_age since we would only update if the 505 # atime was newer than what is in the database 506 $sql = "UPDATE bayes_vars 507 SET newest_token_age = ? 508 WHERE id = ? 509 AND newest_token_age < ?"; 510 511 $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime); 512 513 unless (defined($rows)) { 514 dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr()); 515 $self->{_dbh}->rollback(); 516 return 0; 517 } 518 519 $self->{_dbh}->commit(); 520 return 1; 521} 522 523=head2 cleanup 524 525public instance (Boolean) cleanup () 526 527Description: 528This method performs any cleanup necessary before moving onto the next 529operation. 530 531=cut 532 533sub cleanup { 534 my ($self) = @_; 535 536 return 1 unless ($self->{needs_cleanup}); 537 538 # cleanup was needed, go ahead and clear the cleanup flag 539 $self->{needs_cleanup} = 0; 540 541 my $sql = "DELETE from bayes_token 542 WHERE id = ? 543 AND spam_count <= 0 544 AND ham_count <= 0"; 545 546 my $toks_deleted = $self->{_dbh}->do($sql, undef, $self->{_userid}); 547 548 unless (defined($toks_deleted)) { 549 dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr()); 550 $self->{_dbh}->rollback(); 551 return 0; 552 } 553 554 # check to see if any tokens where deleted 555 return 1 if ($toks_deleted eq '0E0'); 556 557 $sql = "UPDATE bayes_vars SET token_count = token_count - ? WHERE id = ?"; 558 559 my $rows = $self->{_dbh}->do($sql, undef, $toks_deleted, $self->{_userid}); 560 561 unless (defined($rows)) { 562 dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr()); 563 $self->{_dbh}->rollback(); 564 return 0; 565 } 566 567 $self->{_dbh}->commit(); 568 return 1; 569} 570 571=head2 clear_database 572 573public instance (Boolean) clear_database () 574 575Description: 576This method deletes all records for a particular user. 577 578Callers should be aware that any errors returned by this method 579could causes the database to be inconsistent for the given user. 580 581=cut 582 583sub clear_database { 584 my ($self) = @_; 585 586 # We want to open readonly first, because if they don't already have 587 # a db entry, we want to avoid creating one, just to delete it in a few secs 588 if ($self->tie_db_readonly()) { 589 # Ok, they must have had a db entry, so now make the connection writable 590 $self->tie_db_writable(); 591 } 592 else { 593 # If we were unable to create a readonly connection then they must 594 # not have a db entry, so no need to clear. 595 # But it should be considered a success. 596 return 1; 597 } 598 599 return 0 unless (defined($self->{_dbh})); 600 601 my $rows = $self->{_dbh}->do("DELETE FROM bayes_vars WHERE id = ?", 602 undef, 603 $self->{_userid}); 604 unless (defined($rows)) { 605 dbg("bayes: SQL error removing user (bayes_vars) data: ".$self->{_dbh}->errstr()); 606 $self->{_dbh}->rollback(); 607 return 0; 608 } 609 610 $rows = $self->{_dbh}->do("DELETE FROM bayes_seen WHERE id = ?", 611 undef, 612 $self->{_userid}); 613 unless (defined($rows)) { 614 dbg("bayes: SQL error removing seen data: ".$self->{_dbh}->errstr()); 615 $self->{_dbh}->rollback(); 616 return 0; 617 } 618 619 $rows = $self->{_dbh}->do("DELETE FROM bayes_token WHERE id = ?", 620 undef, 621 $self->{_userid}); 622 unless (defined($rows)) { 623 dbg("bayes: SQL error removing token data: ".$self->{_dbh}->errstr()); 624 $self->{_dbh}->rollback(); 625 return 0; 626 } 627 628 $self->{_dbh}->commit(); 629 return 1; 630} 631 632=head1 Private Methods 633 634=head2 _connect_db 635 636private instance (Boolean) _connect_db () 637 638Description: 639This method connects to the SQL database. 640 641=cut 642 643sub _connect_db { 644 my ($self) = @_; 645 646 $self->{_dbh} = undef; 647 648 # Turn off PrintError and explicitly set AutoCommit to off 649 my $dbh = DBI->connect($self->{_dsn}, $self->{_dbuser}, $self->{_dbpass}, 650 {'PrintError' => 0, 'AutoCommit' => 0}); 651 652 if (!$dbh) { 653 dbg("bayes: unable to connect to database: ".DBI->errstr()); 654 return 0; 655 } 656 else { 657 dbg("bayes: database connection established"); 658 } 659 660 $self->{_dbh} = $dbh; 661 662 return 1; 663} 664 665=head2 _initialize_db 666 667private instance (Boolean) _initialize_db () 668 669Description: 670This method will check to see if a user has had their bayes variables 671initialized. If not then it will perform this initialization. 672 673=cut 674 675sub _initialize_db { 676 my ($self, $create_entry_p) = @_; 677 678 return 0 if !defined $self->{_dbh}; 679 return 0 if !defined $self->{_username} || $self->{_username} eq ''; 680 681 # Check to see if we should call the services_authorized_for_username plugin 682 # hook to see if this user is allowed/able to use bayes. If not, do nothing 683 # and return 0. 684 if ($self->{bayes}->{conf}->{bayes_sql_username_authorized}) { 685 my $services = { 'bayessql' => 0 }; 686 $self->{bayes}->{main}->call_plugins("services_allowed_for_username", 687 { services => $services, 688 username => $self->{_username}, 689 conf => $self->{bayes}->{conf}, 690 }); 691 692 unless ($services->{bayessql}) { 693 dbg("bayes: username not allowed by services_allowed_for_username plugin call"); 694 return 0; 695 } 696 } 697 698 my $sqlselect = "SELECT id FROM bayes_vars WHERE username = ?"; 699 700 my $sthselect = $self->{_dbh}->prepare_cached($sqlselect); 701 702 unless (defined($sthselect)) { 703 dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr()); 704 return 0; 705 } 706 707 my $rc = $sthselect->execute($self->{_username}); 708 709 unless ($rc) { 710 dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr()); 711 return 0; 712 } 713 714 my ($id) = $sthselect->fetchrow_array(); 715 716 if ($id) { 717 $self->{_userid} = $id; 718 dbg("bayes: Using userid: ".$self->{_userid}); 719 $sthselect->finish(); 720 return 1; 721 } 722 723 # Do not create an entry for this user unless we were specifically asked to 724 return 0 unless ($create_entry_p); 725 726 # For now let the database setup the other variables as defaults 727 my $sqlinsert = "INSERT INTO bayes_vars (username) VALUES (?)"; 728 729 my $rows = $self->{_dbh}->do($sqlinsert, 730 undef, 731 $self->{_username}); 732 unless (defined($rows)) { 733 dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr()); 734 $self->{_dbh}->rollback(); 735 return 0; 736 } 737 738 $id = $self->{_dbh}->{'mysql_insertid'}; 739 740 $self->{_dbh}->commit(); 741 742 if ($id) { 743 $self->{_userid} = $id; 744 dbg("bayes: using userid: ".$self->{_userid}); 745 return 1; 746 } 747 748 return 1; 749} 750 751=head2 _put_token 752 753private instance (Boolean) _put_token (string $token, 754 integer $spam_count, 755 integer $ham_count, 756 string $atime) 757 758Description: 759This method performs the work of either inserting or updating a token in 760the database. 761 762=cut 763 764sub _put_token { 765 my ($self, $token, $spam_count, $ham_count, $atime) = @_; 766 767 return 0 unless (defined($self->{_dbh})); 768 769 $spam_count ||= 0; 770 $ham_count ||= 0; 771 772 if ($spam_count == 0 && $ham_count == 0) { 773 return 1; 774 } 775 776 # the case where spam_count of ham_count is < 0 is special, it assumes 777 # that there already exists a token (although there might actually not be 778 # be one) that will be updated. So we just do the update, being careful 779 # to not allow the spam_count or ham_count to not drop below 0 780 # In addition, when lowering the spam_count or ham_count we will not be 781 # updating the atime value 782 if ($spam_count < 0 || $ham_count < 0) { 783 # we only need to cleanup when we subtract counts for a token and the 784 # counts may have both reached 0 785 $self->{needs_cleanup} = 1; 786 787 my $sql = "UPDATE bayes_token SET spam_count = GREATEST(spam_count + ?, 0), 788 ham_count = GREATEST(ham_count + ?, 0) 789 WHERE id = ? 790 AND token = ?"; 791 792 my $sth = $self->{_dbh}->prepare_cached($sql); 793 794 unless (defined($sth)) { 795 dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr()); 796 $self->{_dbh}->rollback(); 797 return 0; 798 } 799 800 my $rc = $sth->execute($spam_count, 801 $ham_count, 802 $self->{_userid}, 803 $token); 804 805 unless ($rc) { 806 dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr()); 807 $self->{_dbh}->rollback(); 808 return 0; 809 } 810 } 811 else { 812 my $sql = "INSERT INTO bayes_token 813 (id, token, spam_count, ham_count, atime) 814 VALUES (?,?,?,?,?) 815 ON DUPLICATE KEY UPDATE spam_count = GREATEST(spam_count + ?, 0), 816 ham_count = GREATEST(ham_count + ?, 0), 817 atime = GREATEST(atime, ?)"; 818 819 my $sth = $self->{_dbh}->prepare_cached($sql); 820 821 unless (defined($sth)) { 822 dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr()); 823 $self->{_dbh}->rollback(); 824 return 0; 825 } 826 827 my $rc = $sth->execute($self->{_userid}, 828 $token, 829 $spam_count, 830 $ham_count, 831 $atime, 832 $spam_count, 833 $ham_count, 834 $atime); 835 836 unless ($rc) { 837 dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr()); 838 $self->{_dbh}->rollback(); 839 return 0; 840 } 841 842 # With ON DUPLICATE KEY UPDATE, the affected-rows value per row is 1 if 843 # the row is inserted as a new row and 2 if an existing row is updated. 844 # 845 # Due to a MySQL server bug a value of 3 can be seen. 846 # See: http://bugs.mysql.com/bug.php?id=46675 847 # When executing the INSERT ... ON DUPLICATE KEY UPDATE statement 848 # and checking the rows return count: 849 # mysql_client_found_rows = 0: The second INSERT returns a row count 850 # of 2 in all MySQL versions. 851 # mysql_client_found_rows = 1: The second INSERT returns this row count: 852 # Before MySQL 5.1.20: 2 853 # MySQL 5.1.20: undef on Mac OS X, 139775481 on Linux (garbage?) 854 # MySQL 5.1.21 and up: 3 855 # 856 my $num_rows = $rc; 857 858 $sth->finish(); 859 860 if ($num_rows == 1 || $num_rows == 2 || $num_rows == 3) { 861 my $token_count_update = ''; 862 863 $token_count_update = "token_count = token_count + 1," if $num_rows == 1; 864 $sql = "UPDATE bayes_vars SET 865 $token_count_update 866 newest_token_age = GREATEST(newest_token_age, ?), 867 oldest_token_age = LEAST(oldest_token_age, ?) 868 WHERE id = ?"; 869 870 $sth = $self->{_dbh}->prepare_cached($sql); 871 872 unless (defined($sth)) { 873 dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr()); 874 $self->{_dbh}->rollback(); 875 return 0; 876 } 877 878 my $rc = $sth->execute($atime, $atime, $self->{_userid}); 879 880 unless ($rc) { 881 dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr()); 882 $self->{_dbh}->rollback(); 883 return 0; 884 } 885 } 886 else { 887 # $num_rows was not what we expected 888 my $token_displ = $token; 889 $token_displ =~ s/(.)/sprintf('%02x',ord($1))/egs; 890 dbg("bayes: _put_token: Updated an unexpected number of rows: %s, ". 891 "id: %s, token (hex): %s", 892 $num_rows, $self->{_userid}, $token_displ); 893 $self->{_dbh}->rollback(); 894 return 0; 895 } 896 } 897 898 $self->{_dbh}->commit(); 899 return 1; 900} 901 902=head2 _put_tokens 903 904private instance (Boolean) _put_tokens (\% $tokens, 905 integer $spam_count, 906 integer $ham_count, 907 string $atime) 908 909Description: 910This method performs the work of either inserting or updating tokens in 911the database. 912 913=cut 914 915sub _put_tokens { 916 my ($self, $tokens, $spam_count, $ham_count, $atime) = @_; 917 918 return 0 unless (defined($self->{_dbh})); 919 920 $spam_count ||= 0; 921 $ham_count ||= 0; 922 923 if ($spam_count == 0 && $ham_count == 0) { 924 return 1; 925 } 926 927 # the case where spam_count of ham_count is < 0 is special, it assumes 928 # that there already exists a token (although there might actually not be 929 # be one) that will be updated. So we just do the update, being careful 930 # to not allow the spam_count or ham_count to not drop below 0 931 # In addition, when lowering the spam_count or ham_count we will not be 932 # updating the atime value 933 if ($spam_count < 0 || $ham_count < 0) { 934 # we only need to cleanup when we subtract counts for a token and the 935 # counts may have both reached 0 936 $self->{needs_cleanup} = 1; 937 938 my $sql = "UPDATE bayes_token SET spam_count = GREATEST(spam_count + ?, 0), 939 ham_count = GREATEST(ham_count + ?, 0) 940 WHERE id = ? 941 AND token = ?"; 942 943 my $sth = $self->{_dbh}->prepare_cached($sql); 944 945 unless (defined($sth)) { 946 dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr()); 947 $self->{_dbh}->rollback(); 948 return 0; 949 } 950 951 my $error_p = 0; 952 foreach my $token (keys %{$tokens}) { 953 my $rc = $sth->execute($spam_count, 954 $ham_count, 955 $self->{_userid}, 956 $token); 957 958 unless ($rc) { 959 dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr()); 960 $error_p = 1; 961 } 962 } 963 964 $sth->finish(); 965 966 if ($error_p) { 967 $self->{_dbh}->rollback(); 968 return 0; 969 } 970 } 971 else { 972 my $sql = "INSERT INTO bayes_token 973 (id, token, spam_count, ham_count, atime) 974 VALUES (?,?,?,?,?) 975 ON DUPLICATE KEY UPDATE spam_count = GREATEST(spam_count + ?, 0), 976 ham_count = GREATEST(ham_count + ?, 0), 977 atime = GREATEST(atime, ?)"; 978 979 my $sth = $self->{_dbh}->prepare_cached($sql); 980 981 unless (defined($sth)) { 982 dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr()); 983 $self->{_dbh}->rollback(); 984 return 0; 985 } 986 987 my $error_p = 0; 988 my $new_tokens = 0; 989 my $need_atime_update_p = 0; 990 foreach my $token (keys %{$tokens}) { 991 my $rc = $sth->execute($self->{_userid}, 992 $token, 993 $spam_count, 994 $ham_count, 995 $atime, 996 $spam_count, 997 $ham_count, 998 $atime); 999 1000 if (!$rc) { 1001 dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr()); 1002 $error_p = 1; 1003 } 1004 else { 1005 my $num_rows = $rc; 1006 1007 # With ON DUPLICATE KEY UPDATE, the affected-rows value per row is 1 if 1008 # the row is inserted as a new row and 2 if an existing row is updated. 1009 # But see MySQL bug (as above): http://bugs.mysql.com/bug.php?id=46675 1010 1011 if ($num_rows == 1) { 1012 $new_tokens++; 1013 $need_atime_update_p = 1; 1014 } elsif ($num_rows == 2 || $num_rows == 3) { 1015 $need_atime_update_p = 1; 1016 } else { 1017 # $num_rows was not what we expected 1018 my $token_displ = $token; 1019 $token_displ =~ s/(.)/sprintf('%02x',ord($1))/egs; 1020 dbg("bayes: _put_tokens: Updated an unexpected number of rows: %s, ". 1021 "id: %s, token (hex): %s", 1022 $num_rows, $self->{_userid}, $token_displ); 1023 $error_p = 1; 1024 } 1025 } 1026 } 1027 1028 $sth->finish(); 1029 1030 if ($error_p) { 1031 $self->{_dbh}->rollback(); 1032 return 0; 1033 } 1034 1035 if ($need_atime_update_p) { 1036 my $token_count_update = ''; 1037 1038 $token_count_update = "token_count = token_count + $new_tokens," if ($new_tokens); 1039 $sql = "UPDATE bayes_vars SET 1040 $token_count_update 1041 newest_token_age = GREATEST(newest_token_age, ?), 1042 oldest_token_age = LEAST(oldest_token_age, ?) 1043 WHERE id = ?"; 1044 1045 $sth = $self->{_dbh}->prepare_cached($sql); 1046 1047 unless (defined($sth)) { 1048 dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr()); 1049 $self->{_dbh}->rollback(); 1050 return 0; 1051 } 1052 1053 my $rc = $sth->execute($atime, $atime, $self->{_userid}); 1054 1055 unless ($rc) { 1056 dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr()); 1057 $self->{_dbh}->rollback(); 1058 return 0; 1059 } 1060 } 1061 else { 1062 info("bayes: _put_tokens: no atime updates needed? Num of tokens: %d", 1063 scalar keys %{$tokens}); 1064# $self->{_dbh}->rollback(); 1065# return 0; 1066 } 1067 } 1068 1069 $self->{_dbh}->commit(); 1070 return 1; 1071} 1072 1073sub sa_die { Mail::SpamAssassin::sa_die(@_); } 1074 10751; 1076