1# <@LICENSE>
2# Licensed to the Apache Software Foundation (ASF) under one or more
3# contributor license agreements.  See the NOTICE file distributed with
4# this work for additional information regarding copyright ownership.
5# The ASF licenses this file to you under the Apache License, Version 2.0
6# (the "License"); you may not use this file except in compliance with
7# the License.  You may obtain a copy of the License at:
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# </@LICENSE>
17
18=head1 NAME
19
20Mail::SpamAssassin::BayesStore::MySQL - MySQL Specific Bayesian Storage Module Implementation
21
22=head1 DESCRIPTION
23
24This module implements a MySQL specific based bayesian storage module.  It
25requires that you are running at least version 4.1 of MySQL, if you are running
26a version of MySQL < 4.1 then several aspects of this module will fail and
27possibly corrupt your bayes database data.
28
29In addition, this module will support rollback on error, if you are
30using the InnoDB database table type in MySQL.  For more information
31please review the instructions in sql/README.bayes.
32
33=cut
34
35package Mail::SpamAssassin::BayesStore::MySQL;
36
37use strict;
38use warnings;
39# use bytes;
40use re 'taint';
41
42use Mail::SpamAssassin::BayesStore::SQL;
43use Mail::SpamAssassin::Logger;
44
45our @ISA = qw( Mail::SpamAssassin::BayesStore::SQL );
46
47use constant HAS_DBI => eval { require DBI; };
48
49=head1 METHODS
50
51=head2 token_expiration
52
53public instance (Integer, Integer,
54                 Integer, Integer) token_expiration(\% $opts,
55                                                    Integer $newdelta,
56                                                    @ @vars)
57
58Description:
59This method performs the database specific expiration of tokens based on
60the passed in C<$newdelta> and C<@vars>.
61
62=cut
63
64sub token_expiration {
65  my ($self, $opts, $newdelta, @vars) = @_;
66
67  my $num_hapaxes;
68  my $num_lowfreq;
69  my $deleted;
70
71  # Figure out how old is too old...
72  my $too_old = $vars[10] - $newdelta; # tooold = newest - delta
73
74  # if token atime > newest, reset to newest ...
75  my $sql = "UPDATE bayes_token SET atime = ?
76              WHERE id  = ?
77                AND atime > ?";
78
79  my $rows = $self->{_dbh}->do($sql, undef, $vars[10], $self->{_userid}, $vars[10]);
80
81  unless (defined($rows)) {
82    dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
83    $deleted = 0;
84    $self->{_dbh}->rollback();
85    goto token_expiration_final;
86  }
87
88  # Check to make sure the expire won't remove too many tokens
89  $sql = "SELECT count(token) FROM bayes_token
90           WHERE id = ?
91             AND atime < ?";
92
93  my $sth = $self->{_dbh}->prepare_cached($sql);
94
95  unless (defined($sth)) {
96    dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
97    $deleted = 0;
98    $self->{_dbh}->rollback();
99    goto token_expiration_final;
100  }
101
102  my $rc = $sth->execute($self->{_userid}, $too_old);
103
104  unless ($rc) {
105    dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
106    $deleted = 0;
107    $self->{_dbh}->rollback();
108    goto token_expiration_final;
109  }
110
111  my ($count) = $sth->fetchrow_array();
112
113  $sth->finish();
114
115  # Sanity check: if we expired too many tokens, abort!
116  if ($vars[3] - $count < 100000) {
117    dbg("bayes: token expiration would expire too many tokens, aborting");
118    # set these appropriately so the next expire pass does the first pass
119    $deleted = 0;
120    $newdelta = 0;
121  }
122  else {
123    # Do the expire
124    $sql = "DELETE from bayes_token
125             WHERE id = ?
126               AND atime < ?";
127
128    $rows = $self->{_dbh}->do($sql, undef, $self->{_userid}, $too_old);
129
130    unless (defined($rows)) {
131      dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
132      $deleted = 0;
133      $self->{_dbh}->rollback();
134      goto token_expiration_final;
135    }
136
137    $deleted = ($rows eq '0E0') ? 0 : $rows;
138  }
139
140  # Update the magic tokens as appropriate
141  $sql = "UPDATE bayes_vars SET token_count = token_count - ?,
142                                last_expire = ?,
143                                last_atime_delta = ?,
144                                last_expire_reduce = ?,
145                                oldest_token_age = (SELECT min(atime)
146                                                      FROM bayes_token
147                                                     WHERE id = ?)
148				WHERE id = ?";
149
150  $rows = $self->{_dbh}->do($sql, undef, $deleted, time(), $newdelta, $deleted, $self->{_userid}, $self->{_userid});
151
152  unless (defined($rows)) {
153    # Very bad, we actually deleted the tokens, but were unable to update
154    # bayes_vars with the new data.
155    dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
156    $self->{_dbh}->rollback();
157    $deleted = 0;
158    goto token_expiration_final;
159  }
160
161  $self->{_dbh}->commit();
162
163token_expiration_final:
164  my $kept = $vars[3] - $deleted;
165
166  $num_hapaxes = $self->_get_num_hapaxes() if ($opts->{verbose});
167  $num_lowfreq = $self->_get_num_lowfreq() if ($opts->{verbose});
168
169  # Call untie_db() first so we unlock correctly etc. first
170  $self->untie_db();
171
172  return ($kept, $deleted, $num_hapaxes, $num_lowfreq);
173}
174
175=head2 seen_put
176
177public (Boolean) seen_put (string $msgid, char $flag)
178
179Description:
180This method records C<$msgid> as the type given by C<$flag>.  C<$flag> is one of
181two values 's' for spam and 'h' for ham.
182
183=cut
184
185sub seen_put {
186  my ($self, $msgid, $flag) = @_;
187
188  return 0 if (!$msgid);
189  return 0 if (!$flag);
190
191  return 0 unless (defined($self->{_dbh}));
192
193  my $sql = "INSERT INTO bayes_seen (id, msgid, flag)
194             VALUES (?,?,?)
195	     ON DUPLICATE KEY UPDATE flag=VALUES(flag)";
196
197  #added ON DUPLICATE KEY UPDATE flag=VALUES(flag) per bug 5998 on 4/8/2015
198
199  my $rows = $self->{_dbh}->do($sql,
200			       undef,
201			       $self->{_userid}, $msgid, $flag);
202
203  unless (defined($rows)) {
204    dbg("bayes: seen_put: SQL error: ".$self->{_dbh}->errstr());
205    $self->{_dbh}->rollback();
206    return 0;
207  }
208
209  dbg("bayes: seen ($msgid) put");
210  $self->{_dbh}->commit();
211  return 1;
212}
213
214=head2 seen_delete
215
216public instance (Boolean) seen_delete (string $msgid)
217
218Description:
219This method removes C<$msgid> from the database.
220
221=cut
222
223sub seen_delete {
224  my ($self, $msgid) = @_;
225
226  return 0 if (!$msgid);
227
228  return 0 unless (defined($self->{_dbh}));
229
230  my $sql = "DELETE FROM bayes_seen
231              WHERE id = ?
232                AND msgid = ?";
233
234  my $rows = $self->{_dbh}->do($sql,
235			       undef,
236			       $self->{_userid}, $msgid);
237
238  unless (defined($rows)) {
239    dbg("bayes: seen_delete: SQL error: ".$self->{_dbh}->errstr());
240    $self->{_dbh}->rollback();
241    return 0;
242  }
243
244  $self->{_dbh}->commit();
245  return 1;
246}
247
248=head2 set_last_expire
249
250public instance (Boolean) set_last_expire (Integer $time)
251
252Description:
253This method sets the last expire time.
254
255=cut
256
257sub set_last_expire {
258  my ($self, $time) = @_;
259
260  return 0 unless (defined($time));
261
262  return 0 unless (defined($self->{_dbh}));
263
264  my $sql = "UPDATE bayes_vars SET last_expire = ? WHERE id = ?";
265
266  my $rows = $self->{_dbh}->do($sql,
267			       undef,
268			       $time,
269			       $self->{_userid});
270
271  unless (defined($rows)) {
272    dbg("bayes: set_last_expire: SQL error: ".$self->{_dbh}->errstr());
273    $self->{_dbh}->rollback();
274    return 0;
275  }
276
277  $self->{_dbh}->commit();
278  return 1;
279}
280
281=head2 set_running_expire_tok
282
283public instance (String $time) set_running_expire_tok ()
284
285Description:
286This method sets the time that an expire starts running.
287
288=cut
289
290sub set_running_expire_tok {
291  my ($self) = @_;
292
293  return 0 unless (defined($self->{_dbh}));
294
295  my $sql = "INSERT INTO bayes_expire (id,runtime) VALUES (?,?)";
296
297  my $time = time();
298
299  my $rows = $self->{_dbh}->do($sql,
300			       undef,
301			       $self->{_userid}, $time);
302  unless (defined($rows)) {
303    dbg("bayes: set_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
304    $self->{_dbh}->rollback();
305    return;
306  }
307
308  $self->{_dbh}->commit();
309  return $time;
310}
311
312=head2 remove_running_expire_tok
313
314public instance (Boolean) remove_running_expire_tok ()
315
316Description:
317This method removes the row in the database that indicates that
318and expire is currently running.
319
320=cut
321
322sub remove_running_expire_tok {
323  my ($self) = @_;
324
325  return 0 unless (defined($self->{_dbh}));
326
327  my $sql = "DELETE from bayes_expire
328              WHERE id = ?";
329
330  my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
331
332  unless (defined($rows)) {
333    dbg("bayes: remove_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
334    $self->{_dbh}->rollback();
335    return 0;
336  }
337
338  $self->{_dbh}->commit();
339  return 1;
340}
341
342=head2 nspam_nham_change
343
344public instance (Boolean) nspam_nham_change (Integer $num_spam,
345                                             Integer $num_ham)
346
347Description:
348This method updates the number of spam and the number of ham in the database.
349
350=cut
351
352sub nspam_nham_change {
353  my ($self, $num_spam, $num_ham) = @_;
354
355  return 0 unless (defined($self->{_dbh}));
356
357  my $sql;
358  my @bindings;
359
360  if ($num_spam != 0 && $num_ham != 0) {
361    $sql = "UPDATE bayes_vars
362               SET spam_count = spam_count + ?,
363                   ham_count = ham_count + ?
364             WHERE id = ?";
365    @bindings = ($num_spam, $num_ham, $self->{_userid});
366  }
367  elsif ($num_spam != 0) {
368    $sql = "UPDATE bayes_vars
369              SET spam_count = spam_count + ?
370             WHERE id = ?";
371    @bindings = ($num_spam, $self->{_userid});
372  }
373  elsif ($num_ham != 0) {
374    $sql = "UPDATE bayes_vars
375               SET ham_count = ham_count + ?
376            WHERE id = ?";
377    @bindings = ($num_ham, $self->{_userid});
378  }
379  else {
380    # For some reason called with no delta, it's ok though so just return
381    dbg("bayes: nspam_nham_change: Called with no delta on spam or ham");
382    return 1;
383  }
384
385  my $rows = $self->{_dbh}->do($sql,
386			       undef,
387			       @bindings);
388
389  unless (defined($rows)) {
390    dbg("bayes: nspam_nham_change: SQL error: ".$self->{_dbh}->errstr());
391    $self->{_dbh}->rollback();
392    return 0;
393  }
394
395  $self->{_dbh}->commit();
396  return 1;
397}
398
399=head2 tok_touch
400
401public instance (Boolean) tok_touch (String $token,
402                                     String $atime)
403
404Description:
405This method updates the given tokens (C<$token>) atime.
406
407The assumption is that the token already exists in the database.
408
409=cut
410
411sub tok_touch {
412  my ($self, $token, $atime) = @_;
413
414  return 0 unless (defined($self->{_dbh}));
415
416  # shortcut, will only update atime for the token if the atime is less than
417  # what we are updating to
418  my $sql = "UPDATE bayes_token
419                SET atime = ?
420              WHERE id = ?
421                AND token = ?
422                AND atime < ?";
423
424  my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid},
425			       $token, $atime);
426
427  unless (defined($rows)) {
428    dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr());
429    $self->{_dbh}->rollback();
430    return 0;
431  }
432
433  # if we didn't update a row then no need to update newest_token_age
434  return 1 if ($rows eq '0E0');
435
436  # need to check newest_token_age
437  # no need to check oldest_token_age since we would only update if the
438  # atime was newer than what is in the database
439  $sql = "UPDATE bayes_vars
440             SET newest_token_age = ?
441           WHERE id = ?
442             AND newest_token_age < ?";
443
444  $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
445
446  unless (defined($rows)) {
447    dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr());
448    $self->{_dbh}->rollback();
449    return 0;
450  }
451
452  $self->{_dbh}->commit();
453  return 1;
454}
455
456=head2 tok_touch_all
457
458public instance (Boolean) tok_touch (\@ $tokens
459                                     String $atime)
460
461Description:
462This method does a mass update of the given list of tokens C<$tokens>, if the existing token
463atime is < C<$atime>.
464
465The assumption is that the tokens already exist in the database.
466
467We should never be touching more than N_SIGNIFICANT_TOKENS, so we can make
468some assumptions about how to handle the data (ie no need to batch like we
469do in tok_get_all)
470
471=cut
472
473sub tok_touch_all {
474  my ($self, $tokens, $atime) = @_;
475
476  return 0 unless (defined($self->{_dbh}));
477
478  return 1 unless (scalar(@{$tokens}));
479
480  my $sql = "UPDATE bayes_token SET atime = ? WHERE id = ? AND token IN (";
481
482  my @bindings = ($atime, $self->{_userid});
483  foreach my $token (@{$tokens}) {
484    $sql .= "?,";
485    push(@bindings, $token);
486  }
487  chop($sql); # get rid of trailing ,
488
489  $sql .= ") AND atime < ?";
490  push(@bindings, $atime);
491
492  my $rows = $self->{_dbh}->do($sql, undef, @bindings);
493
494  unless (defined($rows)) {
495    dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr());
496    $self->{_dbh}->rollback();
497    return 0;
498  }
499
500  # if we didn't update a row then no need to update newest_token_age
501  return 1 if ($rows eq '0E0');
502
503  # need to check newest_token_age
504  # no need to check oldest_token_age since we would only update if the
505  # atime was newer than what is in the database
506  $sql = "UPDATE bayes_vars
507             SET newest_token_age = ?
508           WHERE id = ?
509             AND newest_token_age < ?";
510
511  $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
512
513  unless (defined($rows)) {
514    dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr());
515    $self->{_dbh}->rollback();
516    return 0;
517  }
518
519  $self->{_dbh}->commit();
520  return 1;
521}
522
523=head2 cleanup
524
525public instance (Boolean) cleanup ()
526
527Description:
528This method performs any cleanup necessary before moving onto the next
529operation.
530
531=cut
532
533sub cleanup {
534  my ($self) = @_;
535
536  return 1 unless ($self->{needs_cleanup});
537
538  # cleanup was needed, go ahead and clear the cleanup flag
539  $self->{needs_cleanup} = 0;
540
541  my $sql = "DELETE from bayes_token
542              WHERE id = ?
543                AND spam_count <= 0
544                AND ham_count <= 0";
545
546  my $toks_deleted = $self->{_dbh}->do($sql, undef, $self->{_userid});
547
548  unless (defined($toks_deleted)) {
549    dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr());
550    $self->{_dbh}->rollback();
551    return 0;
552  }
553
554  # check to see if any tokens where deleted
555  return 1 if ($toks_deleted eq '0E0');
556
557  $sql = "UPDATE bayes_vars SET token_count = token_count - ? WHERE id = ?";
558
559  my $rows = $self->{_dbh}->do($sql, undef, $toks_deleted, $self->{_userid});
560
561  unless (defined($rows)) {
562    dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr());
563    $self->{_dbh}->rollback();
564    return 0;
565  }
566
567  $self->{_dbh}->commit();
568  return 1;
569}
570
571=head2 clear_database
572
573public instance (Boolean) clear_database ()
574
575Description:
576This method deletes all records for a particular user.
577
578Callers should be aware that any errors returned by this method
579could causes the database to be inconsistent for the given user.
580
581=cut
582
583sub clear_database {
584  my ($self) = @_;
585
586  # We want to open readonly first, because if they don't already have
587  # a db entry, we want to avoid creating one, just to delete it in a few secs
588  if ($self->tie_db_readonly()) {
589    # Ok, they must have had a db entry, so now make the connection writable
590    $self->tie_db_writable();
591  }
592  else {
593    # If we were unable to create a readonly connection then they must
594    # not have a db entry, so no need to clear.
595    # But it should be considered a success.
596    return 1;
597  }
598
599  return 0 unless (defined($self->{_dbh}));
600
601  my $rows = $self->{_dbh}->do("DELETE FROM bayes_vars WHERE id = ?",
602			       undef,
603			       $self->{_userid});
604  unless (defined($rows)) {
605    dbg("bayes: SQL error removing user (bayes_vars) data: ".$self->{_dbh}->errstr());
606    $self->{_dbh}->rollback();
607    return 0;
608  }
609
610  $rows = $self->{_dbh}->do("DELETE FROM bayes_seen WHERE id = ?",
611			    undef,
612			    $self->{_userid});
613  unless (defined($rows)) {
614    dbg("bayes: SQL error removing seen data: ".$self->{_dbh}->errstr());
615    $self->{_dbh}->rollback();
616    return 0;
617  }
618
619  $rows = $self->{_dbh}->do("DELETE FROM bayes_token WHERE id = ?",
620			    undef,
621			    $self->{_userid});
622  unless (defined($rows)) {
623    dbg("bayes: SQL error removing token data: ".$self->{_dbh}->errstr());
624    $self->{_dbh}->rollback();
625    return 0;
626  }
627
628  $self->{_dbh}->commit();
629  return 1;
630}
631
632=head1 Private Methods
633
634=head2 _connect_db
635
636private instance (Boolean) _connect_db ()
637
638Description:
639This method connects to the SQL database.
640
641=cut
642
643sub _connect_db {
644  my ($self) = @_;
645
646  $self->{_dbh} = undef;
647
648  # Turn off PrintError and explicitly set AutoCommit to off
649  my $dbh = DBI->connect($self->{_dsn}, $self->{_dbuser}, $self->{_dbpass},
650                        {'PrintError' => 0, 'AutoCommit' => 0});
651
652  if (!$dbh) {
653    dbg("bayes: unable to connect to database: ".DBI->errstr());
654    return 0;
655  }
656  else {
657    dbg("bayes: database connection established");
658  }
659
660  $self->{_dbh} = $dbh;
661
662 return 1;
663}
664
665=head2 _initialize_db
666
667private instance (Boolean) _initialize_db ()
668
669Description:
670This method will check to see if a user has had their bayes variables
671initialized. If not then it will perform this initialization.
672
673=cut
674
675sub _initialize_db {
676  my ($self, $create_entry_p) = @_;
677
678  return 0 if !defined $self->{_dbh};
679  return 0 if !defined $self->{_username} || $self->{_username} eq '';
680
681  # Check to see if we should call the services_authorized_for_username plugin
682  # hook to see if this user is allowed/able to use bayes.  If not, do nothing
683  # and return 0.
684  if ($self->{bayes}->{conf}->{bayes_sql_username_authorized}) {
685    my $services = { 'bayessql' => 0 };
686    $self->{bayes}->{main}->call_plugins("services_allowed_for_username",
687					 { services => $services,
688					   username => $self->{_username},
689					   conf => $self->{bayes}->{conf},
690					 });
691
692    unless ($services->{bayessql}) {
693      dbg("bayes: username not allowed by services_allowed_for_username plugin call");
694      return 0;
695    }
696  }
697
698  my $sqlselect = "SELECT id FROM bayes_vars WHERE username = ?";
699
700  my $sthselect = $self->{_dbh}->prepare_cached($sqlselect);
701
702  unless (defined($sthselect)) {
703    dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
704    return 0;
705  }
706
707  my $rc = $sthselect->execute($self->{_username});
708
709  unless ($rc) {
710    dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
711    return 0;
712  }
713
714  my ($id) = $sthselect->fetchrow_array();
715
716  if ($id) {
717    $self->{_userid} = $id;
718    dbg("bayes: Using userid: ".$self->{_userid});
719    $sthselect->finish();
720    return 1;
721  }
722
723  # Do not create an entry for this user unless we were specifically asked to
724  return 0 unless ($create_entry_p);
725
726  # For now let the database setup the other variables as defaults
727  my $sqlinsert = "INSERT INTO bayes_vars (username) VALUES (?)";
728
729  my $rows = $self->{_dbh}->do($sqlinsert,
730			       undef,
731			       $self->{_username});
732  unless (defined($rows)) {
733    dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
734    $self->{_dbh}->rollback();
735    return 0;
736  }
737
738  $id = $self->{_dbh}->{'mysql_insertid'};
739
740  $self->{_dbh}->commit();
741
742  if ($id) {
743    $self->{_userid} = $id;
744    dbg("bayes: using userid: ".$self->{_userid});
745    return 1;
746  }
747
748  return 1;
749}
750
751=head2 _put_token
752
753private instance (Boolean) _put_token (string $token,
754                                       integer $spam_count,
755                                       integer $ham_count,
756				       string $atime)
757
758Description:
759This method performs the work of either inserting or updating a token in
760the database.
761
762=cut
763
764sub _put_token {
765  my ($self, $token, $spam_count, $ham_count, $atime) = @_;
766
767  return 0 unless (defined($self->{_dbh}));
768
769  $spam_count ||= 0;
770  $ham_count ||= 0;
771
772  if ($spam_count == 0 && $ham_count == 0) {
773    return 1;
774  }
775
776  # the case where spam_count of ham_count is < 0 is special, it assumes
777  # that there already exists a token (although there might actually not be
778  # be one) that will be updated.  So we just do the update, being careful
779  # to not allow the spam_count or ham_count to not drop below 0
780  # In addition, when lowering the spam_count or ham_count we will not be
781  # updating the atime value
782  if ($spam_count < 0 || $ham_count < 0) {
783    # we only need to cleanup when we subtract counts for a token and the
784    # counts may have both reached 0
785    $self->{needs_cleanup} = 1;
786
787    my $sql = "UPDATE bayes_token SET spam_count = GREATEST(spam_count + ?, 0),
788                                      ham_count = GREATEST(ham_count + ?, 0)
789                WHERE id = ?
790                  AND token = ?";
791
792    my $sth = $self->{_dbh}->prepare_cached($sql);
793
794    unless (defined($sth)) {
795      dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
796      $self->{_dbh}->rollback();
797      return 0;
798    }
799
800    my $rc = $sth->execute($spam_count,
801			   $ham_count,
802			   $self->{_userid},
803			   $token);
804
805    unless ($rc) {
806      dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
807      $self->{_dbh}->rollback();
808      return 0;
809    }
810  }
811  else {
812    my $sql = "INSERT INTO bayes_token
813               (id, token, spam_count, ham_count, atime)
814               VALUES (?,?,?,?,?)
815               ON DUPLICATE KEY UPDATE spam_count = GREATEST(spam_count + ?, 0),
816                                       ham_count = GREATEST(ham_count + ?, 0),
817                                       atime = GREATEST(atime, ?)";
818
819    my $sth = $self->{_dbh}->prepare_cached($sql);
820
821    unless (defined($sth)) {
822      dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
823      $self->{_dbh}->rollback();
824      return 0;
825    }
826
827    my $rc = $sth->execute($self->{_userid},
828			   $token,
829			   $spam_count,
830			   $ham_count,
831			   $atime,
832			   $spam_count,
833			   $ham_count,
834			   $atime);
835
836    unless ($rc) {
837      dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
838      $self->{_dbh}->rollback();
839      return 0;
840    }
841
842    # With ON DUPLICATE KEY UPDATE, the affected-rows value per row is 1 if
843    # the row is inserted as a new row and 2 if an existing row is updated.
844    #
845    # Due to a MySQL server bug a value of 3 can be seen.
846    # See: http://bugs.mysql.com/bug.php?id=46675
847    #   When executing the INSERT ... ON DUPLICATE KEY UPDATE statement
848    #   and checking the rows return count:
849    #   mysql_client_found_rows = 0: The second INSERT returns a row count
850    #                                of 2 in all MySQL versions.
851    #   mysql_client_found_rows = 1: The second INSERT returns this row count:
852    #     Before MySQL 5.1.20: 2
853    #     MySQL 5.1.20: undef on Mac OS X, 139775481 on Linux (garbage?)
854    #     MySQL 5.1.21 and up: 3
855    #
856    my $num_rows = $rc;
857
858    $sth->finish();
859
860    if ($num_rows == 1 || $num_rows == 2 || $num_rows == 3) {
861      my $token_count_update = '';
862
863      $token_count_update = "token_count = token_count + 1," if $num_rows == 1;
864      $sql = "UPDATE bayes_vars SET
865                     $token_count_update
866                     newest_token_age = GREATEST(newest_token_age, ?),
867                     oldest_token_age = LEAST(oldest_token_age, ?)
868               WHERE id = ?";
869
870      $sth = $self->{_dbh}->prepare_cached($sql);
871
872      unless (defined($sth)) {
873	dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
874	$self->{_dbh}->rollback();
875	return 0;
876      }
877
878      my $rc = $sth->execute($atime, $atime, $self->{_userid});
879
880      unless ($rc) {
881	dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
882	$self->{_dbh}->rollback();
883	return 0;
884      }
885    }
886    else {
887      # $num_rows was not what we expected
888      my $token_displ = $token;
889      $token_displ =~ s/(.)/sprintf('%02x',ord($1))/egs;
890      dbg("bayes: _put_token: Updated an unexpected number of rows: %s, ".
891          "id: %s, token (hex): %s",
892          $num_rows, $self->{_userid}, $token_displ);
893      $self->{_dbh}->rollback();
894      return 0;
895    }
896  }
897
898  $self->{_dbh}->commit();
899  return 1;
900}
901
902=head2 _put_tokens
903
904private instance (Boolean) _put_tokens (\% $tokens,
905                                        integer $spam_count,
906                                        integer $ham_count,
907			 	        string $atime)
908
909Description:
910This method performs the work of either inserting or updating tokens in
911the database.
912
913=cut
914
915sub _put_tokens {
916  my ($self, $tokens, $spam_count, $ham_count, $atime) = @_;
917
918  return 0 unless (defined($self->{_dbh}));
919
920  $spam_count ||= 0;
921  $ham_count ||= 0;
922
923  if ($spam_count == 0 && $ham_count == 0) {
924    return 1;
925  }
926
927  # the case where spam_count of ham_count is < 0 is special, it assumes
928  # that there already exists a token (although there might actually not be
929  # be one) that will be updated.  So we just do the update, being careful
930  # to not allow the spam_count or ham_count to not drop below 0
931  # In addition, when lowering the spam_count or ham_count we will not be
932  # updating the atime value
933  if ($spam_count < 0 || $ham_count < 0) {
934    # we only need to cleanup when we subtract counts for a token and the
935    # counts may have both reached 0
936    $self->{needs_cleanup} = 1;
937
938    my $sql = "UPDATE bayes_token SET spam_count = GREATEST(spam_count + ?, 0),
939                                      ham_count = GREATEST(ham_count + ?, 0)
940                WHERE id = ?
941                  AND token = ?";
942
943    my $sth = $self->{_dbh}->prepare_cached($sql);
944
945    unless (defined($sth)) {
946      dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr());
947      $self->{_dbh}->rollback();
948      return 0;
949    }
950
951    my $error_p = 0;
952    foreach my $token (keys %{$tokens}) {
953      my $rc = $sth->execute($spam_count,
954			     $ham_count,
955			     $self->{_userid},
956			     $token);
957
958      unless ($rc) {
959	dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr());
960	$error_p = 1;
961      }
962    }
963
964    $sth->finish();
965
966    if ($error_p) {
967      $self->{_dbh}->rollback();
968      return 0;
969    }
970  }
971  else {
972    my $sql = "INSERT INTO bayes_token
973               (id, token, spam_count, ham_count, atime)
974               VALUES (?,?,?,?,?)
975               ON DUPLICATE KEY UPDATE spam_count = GREATEST(spam_count + ?, 0),
976                                       ham_count = GREATEST(ham_count + ?, 0),
977                                       atime = GREATEST(atime, ?)";
978
979    my $sth = $self->{_dbh}->prepare_cached($sql);
980
981    unless (defined($sth)) {
982      dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr());
983      $self->{_dbh}->rollback();
984      return 0;
985    }
986
987    my $error_p = 0;
988    my $new_tokens = 0;
989    my $need_atime_update_p = 0;
990    foreach my $token (keys %{$tokens}) {
991      my $rc = $sth->execute($self->{_userid},
992			     $token,
993			     $spam_count,
994			     $ham_count,
995			     $atime,
996			     $spam_count,
997			     $ham_count,
998			     $atime);
999
1000      if (!$rc) {
1001	dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr());
1002	$error_p = 1;
1003      }
1004      else {
1005	my $num_rows = $rc;
1006
1007        # With ON DUPLICATE KEY UPDATE, the affected-rows value per row is 1 if
1008        # the row is inserted as a new row and 2 if an existing row is updated.
1009        # But see MySQL bug (as above): http://bugs.mysql.com/bug.php?id=46675
1010
1011        if ($num_rows == 1) {
1012          $new_tokens++;
1013          $need_atime_update_p = 1;
1014        } elsif ($num_rows == 2 || $num_rows == 3) {
1015          $need_atime_update_p = 1;
1016        } else {
1017          # $num_rows was not what we expected
1018          my $token_displ = $token;
1019          $token_displ =~ s/(.)/sprintf('%02x',ord($1))/egs;
1020          dbg("bayes: _put_tokens: Updated an unexpected number of rows: %s, ".
1021              "id: %s, token (hex): %s",
1022              $num_rows, $self->{_userid}, $token_displ);
1023          $error_p = 1;
1024        }
1025      }
1026    }
1027
1028    $sth->finish();
1029
1030    if ($error_p) {
1031      $self->{_dbh}->rollback();
1032      return 0;
1033    }
1034
1035    if ($need_atime_update_p) {
1036      my $token_count_update = '';
1037
1038      $token_count_update = "token_count = token_count + $new_tokens," if ($new_tokens);
1039      $sql = "UPDATE bayes_vars SET
1040                     $token_count_update
1041                     newest_token_age = GREATEST(newest_token_age, ?),
1042                     oldest_token_age = LEAST(oldest_token_age, ?)
1043               WHERE id = ?";
1044
1045      $sth = $self->{_dbh}->prepare_cached($sql);
1046
1047      unless (defined($sth)) {
1048	dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr());
1049	$self->{_dbh}->rollback();
1050	return 0;
1051      }
1052
1053      my $rc = $sth->execute($atime, $atime, $self->{_userid});
1054
1055      unless ($rc) {
1056	dbg("bayes: _put_tokens: SQL error: ".$self->{_dbh}->errstr());
1057	$self->{_dbh}->rollback();
1058	return 0;
1059      }
1060    }
1061    else {
1062      info("bayes: _put_tokens: no atime updates needed?  Num of tokens: %d",
1063           scalar keys %{$tokens});
1064#     $self->{_dbh}->rollback();
1065#     return 0;
1066    }
1067  }
1068
1069  $self->{_dbh}->commit();
1070  return 1;
1071}
1072
1073sub sa_die { Mail::SpamAssassin::sa_die(@_); }
1074
10751;
1076