1#!/usr/local/bin/perl
2# rebuilds bayesian spam database
3our $VERSION = "RB-1.99";
4our $modversion = '(13177)';
5# (c) John Hanna 2003 under the terms of the GPL
6# Updated July 2004 for simple proxy support.
7# (c) Fritz Borgstedt 2006 under the terms of the GPL
8# Updated Feb 2008 refactoring and rewrites
9# (c) Kevin 2008 under the terms of the GPL
10use bytes;    # get rid of anoying 'Malformed UTF-8' messages
11use Digest::MD5 qw(md5_hex);
12use English '-no_match_vars';
13use File::Copy;
14use IO::Handle;
15use IO::Socket;
16use Time::Local;
17use Time::HiRes;
18use Cwd;
19use strict qw(vars subs);
20our $AvailLWP  = eval('use LWP::Simple; 1');    # LWP::Simple module installed
21our $CanUseLWP = $AvailLWP;
22our	$noGriplistUpload;
23our	$noGriplistDownload;
24our $gripListDownUrl = 'http://*HOST*/cgi-bin/assp_griplist?binary';
25our $gripListUpUrl = 'http://*HOST*/cgi-bin/assp_griplist?binary';
26our $gripListUpHost = 'assp.sourceforge.net';
27$gripListDownUrl =~ s/\*HOST\*/$gripListUpHost/o;
28$gripListUpUrl  =~ s/\*HOST\*/$gripListUpHost/o;
29
30our %Config;
31our $ReplaceOldSpamdb=1;
32our $onlyNewCorrected=0;
33
34our $HeaderValueRe=qr/[ \t]*[^\r\n]*(?:\r?\n[ \t]+\S[^\r\n]*)*(?:\r?\n)?/o;
35# IP address representations
36our $IPprivate;
37our $IPQuadSectRE;
38our $IPQuadSectDotRE;
39our $IPQuadRE;
40our $IPStrictQuadRE;
41
42# Host
43our $IPSectRe;
44our $IPSectHexRe;
45our $IPSectDotRe;
46our $IPSectHexDotRe;
47our $IPRe;
48our $IPv4Re;
49our $IPv6Re;
50our $IPv6LikeRe;
51our $PortRe;
52our $HostRe;
53our $HostPortRe;
54# IP address representations
55my $sep;
56my $v6Re = '[0-9A-Fa-f]{1,4}';
57$IPSectRe = '(?:25[0-5]|2[0-4]\d|1\d\d|0?\d?\d)';
58$IPSectHexRe = '(?:(?:0x)?(?:[A-Fa-f][A-Fa-f0-9]?|[A-Fa-f0-9]?[A-Fa-f]))';
59
60$IPprivate  = '^(?:0{1,3}\.0{1,3}\.0{1,3}\.0{1,3}|127(?:\.'.$IPSectRe.'){3}|169\.254(?:\.'.$IPSectRe.'){2}|0?10(?:\.'.$IPSectRe.'){3}|192\.168(?:\.'.$IPSectRe.'){2}|172\.0?1[6-9](?:\.'.$IPSectRe.'){2}|172\.0?2[0-9](?:\.'.$IPSectRe.'){2}|172\.0?3[01](?:\.'.$IPSectRe.'){2})$';   #RFC 1918 decimal
61$IPprivate .= '|^(?:(?:0x)?0{1,2}\.(?:0x)?0{1,2}\.(?:0x)?0{1,2}\.(?:0x)?0{1,2}|(?:0x)?7[Ff](?:\.'.$IPSectHexRe.'){3}|(?:0x)?[aA]9\.(?:0x)?[Ff][Ee](?:\.'.$IPSectHexRe.'){2}|(?:0x)?0[aA](?:\.'.$IPSectHexRe.'){3}|(?:0x)?[Cc]0\.(?:0x)?[Aa]8(?:\.'.$IPSectHexRe.'){2}|(?:0x)[Aa][Cc]\.(?:0x)1[0-9a-fA-F](?:\.'.$IPSectHexRe.'){2})$';   #RFC 1918 Hex
62$IPprivate .= '|^(?:0{0,4}:){2,6}'.$IPprivate.'$';  # privat IPv4 in IPv6
63$IPprivate .= '|^(?:0{0,4}:){2,7}[1:]?$';  # IPv6 loopback and universal
64
65$IPQuadSectRE='(?:0([0-7]+)|0x([0-9a-fA-F]+)|(\d+))';
66$IPQuadSectDotRE='(?:'.$IPQuadSectRE.'\.)';
67$IPQuadRE=qr/$IPQuadSectDotRE?$IPQuadSectDotRE?$IPQuadSectDotRE?$IPQuadSectRE/o;
68
69
70
71$IPSectDotRe = '(?:'.$IPSectRe.'\.)';
72$IPSectHexDotRe = '(?:'.$IPSectHexRe.'\.)';
73$IPv4Re = qr/(?:
74(?:$IPSectDotRe){3}$IPSectRe
75|
76(?:$IPSectHexDotRe){3}$IPSectHexRe
77)/xo;
78
79# privat IPv6 addresses
80$IPprivate .= <<EOT;
81|^(?i:FE[89A-F][0-9A-F]):
82(?:
83(?:(?:$v6Re:){6}(?:                                $v6Re      |:))|
84(?:(?:$v6Re:){5}(?:                   $IPv4Re |   :$v6Re      |:))|
85(?:(?:$v6Re:){4}(?:                  :$IPv4Re |(?::$v6Re){1,2}|:))|
86(?:(?:$v6Re:){3}(?:(?:(?::$v6Re)?    :$IPv4Re)|(?::$v6Re){1,3}|:))|
87(?:(?:$v6Re:){2}(?:(?:(?::$v6Re){0,2}:$IPv4Re)|(?::$v6Re){1,4}|:))|
88(?:(?:$v6Re:)   (?:(?:(?::$v6Re){0,3}:$IPv4Re)|(?::$v6Re){1,5}|:))|
89                (?:(?:(?::$v6Re){0,4}:$IPv4Re)|(?::$v6Re){1,6}|:)
90)\$
91EOT
92$IPprivate = qr/$IPprivate/xo;
93
94# RFC4291, section 2.2, "Text Representation of addresses"
95$sep = '[:-]';
96$IPv6Re = $IPv6LikeRe = <<EOT;
97(?:
98(?:(?:$v6Re$sep){7}(?:                                         $v6Re      |$sep))|
99(?:(?:$v6Re$sep){6}(?:                         $IPv4Re |   $sep$v6Re      |$sep))|
100(?:(?:$v6Re$sep){5}(?:                     $sep$IPv4Re |(?:$sep$v6Re){1,2}|$sep))|
101(?:(?:$v6Re$sep){4}(?:(?:(?:$sep$v6Re)?    $sep$IPv4Re)|(?:$sep$v6Re){1,3}|$sep))|
102(?:(?:$v6Re$sep){3}(?:(?:(?:$sep$v6Re){0,2}$sep$IPv4Re)|(?:$sep$v6Re){1,4}|$sep))|
103(?:(?:$v6Re$sep){2}(?:(?:(?:$sep$v6Re){0,3}$sep$IPv4Re)|(?:$sep$v6Re){1,5}|$sep))|
104(?:(?:$v6Re$sep)   (?:(?:(?:$sep$v6Re){0,4}$sep$IPv4Re)|(?:$sep$v6Re){1,6}|$sep))|
105(?:        $sep    (?:(?:(?:$sep$v6Re){0,5}$sep$IPv4Re)|(?:$sep$v6Re){1,7}|$sep))
106)
107EOT
108
109$IPv6Re =~ s/\Q$sep\E/:/go;
110$IPv6Re = qr/$IPv6Re/xo;
111$IPv6LikeRe = qr/$IPv6LikeRe/xo;
112
113$IPRe = qr/(?:
114$IPv4Re
115|
116$IPv6Re
117)/xo;
118our $EmailDomainRe;
119$EmailDomainRe=qr/(?:\w[\w\.\-]*\.\w\w+|\[[\d\.]*\.\d+\])/o;
120# re for a single port - could be number 1 to 65535
121$PortRe = qr/(?:(?:[1-6]\d{4})|(?:[1-9]\d{0,3}))/o;
122# re for a single host - could be an IP a name or a fqdn
123$HostRe = qr/(?:(?:$IPv4Re|\[?$IPv6Re\]?)|$EmailDomainRe|\w\w+)/o;
124$HostPortRe = qr/$HostRe:$PortRe/o;
125
126our %m;
127our %GpOK;
128if ($CanUseLWP) {
129#        my $ver        = eval('LWP::Simple->VERSION');
130#        print "LWP::Simple $ver installed - download griplist available\n" ;
131    } elsif ( !$AvailLWP ) {
132        print "LWP::Simple module not installed - download griplist not available\n";
133    }
134#use warnings;
135
136#no output buffering to screen
137*STDOUT->autoflush();
138
139#holy predeclarations Batman!
140use vars qw(
141    $autoCorrectCorpus $base $DropList $correctednotspam $correctednotspamcount $correctedspam
142    $correctedspamcount $discarded $DoDropList $DoNotCollectRed $EmailAdrRe $EmailDomainRe $EmailFrom $EmailAdminReportsTo $griplist $HamWordCount
143    $KeepwhitelistedSpam $lowernorm $logfile $Log $LogDateFormat $maillogExt $MaxBytes $MaxCorrectedDays $MaxBayesFileAge $MaxNoBayesFileAge  $MaintBayesCollection $MaxFiles $MaxKeepDeleted $MaxWhitelistDays
144    $MaxWhitelistLength  $maintbayescollection $minimumfiles $minimumdays $mydb $myhost $mypassword $myuser $myName $notspamlog $processTime
145    $notspamlogcount $npRe $incomingOkMail $OrderedTieHashSize $pbdbfile $proxyserver $proxyuser $proxypass $noGriplist
146    $RebuildLog $rebuildrun $redlistdb $redRe $redReRE $resendmail $setFilePermOnStart $silent $spamdb $spamdberror $spamdbFile $spamdberrorFile $RegExLength
147    $spam $spamdbFname $spamdberrorFname $spamlog $spamlogcount $SpamWordCount $starttime
148    $usesubject $Whitelistrb_cleanFreq $whitelistdb $WhitelistObject $RebuildNotify $RedlistObject $viruslog $whiteRe $whiteReRE $wildcardUser
149    %HamHash %Helo %Redlist %spam %SpamHash %Whitelist $asspLog $DoNotCollectRedList $DoNotCollectRedRe
150    $DoFullGripDownload $UseLocalTime $uppernorm $TrashObject %Trashlist
151    $runAsUser $runAsGroup
152);
153# load from command line if specified
154if($ARGV[0]) {
155 $base=$ARGV[0];
156} else {
157 # the last one is the one used if all else fails
158 $base = cwd();
159 unless (-e "$base/assp.cfg") {
160   foreach ('.','/usr/local/assp','/home/assp','/etc/assp','/usr/assp','/applications/assp','/assp','/var/db/assp') {
161    if (-e "$_/assp.cfg") {
162      $base=$_;
163      last ;
164    }
165   }
166 }
167 $base = cwd() if $base eq '.';
168}
169unless (chdir $base) {
170print
171"Usage:
172  perl rebuildspamdb.pl  c:\\assp  -- runs the programm in basedirectory c:\\assp\n
173";
174 die "Abort: unable to change to basedirectory $base";
175}
176$silent = 1 if (lc $ARGV[1] =~ /silent/i ||  lc $ARGV[0] =~ /silent/i);
177
178#load configuration options from assp.cfg file
179&loadconfig();
180
181fork() && exit;
182
183# open log file
184if ( -e "$rebuildrun.bak" ) {
185    unlink("$rebuildrun.bak") or die "unable to remove file: $!";
186}
187if ( -e $rebuildrun ) {
188    copy( $rebuildrun, "$rebuildrun.bak" ) or die "unable to copy file for: $!";
189}
190open( $RebuildLog, '>', "$rebuildrun" ) or die "unable to open file for logging: $!";
191
192our $silentlog;
193$starttime = time;
194&rb_printlog("\n");
195for ( my $c = 10; $c >= 1; $c-- ) { &rb_printlog(q{*}); }
196my $savesilent=$silent;
197$silent=0;
198rb_printlog (&timestring(time) . " RebuildSpamDB $VERSION $modversion is starting;\n") ;
199$silent=$savesilent;
200
201&rb_printlog( "\nRunning in $myName basedirectory '$base'\n");
202#-- check if running as root
203&rb_printlog( "Running as root!!\n") if $< == 0 && $^O ne "MSWin32";
204
205#-- print username
206&rb_printlog( "Running as user '" . (getpwuid($<))[0] . "'\n") if $< != 0 && $^O ne "MSWin32";
207
208&rb_printlog("\n--- ASSP $myName Settings ---\n");
209
210my $AvailTieRDBM  = eval "use Tie::RDBM; 1";    # Is the required module installed?
211my $CanUseTieRDBM = $AvailTieRDBM;              # this looks wierd but it's the only way it works
212undef $AvailTieRDBM;
213$EmailAdrRe    = "[^()<>@,;:\\\"\\[\\]\000-\040]+";
214$EmailDomainRe = '(?:\w[\w\.\-]*\.\w+|\[[\d\.]*\.\d+\])';
215
216# set counts
217$HamWordCount          = $SpamWordCount = $correctedspamcount = 0;
218$correctednotspamcount = $spamlogcount  = $notspamlogcount    = 0;
219
220# read old norm
221our $Normfile;
222our ($oldnorm, $oldcorrectedspamcount, $oldcorrectednotspamcount, $oldspamlogcount, $oldnotspamlogcount);
223open( $Normfile, '<', "$base/normfile" ) || warn "unable to open $base/normfile: $!\n";
224if ($Normfile) {
225	while (<$Normfile>) {
226    	($oldnorm, $oldcorrectedspamcount, $oldcorrectednotspamcount, $oldspamlogcount, $oldnotspamlogcount) = split(" ",$_);
227    }
228    close $Normfile;
229}
230
231
232
233if ($DoNotCollectRedList) {
234    &rb_printlog(
235        "Do Not Collect Messages with redlisted address: Enabled\n**Messages with redlisted addresses will be removed from the corpus!**\n\n"
236    );
237}
238
239&rb_printlog("Maxbytes: $MaxBytes \n");
240&rb_printlog("Maxfiles: $MaxFiles \n");
241
242
243#rebuild various cache files and lists
244&repair();
245
246# Let's rb_clean the old deleted entries
247
248 &rb_cleanTrashlist();
249
250# Let's rb_clean the non bayesian folder of old entries
251# Let's rb_clean the bayesian folder of old entries
252# Let's rb_clean the corrected spam/notspam folder of old entries
253
254 &rb_cleanUpCollection();
255
256
257# name, contents, refrence to "compiled" object
258#&compileregex( "whiteRe", $whiteRe, \$whiteReRE );
259&compileregex( "redRe",   $redRe,   \$redReRE );
260
261# redlist,whitelist
262&createlistobjects();
263
264# isspam?, path, filter, weight, processing sub
265$correctedspamcount    = &processfolder( 1, $correctedspam,    "*",      2, \&dospamhash );
266$correctednotspamcount = &processfolder( 0, $correctednotspam, "*",      4, \&dohamhash );
267$spamlogcount          = &processfolder( 1, $spamlog,          "*", 1, \&checkspam );
268$notspamlogcount       = &processfolder( 0, $notspamlog,       "*", 1, \&checkham );
269our $norm = $HamWordCount ? ( $SpamWordCount / $HamWordCount ) : 1;
270$norm = sprintf("%.4f",$norm);
271open( my $normFile, '>', "$base/normfile" ) || warn "unable to open $base/normfile: $!\n";
272if ($normFile) {
273    print { $normFile } "$norm $correctedspamcount $correctednotspamcount $spamlogcount $notspamlogcount";
274    close $normFile;
275}
276
277# Create Bayesian DB
278&generatescores();
279our %HeloBlack;
280our $HeloBlackObject = tie %HeloBlack, 'orderedtie', "$base/$spamdb.helo";
281# Create HELo blacklist
282&createheloblacklist();
283&rb_printlog(
284    "\nSpam Weight:\t   " . commify($SpamWordCount) . "\nNot-Spam Weight:   " . commify($HamWordCount) . "\n\n" );
285if ( !($norm) ) {    #invalid norm
286    &rb_printlog("Warning: Corpus insufficent to calculate normality!\n");
287}
288else {               #norm exists, print it
289
290
291        my $normdesc = '';
292
293        &rb_printlog( "Wanted Corpus norm:\t%.4f %s $normdesc \n", $autoCorrectCorpus  );
294        &rb_printlog( "Reached Corpus norm:\t%.4f %s $normdesc \n", $norm  );
295
296
297}
298
299
300$lowernorm = 0.5 if $lowernorm && ($lowernorm > 1 or $lowernorm < 0.5);
301
302
303if   ( time - $starttime != 0 ) { $processTime = time - $starttime; }
304else                            { $processTime = 1; }
305&rb_printlog( "\nTotal processing time: %d second(s)\n\n", $processTime );
306
307&downloadGripConf();
308&uploadgriplist() if ! $noGriplistUpload && !$noGriplist;
309&downloadgriplist() if ! $noGriplistDownload && !$noGriplist;
310
311
312&downloaddroplist();
313
314$savesilent=$silent;
315$silent=0;
316&rb_printlog( "\n");
317&rb_printlog( &timestring(time) . " RebuildSpamDB $VERSION $modversion ended;\n");
318$silent=$savesilent;
319&rb_printlog( "Sending Notify to $RebuildNotify\n") if $RebuildNotify;
320&rb_printlog( "Sending Notify not possible, address in RebuildNotify missing\n") if !$RebuildNotify;
321close $RebuildLog;
322if ($RebuildNotify) {
323        &sendNotification(
324          $EmailFrom,
325          $RebuildNotify,
326          'RebuildSpamDB - report',
327          "File rebuildrun.txt follows:\r\n\r\n",
328          "$base/rebuildrun.txt");
329    }
330
331##########################################
332#           script ends here
333##########################################
334sub createlistobjects {
335
336    if ( $CanUseTieRDBM && $whitelistdb =~ /mysql/ && !$KeepwhitelistedSpam ) {
337        eval {
338            $WhitelistObject = tie %Whitelist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost",
339                { user => "$myuser", password => "$mypassword", table => 'whitelist', create => 0 };
340        };
341        if ($EVAL_ERROR) {
342            &rb_printlog("whitelist mysql error: $@");
343            $CanUseTieRDBM = 0;
344            $whitelistdb   = "whitelist";
345        }
346    }
347    elsif ( !$KeepwhitelistedSpam ) {
348        if ( -e $whitelistdb ) { $WhitelistObject = tie( %Whitelist, 'orderedtie', "$whitelistdb" ); }
349    }
350    if ( $CanUseTieRDBM && $redlistdb =~ /mysql/ && ( $DoNotCollectRed || $DoNotCollectRedList ) ) {
351        eval {
352            $RedlistObject = tie %Redlist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost",
353                { user => "$myuser", password => "$mypassword", table => 'redlist', create => 0 };
354        };
355        if ($EVAL_ERROR) {
356            &rb_printlog("redlist mysql error: $@");
357            $CanUseTieRDBM = 0;
358            $redlistdb     = "redlist";
359        }
360    }
361    elsif ($DoNotCollectRed) {
362        if ( -e $redlistdb ) { $RedlistObject = tie( %Redlist, 'orderedtie', "$redlistdb" ); }
363    }
364    return;
365} ## end sub createlistobjects
366
367sub generatescores {
368    my ( $t, $s, @result, $pair, $v );
369    &rb_printlog("\nGenerating weighted Bayesian tuplets...");
370    open( $spamdbFile, '>', "$spamdb.tmp" ) || die "unable to open $spamdb.tmp: $!\n";
371    binmode $spamdbFile;
372    print { $spamdbFile } "\n";
373    while ( ( $pair, $v ) = each(%spam) ) {
374        ( $s, $t ) = split( q{ }, $v );
375        $t = ( $t - $s ) * $norm + $s;    # normalize t
376        if ( $t < 5 ) {
377
378            #$unknowns+=$s; $unknownt+=$t;
379            next;
380        }
381
382        # if token represents all spam or all ham then square its value
383        if ( $s == $t || $s == 0 ) {
384            $s = $s * $s;
385            $t = $t * $t;
386        }
387        $v = ( 1 + $s ) / ( $t + 2 );
388        $v = sprintf( "%.7f", $v );
389        $v = '0.9999999' if $v >= 1;
390        $v = '0.0000001' if $v <= 0;
391        push( @result, "$pair\002$v\n" ) if abs( $v - .5 ) > .09;
392    }
393    &rb_printlog("done\n");
394    undef %spam;    # free some memory
395    &rb_printlog("\nSaving rebuilt SPAM database...");
396    for ( sort @result ) { print { $spamdbFile } $_; }
397    close $spamdbFile;
398    if ( -e "$spamdb.bak" ) { unlink("$spamdb.bak") || &rb_printlog("unable to remove '$spamdb.bak' $!\n") }
399    if ( -e $spamdb ) {
400        rename( $spamdb, "$spamdb.bak" ) || &rb_printlog("unable to rename '$spamdb' to '$spamdb.bak' $!\n");
401    }
402    rename( "$spamdb.tmp", $spamdb ) || &rb_printlog("unable to rename '$spamdb.tmp' to '$spamdb' $!\n");
403    &rb_printlog("done\n");
404    my $filesize = -s "$spamdb";
405    &rb_printlog( "\nResulting file '$spamdbFname' is " . commify($filesize) . " bytes\n" );
406    my $pairs = scalar @result;
407    &rb_printlog( "Bayesian Pairs: " . commify($pairs) . "\n" );
408    return;
409} ## end sub generatescores
410
411
412sub createheloblacklist {
413    my (@Helo);
414    open( my $FheloBlack, '>', "$spamdb.helo.tmp" ) || &rb_printlog("unable to open '$spamdb.helo.tmp' $!\n");
415    binmode $FheloBlack;
416    print { $FheloBlack } "\n";
417    my $allcount = 0;
418    my $notnew = 0;
419    while ( my ( $helostr, $weights ) = each(%Helo) ) {
420
421		if ( $weights->[1] / ( $weights->[0] + $weights->[1] + .1 ) > .98 ) { 	push( @Helo, "$helostr\0021\n" ); }
422		elsif 	( $weights->[1] / ( $weights->[0] + $weights->[1] + .1 ) < .2 ) { push( @Helo, "$helostr\0020.01\n" ); }
423
424	}
425    print { $FheloBlack } sort @Helo;
426    eval{close $FheloBlack;};
427    &rb_printlog( "\nHELO Blacklist: " . scalar(@Helo) . " HELOs\n" );
428    if ( -e "$spamdb.helo.bak" ) {
429        unlink("$spamdb.helo.bak") || &rb_printlog("unable to remove '$spamdb.helo.bak' $!\n");
430    }
431    if ( -e "$spamdb.helo" ) {
432        rename( "$spamdb.helo", "$spamdb.helo.bak" )
433            || &rb_printlog("unable to rename '$spamdb.helo' to '$spamdb.helo.bak' $!\n");
434    }
435    rename( "$spamdb.helo.tmp", "$spamdb.helo" )
436        || &rb_printlog("unable to rename '$spamdb.helo.tmp' to '$spamdb.helo' $!\n");
437    return;
438}
439
440sub loadconfig {
441
442    my $RCF;
443    open($RCF,"<$base/assp.cfg");
444    while (<$RCF>) {
445        s/\r|\n//go;
446    	my ($k,$v) = split(/:=/o,$_,2);
447        next unless $k;
448        $Config{$k} = $v;
449    }
450    close $RCF;
451    $TrashObject       = tie %Trashlist,   'orderedtie', "$base/trashlist.db";
452
453    $DoDropList     	 = $Config{ DoDropList };
454    $runAsUser			 = $Config{ runAsUser };
455    $runAsGroup			 = $Config{ runAsGroup };
456    $correctednotspam    = $Config{ correctednotspam } && "$Config{base}/$Config{correctednotspam}" || 'errors/notspam';
457    $correctedspam       = $Config{ correctedspam } && "$Config{base}/$Config{correctedspam}" || 'errors/spam';
458	$incomingOkMail      		 = $Config{ incomingOkMail } && "$Config{base}/$Config{incomingOkMail}" || 'okmail';
459    $DoNotCollectRed     = $Config{ DoNotCollectRed };
460    $DoNotCollectRedRe   = $Config{ DoNotCollectRedRe };
461    $DoNotCollectRedList = $Config{ DoNotCollectRedList };
462    $KeepwhitelistedSpam = $Config{ KeepwhitelistedSpam };
463    $logfile             = $Config{ logfile } && "$Config{base}/$Config{logfile}" || 'maillog.txt';
464	$Log                 = $Config{ logfile } && "$Config{base}/$Config{logfile}" || 'maillog.txt';
465    $maillogExt          = $Config{ maillogExt };
466    $MaxBytes            = $Config{ MaxBytes } || 10000;
467    $MaxFiles            = $Config{ MaxFiles } || 14000;
468
469    $MaxWhitelistDays    = $Config{ MaxWhitelistDays } || 90;
470    $MaxCorrectedDays    		= $Config{ MaxCorrectedDays } || 1000;
471    $MaxNoBayesFileAge	 		= $Config{ MaxNoBayesFileAge } || 30;
472    $MaxBayesFileAge	 		= $Config{ MaxBayesFileAge };
473    $MaintBayesCollection	 	= $Config{ MaintBayesCollection } || 1;
474    $maintbayescollection		= $MaintBayesCollection;
475    $MaxWhitelistLength  	= $Config{ MaxWhitelistLength } || 60;
476	$MaxKeepDeleted  		= $Config{ MaxKeepDeleted } || 0;
477    $notspamlog          	= $Config{ notspamlog } && "$Config{base}/$Config{notspamlog}" || 'notspam';
478    $npRe                = $Config{ npRe };
479    $OrderedTieHashSize  = $Config{ OrderedTieHashSize } || 10_000;
480    $pbdbfile            = $Config{ pbdb };
481
482    $proxyserver         = $Config{ proxyserver };
483    $proxyuser			 = $Config{ proxyuser };
484    $proxypass			 = $Config{ proxypass };
485    $resendmail			 = $Config{ resendmail };
486    $redlistdb           = $Config{ redlistdb } && "$Config{base}/$Config{redlistdb}" || 'redlist';
487    $redRe               = $Config{ redRe };
488	$myName				 = $Config{ myName };
489    $setFilePermOnStart	 = $Config{ setFilePermOnStart };
490    $spamdb              = $Config{ spamdb } && "$Config{base}/$Config{spamdb}" || 'spamdb';
491    $spamdbFname         = $Config{ spamdb } || 'spamdb';
492    $spamdberrorFname    = $spamdbFname.'error';
493    $spamdberror    	 = $spamdb.'error';
494    $spamlog             = $Config{ spamlog } && "$Config{base}/$Config{spamlog}" || 'spam';
495    $discarded           = $Config{ discarded } && "$Config{base}/$Config{discarded}" || 'discarded';
496	$viruslog            = $Config{ viruslog } && "$Config{base}/$Config{viruslog}" || 'viruslog';
497    $usesubject          = $Config{ UseSubjectsAsMaillogNames };
498    $whitelistdb         = $Config{ whitelistdb } && "$Config{base}/$Config{whitelistdb}" || 'whitelist';
499	$griplist			 = $Config{ griplist };
500	$DropList            = $Config{ DropList } || 'file:files/droplist.txt';
501	($DropList) 		 = $DropList =~ /^ *file: *(.+)/i if $DropList =~ /^ *file:/;
502    $noGriplist    		 = $Config{ noGriplist };
503	$noGriplistUpload    = $Config{ noGriplistUpload };
504	$noGriplistDownload  = $Config{ noGriplistDownload };
505    $asspLog             = $Config{ asspLog };
506    $whiteRe             = $Config{ whiteRe };
507    $wildcardUser        = $Config{ wildcardUser };
508    $mydb                = $Config{ mydb };
509    $myhost              = $Config{ myhost };
510    $myuser              = $Config{ myuser };
511    $mypassword          = $Config{ mypassword };
512    $rebuildrun          = &fixPath($base) . "/rebuildrun.txt";
513    $EmailAdminReportsTo = $Config{ EmailAdminReportsTo };
514    $RebuildNotify		 = $Config{ RebuildNotify };
515
516
517    $autoCorrectCorpus	 = $Config{ autoCorrectCorpus } || "1";
518
519
520	$minimumfiles = 10000 if !$minimumfiles;
521	$minimumfiles = 5000 if  $minimumfiles < 5000;
522	$minimumdays = 14 if !$minimumdays;
523	$minimumdays = 7 if  $minimumdays < 7;
524
525    $EmailFrom		 	 = $Config{ EmailFrom };
526    $RegExLength         = $Config{ RegExLength };
527    $UseLocalTime        = $Config{ UseLocalTime };
528    $LogDateFormat		 = $Config{ LogDateFormat } || "MMM-DD-YY hh:mm:ss";
529
530    $DoFullGripDownload  = $Config{ DoFullGripDownload };
531
532    return;
533} ## end sub loadconfig
534
535sub processfolder {
536    my ( $fldrType, $fldrpath, $filter, $weight, $sub ) = @_;
537    my ( $count, $processFolderTime, $folderStartTime, $fileCount, @files );
538    our ( $WhiteCount, $RedCount );
539    $folderStartTime = time;
540    $fldrpath        = &fixPath($fldrpath);
541    &rb_printlog( "\n" . $fldrpath . "\n" );
542    $fldrpath .= "/*";
543    $fileCount = &countfiles($fldrpath);
544    &rb_printlog( "File Count:\t" . commify($fileCount) );
545    &rb_printlog("\nProcessing...");
546    $count = $WhiteCount = 0;
547    @files = glob($fldrpath);
548    my $importmaxfiles = $MaxFiles;
549
550	my $percent;
551
552
553	my $filenum;
554
555
556
557
558
559    #while( glob($fldrpath) && $count <= $MaxFiles ) {
560    foreach my $file (@files) {
561
562        &add( $fldrType, $file, $weight, $sub );
563        last if  ($spamlogcount && $autoCorrectCorpus && !$fldrType && $SpamWordCount && ($HamWordCount * $autoCorrectCorpus) > $SpamWordCount);
564        $count++;
565        last if $count >= $importmaxfiles;    #too many files
566    }
567    if   ( time - $folderStartTime != 0 ) { $processFolderTime = time - $folderStartTime; }
568    else                                  { $processFolderTime = 1; }
569    $count = $count - $WhiteCount ;
570
571
572    if ($WhiteCount) {
573        &rb_printlog( "\nRemoved White:\t" . commify($WhiteCount) );
574    }
575
576    &rb_printlog( "\nImported Files:\t" . commify($count) );
577    &rb_printlog( "\nImported SpamWordCount(total):\t" . commify($SpamWordCount) );
578    &rb_printlog( "\nImported HamWordCount(total):\t" . commify($HamWordCount) );
579
580
581    if ( $count  > $MaxFiles ) {
582        $maintbayescollection = 1;
583    }
584
585    #&rb_printlog( "\n " . commify($SpamWordCount) . " spam weight \n " . commify($HamWordCount) . " non-spam weight." );
586    &rb_printlog("\nFinished in $processFolderTime second(s)\n");
587
588    return $count;
589} ## end sub processfolder
590
591sub countfiles {
592    my ($fldrpath) = @_;
593    my @fileCount = glob("$fldrpath");
594    return scalar(@fileCount);
595}
596
597sub commify {
598    local $_ = shift;
599    1 while s/^([-+]?\d+)(\d{3})/$1,$2/;
600    return $_;
601}
602
603sub hash {
604    my ($msgText) = @_;
605
606    my ( $head, $body );
607
608    # creates a md5 hash of $msg body
609    if ( $msgText =~ /^(.*?)\n\r?\n(.*)/s ) {
610
611        $head = $1;
612        $body = $2;
613
614        return md5_hex($body);
615    }
616    else {
617
618        #return q;
619        #There is no split, the message has no valid body
620        return md5_hex($msgText);
621    }
622
623    #return $value;
624    return;
625}
626
627sub dospamhash {
628    my ( $FileName, $msgText ) = @_;
629    $SpamHash{ &hash($msgText) } = '1';
630    return;
631}
632
633sub dohamhash {
634    my ( $FileName, $msgText ) = @_;
635    $HamHash{ &hash($msgText) } = q{};
636    return;
637}
638
639sub checkspam {
640    my ( $FileName, $msgText ) = @_;
641    our $HamHash;
642    $msgText = &hash($msgText);
643    my ( $return, $reason );
644    if ( defined( $HamHash{ $msgText } ) ) {
645
646        # we've found a message in the spam database that is the same as one in the corrected Ham group
647        my $fn = shift;
648        &deletefile( $fn, "found in $correctednotspam" );
649        return 1;
650    } elsif ( $reason = &redlisted( $_[1] ) ) {
651        my $fn = shift;
652        &deletefile( $fn, $reason );
653        return 1;
654
655    } elsif ( $reason = &whitelisted( $_[1] ) ) {
656        my $fn = shift;
657        &deletefile( $fn, $reason );
658        return 1;
659    }
660    return 0;
661}
662
663sub checkham {
664    my ( $FileName, $msgText ) = @_;
665    our $SpamHash;
666    my ( $return, $reason );
667    $msgText = &hash($msgText);
668    if ( defined( $SpamHash{ $msgText } ) ) {
669
670        # we've found a message in the ham database that is the same as one in the corrected spam group
671        my $fn = shift;
672        &deletefile( $fn, "found in $correctedspam" );
673        return 1;
674
675    }
676    return 0;
677}
678
679sub getrecontent {
680    my ( $value, $name ) = @_;
681    my $fromfile = 0;
682    if ( $value =~ /^ *file: *(.+)/i ) {
683
684        # the option list is actually saved in a file.
685        $fromfile = 1;
686        my $fil = $1;
687        $fil = "$base/$fil" if $fil !~ /^\Q$base\E/i;
688        local $/;
689        if ( open( my $File, '<', $fil ) ) {
690            $value = <$File>;
691
692            # rb_clean off comments
693            $value =~ s/#.*//g;
694
695            # replace newlines (and the whitespace that surrounds them) with a |(pipe character)
696            $value =~ s/\s*\n\s*/|/g;
697            close $File;
698        }
699        else { $value = q{}; }
700    }
701    $value =~ s/\|\|/\|/g;
702    $value =~ s/\s*\|/\|/g;
703    $value =~ s/\|\s*/\|/g;
704    $value =~ s/\|\|+/\|/g;
705    $value =~ s/^\s*\|?//;
706    $value =~ s/\|?\s*$//;
707    $value =~ s/\|$//;
708    return $value;
709} ## end sub getrecontent
710
711sub batv_remove_tag {
712    my $mailfrom = shift;
713
714    if ($mailfrom =~ /^(prvs=.*=)(.*)/o) {
715
716        $mailfrom = lc $2;
717
718    }
719    return $mailfrom;
720}
721
722sub whitelisted {
723    return 0 if $KeepwhitelistedSpam;
724    my $m = shift;
725    my $curaddr;
726    my %seen;
727
728
729    $m =~ s/\n\r?\n.*//s;    # remove body
730    while ( $m =~ 	/(?:from|sender|reply-to|errors-to|envelope-from|list-\w+):($HeaderValueRe)/igo) {
731    	my $s = $1;
732
733    	if ($s !~ /($EmailAdrRe\@$EmailDomainRe)/io) {
734    		next;
735    	} else {
736            $curaddr = batv_remove_tag($1);
737
738   		}
739
740        if ( exists $seen{ $curaddr } ) {
741
742            next;                #we already checked this address
743        } else {
744        	$seen{ $curaddr } = 1;
745        }
746
747        if ( $Whitelist{ $curaddr } ) {
748            my $reason = $curaddr;
749            $reason =~ s/\s+$/ /g;
750            $reason =~ s/[\r\n]/ /g;
751            our $WhiteCount++;
752            return ( " -- '$reason' is in Whitelist");
753        }
754        if ($wildcardUser) {
755            my ( $mfdd, $alldd, $reason );
756            $mfdd = $1 if $curaddr =~ /(\@.*)/;
757            $alldd = "$wildcardUser$mfdd";
758            if ( $Whitelist{ lc $alldd } ) {
759                $reason = $curaddr;
760                $reason =~ s/\s+$/ /g;
761                $reason =~ s/[\r\n]/ /g;
762                our $WhiteCount++;
763                return ( " -- '$reason' is in Whitelist ($wildcardUser)");
764            }
765        }
766    } ## end while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo)
767    return 0;
768} ## end sub whitelisted
769
770sub redlisted {
771    my $m = shift;
772    my (%seen);
773	my $isasspheader;
774
775    if ( $DoNotCollectRedList) {
776        $m =~ s/\n\r?\n.*//s;                            # remove body
777        while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo ) {
778            my $curaddr = lc($1);
779
780            #$curaddr = lc( $1 . $2 );
781            if ( exists $seen{ $curaddr } ) {
782                next;                                    #we already checked this address
783            }
784            else { $seen{ $curaddr } = 1; }
785            if ( $Redlist{ $curaddr } ) {
786                my $reason = $curaddr;
787                $reason =~ s/\s+$/ /g;
788                $reason =~ s/[\r\n]/ /g;
789                our $RedCount++;
790                return ( " -- '$reason' is in Redlist");
791            }
792        }
793    }
794    return 0;
795} ## end sub redlisted
796
797sub deletefile {
798    my ( $fn, $reason, $nolog, $notrashlist ) = @_;
799
800	if ( -e $fn ) {
801        	if ( -w $fn || -W $fn ) {
802            	&rb_printlog( "\nremoving " . $fn . q{ } . $reason );
803            	if ($MaxKeepDeleted  && !$notrashlist ) {
804    				$Trashlist{$fn}=time;
805    			} else {
806
807            		unlink($fn);
808
809            	}
810        	} else {
811        		rb_printlog( "\ncannot delete " . $reason . " message " . $fn . ": file is not writable: $!" ) ;
812			}
813	} else {
814    		rb_printlog( "\ncannot delete " . $reason . " message " . $fn . ": $!" ) if !$nolog;
815    }
816
817
818}
819
820
821sub getfile {
822    my ( $fn, $sub ) = @_;
823    my $message;
824    my $count;
825    my $numreadchars;
826	return if exists $Trashlist{$fn};
827    open( my $file, '<', "$fn" ) || return;
828
829#	my $dtime=(stat($fn))[9]-time;
830
831#    return if $dtime > 0;
832    # Maxbytes or 10000, whichever is less
833    $numreadchars = $MaxBytes <= 10_000 ? $MaxBytes : 10_000;
834    $count = read( $file, $message, $numreadchars );    # read characters into memory
835    close $file;
836    return if $sub->( $fn, $message );                  # have i read this before?
837
838    return $message;
839}
840
841sub add {
842    my ( $isspam, $fn, $factor, $sub ) = @_;
843
844    return if -d $fn;
845    my ( $curHelo, $CurWord, $PrevWord, $sfac, $tfac );
846    $PrevWord = $CurWord = q{};
847    my $content = &getfile( $fn, $sub );
848    return unless $content;
849    if ( $content =~ /helo=(.*?)\)/i ) {
850        $curHelo = lc($1);
851        if ( $Helo{ $curHelo } ) { $Helo{ $curHelo }->[$isspam] += $factor; }
852        else {    #it doesn't seem to exist. create it.
853            $Helo{ $curHelo }->[$isspam] = $factor;
854        }
855    }
856    my $OK;
857    ($content,$OK) = &rb_clean($content);
858    while ( $content =~ /([-\$A-Za-z0-9\'\.!\240-\377]{2,})/g ) {
859        if  ($spamlogcount && $autoCorrectCorpus && !$isspam && $SpamWordCount && ($HamWordCount * $autoCorrectCorpus) > $SpamWordCount) {
860
861        return;
862        }
863        if ( length($1) > 20 || length($1) < 2 ) { next }
864        $PrevWord = $CurWord;
865
866		$CurWord  = BayesWordrb_clean($1);
867
868        if ( !$PrevWord ) { next }  # We only want word pairs
869        if ( length($CurWord) < 2 || length($PrevWord) < 2 ) { next }    # too short after rb_cleaning
870
871        # increment global weights, they are not really word counts
872        if   ($isspam) { $SpamWordCount += $factor; }
873        else           { $HamWordCount  += $factor; }
874        if ( exists( $spam{ "$PrevWord $CurWord" } ) ) {
875            ( $sfac, $tfac ) = split( q{ }, $spam{ "$PrevWord $CurWord" } );
876        }
877        else {
878
879            # the pair does not exist, create it
880            $spam{ "$PrevWord $CurWord" } = "0 0";
881            ( $sfac, $tfac ) = split( q{ }, $spam{ "$PrevWord $CurWord" } );
882        }
883        $sfac += $isspam ? $factor : 0;
884        $tfac += $factor;
885        $spam{ "$PrevWord $CurWord" } = "$sfac $tfac";
886    } ## end while ( $content =~ /([-\$A-Za-z0-9\'\.!\240-\377]{2,})/g)
887    return;
888} ## end sub add
889
890sub Umlaute {
891	my $string = shift;
892	my %umlaute = ("ä" => "ae", "Ä" => "Ae", "ü" => "ue", "Ü" => "Ue", "ö" => "oe", "Ö" => "Oe", "ß" => "ss" );
893	my $umlautkeys = join ("|", keys(%umlaute));
894	$string =~ s/($umlautkeys)/$umlaute{$1}/g;
895	return $string;
896} ##
897
898sub BayesWordrb_clean {
899    my $word = lc(shift);
900    $word =~ s/#(?:[a-f0-9]{2})+/randcolor/go;
901    $word =~ s/^#\d+/randdecnum/go;
902
903    $word =~ s/[_\[\]\~\@\%\$\&\{\}<>#(),.'";:=!?*+\/\\\-]+$//o;
904    $word =~s/^[_\[\]\~\@\%\$\&\{\}<>#(),.'";:=!?*+\/\\\-]+//o;
905    $word =~ s/!!!+/!!/go;
906    $word =~ s/\*\*+/**/go;
907    $word =~ s/--+/-/go;
908    $word =~ s/__+/_/go;
909    $word =~ s/[\d,.]{2,}/randnumber/go;
910    $word =~ s/^[\d:\.\-+();:<>,!"'\/%]+(?:[ap]m)?$/randwildnum/o;    # ignore numbers , dates, times, versions ...
911#    $word = &Umlaute($word);
912
913    return if length($word) > 20 or length($word) < 2;
914    return $word;
915}
916# rb_clean up source email
917sub rb_clean {
918    my $m = shift;
919
920    my $msg = ref($m) ? $$m : $m;
921    my $t = time + 15;     # max 15 seconds for this rb_cleaning
922    my $body;
923    my $header;
924    my $undec = 1;
925
926
927
928    local $_= "\n". (($header) ? $header : $msg);
929    my ($helo,$rcpt);
930    if ($header) {
931        ($helo)=/helo=([^)]+)\)/io;
932        $helo=~s/(\w+)/ hlo $1 /go if length($helo) > 19; # if the helo string is long, break it up
933        $rcpt="rcpt ".join(" rcpt ",/($EmailAdrRe\@$EmailDomainRe)/go);
934        return "helo: $helo\n$rcpt\n",0 if (time > $t);
935        # mark the subject
936        $rcpt .= "\n".fixsub($1) if /\nsubject: (.*)/io;
937        return "helo: $helo\n$rcpt\n",0 if (time > $t);
938    }
939
940    # from now only do the body if possible
941    local $_ = $body if $body;
942
943    # replace &#ddd encoding
944    s/\&\#(\d{1,5})\;?/chr($1)/geo;
945    s/\&\#x(\d{1,4})\;?/chr(hex($1))/geo;
946    s/([^\\])?[%=]([0-9A-F]{2})/$1.chr(hex($2))/gieo; # replace url encoding
947    return "helo: $helo\n$rcpt\n",0 if (time > $t);
948
949    if ($undec) {
950      # replace base64 encoding
951      s/\n([a-zA-Z0-9+\/=]{40,}\r?\n[a-zA-Z0-9+\/=\r\n]+)/base64decode($1)/gseo;
952      return "helo: $helo\n$rcpt\n",0 if (time > $t);
953
954      # rb_clean up quoted-printable references
955      s/(Subject: .*)=\r?\n/$1\n/o;
956      return "helo: $helo\n$rcpt\n",0 if (time > $t);
957      s/=\r?\n//go;
958      return "helo: $helo\n$rcpt\n",0 if (time > $t);
959      # strip out mime continuation
960      s/.*---=_NextPart_.*\n//go;
961      return "helo: $helo\n$rcpt\n",0 if (time > $t);
962    }
963
964    # rb_clean up MIME quoted-printable line breakings
965    s/=\r?\n//gos;
966    return "helo: $helo\n$rcpt\n",0 if (time > $t);
967
968    # rb_clean up &nbsp; and &amp;
969    s/&nbsp;?/ /gio; s/&amp;?/and/gio;
970    return "helo: $helo\n$rcpt\n",0 if (time > $t);
971    s/(\d),(\d)/$1$2/go;
972    return "helo: $helo\n$rcpt\n",0 if (time > $t);
973    s/\r//go; s/ *\n/\n/go;
974    return "helo: $helo\n$rcpt\n",0 if (time > $t);
975    s/\n\n\n\n\n+/\nblines blines\n/go;
976    return "helo: $helo\n$rcpt\n",0 if (time > $t);
977
978    # rb_clean up html stuff
979    s/<\s*script[^>]+>.*?<\s*\/\s*script\s*>/ jscripttag /igos;
980    s/<\s*script[^>]+>/ jscripttag /igos;
981    return "helo: $helo\n$rcpt\n",0 if (time > $t);
982    # remove style sheets
983    s/<\s*style[^>]*>.*?<\s*\/\s*style\s*>//igso;
984    return "helo: $helo\n$rcpt\n",0 if (time > $t);
985
986#    while (s/(\w+)(<[^>]*>)((?:<[^>]*>)*\w+)/$2$1$3/go){return "helo: $helo\n$rcpt\n",0 if (time > $t);} # move html out of words
987    s/<\s*(?:[biu]|strong)\s*>/ boldifytext /gio;
988    return "helo: $helo\n$rcpt\n",0 if (time > $t);
989
990    # remove some tags that are not informative
991    s/<\s*\/?\s*(?:p|br|div|t[drh]|li|[uo]l|center)[^>]*>/\n/gios;
992    s/<\s*\/?\s*(?:[biuo]|strong)\s*>//gio;
993    return "helo: $helo\n$rcpt\n",0 if (time > $t);
994    s/<\s*\/?\s*(?:html|meta|head|body|span|table|font|col|map)[^>]*>//igos;
995    return "helo: $helo\n$rcpt\n",0 if (time > $t);
996    s/(<\s*a\s[^>]*>)(.*?)(<\s*\/a\s*>)/$1.fixlinktext($2)/igseo;
997    return "helo: $helo\n$rcpt\n",0 if (time > $t);
998
999    # treat titles like subjects
1000    s/<\s*title[^>]*>(.*?)<\s*\/\s*title\s*>/fixsub($1)/igeos;
1001    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1002
1003    # remove html comments
1004    s/<\s*!.*?-->//gso; s/<\s*![^>]*>//go;
1005    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1006
1007    # look for random words
1008    s/[ a-z0-9][ghjklmnpqrstvwxz_]{2}[bcdfghjklmnpqrstvwxz_0-9]{3}\S*/ randword /gio;
1009    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1010
1011    # remove mime separators
1012    s/\n--.*?randword.*//go;
1013    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1014
1015    # look for linked images
1016    s/(<\s*a[^>]*>[^<]*<\s*img)/ linkedimage $1/giso;
1017    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1018    s/<[^>]*href\s*=\s*("[^"]*"|\S*)/fixhref($1)/isgeo;
1019    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1020    s/(?:ht|f)tps?:\/\/(\S*)/fixhref($1)/isgeo;
1021    return "helo: $helo\n$rcpt\n",0 if (time > $t);
1022    s/(\S+\@\S+\.\w{2,5})\b/fixhref($1)/geo;
1023    s/<?\s*img .{0,50}src\s*=\s*['"]([^'"]*)['"][^>]+>/$1/gois;
1024    s/["']\s*\/?s*>|target\s*=\s*['"]_blank['"]|<\s*\/|:\/\/ //go;
1025    s/ \d{2,} / 1234 /go;
1026
1027
1028    return ("helo: $helo\n$rcpt\n$_",1);
1029
1030}
1031
1032sub rb_cleanwhite {
1033    &rb_printlog("\n---rb_cleaning whitelist ($whitelistdb)---\n");
1034
1035    &rb_printlog( "whitelist entries older than " . $MaxWhitelistDays . " days (MaxWhitelistDays) will be removed\n" );
1036    my $calcTime = time - 24 * 3600 * $MaxWhitelistDays;
1037
1038    my $wlbefore = 0;
1039    my $wlafter = 0;
1040    if ( !( $whitelistdb =~ /mysql/ ) ) {
1041        if ( open( F, "<", "$whitelistdb" ) ) {
1042            binmode(F);
1043	    $_ = <F>; # ignore blank line at start of file
1044            my $nwhite;
1045            local $/ = "\n";
1046            $nwhite = "\n";
1047            while (<F>) {
1048                chomp;
1049                $wlbefore++;
1050                my ( $adr, $time ) = split( "\002", $_ );
1051                next if ( !$time || !$adr );
1052                $adr =~ s/^\'//g;
1053                $adr =~ s/^\"//g;
1054                $adr = batv_remove_tag($adr);
1055                next if ($adr =~ m/^'/);    #skip addresses with leading ' chars
1056                next if $calcTime > $time || length($adr) > $MaxWhitelistLength;
1057                $nwhite .= "$adr\002$time\n";
1058                $wlafter++;
1059            }
1060            close F;
1061
1062                unlink "$whitelistdb.bak";
1063                rename( $whitelistdb, "$whitelistdb.bak" );
1064                open( O, ">", "$whitelistdb" );
1065                binmode(O);
1066                print O $nwhite;
1067                close O;
1068
1069        }
1070    } ## end if ( !( $whitelistdb =~...
1071    else {
1072        my %Whitelist;
1073        my $WhitelistObject;
1074        eval {
1075            $WhitelistObject = tie %Whitelist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost",
1076                { user => "$myuser", password => "$mypassword", table => 'whitelist', create => 0 };
1077        };
1078        if ($EVAL_ERROR) {
1079            &rb_printlog("whitelist mysql error: $@");
1080            $CanUseTieRDBM = 0;
1081            $whitelistdb   = "whitelist";
1082        }
1083        $wlbefore = scalar keys %Whitelist;
1084        $wlafter  = $wlbefore;
1085        while ( my ( $key, $value ) = each %Whitelist ) {
1086
1087            #my $date1 = localtime($value); #debugging stuff
1088            #my $date2 = localtime($calcTime);
1089            #print "$key=$value\n";
1090            if ( $value < $calcTime || length($key) > $MaxWhitelistLength ) {
1091                if ( $Whitelist{ $key } ) {
1092                    delete $Whitelist{ $key };
1093                    $wlafter--;
1094                }
1095            }
1096        }
1097        $WhitelistObject->flush() if $WhitelistObject && $whitelistdb !~ /mysql/;
1098
1099        #untie %Whitelist;
1100    } ## end else [ if ( !( $whitelistdb =~...
1101    &rb_printlog( "whitelist before: " . commify($wlbefore) . "\n" );
1102    &rb_printlog( "whitelist after:  " . commify($wlafter) . "\n" );
1103    return;
1104} ## end sub rb_cleanwhite
1105
1106sub dayofweek {
1107
1108    # this is mercilessly hacked from John Von Essen's Date::Day
1109    my ( $d, $m, $y ) = $_[0] =~ /(\S+) +(\S+) +(\S+)/;
1110
1111    # data for DayOfWeek function
1112    my %Months = (
1113        'Jan', 1, 'Feb', 2, 'Mar', 3, 'Apr', 4,  'May', 5,  'Jun', 6,
1114        'Jul', 7, 'Aug', 8, 'Sep', 9, 'Oct', 10, 'Nov', 11, 'Dec', 12,
1115    );
1116    my %Month = ( 1, 0, 2, 3, 3, 2, 4, 5, 5, 0, 6, 3, 7, 5, 8, 1, 9, 4, 10, 6, 11, 2, 12, 4, );
1117    my %Weekday = ( 0, 'srdSUN', 1, 'srdMON', 2, 'srdTUE', 3, 'srdWED', 4, 'srdTHU', 5, 'srdFRI', 6, 'srdSAT', );
1118    $y += 2000;
1119    $m = $Months{ $m };
1120    if ( $m <= 2 ) { $y--; }
1121    my $wday = ( ( $d + $Month{ $m } + $y + ( int( $y / 4 ) ) - ( int( $y / 100 ) ) + ( int( $y / 400 ) ) ) % 7 );
1122    return $Weekday{ $wday };
1123}
1124sub fixhref     { my $t = shift; $t =~ s/(\w+)/ href $1 /g; return $t; }
1125sub fixlinktext { my $t = shift; $t =~ s/(\w+)/atxt $1/g;   return $t; }
1126
1127sub fixurl {
1128    my $a = shift;
1129    $a =~ s/%([0-9a-fA-F][0-9a-fA-F])/pack('C',hex($1))/ge;
1130    return $a;
1131}
1132
1133sub fixsub {
1134    my $s = shift;
1135
1136    #print "$s=>";
1137    $s =~ s/ {3,}/ lotsaspaces /g;
1138    $s =~ s/(\S+)/ssub $1/g;
1139
1140    #print "$s\n";
1141    return "\n$s ssub";
1142}
1143
1144sub base64decode {
1145    my $str = shift;
1146    my $res = "\n\n";
1147    $str =~ tr|A-Za-z0-9+/||cd;
1148    $str =~ tr|A-Za-z0-9+/| -_|;
1149    while ( $str =~ /(.{1,60})/gs ) {
1150        my $len = chr( 32 + length($1) * 3 / 4 );
1151        $res .= unpack( "u", $len . $1 );
1152    }
1153    return $res;
1154}
1155
1156sub rb_printlog {
1157    return if $silentlog;
1158    my ( $text, $format ) = @_;
1159    if ( !$format ) {
1160        print "$text" unless $silent;
1161        print $RebuildLog "$text";
1162    }
1163    if ($format) {
1164        printf "$text", $format unless $silent;
1165        printf $RebuildLog "$text", $format;
1166    }
1167    return;
1168}
1169sub timestring {
1170    my ($time,$what) = @_;
1171    my @m = $time ? localtime($time) : localtime();
1172    my $tstr = $time ? scalar(localtime($time)) : scalar(localtime());
1173    my ($day,$month) = $tstr =~ /(...) (...)/;
1174    my $format = $LogDateFormat;
1175    if (lc $what eq 'd') {   # date only - remove time part from format
1176        $format =~ s/[^YMD]*(?:hh|mm|ss)[^YMD]*//g;
1177    } elsif (lc $what eq 't') { # time only - remove date part from format
1178        $format =~ s/[^hms]*(?:Y{2,4}|M{2,3}|D{2,3})[^hms]*//g;
1179    }
1180    $format =~ s/^[^YMDhms]//;
1181    $format =~ s/[^YMDhms]$//;
1182    $format =~ s/YYYY/sprintf("%04d",$m[5]+1900)/e;
1183    $format =~ s/YY/sprintf("%02d",$m[5]-100)/e;
1184    $format =~ s/MMM/$month/;
1185    $format =~ s/MM/sprintf("%02d",$m[4]+1)/e;
1186    $format =~ s/DDD/$day/e;
1187    $format =~ s/DD/sprintf("%02d",$m[3])/e;
1188    $format =~ s/hh/sprintf("%02d",$m[2])/e;
1189    $format =~ s/mm/sprintf("%02d",$m[1])/e;
1190    $format =~ s/ss/sprintf("%02d",$m[0])/e;
1191
1192    return $format;
1193}
1194
1195sub downloadGripConf {
1196
1197
1198    my $rc;
1199
1200    my $griplistUrl = "http://downloads.sourceforge.net/project/assp/griplist/griplist.conf";
1201    my $file     = "$base/griplist.conf";
1202
1203    $file		 = &fixPath($file);
1204
1205
1206    if ( !$CanUseLWP ) {
1207        &rb_printlog("$file download failed: LWP::Simple Perl module not available\n");
1208        return;
1209    }
1210
1211    if ( -e $file  ) {
1212        if ( !-r $file  ) {
1213            &rb_printlog( "$file download failed: $file  not readable!\n" );
1214            return;
1215        } elsif ( !-w $file  ) {
1216            &rb_printlog( "$file download failed: $file  not writable!\n" );
1217            return;
1218        }
1219    }
1220    else {
1221        if ( open( TEMPFILE, ">", $file ) ) {
1222            #we can create the file, this is good, now close the file and keep going.
1223            close TEMPFILE;
1224	    unlink($file);
1225        } else {
1226            &rb_printlog("$file download failed: Cannot create $file - $!\n" );
1227            return;
1228        }
1229    }
1230
1231    # Create LWP object
1232    use LWP::Simple qw(mirror is_success status_message $ua);
1233
1234    # Set useragent to Rebuild version
1235    $ua->agent(
1236        "rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)");
1237    $ua->timeout(20);
1238    if ($proxyserver) {
1239        $ua->proxy( 'http', "http://" . $proxyserver );
1240
1241    } else {
1242
1243    }
1244
1245    # call LWP mirror command
1246    $rc = mirror( $griplistUrl, $file );
1247
1248    if ( $rc == 304 ) {
1249        # HTTP 304 not modified status returned
1250        return;
1251    } elsif ( !is_success($rc) ) {
1252        #download failed-error code output to logfile
1253        &rb_printlog("$file download failed: $rc " . status_message($rc). "\n" );
1254        return;
1255    } elsif ( is_success($rc) ) {
1256        # download complete
1257        my $filesize = -s "$file";
1258        &rb_printlog("$file download complete: $filesize bytes\n" );
1259        chmod 0644, "$file";
1260    }
1261    my $ret = $rc;
1262#    &rb_printlog("info: updated GRIPLIST upload and download URL's in $file\n") if $ret;
1263    $ret = 0;
1264    open my $GC , '<', $file or return 0;
1265    binmode $GC;
1266    while (<$GC>) {
1267        s/\r|\n//o;
1268        if (/^\s*(gripList(?:DownUrl|UpUrl|UpHost))\s*:\s*(.+)$/) {
1269            ${$1} = $2;
1270            $ret++;
1271        }
1272    }
1273    close
1274#    &rb_printlog("info: loaded GRIPLIST upload and download URL's from $file\n") if $ret;
1275    &rb_printlog("info: GRIPLIST config $file is possibly incomplete\n") if $ret < 3;
1276    $gripListDownUrl =~ s/\*HOST\*/$gripListUpHost/o;
1277    $gripListUpUrl  =~ s/\*HOST\*/$gripListUpHost/o;
1278    return $ret;
1279}
1280
1281
1282sub uploadgriplist {
1283    local $/ = "\n";
1284
1285    #&rb_printlog("Start building Griplist \n");
1286    open( my $FLogFile, '<', "$Log" ) || &rb_printlog("Unable to create Griplist.\n unable to open logfile '$Log': $!\n");
1287    my ( $date, $ip, $ipnet, %m, %ok, %locals, $match, $peeraddress, $hostaddress, $connect, $day, $gooddays, $st );
1288    my $buf;
1289	my $iday;
1290    #build list of the last 2 days
1291    my $time = Time::HiRes::time();
1292    my $dayoffset = $time % ( 24 * 3600 );
1293
1294    for ( my $i = 0 ; $i < 2 ; $i++ ) {
1295        $gooddays .= '|' if ( $i > 0 );
1296        $day = localtime( $time - $i * 24 * 3600 );
1297        $day =~ s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/;
1298        $gooddays .= $day;
1299    }
1300    if ($LogDateFormat !~ /MMM-DD-YY/)  {
1301	$gooddays .= '|';
1302    for ( my $i = 0 ; $i < 2 ; $i++ ) {
1303        $gooddays .= '|' if ( $i > 0 );
1304        $day = &timestring( $time - $i * 24 * 3600 , 'd');
1305        $gooddays .= $day;
1306    }}
1307
1308
1309    undef $day;
1310    %locals = ( '127', 1, '10', 1, '192.168', 1, '169.254', 1, '::1', 1, 'fe80:', 1 );    #RFC 1918, IPv6
1311    for ( 16 .. 31 ) { $locals{ "172.$_" } = 1 }                                          #RFC 1918
1312
1313    while (<$FLogFile>) {
1314        next unless ( $date, $ip, $match ) = /($gooddays) .*\s([0-9a-f\.:]+) .* to: \S+ (.*)/io;
1315	$ipnet = $ip;
1316	if ($ipnet =~ /:.*:/) {
1317		$ipnet =~ s/:.*/:/ if ($ipnet !~ /^:/);
1318        	next if $locals{ $ipnet };		# ignore local IP ranges
1319	}
1320	else {
1321		$ipnet =~ s/^(\d+\.\d+)\..*/$1/;
1322        	next if $locals{ $ipnet };		# ignore local IP ranges
1323		$ipnet =~ s/^(\d+)\..*/$1/;
1324        	next if $locals{ $ipnet };		# ignore local IP ranges
1325	}
1326	$ipnet = $ip;
1327	if ($ipnet =~ /:.*:/) {
1328		$ipnet =~ s/^([0-9a-f]+:[0-9a-f]+:[0-9a-f]+:[0-9a-f]*:).*/$1/i;	# yes: "+++*" so as to allow "2001:123:456::"
1329	}
1330	else {
1331		$ipnet =~ s/(\d+)\.(\d+)\.(\d+)\.(\d+)/$1.$2.$3/;
1332	}
1333
1334        if (m/(\[Local]|\[MessageOK]|\[RWL]|\[whitelisted])|\[NoProcessing]/i) {
1335
1336            #Good IP
1337            $m{ $ipnet }  += 1;
1338            $ok{ $ipnet } += 1;
1339            next;
1340        }
1341        if (m/(Connection idle for|\[Backscatter]|\[Bayesian]|\[BlackDomain]|\[BlackHELO]|\[BombBlack]|\[BombData]|\[BombHeader]|\[BombRaw]|\[BombScript]|\[BombSender]|\[Collect]|\[Connection]|\[DNSBL]|\[DenyIP]|\[DenyStrict]|\[Extreme]|\[ForgedHELO]|\[ForgedLocalSender]|\[FromMissing]|\[IPfrequency]|\[IPperDomain]|\[InvalidHELO]|\[Malformedaddress]|\[MaxErrors]|\[MessageScore]|\[MissingMX]|\[MsgID]|\[OversizedHeader]|\[PTRinvalid]|\[PTRmissing]|\[PenaltyBox]|\[Penalty]|\[RelayAttempt]|\[SpoofedSender]|\[Trap]|\[URIBL]|\[VIRUS]|\[ValidHELO]|spam found|\[blocked\])/i) {
1342
1343            #Bad IP
1344            $m{ $ipnet }  += 1;
1345            $ok{ $ipnet } += 0;
1346            next;
1347        }
1348    }
1349    close $FLogFile;
1350    if ( !%m ) {
1351        &rb_printlog( "Skipping Griplist upload. Not enough messages processed.\n");
1352        return;
1353    }
1354    &rb_printlog("Preparing binary Griplist upload...");
1355    my $n6 = 0;
1356    my $n4 = 0;
1357    my ($buf6, $buf4);
1358    foreach (keys %m) {
1359        next if (!$m{$_});
1360        if ($_ =~ /:/) {
1361            my $ip = $_;
1362            $ip =~ s/([0-9a-f]*):/0000$1:/gi;
1363            $ip =~ s/0*([0-9a-f]{4}):/$1:/gi;
1364            $buf6 .= pack("H4H4H4H4", split(/:/, $ip));
1365            $buf6 .= pack("C", (1 - $ok{$_} / $m{$_}) * 255);
1366            $n6++;
1367        } else {
1368            $buf4 .= pack("C3C", split(/\./, $_), (1 - $ok{$_} / $m{$_}) * 255);
1369            $n4++;
1370        }
1371    }
1372    $st = pack("N2", $n6 / 2**32, $n6);
1373    $st .= pack("N", $n4);
1374    $st .= $buf6 . $buf4;
1375    &rb_printlog(" done\n");
1376    if ($proxyserver) {
1377        &rb_printlog("Uploading Griplist via Proxy: $proxyserver\n");
1378
1379        my $user = $proxyuser ? "$proxyuser:$proxypass\@": '';
1380        $peeraddress = $user . $proxyserver;
1381        $hostaddress = $proxyserver;
1382        $connect     = "POST $gripListUpUrl HTTP/1.0";
1383    } else {
1384        &rb_printlog("Uploading Griplist via Direct Connection\n");
1385        $peeraddress = $gripListUpHost . ':80';
1386        $hostaddress = $gripListUpHost;
1387        my ($url) = $gripListUpUrl =~ /http:\/\/[^\/](\/.+)/oi;
1388        $connect     = <<"EOF";
1389POST $url HTTP/1.1
1390User-Agent: rebuildspamdb/$VERSION ($^O; Perl/$];)
1391Host: $gripListUpHost
1392EOF
1393    }
1394    my $socket = new IO::Socket::INET( Proto => 'tcp', Peeraddr => $peeraddress, Timeout => 2 );
1395    if ( defined $socket ) {
1396        my $len = length($st);
1397        $connect .= <<"EOF";
1398Content-Type: application/x-www-form-urlencoded
1399Content-Length: $len
1400
1401$st
1402EOF
1403        print { $socket } $connect;
1404        $socket->sysread($buf, 4096);
1405        $socket->close;
1406        &rb_printlog("Submitted $len bytes: $n6 IPv6 addresses, $n4 IPv4 addresses\n");
1407    }
1408    else {
1409        &rb_printlog("unable to connect to assp.sourceforge.net to upload griplist\n");
1410        return;
1411    }
1412    return;
1413} ## end sub uploadgriplist
1414
1415sub downloadgriplist {
1416    &rb_printlog("Griplist download disabled\n")  if $noGriplist;
1417    return if $noGriplist;
1418    &rb_printlog("Griplist file not configured\n")  if (!$griplist);
1419    return if (!$griplist);
1420
1421    my $rc;
1422
1423    my $gripListUrl = "http://assp.sourceforge.net/cgi-bin/assp_griplist?binary";
1424    my $gripFile    = "$base/$griplist";
1425
1426    ## let's check if we really need to
1427    if (-e $gripFile) {
1428        my @s     = stat($gripFile);
1429        my $mtime = $s[9];
1430        if (time - $mtime < 8*60*60) {
1431            &rb_printlog("Griplist download failed: last download too recent\n");
1432            return;
1433        }
1434    }
1435
1436    # check for previous download timestamp, so we can do delta now
1437    my %lastdownload;
1438    $lastdownload{full} = 0;
1439    $lastdownload{fullUTC} = 0;
1440    $lastdownload{delta} = 0;
1441    $lastdownload{deltaUTC} = 0;
1442    my $delta = "";
1443    if (open(UTC, "$gripFile.utc")) {
1444        local $/;
1445        my $buf = <UTC>;
1446        close(UTC);
1447        chop($buf);
1448        if ($buf =~ /full/ && $buf =~ /delta/) {
1449            %lastdownload = split(/\s+|\n/, $buf);
1450        } else {
1451            $lastdownload{delta} = $buf;
1452        }
1453        if (! ($DoFullGripDownload && time - $lastdownload{fullUTC} > $DoFullGripDownload*24*60*60)) {
1454            my $lasttime;
1455            $lasttime = $lastdownload{full};
1456            $lasttime = $lastdownload{delta} if ($lastdownload{delta} > $lastdownload{full});
1457            $gripListUrl .= "&delta=$lasttime";
1458            $delta = " (delta)";
1459        }
1460    }
1461
1462    if (!$CanUseLWP) {
1463        &rb_printlog("Griplist download failed: LWP::Simple Perl module not available\n");
1464        return;
1465    }
1466
1467    if (open(TEMPFILE, ">", "$gripFile.tmp")) {
1468        #we can create the file, this is good, now close the file and keep going.
1469        close TEMPFILE;
1470        unlink("$gripFile.tmp");
1471    } else {
1472        &rb_printlog("Griplist download failed: Cannot create $gripFile.tmp\n");
1473        return;
1474    }
1475
1476    # Create LWP ogject
1477    use LWP::Simple qw(mirror is_success status_message $ua);
1478
1479    # Set useragent to Rebuild version
1480    $ua->agent("rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)");
1481    $ua->timeout(20);
1482    if ($proxyserver) {
1483        $ua->proxy('http', "http://" . $proxyserver);
1484        &rb_printlog("Downloading Griplist$delta via HTTP proxy: $proxyserver\n");
1485    } else {
1486        &rb_printlog("Downloading Griplist$delta via direct HTTP connection\n");
1487    }
1488
1489    # call LWP mirror command
1490    my $dltime = time;
1491    $rc = mirror($gripListDownUrl, "$gripFile.tmp");
1492
1493    if ($rc == 304) {
1494        # HTTP 304 not modified status returned
1495        # can't happen - we ALWAYS get new data
1496        unlink("$gripFile.tmp");
1497        return;
1498    } elsif (!is_success($rc)) {
1499        # download failed-error code output to logfile
1500        # &rb_printlog("Griplist download failed: $rc " . status_message($rc). "\n");
1501        unlink("$gripFile.tmp");
1502        return;
1503    }
1504
1505    # download complete
1506    my $filesize = -s "$gripFile.tmp";
1507    &rb_printlog("Griplist download complete: binary download $filesize bytes\n");
1508
1509    # enough data?
1510    if ($filesize < 12) {
1511        &rb_printlog("Griplist download error: grip data too small\n");
1512        unlink("$gripFile.tmp");
1513        return;
1514    }
1515
1516    # record download time so we can do delta next time
1517    unlink("$gripFile.utc");
1518    if (open(UTC, ">$gripFile.utc")) {
1519        my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = gmtime($dltime);
1520        $year += 1900;
1521        $mon += 1;
1522        if (! $delta) {
1523            $lastdownload{full} = sprintf "%04d%02d%02d%02d%02d%02d", $year, $mon, $day, $hour, $min, $sec;
1524            $lastdownload{fullUTC} = $dltime;
1525        } else {
1526            $lastdownload{delta} = sprintf "%04d%02d%02d%02d%02d%02d", $year, $mon, $day, $hour, $min, $sec;
1527            $lastdownload{deltaUTC} = $dltime;
1528        }
1529        printf UTC "full\t%s\n", $lastdownload{full};
1530        printf UTC "fullUTC\t%s\n", $lastdownload{fullUTC};
1531        printf UTC "delta\t%s\n", $lastdownload{delta};
1532        printf UTC "deltaUTC\t%s\n", $lastdownload{deltaUTC};
1533        close(UTC);
1534    }
1535
1536    # if we did a delta download, read in previous data so we can merge
1537    my @binFiles;
1538    push(@binFiles, "$gripFile.bin") if ($gripListUrl =~ /delta=/);
1539    push(@binFiles, "$gripFile.tmp");
1540
1541    # convert binary download form to text form used by ASSP
1542    my $buf;
1543    my %grip;
1544    my $action = "read";
1545    foreach my $binF (@binFiles) {
1546        my $binSize = -s $binF;
1547        open(BIN, $binF);
1548        binmode(BIN);
1549        read(BIN, $buf, $binSize);
1550        close(BIN);
1551
1552    # IPv6 count
1553    	my ($n6h, $n6l) = unpack("N2", $buf);
1554    	my $n6 = $n6h * 2**32 + $n6l;
1555
1556    # IPv4 count
1557    	my $n4;
1558    	eval { $n4 = unpack("x[N2] N", $buf); };
1559
1560
1561    # decode IPv6 data
1562    	my $x6 = 0;
1563    	eval {
1564    	for (my $i = 0; $i < $n6; $i++) {
1565        my ($bip, $grey) = unpack("x[N2] x[N] x$x6 a8 C", $buf);
1566        my $ip = join(":", unpack("H4H4H4H4", $bip)) . ":";
1567        $ip =~ s/:0+([0-9a-f])/:$1/gio;
1568        $ip =~ s/:0:$/::/o;
1569
1570        #                $grip{$ip} = $grey / 255;
1571        #                $gripdelta{$ip} = $grey / 255 if $deltayonly;
1572        $x6 += 9;
1573    	}
1574    	};
1575
1576    # decode IPv4 data
1577    	my $x4 = 0;
1578    	for (my $i = 0; $i < $n4; $i++) {
1579        my ($bip, $grey) = unpack("x[N2] x[N] x$x6 x$x4 a3 C", $buf);
1580        my $ip = join(".", unpack("C3", $bip));
1581        $grip{$ip} = $grey / 255;
1582
1583        $x4 += 4;
1584    }
1585        &rb_printlog("Griplist binary $action OK: $binF, $n6 IPv6 addresses, $n4 IPv4 addresses\n");
1586        $action = "merge";
1587    }
1588
1589    # remove download file
1590    unlink("$gripFile.tmp");
1591
1592    # output binary version, so we can do a delta next time
1593    &rb_printlog("Writing merged Griplist binary...");
1594    my $buf;
1595    my $n6 = 0;
1596    my $n4 = 0;
1597    my ($buf6, $buf4);
1598    foreach my $ip (keys %grip) {
1599        if ($ip =~ /:/) {
1600            my $ip2 = $ip;
1601            $ip2 =~ s/([0-9a-f]*):/0000$1:/gi;
1602            $ip2 =~ s/0*([0-9a-f]{4}):/$1:/gi;
1603            $buf6 .= pack("H4H4H4H4", split(/:/, $ip2));
1604            $buf6 .= pack("C", int($grip{$ip} * 255));
1605            $n6++;
1606        } else {
1607            $buf4 .= pack("C3C", split(/\./, $ip), int($grip{$ip} * 255));
1608            $n4++;
1609        }
1610    }
1611    $buf = pack("N2", $n6/2**32, $n6);
1612    $buf .= pack("N", $n4);
1613    $buf .= $buf6 . $buf4;
1614    unlink("$gripFile.bin");
1615    open (BIN, ">$gripFile.bin");
1616    binmode(BIN);
1617    print BIN $buf;
1618    close(BIN);
1619    chmod 0644, "$gripFile.bin";
1620    &rb_printlog(" done\n");
1621
1622    # output text version
1623    &rb_printlog("Writing merged Griplist text...");
1624    unlink("$gripFile");
1625    open (TEXT, ">$gripFile");
1626    binmode(TEXT);
1627    print TEXT "\n";
1628    foreach my $ip (sort keys %grip) {
1629
1630        printf TEXT "$ip\002%.2f\n", $grip{$ip};
1631    }
1632    close(TEXT);
1633    chmod 0644, "$gripFile";
1634    &rb_printlog(" done\n");
1635
1636    &rb_printlog("Griplist writing complete: $n6 IPv6 addresses, $n4 IPv4 addresses\n\n");
1637}
1638
1639
1640sub downloaddroplist {
1641
1642	&rb_printlog("Droplist download disabled\n\n")  if !$DoDropList;
1643	return if !$DoDropList;
1644    my $rc;
1645
1646    my $droplistUrl = "http://www.spamhaus.org/drop/drop.lasso";
1647    my $dropFile     = "$base/$DropList";
1648
1649    $dropFile		 = &fixPath($dropFile);
1650
1651    # let's check if we really need to
1652    if (-e $dropFile) {
1653        my @s     = stat($dropFile);
1654        my $mtime = $s[9];
1655        my $random = int(rand(144-72)+72)+1;
1656        if (time - $mtime < $random*60*60) {
1657
1658            &rb_printlog("Droplist download skipped: last download too recent\n");
1659            return;
1660        }
1661    }
1662
1663    if ( !$CanUseLWP ) {
1664        &rb_printlog("Droplist download failed: LWP::Simple Perl module not available\n");
1665        return;
1666    }
1667
1668    if ( -e $dropFile  ) {
1669        if ( !-r $dropFile  ) {
1670            &rb_printlog( "Droplist download failed: $dropFile  not readable!\n" );
1671            return;
1672        } elsif ( !-w $dropFile  ) {
1673            &rb_printlog( "Droplist download failed: $dropFile  not writable!\n" );
1674            return;
1675        }
1676    }
1677    else {
1678        if ( open( TEMPFILE, ">", $dropFile ) ) {
1679            #we can create the file, this is good, now close the file and keep going.
1680            close TEMPFILE;
1681	    unlink($dropFile);
1682        } else {
1683            &rb_printlog("Droplist download failed: Cannot create $dropFile \n" );
1684            return;
1685        }
1686    }
1687
1688    # Create LWP ogject
1689    use LWP::Simple qw(mirror is_success status_message $ua);
1690
1691    # Set useragent to Rebuild version
1692    $ua->agent(
1693        "rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)");
1694    $ua->timeout(20);
1695    if ($proxyserver) {
1696        $ua->proxy( 'http', "http://" . $proxyserver );
1697        &rb_printlog("Downloading $dropFile via HTTP proxy: $proxyserver\n" );
1698    } else {
1699        &rb_printlog("Downloading $dropFile via direct HTTP connection\n" );
1700    }
1701
1702    # call LWP mirror command
1703    $rc = mirror( $droplistUrl, $dropFile );
1704
1705    if ( $rc == 304 ) {
1706        # HTTP 304 not modified status returned
1707        return;
1708    } elsif ( !is_success($rc) ) {
1709        #download failed-error code output to logfile
1710        &rb_printlog("$dropFile download failed: $rc " . status_message($rc). "\n" );
1711        return;
1712    } elsif ( is_success($rc) ) {
1713        # download complete
1714        my $filesize = -s "$dropFile";
1715        &rb_printlog("$dropFile download complete: $filesize bytes\n" );
1716        chmod 0644, "$dropFile";
1717    }
1718}
1719
1720sub compileregex {
1721    use re 'eval';
1722    my ( $name, $contents, $REname ) = @_;
1723    $contents = getrecontent( $contents, $name );
1724    $contents ||= '^(?!)';    # regexp that never matches
1725
1726    # trim long matches to 32 chars including '...' at the end
1727    eval { $$REname = qr/(?si)$contents/ };
1728    if ($EVAL_ERROR) { print "regular expression error in '$contents' for $name: $@\n"; }
1729    return q{};
1730}
1731
1732sub optionList {
1733
1734    # this converts a | separated list into a RE
1735    my ( $d, $configname ) = @_;
1736    $d = getrecontent( $d, $configname );
1737    $d =~ s/([\.\[\]\-\(\)\*\+\\])/\\$1/g;
1738    return $d;
1739}
1740
1741sub fixPath {
1742    my ($path) = @_;
1743    my $len = length($path);
1744    if   ( !substr( $path, ( $len - 1 ), 1 ) eq q{/} ) { return $path . q{/}; }
1745    else                                               { return $path; }
1746    return;
1747}
1748
1749sub repair {
1750    $/ = "\n";
1751
1752    # mxa ptr rbl spf uribl white black
1753    my $pbdb = "$base/$pbdbfile";
1754    my ( @files, %w );
1755    my ( $k,     $v );
1756    if ( !( $pbdbfile =~ /mysql/ ) ) {
1757        foreach ( glob("$pbdb.*.db") ) { push( @files, $_ ); }
1758    }
1759    if ( !( $whitelistdb =~ /mysql/ ) ) { push( @files, $whitelistdb ); }
1760    if ( !( $redlistdb   =~ /mysql/ ) ) { push( @files, $redlistdb ); }
1761    foreach my $f (@files) {
1762        if ( !-e $f ) { next }
1763        open( my $curfile, "<", $f );
1764
1765        #<$curfile>;
1766        while (<$curfile>) {
1767            ( $k, $v ) = split( /[\001\002\n]/, $_ );
1768            if ( $k eq q{} || $v eq q{} ) { next }
1769
1770            #print "$k=$v\n";
1771            $w{ $k } = $v;
1772        }
1773        close $curfile;
1774        open( my $newfile, ">", "$f.new" );
1775        binmode $newfile;
1776        print { $newfile } "\n";
1777        for ( sort keys %w ) { print { $newfile } "$_\002$w{$_}\n"; }
1778        close $newfile;
1779        rename( $f, "$f.bak" );
1780        rename( "$f.new", $f );
1781        undef %w;
1782    }
1783    return;
1784} ## end sub repair
1785
1786
1787
1788sub sendNotification {
1789    my ($from,$to,$sub,$body,$file) = @_;
1790    my $text;
1791    return unless $to;
1792    return unless $resendmail;
1793    my $date=$UseLocalTime ? localtime() : gmtime();
1794    my $tz=$UseLocalTime ? tzStr() : '+0000';
1795    $date=~s/(\w+) +(\w+) +(\d+) +(\S+) +(\d+)/$1, $3 $2 $5 $4/;
1796    $text = "Date: $date $tz\r\n";
1797    $text .= "X-Assp-Notification: YES\r\n";
1798  	$text .= "From: <$from>\r\nTo:" if $from !~ /\</;
1799    $text .= "From: $from\r\nTo:" if $from =~ /\</;
1800
1801    foreach (split(/\|/, $to)) {
1802        $text .= " <$_>,";
1803    }
1804    chop $text;
1805    $text .= "\r\n";
1806    $text .= "Subject: $sub\r\n";
1807    $text .= "Content-Type: text/plain;	charset=\"ISO-8859-1\"\r\n";
1808    $text .= "Content-Transfer-Encoding: 7bit\r\n";
1809    my $msgid = int(rand(1000000));
1810    $text .= "Message-ID: a$msgid\@$myName\r\n";
1811    $text = &headerWrap($text);
1812    $text .= "\r\n";           # end header
1813    foreach (split(/\r?\n/,$body)) {
1814        $text .= "$_\r\n";
1815    }
1816
1817    my $f;
1818    if ($file && -e $file && open($f,"<",$file)) {
1819
1820        while (<$f>) {
1821             s/\r?\n//g;
1822             $text .= "$_\r\n";
1823
1824        }
1825        close $f;
1826    }
1827
1828    $text .= ".\r\n";
1829    $text =~ tr/\x80-\xFF/_/;   # 7bit only
1830    my $rfile = "$base/$resendmail/n$msgid$maillogExt";
1831	-d "$base/$resendmail" or mkdir "$base/$resendmail", 0777;
1832    if (open($f,">",$rfile)) {
1833        binmode $f;
1834        print $f $text;
1835        close $f;
1836        &rb_printlog( "write notify message to $rfile\n" );
1837    } else {
1838
1839        &rb_printlog( "error: unable to write notify message to $rfile - $!\n" );
1840    }
1841
1842}
1843sub tzStr {
1844
1845    # calculate the time difference in minutes
1846    my $minoffset =
1847      ( Time::Local::timelocal( localtime() ) -
1848          Time::Local::timelocal( gmtime() ) ) / 60;
1849
1850   # translate it to "hour-format", so that 90 will be 130, and -90 will be -130
1851    my $sign = $minoffset < 0 ? -1 : +1;
1852    $minoffset = abs($minoffset) + 0.5;
1853    my $tzoffset = 0;
1854    $tzoffset = $sign * ( int( $minoffset / 60 ) * 100 + ( $minoffset % 60 ) )
1855      if $minoffset;
1856
1857    # apply final formatting, including +/- sign and 4 digits
1858    return sprintf( "%+05d", $tzoffset );
1859}
1860# wrap long headers
1861sub headerWrap {
1862    my $header = shift;
1863    $header =~
1864s/(?:([^\r\n]{60,75}?;)|([^\r\n]{60,75}) ) {0,5}(?=[^\r\n]{10,})/$1$2\r\n\t/g;
1865
1866    return $header;
1867}
1868sub rb_cleanUpFiles {
1869    my ($folder, $filter, $filetime) = @_;
1870
1871    my $textfilter = " (*$filter)" if $filter;
1872    my @files;
1873    my $file;
1874    my $count;
1875    my $dir = &fixPath($folder);
1876    $dir =~ s/\\/\//g;
1877    return unless -e $dir;
1878    &rb_printlog( "starting rb_cleanup old files$textfilter for folder $dir\n" );
1879
1880    opendir(my $DIR,"$dir");
1881    @files = readdir($DIR);
1882    close $DIR;
1883	my $fldrpath        = $dir . "/*";
1884
1885    my $filecount = &countfiles($fldrpath);
1886    foreach $file (@files) {
1887        next if $file eq '.';
1888        next if $file eq '..';
1889        next unless $file =~ /$maillogExt$/i or $file =~ /\.rpt$/i;
1890        next if ($filter && $file !~ /$filter$/i);
1891        next if ($filter && $file =~ /^$filter$/i);
1892        $file = "$dir/$file";
1893        next if -d $file;
1894        next unless -w $file;
1895        my $dtime=(stat($file))[9]-time;
1896        if (($dtime < $filetime * -1) or ($dtime > 0 && $dtime < $MaxKeepDeleted - $filetime)) {
1897            $count++;
1898
1899        }
1900    }
1901
1902    my $filecountafter = &countfiles($fldrpath);
1903    &rb_printlog( "folder $dir before: $filecount\n" ) ;
1904	&rb_printlog( "folder $dir deleted: $count\n" ) if $count;
1905	&rb_printlog( "folder $dir after: $filecountafter\n\n" ) ;
1906	$correctedspamcount = $filecountafter if $dir =~ 'errors\/spam';
1907	$correctednotspamcount = $filecountafter if $dir =~ 'errors\/notspam';
1908
1909}
1910
1911sub rb_cleanUpMaxFiles {
1912    my $folder = shift;
1913    my $percent = shift;
1914
1915    my @files;
1916    my $file;
1917    my $count;
1918    my $info;
1919    my $dir = ($folder !~ /\Q$base\E/i) ? "$base/$folder" : $folder ;
1920    my $maxfiles = $MaxFiles;
1921    my $importmaxfiles = $MaxFiles;
1922
1923
1924    my $adiff = abs ($correctedspamcount - $correctednotspamcount);
1925    $maxfiles = $MaxFiles + $adiff if $dir =~ /assp\/spam/ && $correctednotspamcount > $correctedspamcount;
1926    $maxfiles = $MaxFiles + $adiff if $dir =~ /assp\/notspam/ && $correctedspamcount > $correctednotspamcount;
1927
1928    $dir =~ s/\\/\//g;
1929    return unless -e $dir;
1930
1931
1932    opendir(my $DIR,"$dir");
1933    @files = readdir($DIR);
1934    close $DIR;
1935    my $filecount = @files - 2;
1936
1937    rb_printlog("rb_cleaning $dir skipped - filecount: $filecount < maxfiles: $maxfiles\n") if $filecount <= $maxfiles;
1938    return $info if  $filecount <= $maxfiles;
1939
1940    my %filelist = ();
1941    while (@files ) {
1942        $file = shift @files;
1943        next if $file eq '.';
1944        next if $file eq '..';
1945        $file = "$dir/$file";
1946        if (-d $file) {
1947            $filecount--;
1948            next;
1949        }
1950        my $ft = (stat($file))[9];
1951        $ft = $ft - (60 * 24 * 3600) if $ft > time;
1952        while (exists $filelist{$ft}) {
1953            $ft++;
1954        }
1955        $filelist{$ft} = $file;
1956        $count++;
1957
1958    }
1959    return $info if $filecount <= $maxfiles;
1960
1961
1962    rb_printlog("\nstarting rb_cleaning $dir - delete files from $dir - old filecount: $filecount: \n");
1963    my $toFilenumber;
1964    my $filenum;
1965    my $time = time - ($minimumdays * 24 * 3600);   # two weeks ago
1966	my $savecount;
1967
1968    $filenum = $maxfiles - $filecount;
1969    $toFilenumber = $maxfiles;
1970
1971    $count = 0;
1972
1973    foreach my $filetime (sort keys %filelist) {
1974        last if $filecount-- < $toFilenumber;
1975
1976        unlink "$filelist{$filetime}";
1977        $count++;
1978
1979    }
1980
1981    opendir(my $DIR,"$dir");
1982    @files = readdir($DIR);
1983    close $DIR;
1984    my $newfilecount = @files - 2;
1985
1986
1987    rb_printlog("finished rb_cleaning $dir - new filecount: $newfilecount\n") ;
1988
1989    return $info;
1990}
1991
1992
1993sub rb_cleanUpCollection {
1994
1995
1996
1997
1998	my $age = $MaxNoBayesFileAge * 3600 * 24;
1999    my @dirs = ('incomingOkMail','discarded','viruslog');
2000    my $dir;
2001
2002
2003    	&rb_printlog( "\n--- rb_cleaning NoBayesian folders ---\n" );
2004    	&rb_printlog( "entries older than $MaxNoBayesFileAge days will be removed\n" ) if $MaxNoBayesFileAge;
2005
2006    foreach my $dir (@dirs) {
2007        if ($age) {
2008            &rb_cleanUpFiles(${$dir},'',$age) if ${$dir};
2009        }
2010    }
2011
2012    $age = $MaxCorrectedDays * 3600 * 24;
2013    @dirs = ('correctedspam','correctednotspam');
2014
2015    &rb_printlog( "\n--- rb_cleaning corrected (errors) spam/notspam folders ---\n" );
2016    &rb_printlog( "entries older than $MaxCorrectedDays days will be removed\n" ) if $MaxCorrectedDays;
2017
2018    foreach my $dir (@dirs) {
2019        if ($age) {
2020            &rb_cleanUpFiles(${$dir},'',$age) if ${$dir};
2021        } else {
2022            &rb_cleanUpMaxFiles(${$dir}) if ${$dir};
2023        }
2024    }
2025
2026    return unless $maintbayescollection;
2027    $age = 0;
2028    @dirs = ('spamlog','notspamlog');
2029
2030    &rb_printlog( "\n--- rb_cleaning Bayesian folders ---\n" );
2031    &rb_printlog( "entries older than $MaxBayesFileAge days will be removed\n" ) if $MaxBayesFileAge;
2032
2033    foreach my $dir (@dirs) {
2034        if ($age) {
2035            &rb_cleanUpFiles(${$dir},'',$age) if ${$dir};
2036        } else {
2037            &rb_cleanUpMaxFiles(${$dir}) if ${$dir};
2038        }
2039    }
2040}
2041
2042sub rb_cleanTrashlist {
2043    my $files_before = my $files_deleted = 0;
2044    my $t = time;
2045    my $mcount;
2046
2047    while ( my ( $k, $v ) = each(%Trashlist) ) {
2048		if (!-e $k) {
2049			delete $Trashlist{$k};
2050			$files_deleted++;
2051			next;
2052		}
2053        my $ct = $v;
2054        $files_before++;
2055
2056        if (!$MaxKeepDeleted or ( $t - $ct >= $MaxKeepDeleted * 3600 * 24)
2057            )
2058        {
2059        	unlink $k;
2060            delete $Trashlist{$k};
2061            $files_deleted++;
2062        }
2063    }
2064    &rb_printlog(
2065"\nTrashlist: rb_cleaning finished; before=$files_before, deleted=$files_deleted\n"
2066    );
2067
2068}
2069
2070sub getUidGid {
2071    my ( $uname, $gname ) = @_;
2072
2073    my $rname = "root";
2074    eval('getgrnam($rname);getpwnam($rname);');
2075    if ($@) {
2076
2077        # windows pukes "unimplemented" for these -- just skip it
2078        rb_printlog(
2079"warning:   uname and/or gname are set ($uname,$gname) but getgrnam / getpwnam give errors: $@"
2080        );
2081        return;
2082    }
2083    my $gid;
2084    if ($gname) {
2085        $gid = getgrnam($gname);
2086        if ( defined $gid ) {
2087        } else {
2088            my $msg =
2089"could not find gid for group '$gname' -- not switching effective gid ";
2090            rb_printlog( $msg );
2091            return;
2092        }
2093    }
2094    my $uid;
2095    if ($uname) {
2096        $uid = getpwnam($uname);
2097        if ( defined $uid ) {
2098        } else {
2099            my $msg =
2100"could not find uid for user '$uname' -- not switching effective uid ";
2101            rb_printlog( $msg );
2102            return;
2103        }
2104    }
2105    ( $uid, $gid );
2106}
2107
2108sub mlog {
2109}
2110
2111#####################################################################################
2112#                orderedtie
2113{
2114
2115    package orderedtie;
2116
2117    # This is a tied value that caches lookups from a sorted file; \n separates records,
2118    # \002 separates the key from the value. After OrderedTieHashSize lookups the cache is
2119    # cleared. This give us most of the speed of the hash without the huge memory overhead of storing
2120    # the entire hash and should be totally portable. Picking the best value for n requires some
2121    # tuning. A \n is required to start the file.
2122    # if you're updating entries it behoves you to call flush every so often to make sure that your
2123    # changes are saved. This also frees the memory used to remember updated values.
2124    # for my purposes a value of undef and a nonexistant key are the same
2125    # Obviosly if your keys or values contain \n or \002 it will totally goof things up.
2126    sub TIEHASH {
2127        my ( $c, $fn ) = @_;
2128        my $self = { fn => $fn, age => mtime($fn), cnt => 0, cache => {}, updated => {}, ptr => 1, };
2129        bless $self, $c;
2130        return $self;
2131    }
2132    sub DESTROY { $_[0]->flush(); }
2133    sub mtime { my @s = stat( $_[0] ); $s[9]; }
2134
2135    sub flush {
2136        my $this = shift;
2137        return unless %{ $this->{ updated } };
2138        my $f = $this->{ fn };
2139        open( O, '>', "$f.tmp" ) || return;
2140        binmode(O);
2141        open( I, '<', "$f" ) || print O"\n";
2142        binmode(I);
2143        local $/ = "\n";
2144        my @l = ( sort keys %{ $this->{ updated } } );
2145        my ( $k, $d, $r, $v );
2146
2147        while ( $r = <I> ) {
2148            ( $k, $d ) = split( "\002", $r );
2149            while ( @l && $l[0] lt $k ) {
2150                $v = $this->{ updated }{ $l[0] };
2151                print O"$l[0]\002$v\n" if $v;
2152                shift(@l);
2153            }
2154            if ( $l[0] eq $k ) {
2155                $v = $this->{ updated }{ $l[0] };
2156                print O"$l[0]\002$v\n" if $v;
2157                shift(@l);
2158            }
2159            else { print O$r; }
2160        }
2161        while (@l) {
2162            $v = $this->{ updated }{ $l[0] };
2163            print O"$l[0]\002$v\n" if $v;
2164            shift(@l);
2165        }
2166        close I;
2167        close O;
2168        unlink($f);
2169        rename( "$f.tmp", $f );
2170        $this->{ updated } = {};
2171    } ## end sub flush
2172
2173    sub STORE {
2174        my ( $this, $key, $value ) = @_;
2175        $this->{ cache }{ $key } = $this->{ updated }{ $key } = $value;
2176    }
2177
2178    sub FETCH {
2179        my ( $this, $key ) = @_;
2180        return $this->{ cache }{ $key } if exists $this->{ cache }{ $key };
2181        $this->resetCache()
2182            if ( $this->{ cnt }++ > 10000
2183            || ( $this->{ cnt } & 0x1f ) == 0 && mtime( $this->{ fn } ) != $this->{ age } );
2184        return $this->{ cache }{ $key } = binsearch( $this->{ fn }, $key );
2185    }
2186
2187    sub resetCache {
2188        my $this = shift;
2189        $this->{ cnt }   = 0;
2190        $this->{ age }   = mtime( $this->{ fn } );
2191        $this->{ cache } = { %{ $this->{ updated } } };
2192    }
2193
2194    sub binsearch {
2195        my ( $f, $k ) = @_;
2196        open( F, '<', "$f" ) || return;
2197        binmode(F);
2198        my $siz = my $h = -s $f;
2199        $siz -= 1024;
2200        my $l  = 0;
2201        my $k0 = $k;
2202        $k =~ s/([\[\]\(\)\*\^\!\|\+\.\\\/\?\`\$\@\{\}])/\\$1/g;    # make sure there's no re chars unqutoed in the key
2203
2204        #print "k=$k ($_[1])\n";
2205        while (1) {
2206            my $m = ( ( $l + $h ) >> 1 ) - 1024;
2207            $m = 0 if $m < 0;
2208
2209            #print "($l $m $h) ";
2210            seek( F, $m, 0 );
2211            my $d;
2212            my $read = read( F, $d, 2048 );
2213            if ( $d =~ /\n$k\002([^\n]*)\n/ ) {
2214                close F;
2215
2216                #print "got $1\n";
2217                return $1;
2218            }
2219            my ( $pre, $first, $last, $post ) = $d =~ /^(.*?)\n(.*?)\002.*\n(.*?)\002.*?\n(.*?)$/s;
2220
2221            #print "f=$first ";
2222            last unless defined $first;
2223            if ( $k0 gt $first && $k0 lt $last ) {
2224
2225                #print "got miss\n";
2226                last;
2227            }
2228            if ( $k0 lt $first ) {
2229                last if $m == 0;
2230                $h = $m - 1024 + length($pre);
2231                $h = 0 if $h < 0;
2232            }
2233            if ( $k0 gt $last ) {
2234                last if $m >= $siz;
2235                $l = $m + $read - length($post);
2236            }
2237
2238            #print "l=$l h=$h ";
2239        } ## end while (1)
2240        close F;
2241        return;
2242    } ## end sub binsearch
2243
2244    sub FIRSTKEY {
2245        my $this = shift;
2246        $this->flush();
2247        $this->{ ptr } = 1;
2248        $this->NEXTKEY();
2249    }
2250
2251    sub NEXTKEY {
2252        my ( $this, $lastkey ) = @_;
2253        local $/ = "\n";
2254        open( F, '<', "$this->{fn}" ) || return;
2255        binmode(F);
2256        seek( F, $this->{ ptr }, 0 );
2257        my $r = <F>;
2258        return unless $r;
2259        $this->{ ptr } = tell F;
2260        close F;
2261        my ( $k, $v ) = $r =~ /(.*?)\002(.*?)\n/s;
2262
2263        if ( !exists( $this->{ cache }{ $k } ) && $this->{ cnt }++ > 10000 ) {
2264            $this->{ cnt }   = 0;
2265            $this->{ cache } = { %{ $this->{ updated } } };
2266        }
2267        $this->{ cache }{ $k } = $v;
2268        $k;
2269    }
2270
2271    sub EXISTS {
2272        my ( $this, $key ) = @_;
2273        return FETCH( $this, $key );
2274    }
2275
2276    sub DELETE {
2277        my ( $this, $key ) = @_;
2278        $this->{ cache }{ $key } = $this->{ updated }{ $key } = undef;
2279    }
2280
2281    sub CLEAR {
2282        my ($this) = @_;
2283        open( F, '>', "$this->{fn}" );
2284        binmode(F);
2285        print "\n";
2286        close F;
2287        $this->{ cache }   = {};
2288        $this->{ updated } = {};
2289        $this->{ cnt }     = 0;
2290    }
2291}
2292
2293
2294