1#!/usr/local/bin/perl 2# rebuilds bayesian spam database 3our $VERSION = "RB-1.99"; 4our $modversion = '(13177)'; 5# (c) John Hanna 2003 under the terms of the GPL 6# Updated July 2004 for simple proxy support. 7# (c) Fritz Borgstedt 2006 under the terms of the GPL 8# Updated Feb 2008 refactoring and rewrites 9# (c) Kevin 2008 under the terms of the GPL 10use bytes; # get rid of anoying 'Malformed UTF-8' messages 11use Digest::MD5 qw(md5_hex); 12use English '-no_match_vars'; 13use File::Copy; 14use IO::Handle; 15use IO::Socket; 16use Time::Local; 17use Time::HiRes; 18use Cwd; 19use strict qw(vars subs); 20our $AvailLWP = eval('use LWP::Simple; 1'); # LWP::Simple module installed 21our $CanUseLWP = $AvailLWP; 22our $noGriplistUpload; 23our $noGriplistDownload; 24our $gripListDownUrl = 'http://*HOST*/cgi-bin/assp_griplist?binary'; 25our $gripListUpUrl = 'http://*HOST*/cgi-bin/assp_griplist?binary'; 26our $gripListUpHost = 'assp.sourceforge.net'; 27$gripListDownUrl =~ s/\*HOST\*/$gripListUpHost/o; 28$gripListUpUrl =~ s/\*HOST\*/$gripListUpHost/o; 29 30our %Config; 31our $ReplaceOldSpamdb=1; 32our $onlyNewCorrected=0; 33 34our $HeaderValueRe=qr/[ \t]*[^\r\n]*(?:\r?\n[ \t]+\S[^\r\n]*)*(?:\r?\n)?/o; 35# IP address representations 36our $IPprivate; 37our $IPQuadSectRE; 38our $IPQuadSectDotRE; 39our $IPQuadRE; 40our $IPStrictQuadRE; 41 42# Host 43our $IPSectRe; 44our $IPSectHexRe; 45our $IPSectDotRe; 46our $IPSectHexDotRe; 47our $IPRe; 48our $IPv4Re; 49our $IPv6Re; 50our $IPv6LikeRe; 51our $PortRe; 52our $HostRe; 53our $HostPortRe; 54# IP address representations 55my $sep; 56my $v6Re = '[0-9A-Fa-f]{1,4}'; 57$IPSectRe = '(?:25[0-5]|2[0-4]\d|1\d\d|0?\d?\d)'; 58$IPSectHexRe = '(?:(?:0x)?(?:[A-Fa-f][A-Fa-f0-9]?|[A-Fa-f0-9]?[A-Fa-f]))'; 59 60$IPprivate = '^(?:0{1,3}\.0{1,3}\.0{1,3}\.0{1,3}|127(?:\.'.$IPSectRe.'){3}|169\.254(?:\.'.$IPSectRe.'){2}|0?10(?:\.'.$IPSectRe.'){3}|192\.168(?:\.'.$IPSectRe.'){2}|172\.0?1[6-9](?:\.'.$IPSectRe.'){2}|172\.0?2[0-9](?:\.'.$IPSectRe.'){2}|172\.0?3[01](?:\.'.$IPSectRe.'){2})$'; #RFC 1918 decimal 61$IPprivate .= '|^(?:(?:0x)?0{1,2}\.(?:0x)?0{1,2}\.(?:0x)?0{1,2}\.(?:0x)?0{1,2}|(?:0x)?7[Ff](?:\.'.$IPSectHexRe.'){3}|(?:0x)?[aA]9\.(?:0x)?[Ff][Ee](?:\.'.$IPSectHexRe.'){2}|(?:0x)?0[aA](?:\.'.$IPSectHexRe.'){3}|(?:0x)?[Cc]0\.(?:0x)?[Aa]8(?:\.'.$IPSectHexRe.'){2}|(?:0x)[Aa][Cc]\.(?:0x)1[0-9a-fA-F](?:\.'.$IPSectHexRe.'){2})$'; #RFC 1918 Hex 62$IPprivate .= '|^(?:0{0,4}:){2,6}'.$IPprivate.'$'; # privat IPv4 in IPv6 63$IPprivate .= '|^(?:0{0,4}:){2,7}[1:]?$'; # IPv6 loopback and universal 64 65$IPQuadSectRE='(?:0([0-7]+)|0x([0-9a-fA-F]+)|(\d+))'; 66$IPQuadSectDotRE='(?:'.$IPQuadSectRE.'\.)'; 67$IPQuadRE=qr/$IPQuadSectDotRE?$IPQuadSectDotRE?$IPQuadSectDotRE?$IPQuadSectRE/o; 68 69 70 71$IPSectDotRe = '(?:'.$IPSectRe.'\.)'; 72$IPSectHexDotRe = '(?:'.$IPSectHexRe.'\.)'; 73$IPv4Re = qr/(?: 74(?:$IPSectDotRe){3}$IPSectRe 75| 76(?:$IPSectHexDotRe){3}$IPSectHexRe 77)/xo; 78 79# privat IPv6 addresses 80$IPprivate .= <<EOT; 81|^(?i:FE[89A-F][0-9A-F]): 82(?: 83(?:(?:$v6Re:){6}(?: $v6Re |:))| 84(?:(?:$v6Re:){5}(?: $IPv4Re | :$v6Re |:))| 85(?:(?:$v6Re:){4}(?: :$IPv4Re |(?::$v6Re){1,2}|:))| 86(?:(?:$v6Re:){3}(?:(?:(?::$v6Re)? :$IPv4Re)|(?::$v6Re){1,3}|:))| 87(?:(?:$v6Re:){2}(?:(?:(?::$v6Re){0,2}:$IPv4Re)|(?::$v6Re){1,4}|:))| 88(?:(?:$v6Re:) (?:(?:(?::$v6Re){0,3}:$IPv4Re)|(?::$v6Re){1,5}|:))| 89 (?:(?:(?::$v6Re){0,4}:$IPv4Re)|(?::$v6Re){1,6}|:) 90)\$ 91EOT 92$IPprivate = qr/$IPprivate/xo; 93 94# RFC4291, section 2.2, "Text Representation of addresses" 95$sep = '[:-]'; 96$IPv6Re = $IPv6LikeRe = <<EOT; 97(?: 98(?:(?:$v6Re$sep){7}(?: $v6Re |$sep))| 99(?:(?:$v6Re$sep){6}(?: $IPv4Re | $sep$v6Re |$sep))| 100(?:(?:$v6Re$sep){5}(?: $sep$IPv4Re |(?:$sep$v6Re){1,2}|$sep))| 101(?:(?:$v6Re$sep){4}(?:(?:(?:$sep$v6Re)? $sep$IPv4Re)|(?:$sep$v6Re){1,3}|$sep))| 102(?:(?:$v6Re$sep){3}(?:(?:(?:$sep$v6Re){0,2}$sep$IPv4Re)|(?:$sep$v6Re){1,4}|$sep))| 103(?:(?:$v6Re$sep){2}(?:(?:(?:$sep$v6Re){0,3}$sep$IPv4Re)|(?:$sep$v6Re){1,5}|$sep))| 104(?:(?:$v6Re$sep) (?:(?:(?:$sep$v6Re){0,4}$sep$IPv4Re)|(?:$sep$v6Re){1,6}|$sep))| 105(?: $sep (?:(?:(?:$sep$v6Re){0,5}$sep$IPv4Re)|(?:$sep$v6Re){1,7}|$sep)) 106) 107EOT 108 109$IPv6Re =~ s/\Q$sep\E/:/go; 110$IPv6Re = qr/$IPv6Re/xo; 111$IPv6LikeRe = qr/$IPv6LikeRe/xo; 112 113$IPRe = qr/(?: 114$IPv4Re 115| 116$IPv6Re 117)/xo; 118our $EmailDomainRe; 119$EmailDomainRe=qr/(?:\w[\w\.\-]*\.\w\w+|\[[\d\.]*\.\d+\])/o; 120# re for a single port - could be number 1 to 65535 121$PortRe = qr/(?:(?:[1-6]\d{4})|(?:[1-9]\d{0,3}))/o; 122# re for a single host - could be an IP a name or a fqdn 123$HostRe = qr/(?:(?:$IPv4Re|\[?$IPv6Re\]?)|$EmailDomainRe|\w\w+)/o; 124$HostPortRe = qr/$HostRe:$PortRe/o; 125 126our %m; 127our %GpOK; 128if ($CanUseLWP) { 129# my $ver = eval('LWP::Simple->VERSION'); 130# print "LWP::Simple $ver installed - download griplist available\n" ; 131 } elsif ( !$AvailLWP ) { 132 print "LWP::Simple module not installed - download griplist not available\n"; 133 } 134#use warnings; 135 136#no output buffering to screen 137*STDOUT->autoflush(); 138 139#holy predeclarations Batman! 140use vars qw( 141 $autoCorrectCorpus $base $DropList $correctednotspam $correctednotspamcount $correctedspam 142 $correctedspamcount $discarded $DoDropList $DoNotCollectRed $EmailAdrRe $EmailDomainRe $EmailFrom $EmailAdminReportsTo $griplist $HamWordCount 143 $KeepwhitelistedSpam $lowernorm $logfile $Log $LogDateFormat $maillogExt $MaxBytes $MaxCorrectedDays $MaxBayesFileAge $MaxNoBayesFileAge $MaintBayesCollection $MaxFiles $MaxKeepDeleted $MaxWhitelistDays 144 $MaxWhitelistLength $maintbayescollection $minimumfiles $minimumdays $mydb $myhost $mypassword $myuser $myName $notspamlog $processTime 145 $notspamlogcount $npRe $incomingOkMail $OrderedTieHashSize $pbdbfile $proxyserver $proxyuser $proxypass $noGriplist 146 $RebuildLog $rebuildrun $redlistdb $redRe $redReRE $resendmail $setFilePermOnStart $silent $spamdb $spamdberror $spamdbFile $spamdberrorFile $RegExLength 147 $spam $spamdbFname $spamdberrorFname $spamlog $spamlogcount $SpamWordCount $starttime 148 $usesubject $Whitelistrb_cleanFreq $whitelistdb $WhitelistObject $RebuildNotify $RedlistObject $viruslog $whiteRe $whiteReRE $wildcardUser 149 %HamHash %Helo %Redlist %spam %SpamHash %Whitelist $asspLog $DoNotCollectRedList $DoNotCollectRedRe 150 $DoFullGripDownload $UseLocalTime $uppernorm $TrashObject %Trashlist 151 $runAsUser $runAsGroup 152); 153# load from command line if specified 154if($ARGV[0]) { 155 $base=$ARGV[0]; 156} else { 157 # the last one is the one used if all else fails 158 $base = cwd(); 159 unless (-e "$base/assp.cfg") { 160 foreach ('.','/usr/local/assp','/home/assp','/etc/assp','/usr/assp','/applications/assp','/assp','/var/db/assp') { 161 if (-e "$_/assp.cfg") { 162 $base=$_; 163 last ; 164 } 165 } 166 } 167 $base = cwd() if $base eq '.'; 168} 169unless (chdir $base) { 170print 171"Usage: 172 perl rebuildspamdb.pl c:\\assp -- runs the programm in basedirectory c:\\assp\n 173"; 174 die "Abort: unable to change to basedirectory $base"; 175} 176$silent = 1 if (lc $ARGV[1] =~ /silent/i || lc $ARGV[0] =~ /silent/i); 177 178#load configuration options from assp.cfg file 179&loadconfig(); 180 181fork() && exit; 182 183# open log file 184if ( -e "$rebuildrun.bak" ) { 185 unlink("$rebuildrun.bak") or die "unable to remove file: $!"; 186} 187if ( -e $rebuildrun ) { 188 copy( $rebuildrun, "$rebuildrun.bak" ) or die "unable to copy file for: $!"; 189} 190open( $RebuildLog, '>', "$rebuildrun" ) or die "unable to open file for logging: $!"; 191 192our $silentlog; 193$starttime = time; 194&rb_printlog("\n"); 195for ( my $c = 10; $c >= 1; $c-- ) { &rb_printlog(q{*}); } 196my $savesilent=$silent; 197$silent=0; 198rb_printlog (×tring(time) . " RebuildSpamDB $VERSION $modversion is starting;\n") ; 199$silent=$savesilent; 200 201&rb_printlog( "\nRunning in $myName basedirectory '$base'\n"); 202#-- check if running as root 203&rb_printlog( "Running as root!!\n") if $< == 0 && $^O ne "MSWin32"; 204 205#-- print username 206&rb_printlog( "Running as user '" . (getpwuid($<))[0] . "'\n") if $< != 0 && $^O ne "MSWin32"; 207 208&rb_printlog("\n--- ASSP $myName Settings ---\n"); 209 210my $AvailTieRDBM = eval "use Tie::RDBM; 1"; # Is the required module installed? 211my $CanUseTieRDBM = $AvailTieRDBM; # this looks wierd but it's the only way it works 212undef $AvailTieRDBM; 213$EmailAdrRe = "[^()<>@,;:\\\"\\[\\]\000-\040]+"; 214$EmailDomainRe = '(?:\w[\w\.\-]*\.\w+|\[[\d\.]*\.\d+\])'; 215 216# set counts 217$HamWordCount = $SpamWordCount = $correctedspamcount = 0; 218$correctednotspamcount = $spamlogcount = $notspamlogcount = 0; 219 220# read old norm 221our $Normfile; 222our ($oldnorm, $oldcorrectedspamcount, $oldcorrectednotspamcount, $oldspamlogcount, $oldnotspamlogcount); 223open( $Normfile, '<', "$base/normfile" ) || warn "unable to open $base/normfile: $!\n"; 224if ($Normfile) { 225 while (<$Normfile>) { 226 ($oldnorm, $oldcorrectedspamcount, $oldcorrectednotspamcount, $oldspamlogcount, $oldnotspamlogcount) = split(" ",$_); 227 } 228 close $Normfile; 229} 230 231 232 233if ($DoNotCollectRedList) { 234 &rb_printlog( 235 "Do Not Collect Messages with redlisted address: Enabled\n**Messages with redlisted addresses will be removed from the corpus!**\n\n" 236 ); 237} 238 239&rb_printlog("Maxbytes: $MaxBytes \n"); 240&rb_printlog("Maxfiles: $MaxFiles \n"); 241 242 243#rebuild various cache files and lists 244&repair(); 245 246# Let's rb_clean the old deleted entries 247 248 &rb_cleanTrashlist(); 249 250# Let's rb_clean the non bayesian folder of old entries 251# Let's rb_clean the bayesian folder of old entries 252# Let's rb_clean the corrected spam/notspam folder of old entries 253 254 &rb_cleanUpCollection(); 255 256 257# name, contents, refrence to "compiled" object 258#&compileregex( "whiteRe", $whiteRe, \$whiteReRE ); 259&compileregex( "redRe", $redRe, \$redReRE ); 260 261# redlist,whitelist 262&createlistobjects(); 263 264# isspam?, path, filter, weight, processing sub 265$correctedspamcount = &processfolder( 1, $correctedspam, "*", 2, \&dospamhash ); 266$correctednotspamcount = &processfolder( 0, $correctednotspam, "*", 4, \&dohamhash ); 267$spamlogcount = &processfolder( 1, $spamlog, "*", 1, \&checkspam ); 268$notspamlogcount = &processfolder( 0, $notspamlog, "*", 1, \&checkham ); 269our $norm = $HamWordCount ? ( $SpamWordCount / $HamWordCount ) : 1; 270$norm = sprintf("%.4f",$norm); 271open( my $normFile, '>', "$base/normfile" ) || warn "unable to open $base/normfile: $!\n"; 272if ($normFile) { 273 print { $normFile } "$norm $correctedspamcount $correctednotspamcount $spamlogcount $notspamlogcount"; 274 close $normFile; 275} 276 277# Create Bayesian DB 278&generatescores(); 279our %HeloBlack; 280our $HeloBlackObject = tie %HeloBlack, 'orderedtie', "$base/$spamdb.helo"; 281# Create HELo blacklist 282&createheloblacklist(); 283&rb_printlog( 284 "\nSpam Weight:\t " . commify($SpamWordCount) . "\nNot-Spam Weight: " . commify($HamWordCount) . "\n\n" ); 285if ( !($norm) ) { #invalid norm 286 &rb_printlog("Warning: Corpus insufficent to calculate normality!\n"); 287} 288else { #norm exists, print it 289 290 291 my $normdesc = ''; 292 293 &rb_printlog( "Wanted Corpus norm:\t%.4f %s $normdesc \n", $autoCorrectCorpus ); 294 &rb_printlog( "Reached Corpus norm:\t%.4f %s $normdesc \n", $norm ); 295 296 297} 298 299 300$lowernorm = 0.5 if $lowernorm && ($lowernorm > 1 or $lowernorm < 0.5); 301 302 303if ( time - $starttime != 0 ) { $processTime = time - $starttime; } 304else { $processTime = 1; } 305&rb_printlog( "\nTotal processing time: %d second(s)\n\n", $processTime ); 306 307&downloadGripConf(); 308&uploadgriplist() if ! $noGriplistUpload && !$noGriplist; 309&downloadgriplist() if ! $noGriplistDownload && !$noGriplist; 310 311 312&downloaddroplist(); 313 314$savesilent=$silent; 315$silent=0; 316&rb_printlog( "\n"); 317&rb_printlog( ×tring(time) . " RebuildSpamDB $VERSION $modversion ended;\n"); 318$silent=$savesilent; 319&rb_printlog( "Sending Notify to $RebuildNotify\n") if $RebuildNotify; 320&rb_printlog( "Sending Notify not possible, address in RebuildNotify missing\n") if !$RebuildNotify; 321close $RebuildLog; 322if ($RebuildNotify) { 323 &sendNotification( 324 $EmailFrom, 325 $RebuildNotify, 326 'RebuildSpamDB - report', 327 "File rebuildrun.txt follows:\r\n\r\n", 328 "$base/rebuildrun.txt"); 329 } 330 331########################################## 332# script ends here 333########################################## 334sub createlistobjects { 335 336 if ( $CanUseTieRDBM && $whitelistdb =~ /mysql/ && !$KeepwhitelistedSpam ) { 337 eval { 338 $WhitelistObject = tie %Whitelist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost", 339 { user => "$myuser", password => "$mypassword", table => 'whitelist', create => 0 }; 340 }; 341 if ($EVAL_ERROR) { 342 &rb_printlog("whitelist mysql error: $@"); 343 $CanUseTieRDBM = 0; 344 $whitelistdb = "whitelist"; 345 } 346 } 347 elsif ( !$KeepwhitelistedSpam ) { 348 if ( -e $whitelistdb ) { $WhitelistObject = tie( %Whitelist, 'orderedtie', "$whitelistdb" ); } 349 } 350 if ( $CanUseTieRDBM && $redlistdb =~ /mysql/ && ( $DoNotCollectRed || $DoNotCollectRedList ) ) { 351 eval { 352 $RedlistObject = tie %Redlist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost", 353 { user => "$myuser", password => "$mypassword", table => 'redlist', create => 0 }; 354 }; 355 if ($EVAL_ERROR) { 356 &rb_printlog("redlist mysql error: $@"); 357 $CanUseTieRDBM = 0; 358 $redlistdb = "redlist"; 359 } 360 } 361 elsif ($DoNotCollectRed) { 362 if ( -e $redlistdb ) { $RedlistObject = tie( %Redlist, 'orderedtie', "$redlistdb" ); } 363 } 364 return; 365} ## end sub createlistobjects 366 367sub generatescores { 368 my ( $t, $s, @result, $pair, $v ); 369 &rb_printlog("\nGenerating weighted Bayesian tuplets..."); 370 open( $spamdbFile, '>', "$spamdb.tmp" ) || die "unable to open $spamdb.tmp: $!\n"; 371 binmode $spamdbFile; 372 print { $spamdbFile } "\n"; 373 while ( ( $pair, $v ) = each(%spam) ) { 374 ( $s, $t ) = split( q{ }, $v ); 375 $t = ( $t - $s ) * $norm + $s; # normalize t 376 if ( $t < 5 ) { 377 378 #$unknowns+=$s; $unknownt+=$t; 379 next; 380 } 381 382 # if token represents all spam or all ham then square its value 383 if ( $s == $t || $s == 0 ) { 384 $s = $s * $s; 385 $t = $t * $t; 386 } 387 $v = ( 1 + $s ) / ( $t + 2 ); 388 $v = sprintf( "%.7f", $v ); 389 $v = '0.9999999' if $v >= 1; 390 $v = '0.0000001' if $v <= 0; 391 push( @result, "$pair\002$v\n" ) if abs( $v - .5 ) > .09; 392 } 393 &rb_printlog("done\n"); 394 undef %spam; # free some memory 395 &rb_printlog("\nSaving rebuilt SPAM database..."); 396 for ( sort @result ) { print { $spamdbFile } $_; } 397 close $spamdbFile; 398 if ( -e "$spamdb.bak" ) { unlink("$spamdb.bak") || &rb_printlog("unable to remove '$spamdb.bak' $!\n") } 399 if ( -e $spamdb ) { 400 rename( $spamdb, "$spamdb.bak" ) || &rb_printlog("unable to rename '$spamdb' to '$spamdb.bak' $!\n"); 401 } 402 rename( "$spamdb.tmp", $spamdb ) || &rb_printlog("unable to rename '$spamdb.tmp' to '$spamdb' $!\n"); 403 &rb_printlog("done\n"); 404 my $filesize = -s "$spamdb"; 405 &rb_printlog( "\nResulting file '$spamdbFname' is " . commify($filesize) . " bytes\n" ); 406 my $pairs = scalar @result; 407 &rb_printlog( "Bayesian Pairs: " . commify($pairs) . "\n" ); 408 return; 409} ## end sub generatescores 410 411 412sub createheloblacklist { 413 my (@Helo); 414 open( my $FheloBlack, '>', "$spamdb.helo.tmp" ) || &rb_printlog("unable to open '$spamdb.helo.tmp' $!\n"); 415 binmode $FheloBlack; 416 print { $FheloBlack } "\n"; 417 my $allcount = 0; 418 my $notnew = 0; 419 while ( my ( $helostr, $weights ) = each(%Helo) ) { 420 421 if ( $weights->[1] / ( $weights->[0] + $weights->[1] + .1 ) > .98 ) { push( @Helo, "$helostr\0021\n" ); } 422 elsif ( $weights->[1] / ( $weights->[0] + $weights->[1] + .1 ) < .2 ) { push( @Helo, "$helostr\0020.01\n" ); } 423 424 } 425 print { $FheloBlack } sort @Helo; 426 eval{close $FheloBlack;}; 427 &rb_printlog( "\nHELO Blacklist: " . scalar(@Helo) . " HELOs\n" ); 428 if ( -e "$spamdb.helo.bak" ) { 429 unlink("$spamdb.helo.bak") || &rb_printlog("unable to remove '$spamdb.helo.bak' $!\n"); 430 } 431 if ( -e "$spamdb.helo" ) { 432 rename( "$spamdb.helo", "$spamdb.helo.bak" ) 433 || &rb_printlog("unable to rename '$spamdb.helo' to '$spamdb.helo.bak' $!\n"); 434 } 435 rename( "$spamdb.helo.tmp", "$spamdb.helo" ) 436 || &rb_printlog("unable to rename '$spamdb.helo.tmp' to '$spamdb.helo' $!\n"); 437 return; 438} 439 440sub loadconfig { 441 442 my $RCF; 443 open($RCF,"<$base/assp.cfg"); 444 while (<$RCF>) { 445 s/\r|\n//go; 446 my ($k,$v) = split(/:=/o,$_,2); 447 next unless $k; 448 $Config{$k} = $v; 449 } 450 close $RCF; 451 $TrashObject = tie %Trashlist, 'orderedtie', "$base/trashlist.db"; 452 453 $DoDropList = $Config{ DoDropList }; 454 $runAsUser = $Config{ runAsUser }; 455 $runAsGroup = $Config{ runAsGroup }; 456 $correctednotspam = $Config{ correctednotspam } && "$Config{base}/$Config{correctednotspam}" || 'errors/notspam'; 457 $correctedspam = $Config{ correctedspam } && "$Config{base}/$Config{correctedspam}" || 'errors/spam'; 458 $incomingOkMail = $Config{ incomingOkMail } && "$Config{base}/$Config{incomingOkMail}" || 'okmail'; 459 $DoNotCollectRed = $Config{ DoNotCollectRed }; 460 $DoNotCollectRedRe = $Config{ DoNotCollectRedRe }; 461 $DoNotCollectRedList = $Config{ DoNotCollectRedList }; 462 $KeepwhitelistedSpam = $Config{ KeepwhitelistedSpam }; 463 $logfile = $Config{ logfile } && "$Config{base}/$Config{logfile}" || 'maillog.txt'; 464 $Log = $Config{ logfile } && "$Config{base}/$Config{logfile}" || 'maillog.txt'; 465 $maillogExt = $Config{ maillogExt }; 466 $MaxBytes = $Config{ MaxBytes } || 10000; 467 $MaxFiles = $Config{ MaxFiles } || 14000; 468 469 $MaxWhitelistDays = $Config{ MaxWhitelistDays } || 90; 470 $MaxCorrectedDays = $Config{ MaxCorrectedDays } || 1000; 471 $MaxNoBayesFileAge = $Config{ MaxNoBayesFileAge } || 30; 472 $MaxBayesFileAge = $Config{ MaxBayesFileAge }; 473 $MaintBayesCollection = $Config{ MaintBayesCollection } || 1; 474 $maintbayescollection = $MaintBayesCollection; 475 $MaxWhitelistLength = $Config{ MaxWhitelistLength } || 60; 476 $MaxKeepDeleted = $Config{ MaxKeepDeleted } || 0; 477 $notspamlog = $Config{ notspamlog } && "$Config{base}/$Config{notspamlog}" || 'notspam'; 478 $npRe = $Config{ npRe }; 479 $OrderedTieHashSize = $Config{ OrderedTieHashSize } || 10_000; 480 $pbdbfile = $Config{ pbdb }; 481 482 $proxyserver = $Config{ proxyserver }; 483 $proxyuser = $Config{ proxyuser }; 484 $proxypass = $Config{ proxypass }; 485 $resendmail = $Config{ resendmail }; 486 $redlistdb = $Config{ redlistdb } && "$Config{base}/$Config{redlistdb}" || 'redlist'; 487 $redRe = $Config{ redRe }; 488 $myName = $Config{ myName }; 489 $setFilePermOnStart = $Config{ setFilePermOnStart }; 490 $spamdb = $Config{ spamdb } && "$Config{base}/$Config{spamdb}" || 'spamdb'; 491 $spamdbFname = $Config{ spamdb } || 'spamdb'; 492 $spamdberrorFname = $spamdbFname.'error'; 493 $spamdberror = $spamdb.'error'; 494 $spamlog = $Config{ spamlog } && "$Config{base}/$Config{spamlog}" || 'spam'; 495 $discarded = $Config{ discarded } && "$Config{base}/$Config{discarded}" || 'discarded'; 496 $viruslog = $Config{ viruslog } && "$Config{base}/$Config{viruslog}" || 'viruslog'; 497 $usesubject = $Config{ UseSubjectsAsMaillogNames }; 498 $whitelistdb = $Config{ whitelistdb } && "$Config{base}/$Config{whitelistdb}" || 'whitelist'; 499 $griplist = $Config{ griplist }; 500 $DropList = $Config{ DropList } || 'file:files/droplist.txt'; 501 ($DropList) = $DropList =~ /^ *file: *(.+)/i if $DropList =~ /^ *file:/; 502 $noGriplist = $Config{ noGriplist }; 503 $noGriplistUpload = $Config{ noGriplistUpload }; 504 $noGriplistDownload = $Config{ noGriplistDownload }; 505 $asspLog = $Config{ asspLog }; 506 $whiteRe = $Config{ whiteRe }; 507 $wildcardUser = $Config{ wildcardUser }; 508 $mydb = $Config{ mydb }; 509 $myhost = $Config{ myhost }; 510 $myuser = $Config{ myuser }; 511 $mypassword = $Config{ mypassword }; 512 $rebuildrun = &fixPath($base) . "/rebuildrun.txt"; 513 $EmailAdminReportsTo = $Config{ EmailAdminReportsTo }; 514 $RebuildNotify = $Config{ RebuildNotify }; 515 516 517 $autoCorrectCorpus = $Config{ autoCorrectCorpus } || "1"; 518 519 520 $minimumfiles = 10000 if !$minimumfiles; 521 $minimumfiles = 5000 if $minimumfiles < 5000; 522 $minimumdays = 14 if !$minimumdays; 523 $minimumdays = 7 if $minimumdays < 7; 524 525 $EmailFrom = $Config{ EmailFrom }; 526 $RegExLength = $Config{ RegExLength }; 527 $UseLocalTime = $Config{ UseLocalTime }; 528 $LogDateFormat = $Config{ LogDateFormat } || "MMM-DD-YY hh:mm:ss"; 529 530 $DoFullGripDownload = $Config{ DoFullGripDownload }; 531 532 return; 533} ## end sub loadconfig 534 535sub processfolder { 536 my ( $fldrType, $fldrpath, $filter, $weight, $sub ) = @_; 537 my ( $count, $processFolderTime, $folderStartTime, $fileCount, @files ); 538 our ( $WhiteCount, $RedCount ); 539 $folderStartTime = time; 540 $fldrpath = &fixPath($fldrpath); 541 &rb_printlog( "\n" . $fldrpath . "\n" ); 542 $fldrpath .= "/*"; 543 $fileCount = &countfiles($fldrpath); 544 &rb_printlog( "File Count:\t" . commify($fileCount) ); 545 &rb_printlog("\nProcessing..."); 546 $count = $WhiteCount = 0; 547 @files = glob($fldrpath); 548 my $importmaxfiles = $MaxFiles; 549 550 my $percent; 551 552 553 my $filenum; 554 555 556 557 558 559 #while( glob($fldrpath) && $count <= $MaxFiles ) { 560 foreach my $file (@files) { 561 562 &add( $fldrType, $file, $weight, $sub ); 563 last if ($spamlogcount && $autoCorrectCorpus && !$fldrType && $SpamWordCount && ($HamWordCount * $autoCorrectCorpus) > $SpamWordCount); 564 $count++; 565 last if $count >= $importmaxfiles; #too many files 566 } 567 if ( time - $folderStartTime != 0 ) { $processFolderTime = time - $folderStartTime; } 568 else { $processFolderTime = 1; } 569 $count = $count - $WhiteCount ; 570 571 572 if ($WhiteCount) { 573 &rb_printlog( "\nRemoved White:\t" . commify($WhiteCount) ); 574 } 575 576 &rb_printlog( "\nImported Files:\t" . commify($count) ); 577 &rb_printlog( "\nImported SpamWordCount(total):\t" . commify($SpamWordCount) ); 578 &rb_printlog( "\nImported HamWordCount(total):\t" . commify($HamWordCount) ); 579 580 581 if ( $count > $MaxFiles ) { 582 $maintbayescollection = 1; 583 } 584 585 #&rb_printlog( "\n " . commify($SpamWordCount) . " spam weight \n " . commify($HamWordCount) . " non-spam weight." ); 586 &rb_printlog("\nFinished in $processFolderTime second(s)\n"); 587 588 return $count; 589} ## end sub processfolder 590 591sub countfiles { 592 my ($fldrpath) = @_; 593 my @fileCount = glob("$fldrpath"); 594 return scalar(@fileCount); 595} 596 597sub commify { 598 local $_ = shift; 599 1 while s/^([-+]?\d+)(\d{3})/$1,$2/; 600 return $_; 601} 602 603sub hash { 604 my ($msgText) = @_; 605 606 my ( $head, $body ); 607 608 # creates a md5 hash of $msg body 609 if ( $msgText =~ /^(.*?)\n\r?\n(.*)/s ) { 610 611 $head = $1; 612 $body = $2; 613 614 return md5_hex($body); 615 } 616 else { 617 618 #return q; 619 #There is no split, the message has no valid body 620 return md5_hex($msgText); 621 } 622 623 #return $value; 624 return; 625} 626 627sub dospamhash { 628 my ( $FileName, $msgText ) = @_; 629 $SpamHash{ &hash($msgText) } = '1'; 630 return; 631} 632 633sub dohamhash { 634 my ( $FileName, $msgText ) = @_; 635 $HamHash{ &hash($msgText) } = q{}; 636 return; 637} 638 639sub checkspam { 640 my ( $FileName, $msgText ) = @_; 641 our $HamHash; 642 $msgText = &hash($msgText); 643 my ( $return, $reason ); 644 if ( defined( $HamHash{ $msgText } ) ) { 645 646 # we've found a message in the spam database that is the same as one in the corrected Ham group 647 my $fn = shift; 648 &deletefile( $fn, "found in $correctednotspam" ); 649 return 1; 650 } elsif ( $reason = &redlisted( $_[1] ) ) { 651 my $fn = shift; 652 &deletefile( $fn, $reason ); 653 return 1; 654 655 } elsif ( $reason = &whitelisted( $_[1] ) ) { 656 my $fn = shift; 657 &deletefile( $fn, $reason ); 658 return 1; 659 } 660 return 0; 661} 662 663sub checkham { 664 my ( $FileName, $msgText ) = @_; 665 our $SpamHash; 666 my ( $return, $reason ); 667 $msgText = &hash($msgText); 668 if ( defined( $SpamHash{ $msgText } ) ) { 669 670 # we've found a message in the ham database that is the same as one in the corrected spam group 671 my $fn = shift; 672 &deletefile( $fn, "found in $correctedspam" ); 673 return 1; 674 675 } 676 return 0; 677} 678 679sub getrecontent { 680 my ( $value, $name ) = @_; 681 my $fromfile = 0; 682 if ( $value =~ /^ *file: *(.+)/i ) { 683 684 # the option list is actually saved in a file. 685 $fromfile = 1; 686 my $fil = $1; 687 $fil = "$base/$fil" if $fil !~ /^\Q$base\E/i; 688 local $/; 689 if ( open( my $File, '<', $fil ) ) { 690 $value = <$File>; 691 692 # rb_clean off comments 693 $value =~ s/#.*//g; 694 695 # replace newlines (and the whitespace that surrounds them) with a |(pipe character) 696 $value =~ s/\s*\n\s*/|/g; 697 close $File; 698 } 699 else { $value = q{}; } 700 } 701 $value =~ s/\|\|/\|/g; 702 $value =~ s/\s*\|/\|/g; 703 $value =~ s/\|\s*/\|/g; 704 $value =~ s/\|\|+/\|/g; 705 $value =~ s/^\s*\|?//; 706 $value =~ s/\|?\s*$//; 707 $value =~ s/\|$//; 708 return $value; 709} ## end sub getrecontent 710 711sub batv_remove_tag { 712 my $mailfrom = shift; 713 714 if ($mailfrom =~ /^(prvs=.*=)(.*)/o) { 715 716 $mailfrom = lc $2; 717 718 } 719 return $mailfrom; 720} 721 722sub whitelisted { 723 return 0 if $KeepwhitelistedSpam; 724 my $m = shift; 725 my $curaddr; 726 my %seen; 727 728 729 $m =~ s/\n\r?\n.*//s; # remove body 730 while ( $m =~ /(?:from|sender|reply-to|errors-to|envelope-from|list-\w+):($HeaderValueRe)/igo) { 731 my $s = $1; 732 733 if ($s !~ /($EmailAdrRe\@$EmailDomainRe)/io) { 734 next; 735 } else { 736 $curaddr = batv_remove_tag($1); 737 738 } 739 740 if ( exists $seen{ $curaddr } ) { 741 742 next; #we already checked this address 743 } else { 744 $seen{ $curaddr } = 1; 745 } 746 747 if ( $Whitelist{ $curaddr } ) { 748 my $reason = $curaddr; 749 $reason =~ s/\s+$/ /g; 750 $reason =~ s/[\r\n]/ /g; 751 our $WhiteCount++; 752 return ( " -- '$reason' is in Whitelist"); 753 } 754 if ($wildcardUser) { 755 my ( $mfdd, $alldd, $reason ); 756 $mfdd = $1 if $curaddr =~ /(\@.*)/; 757 $alldd = "$wildcardUser$mfdd"; 758 if ( $Whitelist{ lc $alldd } ) { 759 $reason = $curaddr; 760 $reason =~ s/\s+$/ /g; 761 $reason =~ s/[\r\n]/ /g; 762 our $WhiteCount++; 763 return ( " -- '$reason' is in Whitelist ($wildcardUser)"); 764 } 765 } 766 } ## end while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo) 767 return 0; 768} ## end sub whitelisted 769 770sub redlisted { 771 my $m = shift; 772 my (%seen); 773 my $isasspheader; 774 775 if ( $DoNotCollectRedList) { 776 $m =~ s/\n\r?\n.*//s; # remove body 777 while ( $m =~ /($EmailAdrRe\@$EmailDomainRe)/igo ) { 778 my $curaddr = lc($1); 779 780 #$curaddr = lc( $1 . $2 ); 781 if ( exists $seen{ $curaddr } ) { 782 next; #we already checked this address 783 } 784 else { $seen{ $curaddr } = 1; } 785 if ( $Redlist{ $curaddr } ) { 786 my $reason = $curaddr; 787 $reason =~ s/\s+$/ /g; 788 $reason =~ s/[\r\n]/ /g; 789 our $RedCount++; 790 return ( " -- '$reason' is in Redlist"); 791 } 792 } 793 } 794 return 0; 795} ## end sub redlisted 796 797sub deletefile { 798 my ( $fn, $reason, $nolog, $notrashlist ) = @_; 799 800 if ( -e $fn ) { 801 if ( -w $fn || -W $fn ) { 802 &rb_printlog( "\nremoving " . $fn . q{ } . $reason ); 803 if ($MaxKeepDeleted && !$notrashlist ) { 804 $Trashlist{$fn}=time; 805 } else { 806 807 unlink($fn); 808 809 } 810 } else { 811 rb_printlog( "\ncannot delete " . $reason . " message " . $fn . ": file is not writable: $!" ) ; 812 } 813 } else { 814 rb_printlog( "\ncannot delete " . $reason . " message " . $fn . ": $!" ) if !$nolog; 815 } 816 817 818} 819 820 821sub getfile { 822 my ( $fn, $sub ) = @_; 823 my $message; 824 my $count; 825 my $numreadchars; 826 return if exists $Trashlist{$fn}; 827 open( my $file, '<', "$fn" ) || return; 828 829# my $dtime=(stat($fn))[9]-time; 830 831# return if $dtime > 0; 832 # Maxbytes or 10000, whichever is less 833 $numreadchars = $MaxBytes <= 10_000 ? $MaxBytes : 10_000; 834 $count = read( $file, $message, $numreadchars ); # read characters into memory 835 close $file; 836 return if $sub->( $fn, $message ); # have i read this before? 837 838 return $message; 839} 840 841sub add { 842 my ( $isspam, $fn, $factor, $sub ) = @_; 843 844 return if -d $fn; 845 my ( $curHelo, $CurWord, $PrevWord, $sfac, $tfac ); 846 $PrevWord = $CurWord = q{}; 847 my $content = &getfile( $fn, $sub ); 848 return unless $content; 849 if ( $content =~ /helo=(.*?)\)/i ) { 850 $curHelo = lc($1); 851 if ( $Helo{ $curHelo } ) { $Helo{ $curHelo }->[$isspam] += $factor; } 852 else { #it doesn't seem to exist. create it. 853 $Helo{ $curHelo }->[$isspam] = $factor; 854 } 855 } 856 my $OK; 857 ($content,$OK) = &rb_clean($content); 858 while ( $content =~ /([-\$A-Za-z0-9\'\.!\240-\377]{2,})/g ) { 859 if ($spamlogcount && $autoCorrectCorpus && !$isspam && $SpamWordCount && ($HamWordCount * $autoCorrectCorpus) > $SpamWordCount) { 860 861 return; 862 } 863 if ( length($1) > 20 || length($1) < 2 ) { next } 864 $PrevWord = $CurWord; 865 866 $CurWord = BayesWordrb_clean($1); 867 868 if ( !$PrevWord ) { next } # We only want word pairs 869 if ( length($CurWord) < 2 || length($PrevWord) < 2 ) { next } # too short after rb_cleaning 870 871 # increment global weights, they are not really word counts 872 if ($isspam) { $SpamWordCount += $factor; } 873 else { $HamWordCount += $factor; } 874 if ( exists( $spam{ "$PrevWord $CurWord" } ) ) { 875 ( $sfac, $tfac ) = split( q{ }, $spam{ "$PrevWord $CurWord" } ); 876 } 877 else { 878 879 # the pair does not exist, create it 880 $spam{ "$PrevWord $CurWord" } = "0 0"; 881 ( $sfac, $tfac ) = split( q{ }, $spam{ "$PrevWord $CurWord" } ); 882 } 883 $sfac += $isspam ? $factor : 0; 884 $tfac += $factor; 885 $spam{ "$PrevWord $CurWord" } = "$sfac $tfac"; 886 } ## end while ( $content =~ /([-\$A-Za-z0-9\'\.!\240-\377]{2,})/g) 887 return; 888} ## end sub add 889 890sub Umlaute { 891 my $string = shift; 892 my %umlaute = ("ä" => "ae", "Ä" => "Ae", "ü" => "ue", "Ü" => "Ue", "ö" => "oe", "Ö" => "Oe", "ß" => "ss" ); 893 my $umlautkeys = join ("|", keys(%umlaute)); 894 $string =~ s/($umlautkeys)/$umlaute{$1}/g; 895 return $string; 896} ## 897 898sub BayesWordrb_clean { 899 my $word = lc(shift); 900 $word =~ s/#(?:[a-f0-9]{2})+/randcolor/go; 901 $word =~ s/^#\d+/randdecnum/go; 902 903 $word =~ s/[_\[\]\~\@\%\$\&\{\}<>#(),.'";:=!?*+\/\\\-]+$//o; 904 $word =~s/^[_\[\]\~\@\%\$\&\{\}<>#(),.'";:=!?*+\/\\\-]+//o; 905 $word =~ s/!!!+/!!/go; 906 $word =~ s/\*\*+/**/go; 907 $word =~ s/--+/-/go; 908 $word =~ s/__+/_/go; 909 $word =~ s/[\d,.]{2,}/randnumber/go; 910 $word =~ s/^[\d:\.\-+();:<>,!"'\/%]+(?:[ap]m)?$/randwildnum/o; # ignore numbers , dates, times, versions ... 911# $word = &Umlaute($word); 912 913 return if length($word) > 20 or length($word) < 2; 914 return $word; 915} 916# rb_clean up source email 917sub rb_clean { 918 my $m = shift; 919 920 my $msg = ref($m) ? $$m : $m; 921 my $t = time + 15; # max 15 seconds for this rb_cleaning 922 my $body; 923 my $header; 924 my $undec = 1; 925 926 927 928 local $_= "\n". (($header) ? $header : $msg); 929 my ($helo,$rcpt); 930 if ($header) { 931 ($helo)=/helo=([^)]+)\)/io; 932 $helo=~s/(\w+)/ hlo $1 /go if length($helo) > 19; # if the helo string is long, break it up 933 $rcpt="rcpt ".join(" rcpt ",/($EmailAdrRe\@$EmailDomainRe)/go); 934 return "helo: $helo\n$rcpt\n",0 if (time > $t); 935 # mark the subject 936 $rcpt .= "\n".fixsub($1) if /\nsubject: (.*)/io; 937 return "helo: $helo\n$rcpt\n",0 if (time > $t); 938 } 939 940 # from now only do the body if possible 941 local $_ = $body if $body; 942 943 # replace &#ddd encoding 944 s/\&\#(\d{1,5})\;?/chr($1)/geo; 945 s/\&\#x(\d{1,4})\;?/chr(hex($1))/geo; 946 s/([^\\])?[%=]([0-9A-F]{2})/$1.chr(hex($2))/gieo; # replace url encoding 947 return "helo: $helo\n$rcpt\n",0 if (time > $t); 948 949 if ($undec) { 950 # replace base64 encoding 951 s/\n([a-zA-Z0-9+\/=]{40,}\r?\n[a-zA-Z0-9+\/=\r\n]+)/base64decode($1)/gseo; 952 return "helo: $helo\n$rcpt\n",0 if (time > $t); 953 954 # rb_clean up quoted-printable references 955 s/(Subject: .*)=\r?\n/$1\n/o; 956 return "helo: $helo\n$rcpt\n",0 if (time > $t); 957 s/=\r?\n//go; 958 return "helo: $helo\n$rcpt\n",0 if (time > $t); 959 # strip out mime continuation 960 s/.*---=_NextPart_.*\n//go; 961 return "helo: $helo\n$rcpt\n",0 if (time > $t); 962 } 963 964 # rb_clean up MIME quoted-printable line breakings 965 s/=\r?\n//gos; 966 return "helo: $helo\n$rcpt\n",0 if (time > $t); 967 968 # rb_clean up and & 969 s/ ?/ /gio; s/&?/and/gio; 970 return "helo: $helo\n$rcpt\n",0 if (time > $t); 971 s/(\d),(\d)/$1$2/go; 972 return "helo: $helo\n$rcpt\n",0 if (time > $t); 973 s/\r//go; s/ *\n/\n/go; 974 return "helo: $helo\n$rcpt\n",0 if (time > $t); 975 s/\n\n\n\n\n+/\nblines blines\n/go; 976 return "helo: $helo\n$rcpt\n",0 if (time > $t); 977 978 # rb_clean up html stuff 979 s/<\s*script[^>]+>.*?<\s*\/\s*script\s*>/ jscripttag /igos; 980 s/<\s*script[^>]+>/ jscripttag /igos; 981 return "helo: $helo\n$rcpt\n",0 if (time > $t); 982 # remove style sheets 983 s/<\s*style[^>]*>.*?<\s*\/\s*style\s*>//igso; 984 return "helo: $helo\n$rcpt\n",0 if (time > $t); 985 986# while (s/(\w+)(<[^>]*>)((?:<[^>]*>)*\w+)/$2$1$3/go){return "helo: $helo\n$rcpt\n",0 if (time > $t);} # move html out of words 987 s/<\s*(?:[biu]|strong)\s*>/ boldifytext /gio; 988 return "helo: $helo\n$rcpt\n",0 if (time > $t); 989 990 # remove some tags that are not informative 991 s/<\s*\/?\s*(?:p|br|div|t[drh]|li|[uo]l|center)[^>]*>/\n/gios; 992 s/<\s*\/?\s*(?:[biuo]|strong)\s*>//gio; 993 return "helo: $helo\n$rcpt\n",0 if (time > $t); 994 s/<\s*\/?\s*(?:html|meta|head|body|span|table|font|col|map)[^>]*>//igos; 995 return "helo: $helo\n$rcpt\n",0 if (time > $t); 996 s/(<\s*a\s[^>]*>)(.*?)(<\s*\/a\s*>)/$1.fixlinktext($2)/igseo; 997 return "helo: $helo\n$rcpt\n",0 if (time > $t); 998 999 # treat titles like subjects 1000 s/<\s*title[^>]*>(.*?)<\s*\/\s*title\s*>/fixsub($1)/igeos; 1001 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1002 1003 # remove html comments 1004 s/<\s*!.*?-->//gso; s/<\s*![^>]*>//go; 1005 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1006 1007 # look for random words 1008 s/[ a-z0-9][ghjklmnpqrstvwxz_]{2}[bcdfghjklmnpqrstvwxz_0-9]{3}\S*/ randword /gio; 1009 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1010 1011 # remove mime separators 1012 s/\n--.*?randword.*//go; 1013 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1014 1015 # look for linked images 1016 s/(<\s*a[^>]*>[^<]*<\s*img)/ linkedimage $1/giso; 1017 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1018 s/<[^>]*href\s*=\s*("[^"]*"|\S*)/fixhref($1)/isgeo; 1019 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1020 s/(?:ht|f)tps?:\/\/(\S*)/fixhref($1)/isgeo; 1021 return "helo: $helo\n$rcpt\n",0 if (time > $t); 1022 s/(\S+\@\S+\.\w{2,5})\b/fixhref($1)/geo; 1023 s/<?\s*img .{0,50}src\s*=\s*['"]([^'"]*)['"][^>]+>/$1/gois; 1024 s/["']\s*\/?s*>|target\s*=\s*['"]_blank['"]|<\s*\/|:\/\/ //go; 1025 s/ \d{2,} / 1234 /go; 1026 1027 1028 return ("helo: $helo\n$rcpt\n$_",1); 1029 1030} 1031 1032sub rb_cleanwhite { 1033 &rb_printlog("\n---rb_cleaning whitelist ($whitelistdb)---\n"); 1034 1035 &rb_printlog( "whitelist entries older than " . $MaxWhitelistDays . " days (MaxWhitelistDays) will be removed\n" ); 1036 my $calcTime = time - 24 * 3600 * $MaxWhitelistDays; 1037 1038 my $wlbefore = 0; 1039 my $wlafter = 0; 1040 if ( !( $whitelistdb =~ /mysql/ ) ) { 1041 if ( open( F, "<", "$whitelistdb" ) ) { 1042 binmode(F); 1043 $_ = <F>; # ignore blank line at start of file 1044 my $nwhite; 1045 local $/ = "\n"; 1046 $nwhite = "\n"; 1047 while (<F>) { 1048 chomp; 1049 $wlbefore++; 1050 my ( $adr, $time ) = split( "\002", $_ ); 1051 next if ( !$time || !$adr ); 1052 $adr =~ s/^\'//g; 1053 $adr =~ s/^\"//g; 1054 $adr = batv_remove_tag($adr); 1055 next if ($adr =~ m/^'/); #skip addresses with leading ' chars 1056 next if $calcTime > $time || length($adr) > $MaxWhitelistLength; 1057 $nwhite .= "$adr\002$time\n"; 1058 $wlafter++; 1059 } 1060 close F; 1061 1062 unlink "$whitelistdb.bak"; 1063 rename( $whitelistdb, "$whitelistdb.bak" ); 1064 open( O, ">", "$whitelistdb" ); 1065 binmode(O); 1066 print O $nwhite; 1067 close O; 1068 1069 } 1070 } ## end if ( !( $whitelistdb =~... 1071 else { 1072 my %Whitelist; 1073 my $WhitelistObject; 1074 eval { 1075 $WhitelistObject = tie %Whitelist, 'Tie::RDBM', "dbi:mysql:database=$mydb;host=$myhost", 1076 { user => "$myuser", password => "$mypassword", table => 'whitelist', create => 0 }; 1077 }; 1078 if ($EVAL_ERROR) { 1079 &rb_printlog("whitelist mysql error: $@"); 1080 $CanUseTieRDBM = 0; 1081 $whitelistdb = "whitelist"; 1082 } 1083 $wlbefore = scalar keys %Whitelist; 1084 $wlafter = $wlbefore; 1085 while ( my ( $key, $value ) = each %Whitelist ) { 1086 1087 #my $date1 = localtime($value); #debugging stuff 1088 #my $date2 = localtime($calcTime); 1089 #print "$key=$value\n"; 1090 if ( $value < $calcTime || length($key) > $MaxWhitelistLength ) { 1091 if ( $Whitelist{ $key } ) { 1092 delete $Whitelist{ $key }; 1093 $wlafter--; 1094 } 1095 } 1096 } 1097 $WhitelistObject->flush() if $WhitelistObject && $whitelistdb !~ /mysql/; 1098 1099 #untie %Whitelist; 1100 } ## end else [ if ( !( $whitelistdb =~... 1101 &rb_printlog( "whitelist before: " . commify($wlbefore) . "\n" ); 1102 &rb_printlog( "whitelist after: " . commify($wlafter) . "\n" ); 1103 return; 1104} ## end sub rb_cleanwhite 1105 1106sub dayofweek { 1107 1108 # this is mercilessly hacked from John Von Essen's Date::Day 1109 my ( $d, $m, $y ) = $_[0] =~ /(\S+) +(\S+) +(\S+)/; 1110 1111 # data for DayOfWeek function 1112 my %Months = ( 1113 'Jan', 1, 'Feb', 2, 'Mar', 3, 'Apr', 4, 'May', 5, 'Jun', 6, 1114 'Jul', 7, 'Aug', 8, 'Sep', 9, 'Oct', 10, 'Nov', 11, 'Dec', 12, 1115 ); 1116 my %Month = ( 1, 0, 2, 3, 3, 2, 4, 5, 5, 0, 6, 3, 7, 5, 8, 1, 9, 4, 10, 6, 11, 2, 12, 4, ); 1117 my %Weekday = ( 0, 'srdSUN', 1, 'srdMON', 2, 'srdTUE', 3, 'srdWED', 4, 'srdTHU', 5, 'srdFRI', 6, 'srdSAT', ); 1118 $y += 2000; 1119 $m = $Months{ $m }; 1120 if ( $m <= 2 ) { $y--; } 1121 my $wday = ( ( $d + $Month{ $m } + $y + ( int( $y / 4 ) ) - ( int( $y / 100 ) ) + ( int( $y / 400 ) ) ) % 7 ); 1122 return $Weekday{ $wday }; 1123} 1124sub fixhref { my $t = shift; $t =~ s/(\w+)/ href $1 /g; return $t; } 1125sub fixlinktext { my $t = shift; $t =~ s/(\w+)/atxt $1/g; return $t; } 1126 1127sub fixurl { 1128 my $a = shift; 1129 $a =~ s/%([0-9a-fA-F][0-9a-fA-F])/pack('C',hex($1))/ge; 1130 return $a; 1131} 1132 1133sub fixsub { 1134 my $s = shift; 1135 1136 #print "$s=>"; 1137 $s =~ s/ {3,}/ lotsaspaces /g; 1138 $s =~ s/(\S+)/ssub $1/g; 1139 1140 #print "$s\n"; 1141 return "\n$s ssub"; 1142} 1143 1144sub base64decode { 1145 my $str = shift; 1146 my $res = "\n\n"; 1147 $str =~ tr|A-Za-z0-9+/||cd; 1148 $str =~ tr|A-Za-z0-9+/| -_|; 1149 while ( $str =~ /(.{1,60})/gs ) { 1150 my $len = chr( 32 + length($1) * 3 / 4 ); 1151 $res .= unpack( "u", $len . $1 ); 1152 } 1153 return $res; 1154} 1155 1156sub rb_printlog { 1157 return if $silentlog; 1158 my ( $text, $format ) = @_; 1159 if ( !$format ) { 1160 print "$text" unless $silent; 1161 print $RebuildLog "$text"; 1162 } 1163 if ($format) { 1164 printf "$text", $format unless $silent; 1165 printf $RebuildLog "$text", $format; 1166 } 1167 return; 1168} 1169sub timestring { 1170 my ($time,$what) = @_; 1171 my @m = $time ? localtime($time) : localtime(); 1172 my $tstr = $time ? scalar(localtime($time)) : scalar(localtime()); 1173 my ($day,$month) = $tstr =~ /(...) (...)/; 1174 my $format = $LogDateFormat; 1175 if (lc $what eq 'd') { # date only - remove time part from format 1176 $format =~ s/[^YMD]*(?:hh|mm|ss)[^YMD]*//g; 1177 } elsif (lc $what eq 't') { # time only - remove date part from format 1178 $format =~ s/[^hms]*(?:Y{2,4}|M{2,3}|D{2,3})[^hms]*//g; 1179 } 1180 $format =~ s/^[^YMDhms]//; 1181 $format =~ s/[^YMDhms]$//; 1182 $format =~ s/YYYY/sprintf("%04d",$m[5]+1900)/e; 1183 $format =~ s/YY/sprintf("%02d",$m[5]-100)/e; 1184 $format =~ s/MMM/$month/; 1185 $format =~ s/MM/sprintf("%02d",$m[4]+1)/e; 1186 $format =~ s/DDD/$day/e; 1187 $format =~ s/DD/sprintf("%02d",$m[3])/e; 1188 $format =~ s/hh/sprintf("%02d",$m[2])/e; 1189 $format =~ s/mm/sprintf("%02d",$m[1])/e; 1190 $format =~ s/ss/sprintf("%02d",$m[0])/e; 1191 1192 return $format; 1193} 1194 1195sub downloadGripConf { 1196 1197 1198 my $rc; 1199 1200 my $griplistUrl = "http://downloads.sourceforge.net/project/assp/griplist/griplist.conf"; 1201 my $file = "$base/griplist.conf"; 1202 1203 $file = &fixPath($file); 1204 1205 1206 if ( !$CanUseLWP ) { 1207 &rb_printlog("$file download failed: LWP::Simple Perl module not available\n"); 1208 return; 1209 } 1210 1211 if ( -e $file ) { 1212 if ( !-r $file ) { 1213 &rb_printlog( "$file download failed: $file not readable!\n" ); 1214 return; 1215 } elsif ( !-w $file ) { 1216 &rb_printlog( "$file download failed: $file not writable!\n" ); 1217 return; 1218 } 1219 } 1220 else { 1221 if ( open( TEMPFILE, ">", $file ) ) { 1222 #we can create the file, this is good, now close the file and keep going. 1223 close TEMPFILE; 1224 unlink($file); 1225 } else { 1226 &rb_printlog("$file download failed: Cannot create $file - $!\n" ); 1227 return; 1228 } 1229 } 1230 1231 # Create LWP object 1232 use LWP::Simple qw(mirror is_success status_message $ua); 1233 1234 # Set useragent to Rebuild version 1235 $ua->agent( 1236 "rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)"); 1237 $ua->timeout(20); 1238 if ($proxyserver) { 1239 $ua->proxy( 'http', "http://" . $proxyserver ); 1240 1241 } else { 1242 1243 } 1244 1245 # call LWP mirror command 1246 $rc = mirror( $griplistUrl, $file ); 1247 1248 if ( $rc == 304 ) { 1249 # HTTP 304 not modified status returned 1250 return; 1251 } elsif ( !is_success($rc) ) { 1252 #download failed-error code output to logfile 1253 &rb_printlog("$file download failed: $rc " . status_message($rc). "\n" ); 1254 return; 1255 } elsif ( is_success($rc) ) { 1256 # download complete 1257 my $filesize = -s "$file"; 1258 &rb_printlog("$file download complete: $filesize bytes\n" ); 1259 chmod 0644, "$file"; 1260 } 1261 my $ret = $rc; 1262# &rb_printlog("info: updated GRIPLIST upload and download URL's in $file\n") if $ret; 1263 $ret = 0; 1264 open my $GC , '<', $file or return 0; 1265 binmode $GC; 1266 while (<$GC>) { 1267 s/\r|\n//o; 1268 if (/^\s*(gripList(?:DownUrl|UpUrl|UpHost))\s*:\s*(.+)$/) { 1269 ${$1} = $2; 1270 $ret++; 1271 } 1272 } 1273 close 1274# &rb_printlog("info: loaded GRIPLIST upload and download URL's from $file\n") if $ret; 1275 &rb_printlog("info: GRIPLIST config $file is possibly incomplete\n") if $ret < 3; 1276 $gripListDownUrl =~ s/\*HOST\*/$gripListUpHost/o; 1277 $gripListUpUrl =~ s/\*HOST\*/$gripListUpHost/o; 1278 return $ret; 1279} 1280 1281 1282sub uploadgriplist { 1283 local $/ = "\n"; 1284 1285 #&rb_printlog("Start building Griplist \n"); 1286 open( my $FLogFile, '<', "$Log" ) || &rb_printlog("Unable to create Griplist.\n unable to open logfile '$Log': $!\n"); 1287 my ( $date, $ip, $ipnet, %m, %ok, %locals, $match, $peeraddress, $hostaddress, $connect, $day, $gooddays, $st ); 1288 my $buf; 1289 my $iday; 1290 #build list of the last 2 days 1291 my $time = Time::HiRes::time(); 1292 my $dayoffset = $time % ( 24 * 3600 ); 1293 1294 for ( my $i = 0 ; $i < 2 ; $i++ ) { 1295 $gooddays .= '|' if ( $i > 0 ); 1296 $day = localtime( $time - $i * 24 * 3600 ); 1297 $day =~ s/^... (...) +(\d+) (\S+) ..(..)/$1-$2-$4/; 1298 $gooddays .= $day; 1299 } 1300 if ($LogDateFormat !~ /MMM-DD-YY/) { 1301 $gooddays .= '|'; 1302 for ( my $i = 0 ; $i < 2 ; $i++ ) { 1303 $gooddays .= '|' if ( $i > 0 ); 1304 $day = ×tring( $time - $i * 24 * 3600 , 'd'); 1305 $gooddays .= $day; 1306 }} 1307 1308 1309 undef $day; 1310 %locals = ( '127', 1, '10', 1, '192.168', 1, '169.254', 1, '::1', 1, 'fe80:', 1 ); #RFC 1918, IPv6 1311 for ( 16 .. 31 ) { $locals{ "172.$_" } = 1 } #RFC 1918 1312 1313 while (<$FLogFile>) { 1314 next unless ( $date, $ip, $match ) = /($gooddays) .*\s([0-9a-f\.:]+) .* to: \S+ (.*)/io; 1315 $ipnet = $ip; 1316 if ($ipnet =~ /:.*:/) { 1317 $ipnet =~ s/:.*/:/ if ($ipnet !~ /^:/); 1318 next if $locals{ $ipnet }; # ignore local IP ranges 1319 } 1320 else { 1321 $ipnet =~ s/^(\d+\.\d+)\..*/$1/; 1322 next if $locals{ $ipnet }; # ignore local IP ranges 1323 $ipnet =~ s/^(\d+)\..*/$1/; 1324 next if $locals{ $ipnet }; # ignore local IP ranges 1325 } 1326 $ipnet = $ip; 1327 if ($ipnet =~ /:.*:/) { 1328 $ipnet =~ s/^([0-9a-f]+:[0-9a-f]+:[0-9a-f]+:[0-9a-f]*:).*/$1/i; # yes: "+++*" so as to allow "2001:123:456::" 1329 } 1330 else { 1331 $ipnet =~ s/(\d+)\.(\d+)\.(\d+)\.(\d+)/$1.$2.$3/; 1332 } 1333 1334 if (m/(\[Local]|\[MessageOK]|\[RWL]|\[whitelisted])|\[NoProcessing]/i) { 1335 1336 #Good IP 1337 $m{ $ipnet } += 1; 1338 $ok{ $ipnet } += 1; 1339 next; 1340 } 1341 if (m/(Connection idle for|\[Backscatter]|\[Bayesian]|\[BlackDomain]|\[BlackHELO]|\[BombBlack]|\[BombData]|\[BombHeader]|\[BombRaw]|\[BombScript]|\[BombSender]|\[Collect]|\[Connection]|\[DNSBL]|\[DenyIP]|\[DenyStrict]|\[Extreme]|\[ForgedHELO]|\[ForgedLocalSender]|\[FromMissing]|\[IPfrequency]|\[IPperDomain]|\[InvalidHELO]|\[Malformedaddress]|\[MaxErrors]|\[MessageScore]|\[MissingMX]|\[MsgID]|\[OversizedHeader]|\[PTRinvalid]|\[PTRmissing]|\[PenaltyBox]|\[Penalty]|\[RelayAttempt]|\[SpoofedSender]|\[Trap]|\[URIBL]|\[VIRUS]|\[ValidHELO]|spam found|\[blocked\])/i) { 1342 1343 #Bad IP 1344 $m{ $ipnet } += 1; 1345 $ok{ $ipnet } += 0; 1346 next; 1347 } 1348 } 1349 close $FLogFile; 1350 if ( !%m ) { 1351 &rb_printlog( "Skipping Griplist upload. Not enough messages processed.\n"); 1352 return; 1353 } 1354 &rb_printlog("Preparing binary Griplist upload..."); 1355 my $n6 = 0; 1356 my $n4 = 0; 1357 my ($buf6, $buf4); 1358 foreach (keys %m) { 1359 next if (!$m{$_}); 1360 if ($_ =~ /:/) { 1361 my $ip = $_; 1362 $ip =~ s/([0-9a-f]*):/0000$1:/gi; 1363 $ip =~ s/0*([0-9a-f]{4}):/$1:/gi; 1364 $buf6 .= pack("H4H4H4H4", split(/:/, $ip)); 1365 $buf6 .= pack("C", (1 - $ok{$_} / $m{$_}) * 255); 1366 $n6++; 1367 } else { 1368 $buf4 .= pack("C3C", split(/\./, $_), (1 - $ok{$_} / $m{$_}) * 255); 1369 $n4++; 1370 } 1371 } 1372 $st = pack("N2", $n6 / 2**32, $n6); 1373 $st .= pack("N", $n4); 1374 $st .= $buf6 . $buf4; 1375 &rb_printlog(" done\n"); 1376 if ($proxyserver) { 1377 &rb_printlog("Uploading Griplist via Proxy: $proxyserver\n"); 1378 1379 my $user = $proxyuser ? "$proxyuser:$proxypass\@": ''; 1380 $peeraddress = $user . $proxyserver; 1381 $hostaddress = $proxyserver; 1382 $connect = "POST $gripListUpUrl HTTP/1.0"; 1383 } else { 1384 &rb_printlog("Uploading Griplist via Direct Connection\n"); 1385 $peeraddress = $gripListUpHost . ':80'; 1386 $hostaddress = $gripListUpHost; 1387 my ($url) = $gripListUpUrl =~ /http:\/\/[^\/](\/.+)/oi; 1388 $connect = <<"EOF"; 1389POST $url HTTP/1.1 1390User-Agent: rebuildspamdb/$VERSION ($^O; Perl/$];) 1391Host: $gripListUpHost 1392EOF 1393 } 1394 my $socket = new IO::Socket::INET( Proto => 'tcp', Peeraddr => $peeraddress, Timeout => 2 ); 1395 if ( defined $socket ) { 1396 my $len = length($st); 1397 $connect .= <<"EOF"; 1398Content-Type: application/x-www-form-urlencoded 1399Content-Length: $len 1400 1401$st 1402EOF 1403 print { $socket } $connect; 1404 $socket->sysread($buf, 4096); 1405 $socket->close; 1406 &rb_printlog("Submitted $len bytes: $n6 IPv6 addresses, $n4 IPv4 addresses\n"); 1407 } 1408 else { 1409 &rb_printlog("unable to connect to assp.sourceforge.net to upload griplist\n"); 1410 return; 1411 } 1412 return; 1413} ## end sub uploadgriplist 1414 1415sub downloadgriplist { 1416 &rb_printlog("Griplist download disabled\n") if $noGriplist; 1417 return if $noGriplist; 1418 &rb_printlog("Griplist file not configured\n") if (!$griplist); 1419 return if (!$griplist); 1420 1421 my $rc; 1422 1423 my $gripListUrl = "http://assp.sourceforge.net/cgi-bin/assp_griplist?binary"; 1424 my $gripFile = "$base/$griplist"; 1425 1426 ## let's check if we really need to 1427 if (-e $gripFile) { 1428 my @s = stat($gripFile); 1429 my $mtime = $s[9]; 1430 if (time - $mtime < 8*60*60) { 1431 &rb_printlog("Griplist download failed: last download too recent\n"); 1432 return; 1433 } 1434 } 1435 1436 # check for previous download timestamp, so we can do delta now 1437 my %lastdownload; 1438 $lastdownload{full} = 0; 1439 $lastdownload{fullUTC} = 0; 1440 $lastdownload{delta} = 0; 1441 $lastdownload{deltaUTC} = 0; 1442 my $delta = ""; 1443 if (open(UTC, "$gripFile.utc")) { 1444 local $/; 1445 my $buf = <UTC>; 1446 close(UTC); 1447 chop($buf); 1448 if ($buf =~ /full/ && $buf =~ /delta/) { 1449 %lastdownload = split(/\s+|\n/, $buf); 1450 } else { 1451 $lastdownload{delta} = $buf; 1452 } 1453 if (! ($DoFullGripDownload && time - $lastdownload{fullUTC} > $DoFullGripDownload*24*60*60)) { 1454 my $lasttime; 1455 $lasttime = $lastdownload{full}; 1456 $lasttime = $lastdownload{delta} if ($lastdownload{delta} > $lastdownload{full}); 1457 $gripListUrl .= "&delta=$lasttime"; 1458 $delta = " (delta)"; 1459 } 1460 } 1461 1462 if (!$CanUseLWP) { 1463 &rb_printlog("Griplist download failed: LWP::Simple Perl module not available\n"); 1464 return; 1465 } 1466 1467 if (open(TEMPFILE, ">", "$gripFile.tmp")) { 1468 #we can create the file, this is good, now close the file and keep going. 1469 close TEMPFILE; 1470 unlink("$gripFile.tmp"); 1471 } else { 1472 &rb_printlog("Griplist download failed: Cannot create $gripFile.tmp\n"); 1473 return; 1474 } 1475 1476 # Create LWP ogject 1477 use LWP::Simple qw(mirror is_success status_message $ua); 1478 1479 # Set useragent to Rebuild version 1480 $ua->agent("rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)"); 1481 $ua->timeout(20); 1482 if ($proxyserver) { 1483 $ua->proxy('http', "http://" . $proxyserver); 1484 &rb_printlog("Downloading Griplist$delta via HTTP proxy: $proxyserver\n"); 1485 } else { 1486 &rb_printlog("Downloading Griplist$delta via direct HTTP connection\n"); 1487 } 1488 1489 # call LWP mirror command 1490 my $dltime = time; 1491 $rc = mirror($gripListDownUrl, "$gripFile.tmp"); 1492 1493 if ($rc == 304) { 1494 # HTTP 304 not modified status returned 1495 # can't happen - we ALWAYS get new data 1496 unlink("$gripFile.tmp"); 1497 return; 1498 } elsif (!is_success($rc)) { 1499 # download failed-error code output to logfile 1500 # &rb_printlog("Griplist download failed: $rc " . status_message($rc). "\n"); 1501 unlink("$gripFile.tmp"); 1502 return; 1503 } 1504 1505 # download complete 1506 my $filesize = -s "$gripFile.tmp"; 1507 &rb_printlog("Griplist download complete: binary download $filesize bytes\n"); 1508 1509 # enough data? 1510 if ($filesize < 12) { 1511 &rb_printlog("Griplist download error: grip data too small\n"); 1512 unlink("$gripFile.tmp"); 1513 return; 1514 } 1515 1516 # record download time so we can do delta next time 1517 unlink("$gripFile.utc"); 1518 if (open(UTC, ">$gripFile.utc")) { 1519 my ($sec, $min, $hour, $day, $mon, $year, $wday, $yday, $isdst) = gmtime($dltime); 1520 $year += 1900; 1521 $mon += 1; 1522 if (! $delta) { 1523 $lastdownload{full} = sprintf "%04d%02d%02d%02d%02d%02d", $year, $mon, $day, $hour, $min, $sec; 1524 $lastdownload{fullUTC} = $dltime; 1525 } else { 1526 $lastdownload{delta} = sprintf "%04d%02d%02d%02d%02d%02d", $year, $mon, $day, $hour, $min, $sec; 1527 $lastdownload{deltaUTC} = $dltime; 1528 } 1529 printf UTC "full\t%s\n", $lastdownload{full}; 1530 printf UTC "fullUTC\t%s\n", $lastdownload{fullUTC}; 1531 printf UTC "delta\t%s\n", $lastdownload{delta}; 1532 printf UTC "deltaUTC\t%s\n", $lastdownload{deltaUTC}; 1533 close(UTC); 1534 } 1535 1536 # if we did a delta download, read in previous data so we can merge 1537 my @binFiles; 1538 push(@binFiles, "$gripFile.bin") if ($gripListUrl =~ /delta=/); 1539 push(@binFiles, "$gripFile.tmp"); 1540 1541 # convert binary download form to text form used by ASSP 1542 my $buf; 1543 my %grip; 1544 my $action = "read"; 1545 foreach my $binF (@binFiles) { 1546 my $binSize = -s $binF; 1547 open(BIN, $binF); 1548 binmode(BIN); 1549 read(BIN, $buf, $binSize); 1550 close(BIN); 1551 1552 # IPv6 count 1553 my ($n6h, $n6l) = unpack("N2", $buf); 1554 my $n6 = $n6h * 2**32 + $n6l; 1555 1556 # IPv4 count 1557 my $n4; 1558 eval { $n4 = unpack("x[N2] N", $buf); }; 1559 1560 1561 # decode IPv6 data 1562 my $x6 = 0; 1563 eval { 1564 for (my $i = 0; $i < $n6; $i++) { 1565 my ($bip, $grey) = unpack("x[N2] x[N] x$x6 a8 C", $buf); 1566 my $ip = join(":", unpack("H4H4H4H4", $bip)) . ":"; 1567 $ip =~ s/:0+([0-9a-f])/:$1/gio; 1568 $ip =~ s/:0:$/::/o; 1569 1570 # $grip{$ip} = $grey / 255; 1571 # $gripdelta{$ip} = $grey / 255 if $deltayonly; 1572 $x6 += 9; 1573 } 1574 }; 1575 1576 # decode IPv4 data 1577 my $x4 = 0; 1578 for (my $i = 0; $i < $n4; $i++) { 1579 my ($bip, $grey) = unpack("x[N2] x[N] x$x6 x$x4 a3 C", $buf); 1580 my $ip = join(".", unpack("C3", $bip)); 1581 $grip{$ip} = $grey / 255; 1582 1583 $x4 += 4; 1584 } 1585 &rb_printlog("Griplist binary $action OK: $binF, $n6 IPv6 addresses, $n4 IPv4 addresses\n"); 1586 $action = "merge"; 1587 } 1588 1589 # remove download file 1590 unlink("$gripFile.tmp"); 1591 1592 # output binary version, so we can do a delta next time 1593 &rb_printlog("Writing merged Griplist binary..."); 1594 my $buf; 1595 my $n6 = 0; 1596 my $n4 = 0; 1597 my ($buf6, $buf4); 1598 foreach my $ip (keys %grip) { 1599 if ($ip =~ /:/) { 1600 my $ip2 = $ip; 1601 $ip2 =~ s/([0-9a-f]*):/0000$1:/gi; 1602 $ip2 =~ s/0*([0-9a-f]{4}):/$1:/gi; 1603 $buf6 .= pack("H4H4H4H4", split(/:/, $ip2)); 1604 $buf6 .= pack("C", int($grip{$ip} * 255)); 1605 $n6++; 1606 } else { 1607 $buf4 .= pack("C3C", split(/\./, $ip), int($grip{$ip} * 255)); 1608 $n4++; 1609 } 1610 } 1611 $buf = pack("N2", $n6/2**32, $n6); 1612 $buf .= pack("N", $n4); 1613 $buf .= $buf6 . $buf4; 1614 unlink("$gripFile.bin"); 1615 open (BIN, ">$gripFile.bin"); 1616 binmode(BIN); 1617 print BIN $buf; 1618 close(BIN); 1619 chmod 0644, "$gripFile.bin"; 1620 &rb_printlog(" done\n"); 1621 1622 # output text version 1623 &rb_printlog("Writing merged Griplist text..."); 1624 unlink("$gripFile"); 1625 open (TEXT, ">$gripFile"); 1626 binmode(TEXT); 1627 print TEXT "\n"; 1628 foreach my $ip (sort keys %grip) { 1629 1630 printf TEXT "$ip\002%.2f\n", $grip{$ip}; 1631 } 1632 close(TEXT); 1633 chmod 0644, "$gripFile"; 1634 &rb_printlog(" done\n"); 1635 1636 &rb_printlog("Griplist writing complete: $n6 IPv6 addresses, $n4 IPv4 addresses\n\n"); 1637} 1638 1639 1640sub downloaddroplist { 1641 1642 &rb_printlog("Droplist download disabled\n\n") if !$DoDropList; 1643 return if !$DoDropList; 1644 my $rc; 1645 1646 my $droplistUrl = "http://www.spamhaus.org/drop/drop.lasso"; 1647 my $dropFile = "$base/$DropList"; 1648 1649 $dropFile = &fixPath($dropFile); 1650 1651 # let's check if we really need to 1652 if (-e $dropFile) { 1653 my @s = stat($dropFile); 1654 my $mtime = $s[9]; 1655 my $random = int(rand(144-72)+72)+1; 1656 if (time - $mtime < $random*60*60) { 1657 1658 &rb_printlog("Droplist download skipped: last download too recent\n"); 1659 return; 1660 } 1661 } 1662 1663 if ( !$CanUseLWP ) { 1664 &rb_printlog("Droplist download failed: LWP::Simple Perl module not available\n"); 1665 return; 1666 } 1667 1668 if ( -e $dropFile ) { 1669 if ( !-r $dropFile ) { 1670 &rb_printlog( "Droplist download failed: $dropFile not readable!\n" ); 1671 return; 1672 } elsif ( !-w $dropFile ) { 1673 &rb_printlog( "Droplist download failed: $dropFile not writable!\n" ); 1674 return; 1675 } 1676 } 1677 else { 1678 if ( open( TEMPFILE, ">", $dropFile ) ) { 1679 #we can create the file, this is good, now close the file and keep going. 1680 close TEMPFILE; 1681 unlink($dropFile); 1682 } else { 1683 &rb_printlog("Droplist download failed: Cannot create $dropFile \n" ); 1684 return; 1685 } 1686 } 1687 1688 # Create LWP ogject 1689 use LWP::Simple qw(mirror is_success status_message $ua); 1690 1691 # Set useragent to Rebuild version 1692 $ua->agent( 1693 "rebuildspamdb/$VERSION ($^O; Perl/$]; LWP::Simple/$LWP::VERSION)"); 1694 $ua->timeout(20); 1695 if ($proxyserver) { 1696 $ua->proxy( 'http', "http://" . $proxyserver ); 1697 &rb_printlog("Downloading $dropFile via HTTP proxy: $proxyserver\n" ); 1698 } else { 1699 &rb_printlog("Downloading $dropFile via direct HTTP connection\n" ); 1700 } 1701 1702 # call LWP mirror command 1703 $rc = mirror( $droplistUrl, $dropFile ); 1704 1705 if ( $rc == 304 ) { 1706 # HTTP 304 not modified status returned 1707 return; 1708 } elsif ( !is_success($rc) ) { 1709 #download failed-error code output to logfile 1710 &rb_printlog("$dropFile download failed: $rc " . status_message($rc). "\n" ); 1711 return; 1712 } elsif ( is_success($rc) ) { 1713 # download complete 1714 my $filesize = -s "$dropFile"; 1715 &rb_printlog("$dropFile download complete: $filesize bytes\n" ); 1716 chmod 0644, "$dropFile"; 1717 } 1718} 1719 1720sub compileregex { 1721 use re 'eval'; 1722 my ( $name, $contents, $REname ) = @_; 1723 $contents = getrecontent( $contents, $name ); 1724 $contents ||= '^(?!)'; # regexp that never matches 1725 1726 # trim long matches to 32 chars including '...' at the end 1727 eval { $$REname = qr/(?si)$contents/ }; 1728 if ($EVAL_ERROR) { print "regular expression error in '$contents' for $name: $@\n"; } 1729 return q{}; 1730} 1731 1732sub optionList { 1733 1734 # this converts a | separated list into a RE 1735 my ( $d, $configname ) = @_; 1736 $d = getrecontent( $d, $configname ); 1737 $d =~ s/([\.\[\]\-\(\)\*\+\\])/\\$1/g; 1738 return $d; 1739} 1740 1741sub fixPath { 1742 my ($path) = @_; 1743 my $len = length($path); 1744 if ( !substr( $path, ( $len - 1 ), 1 ) eq q{/} ) { return $path . q{/}; } 1745 else { return $path; } 1746 return; 1747} 1748 1749sub repair { 1750 $/ = "\n"; 1751 1752 # mxa ptr rbl spf uribl white black 1753 my $pbdb = "$base/$pbdbfile"; 1754 my ( @files, %w ); 1755 my ( $k, $v ); 1756 if ( !( $pbdbfile =~ /mysql/ ) ) { 1757 foreach ( glob("$pbdb.*.db") ) { push( @files, $_ ); } 1758 } 1759 if ( !( $whitelistdb =~ /mysql/ ) ) { push( @files, $whitelistdb ); } 1760 if ( !( $redlistdb =~ /mysql/ ) ) { push( @files, $redlistdb ); } 1761 foreach my $f (@files) { 1762 if ( !-e $f ) { next } 1763 open( my $curfile, "<", $f ); 1764 1765 #<$curfile>; 1766 while (<$curfile>) { 1767 ( $k, $v ) = split( /[\001\002\n]/, $_ ); 1768 if ( $k eq q{} || $v eq q{} ) { next } 1769 1770 #print "$k=$v\n"; 1771 $w{ $k } = $v; 1772 } 1773 close $curfile; 1774 open( my $newfile, ">", "$f.new" ); 1775 binmode $newfile; 1776 print { $newfile } "\n"; 1777 for ( sort keys %w ) { print { $newfile } "$_\002$w{$_}\n"; } 1778 close $newfile; 1779 rename( $f, "$f.bak" ); 1780 rename( "$f.new", $f ); 1781 undef %w; 1782 } 1783 return; 1784} ## end sub repair 1785 1786 1787 1788sub sendNotification { 1789 my ($from,$to,$sub,$body,$file) = @_; 1790 my $text; 1791 return unless $to; 1792 return unless $resendmail; 1793 my $date=$UseLocalTime ? localtime() : gmtime(); 1794 my $tz=$UseLocalTime ? tzStr() : '+0000'; 1795 $date=~s/(\w+) +(\w+) +(\d+) +(\S+) +(\d+)/$1, $3 $2 $5 $4/; 1796 $text = "Date: $date $tz\r\n"; 1797 $text .= "X-Assp-Notification: YES\r\n"; 1798 $text .= "From: <$from>\r\nTo:" if $from !~ /\</; 1799 $text .= "From: $from\r\nTo:" if $from =~ /\</; 1800 1801 foreach (split(/\|/, $to)) { 1802 $text .= " <$_>,"; 1803 } 1804 chop $text; 1805 $text .= "\r\n"; 1806 $text .= "Subject: $sub\r\n"; 1807 $text .= "Content-Type: text/plain; charset=\"ISO-8859-1\"\r\n"; 1808 $text .= "Content-Transfer-Encoding: 7bit\r\n"; 1809 my $msgid = int(rand(1000000)); 1810 $text .= "Message-ID: a$msgid\@$myName\r\n"; 1811 $text = &headerWrap($text); 1812 $text .= "\r\n"; # end header 1813 foreach (split(/\r?\n/,$body)) { 1814 $text .= "$_\r\n"; 1815 } 1816 1817 my $f; 1818 if ($file && -e $file && open($f,"<",$file)) { 1819 1820 while (<$f>) { 1821 s/\r?\n//g; 1822 $text .= "$_\r\n"; 1823 1824 } 1825 close $f; 1826 } 1827 1828 $text .= ".\r\n"; 1829 $text =~ tr/\x80-\xFF/_/; # 7bit only 1830 my $rfile = "$base/$resendmail/n$msgid$maillogExt"; 1831 -d "$base/$resendmail" or mkdir "$base/$resendmail", 0777; 1832 if (open($f,">",$rfile)) { 1833 binmode $f; 1834 print $f $text; 1835 close $f; 1836 &rb_printlog( "write notify message to $rfile\n" ); 1837 } else { 1838 1839 &rb_printlog( "error: unable to write notify message to $rfile - $!\n" ); 1840 } 1841 1842} 1843sub tzStr { 1844 1845 # calculate the time difference in minutes 1846 my $minoffset = 1847 ( Time::Local::timelocal( localtime() ) - 1848 Time::Local::timelocal( gmtime() ) ) / 60; 1849 1850 # translate it to "hour-format", so that 90 will be 130, and -90 will be -130 1851 my $sign = $minoffset < 0 ? -1 : +1; 1852 $minoffset = abs($minoffset) + 0.5; 1853 my $tzoffset = 0; 1854 $tzoffset = $sign * ( int( $minoffset / 60 ) * 100 + ( $minoffset % 60 ) ) 1855 if $minoffset; 1856 1857 # apply final formatting, including +/- sign and 4 digits 1858 return sprintf( "%+05d", $tzoffset ); 1859} 1860# wrap long headers 1861sub headerWrap { 1862 my $header = shift; 1863 $header =~ 1864s/(?:([^\r\n]{60,75}?;)|([^\r\n]{60,75}) ) {0,5}(?=[^\r\n]{10,})/$1$2\r\n\t/g; 1865 1866 return $header; 1867} 1868sub rb_cleanUpFiles { 1869 my ($folder, $filter, $filetime) = @_; 1870 1871 my $textfilter = " (*$filter)" if $filter; 1872 my @files; 1873 my $file; 1874 my $count; 1875 my $dir = &fixPath($folder); 1876 $dir =~ s/\\/\//g; 1877 return unless -e $dir; 1878 &rb_printlog( "starting rb_cleanup old files$textfilter for folder $dir\n" ); 1879 1880 opendir(my $DIR,"$dir"); 1881 @files = readdir($DIR); 1882 close $DIR; 1883 my $fldrpath = $dir . "/*"; 1884 1885 my $filecount = &countfiles($fldrpath); 1886 foreach $file (@files) { 1887 next if $file eq '.'; 1888 next if $file eq '..'; 1889 next unless $file =~ /$maillogExt$/i or $file =~ /\.rpt$/i; 1890 next if ($filter && $file !~ /$filter$/i); 1891 next if ($filter && $file =~ /^$filter$/i); 1892 $file = "$dir/$file"; 1893 next if -d $file; 1894 next unless -w $file; 1895 my $dtime=(stat($file))[9]-time; 1896 if (($dtime < $filetime * -1) or ($dtime > 0 && $dtime < $MaxKeepDeleted - $filetime)) { 1897 $count++; 1898 1899 } 1900 } 1901 1902 my $filecountafter = &countfiles($fldrpath); 1903 &rb_printlog( "folder $dir before: $filecount\n" ) ; 1904 &rb_printlog( "folder $dir deleted: $count\n" ) if $count; 1905 &rb_printlog( "folder $dir after: $filecountafter\n\n" ) ; 1906 $correctedspamcount = $filecountafter if $dir =~ 'errors\/spam'; 1907 $correctednotspamcount = $filecountafter if $dir =~ 'errors\/notspam'; 1908 1909} 1910 1911sub rb_cleanUpMaxFiles { 1912 my $folder = shift; 1913 my $percent = shift; 1914 1915 my @files; 1916 my $file; 1917 my $count; 1918 my $info; 1919 my $dir = ($folder !~ /\Q$base\E/i) ? "$base/$folder" : $folder ; 1920 my $maxfiles = $MaxFiles; 1921 my $importmaxfiles = $MaxFiles; 1922 1923 1924 my $adiff = abs ($correctedspamcount - $correctednotspamcount); 1925 $maxfiles = $MaxFiles + $adiff if $dir =~ /assp\/spam/ && $correctednotspamcount > $correctedspamcount; 1926 $maxfiles = $MaxFiles + $adiff if $dir =~ /assp\/notspam/ && $correctedspamcount > $correctednotspamcount; 1927 1928 $dir =~ s/\\/\//g; 1929 return unless -e $dir; 1930 1931 1932 opendir(my $DIR,"$dir"); 1933 @files = readdir($DIR); 1934 close $DIR; 1935 my $filecount = @files - 2; 1936 1937 rb_printlog("rb_cleaning $dir skipped - filecount: $filecount < maxfiles: $maxfiles\n") if $filecount <= $maxfiles; 1938 return $info if $filecount <= $maxfiles; 1939 1940 my %filelist = (); 1941 while (@files ) { 1942 $file = shift @files; 1943 next if $file eq '.'; 1944 next if $file eq '..'; 1945 $file = "$dir/$file"; 1946 if (-d $file) { 1947 $filecount--; 1948 next; 1949 } 1950 my $ft = (stat($file))[9]; 1951 $ft = $ft - (60 * 24 * 3600) if $ft > time; 1952 while (exists $filelist{$ft}) { 1953 $ft++; 1954 } 1955 $filelist{$ft} = $file; 1956 $count++; 1957 1958 } 1959 return $info if $filecount <= $maxfiles; 1960 1961 1962 rb_printlog("\nstarting rb_cleaning $dir - delete files from $dir - old filecount: $filecount: \n"); 1963 my $toFilenumber; 1964 my $filenum; 1965 my $time = time - ($minimumdays * 24 * 3600); # two weeks ago 1966 my $savecount; 1967 1968 $filenum = $maxfiles - $filecount; 1969 $toFilenumber = $maxfiles; 1970 1971 $count = 0; 1972 1973 foreach my $filetime (sort keys %filelist) { 1974 last if $filecount-- < $toFilenumber; 1975 1976 unlink "$filelist{$filetime}"; 1977 $count++; 1978 1979 } 1980 1981 opendir(my $DIR,"$dir"); 1982 @files = readdir($DIR); 1983 close $DIR; 1984 my $newfilecount = @files - 2; 1985 1986 1987 rb_printlog("finished rb_cleaning $dir - new filecount: $newfilecount\n") ; 1988 1989 return $info; 1990} 1991 1992 1993sub rb_cleanUpCollection { 1994 1995 1996 1997 1998 my $age = $MaxNoBayesFileAge * 3600 * 24; 1999 my @dirs = ('incomingOkMail','discarded','viruslog'); 2000 my $dir; 2001 2002 2003 &rb_printlog( "\n--- rb_cleaning NoBayesian folders ---\n" ); 2004 &rb_printlog( "entries older than $MaxNoBayesFileAge days will be removed\n" ) if $MaxNoBayesFileAge; 2005 2006 foreach my $dir (@dirs) { 2007 if ($age) { 2008 &rb_cleanUpFiles(${$dir},'',$age) if ${$dir}; 2009 } 2010 } 2011 2012 $age = $MaxCorrectedDays * 3600 * 24; 2013 @dirs = ('correctedspam','correctednotspam'); 2014 2015 &rb_printlog( "\n--- rb_cleaning corrected (errors) spam/notspam folders ---\n" ); 2016 &rb_printlog( "entries older than $MaxCorrectedDays days will be removed\n" ) if $MaxCorrectedDays; 2017 2018 foreach my $dir (@dirs) { 2019 if ($age) { 2020 &rb_cleanUpFiles(${$dir},'',$age) if ${$dir}; 2021 } else { 2022 &rb_cleanUpMaxFiles(${$dir}) if ${$dir}; 2023 } 2024 } 2025 2026 return unless $maintbayescollection; 2027 $age = 0; 2028 @dirs = ('spamlog','notspamlog'); 2029 2030 &rb_printlog( "\n--- rb_cleaning Bayesian folders ---\n" ); 2031 &rb_printlog( "entries older than $MaxBayesFileAge days will be removed\n" ) if $MaxBayesFileAge; 2032 2033 foreach my $dir (@dirs) { 2034 if ($age) { 2035 &rb_cleanUpFiles(${$dir},'',$age) if ${$dir}; 2036 } else { 2037 &rb_cleanUpMaxFiles(${$dir}) if ${$dir}; 2038 } 2039 } 2040} 2041 2042sub rb_cleanTrashlist { 2043 my $files_before = my $files_deleted = 0; 2044 my $t = time; 2045 my $mcount; 2046 2047 while ( my ( $k, $v ) = each(%Trashlist) ) { 2048 if (!-e $k) { 2049 delete $Trashlist{$k}; 2050 $files_deleted++; 2051 next; 2052 } 2053 my $ct = $v; 2054 $files_before++; 2055 2056 if (!$MaxKeepDeleted or ( $t - $ct >= $MaxKeepDeleted * 3600 * 24) 2057 ) 2058 { 2059 unlink $k; 2060 delete $Trashlist{$k}; 2061 $files_deleted++; 2062 } 2063 } 2064 &rb_printlog( 2065"\nTrashlist: rb_cleaning finished; before=$files_before, deleted=$files_deleted\n" 2066 ); 2067 2068} 2069 2070sub getUidGid { 2071 my ( $uname, $gname ) = @_; 2072 2073 my $rname = "root"; 2074 eval('getgrnam($rname);getpwnam($rname);'); 2075 if ($@) { 2076 2077 # windows pukes "unimplemented" for these -- just skip it 2078 rb_printlog( 2079"warning: uname and/or gname are set ($uname,$gname) but getgrnam / getpwnam give errors: $@" 2080 ); 2081 return; 2082 } 2083 my $gid; 2084 if ($gname) { 2085 $gid = getgrnam($gname); 2086 if ( defined $gid ) { 2087 } else { 2088 my $msg = 2089"could not find gid for group '$gname' -- not switching effective gid "; 2090 rb_printlog( $msg ); 2091 return; 2092 } 2093 } 2094 my $uid; 2095 if ($uname) { 2096 $uid = getpwnam($uname); 2097 if ( defined $uid ) { 2098 } else { 2099 my $msg = 2100"could not find uid for user '$uname' -- not switching effective uid "; 2101 rb_printlog( $msg ); 2102 return; 2103 } 2104 } 2105 ( $uid, $gid ); 2106} 2107 2108sub mlog { 2109} 2110 2111##################################################################################### 2112# orderedtie 2113{ 2114 2115 package orderedtie; 2116 2117 # This is a tied value that caches lookups from a sorted file; \n separates records, 2118 # \002 separates the key from the value. After OrderedTieHashSize lookups the cache is 2119 # cleared. This give us most of the speed of the hash without the huge memory overhead of storing 2120 # the entire hash and should be totally portable. Picking the best value for n requires some 2121 # tuning. A \n is required to start the file. 2122 # if you're updating entries it behoves you to call flush every so often to make sure that your 2123 # changes are saved. This also frees the memory used to remember updated values. 2124 # for my purposes a value of undef and a nonexistant key are the same 2125 # Obviosly if your keys or values contain \n or \002 it will totally goof things up. 2126 sub TIEHASH { 2127 my ( $c, $fn ) = @_; 2128 my $self = { fn => $fn, age => mtime($fn), cnt => 0, cache => {}, updated => {}, ptr => 1, }; 2129 bless $self, $c; 2130 return $self; 2131 } 2132 sub DESTROY { $_[0]->flush(); } 2133 sub mtime { my @s = stat( $_[0] ); $s[9]; } 2134 2135 sub flush { 2136 my $this = shift; 2137 return unless %{ $this->{ updated } }; 2138 my $f = $this->{ fn }; 2139 open( O, '>', "$f.tmp" ) || return; 2140 binmode(O); 2141 open( I, '<', "$f" ) || print O"\n"; 2142 binmode(I); 2143 local $/ = "\n"; 2144 my @l = ( sort keys %{ $this->{ updated } } ); 2145 my ( $k, $d, $r, $v ); 2146 2147 while ( $r = <I> ) { 2148 ( $k, $d ) = split( "\002", $r ); 2149 while ( @l && $l[0] lt $k ) { 2150 $v = $this->{ updated }{ $l[0] }; 2151 print O"$l[0]\002$v\n" if $v; 2152 shift(@l); 2153 } 2154 if ( $l[0] eq $k ) { 2155 $v = $this->{ updated }{ $l[0] }; 2156 print O"$l[0]\002$v\n" if $v; 2157 shift(@l); 2158 } 2159 else { print O$r; } 2160 } 2161 while (@l) { 2162 $v = $this->{ updated }{ $l[0] }; 2163 print O"$l[0]\002$v\n" if $v; 2164 shift(@l); 2165 } 2166 close I; 2167 close O; 2168 unlink($f); 2169 rename( "$f.tmp", $f ); 2170 $this->{ updated } = {}; 2171 } ## end sub flush 2172 2173 sub STORE { 2174 my ( $this, $key, $value ) = @_; 2175 $this->{ cache }{ $key } = $this->{ updated }{ $key } = $value; 2176 } 2177 2178 sub FETCH { 2179 my ( $this, $key ) = @_; 2180 return $this->{ cache }{ $key } if exists $this->{ cache }{ $key }; 2181 $this->resetCache() 2182 if ( $this->{ cnt }++ > 10000 2183 || ( $this->{ cnt } & 0x1f ) == 0 && mtime( $this->{ fn } ) != $this->{ age } ); 2184 return $this->{ cache }{ $key } = binsearch( $this->{ fn }, $key ); 2185 } 2186 2187 sub resetCache { 2188 my $this = shift; 2189 $this->{ cnt } = 0; 2190 $this->{ age } = mtime( $this->{ fn } ); 2191 $this->{ cache } = { %{ $this->{ updated } } }; 2192 } 2193 2194 sub binsearch { 2195 my ( $f, $k ) = @_; 2196 open( F, '<', "$f" ) || return; 2197 binmode(F); 2198 my $siz = my $h = -s $f; 2199 $siz -= 1024; 2200 my $l = 0; 2201 my $k0 = $k; 2202 $k =~ s/([\[\]\(\)\*\^\!\|\+\.\\\/\?\`\$\@\{\}])/\\$1/g; # make sure there's no re chars unqutoed in the key 2203 2204 #print "k=$k ($_[1])\n"; 2205 while (1) { 2206 my $m = ( ( $l + $h ) >> 1 ) - 1024; 2207 $m = 0 if $m < 0; 2208 2209 #print "($l $m $h) "; 2210 seek( F, $m, 0 ); 2211 my $d; 2212 my $read = read( F, $d, 2048 ); 2213 if ( $d =~ /\n$k\002([^\n]*)\n/ ) { 2214 close F; 2215 2216 #print "got $1\n"; 2217 return $1; 2218 } 2219 my ( $pre, $first, $last, $post ) = $d =~ /^(.*?)\n(.*?)\002.*\n(.*?)\002.*?\n(.*?)$/s; 2220 2221 #print "f=$first "; 2222 last unless defined $first; 2223 if ( $k0 gt $first && $k0 lt $last ) { 2224 2225 #print "got miss\n"; 2226 last; 2227 } 2228 if ( $k0 lt $first ) { 2229 last if $m == 0; 2230 $h = $m - 1024 + length($pre); 2231 $h = 0 if $h < 0; 2232 } 2233 if ( $k0 gt $last ) { 2234 last if $m >= $siz; 2235 $l = $m + $read - length($post); 2236 } 2237 2238 #print "l=$l h=$h "; 2239 } ## end while (1) 2240 close F; 2241 return; 2242 } ## end sub binsearch 2243 2244 sub FIRSTKEY { 2245 my $this = shift; 2246 $this->flush(); 2247 $this->{ ptr } = 1; 2248 $this->NEXTKEY(); 2249 } 2250 2251 sub NEXTKEY { 2252 my ( $this, $lastkey ) = @_; 2253 local $/ = "\n"; 2254 open( F, '<', "$this->{fn}" ) || return; 2255 binmode(F); 2256 seek( F, $this->{ ptr }, 0 ); 2257 my $r = <F>; 2258 return unless $r; 2259 $this->{ ptr } = tell F; 2260 close F; 2261 my ( $k, $v ) = $r =~ /(.*?)\002(.*?)\n/s; 2262 2263 if ( !exists( $this->{ cache }{ $k } ) && $this->{ cnt }++ > 10000 ) { 2264 $this->{ cnt } = 0; 2265 $this->{ cache } = { %{ $this->{ updated } } }; 2266 } 2267 $this->{ cache }{ $k } = $v; 2268 $k; 2269 } 2270 2271 sub EXISTS { 2272 my ( $this, $key ) = @_; 2273 return FETCH( $this, $key ); 2274 } 2275 2276 sub DELETE { 2277 my ( $this, $key ) = @_; 2278 $this->{ cache }{ $key } = $this->{ updated }{ $key } = undef; 2279 } 2280 2281 sub CLEAR { 2282 my ($this) = @_; 2283 open( F, '>', "$this->{fn}" ); 2284 binmode(F); 2285 print "\n"; 2286 close F; 2287 $this->{ cache } = {}; 2288 $this->{ updated } = {}; 2289 $this->{ cnt } = 0; 2290 } 2291} 2292 2293 2294