1 2package URBL::Prepare; 3 4use strict; 5#use diagnostics; 6use AutoLoader 'AUTOLOAD'; 7use vars qw($VERSION); 8 9$VERSION = do { my @r = (q$Revision: 0.09 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 10 11sub loadcache; 12sub Destroy {}; 13 14=head1 NAME 15 16URPL::Prepare -- prepare hostname for URBL domain lookup 17 18=head1 SYNOPSIS 19 20 require URBL::Prepare; 21 22 my $ubp = new URBL::Prepare; 23 24 $tlds = $blessed->cachetlds($localfilelistptr); 25 $whitelist = $blessed->cachewhite($localfilelistptr); 26 $domain = $blessed->urbldomain($hostname); 27 $response_code = $proto->loadcache($url,$localfile); 28 ($response,$message) = $proto->loadcache($url,$localfile); 29 $rv = $blessed->urblblack($hostname); 30 $rv = $blessed->urblwhite($hostname); 31 32=head1 DESCRIPTION 33 34=over 2 35 36=item * my $urbl = new URBL::Prepare; 37 38This method returns a blessed reference to an empty hash. 39 40For use with other modules: 41 42 require URBL::Prepare; 43 44 @ISA = qw(URBL::Prepare); 45 46=back 47 48=cut 49 50sub new { 51 my $proto = shift; 52 my $class = ref $proto || $proto || __PACKAGE__; 53 bless {}, $class; 54} 55 56=head1 URBL Preparation for lookup methods 57 58The following three methods are for facilitating URBL lookups. 59 60 SEE: http://www.uribl.com/about.shtml 61 and http://www.surbl.org/guidelines 62 63=over 2 64 65=item * $tldlist = $blessed->cachetlds($localfilelistptr); 66 67This method opens local files in "file list" and extracts the tld's found 68therein. 69 70 input: ptr to array of local/file/path/names 71 return: array ptr to list of tld's 72 73NOTE: place level 3 tld's ahead of level 2 tld's 74 75=cut 76 77# do level3 tld's first 78sub cachetlds { 79 my($bls,$files) = @_; 80 my @tldlist; 81 foreach my $infile (@$files) { 82 my $tldf; 83 next unless open $tldf, $infile; 84 foreach (<$tldf>) { 85 chomp; 86 next unless $_ =~ /\S/; 87 $_ =~ s/\./\\./g; 88 push @tldlist, lc $_; 89 } 90 } 91 $bls->{-urblpreparebad} = \@tldlist; 92} 93 94=item * $whitelist = $blessed->cachewhite($localfilelistptr); 95 96This method opens local file(s) in "file list" and extracts the domains 97found therein. 98 99 See http://wiki.apache.org/spamassassin/DnsBlocklists and 100 http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/ 101 102Note:: these URL's may change 103 104 input: ptr to array of local/file/path/names 105 return: array ptr to whitelist domain names 106 107=cut 108 109sub cachewhite { 110 my($bls,$files) = @_; 111 my @whitelist; 112 foreach my $infile (@$files) { 113 my $wfile; 114 next unless open $wfile, $infile; 115 foreach(<$wfile>) { 116 next unless $_ =~ /uridnsbl_skip_domain\s+(.+)/; 117 (my $white = $1) =~ s/\./\\./g; 118 chomp $white; 119 my @wtmp = split /\s+/, lc $white; 120 push @whitelist, @wtmp; 121 } 122 } 123 $bls->{-urblpreparewhite} = \@whitelist; 124} 125 126=item * $blacklist = $blessed->cacheblack($localfilelistptr); 127 128This method opens local file(s) in "file list" and extracts the domains found 129therein. The domains may be space seperated many to a line. 130 131 input: ptr to array of local/file/path/names 132 return: ptr to blacklist domain names 133 134=cut 135 136sub cacheblack { 137 my($bls,$files) = @_; 138 my @blacklist; 139 foreach my $infile (@$files) { 140 my $bkfile; 141 next unless open $bkfile, $infile; 142 foreach(<$bkfile>) { 143 chomp; 144 (my $black = $_) =~ s/\./\\./g; 145 my @btmp = split /\s+/, lc $black; 146 push @blacklist, @btmp; 147 } 148 } 149 $bls->{-urblprepareblack} = \@blacklist; 150} 151 152=item * $domain = $blessed->urbldomain($hostname) 153 154This method extracts a domain name to check against an SURBL. If the 155hostname is whitelisted, the return value is false, otherwise a domain name 156is returned. 157 158 input: hostname 159 return: false if whitelisted 160 else domain to check against SURBL 161 162NOTE: optionally white or tld testing will be bypassed if the pointer 163is undefined or points to an empty array. 164 165=cut 166 167# Implementation Guidelines 168# 169# http://www.surbl.org/guidelines 170# 171# Extract base (registered) domains from those URIs. This includes removing 172# all leading host names, subdomains, www., randomized subdomains, etc. In 173# order to determine the level of domain to check, use our tables of two level 174# and three level TLDs. Originally these were CCTLDs, but they now also 175# include some frequently abused hosting domains. (Note that these files only 176# rarely update. Please don't retrieve them more often than once per day.) The 177# usage is: 178# 179# For any domain on the three level list, check it at the fourth level. 180# For any domain on the two level list, check it at the third level. 181# For any other domain, check it at the second level. 182# 183# For example, any domain found in the two level list should have a third 184# level domain name (such as foo.co.uk) checked against SURBLs. Domains not 185# specifically on the two level list (such as foo.com or foo.fr) should be 186# checked at two levels. Please do not check at levels other than these as 187# doing so will cause unnecessary queries that won't result in matches. 188 189sub urbldomain { 190 my $bls = shift; 191 my $host = lc shift; 192 my $white = $bls->{-urblpreparewhite} || []; 193 my $tlds = $bls->{-urblpreparebad} || []; 194 195 foreach(@$white) { 196 return undef if $host =~ /$_$/; # whitelisted? 197 } 198 foreach (@$tlds) { 199 if ($host =~ /([^\.]+\.$_)$/) { 200 ($host = $1) =~ s/\\//g; 201 return $host; 202 } 203 } 204 $host =~ /([^\.]+\.[^\.]+)$/; 205 return $1; 206} 207 208=item * $rv = $blessed->urblblack($hostname) 209 210This method check if a hostname is found within the local black list(s). 211 212 input: hostname 213 return: domain found, else false 214 215=cut 216 217sub urblblack { 218 my $bls = shift; 219 my $host = lc shift; 220 my $tlds = $bls->{-urblprepareblack} || []; 221 foreach (@$tlds) { 222 if ($host =~ /$_$/) { 223 ($host = $_) =~ s/\\//g; 224 return $host; 225 } 226 } 227 return undef; 228} 229 230=item * $rv = $blessed->urbwhite($hostname) 231 232This method check if a hostname is found within the local white list. 233 234 input: hostname 235 return: domain found, else false 236 237=cut 238 239sub urblwhite { 240 my $bls = shift; 241 my $host = lc shift; 242 my $white = $bls->{-urblpreparewhite} || []; 243 foreach (@$white) { 244 if ($host =~ /$_$/) { 245 ($host = $_) =~ s/\\//g; 246 return $host; 247 } 248 } 249 return undef; 250} 251 2521; 253__END__ 254 255=item * $response_code = $proto->loadcache($url,$localfile); 256 257=item * ($response,$message) = $proto->loadcache($url,$localfile); 258 259This method uses LWP::UserAgent::mirror to conditionally retrieve files 260to fill local cache with WHITELIST and TLD names. The response code is the 261result returned by the HTTP fetch and should be one of 200 or 304. At the 262time this module was released the files were as follows: 263 264 WHITE LIST URL 265 http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/25_uribl.cf 266 267and 268 269 TLDLIST URL (include some known abusive tlds) 270 http://george.surbl.org/three-level-tlds 271 http://george.surbl.org/two-level-tlds 272 273 input: path/name/for/localfile 274 return: http response code, 275 response message 276 277In scalar context only the http response code is returned. In array context 278the numeric response code and a related text message are returned. 279 280 200 OK file cached 281 304 Not Modified file is up-to-date 282 283Any other response code indicates and error. 284 285 Usage: 286 $rv = URBL::Prepare->loadcache($url,$localfile); 287 288=back 289 290=cut 291 292sub loadcache { 293 my($bls,$url,$file) = @_; 294 require LWP::UserAgent; 295 my $ua = new LWP::UserAgent( 296 timeout => 30 297 ); 298 my $r = $ua->mirror($url,$file); 299 return $r->code unless wantarray; 300 return ($r->code,$r->message); 301} 302 303=head1 APPLICATION EXAMPLES 304 305This example shows how to include URBL::Prepare in another module 306 307 #!/usr/bin/perl 308 package = Some::Package 309 310 use vars qw(@ISA); 311 require URBL::Prepare; 312 313 @ISA = qw( URBL::Prepare ); 314 315 sub new { 316 my $proto = shift; 317 my $class = ref $proto || $proto || __PACKAGE__; 318 my $methodptr = { 319 .... 320 }; 321 bless $methodptr, $class; 322 } 323 ... package code ... 324 1; 325 326 ...end 327...................... 328 329 #!/usr/bin/perl 330 # my application 331 # 332 use Net::DNS::Dig; 333 use Some::Package; 334 335 my $dig = new Net::DNS::Dig; 336 my $sp = new Some::Package; 337 # 338 # initialiaze URBL::Prepare 339 # 340 $sp->cachewhite($localwhitefiles); 341 $sp->cachetlds($localtldfiles); 342 343 # set multi.surbl.org bit mask 344 # 2 = comes from SC 345 # 4 = comes from WS 346 # 8 = comes from PH 347 # 16 = comes from OB (OB is deprecated as of 22 October 2012.) 348 # 16 = comes from MW (MW active as of 1 May 2013.) 349 # 32 = comes from AB 350 # 64 = comes from JP 351 352 # test as: surbl-org-permanent-test-point.com.multi.surbl.org 353 354 my $mask = 0xDE; 355 356 ... application ... 357 ... generates ... 358 ... hostname ... 359 360 my $domain = $sp->urbldomain($hostname) 361 362 my $response = $dig->for($hostname . 'multi.surbl.org') 363 if $domain; # if not whitelisted 364 365 # if an answer is returned 366 if ($domain && $response->{HEADER}->{ANCOUNT}) { 367 # get packed ipV4 answer 368 my $answer = $response->{ANSWER}->[0]->{RDATA}->[0]; 369 if ($mask & unpack("N",$answer)) { 370 # answer is found in selected surbl list 371 } else { 372 # answer not found in selected surbl list 373 } 374 } 375 # domain not found in surbl 376 377 ...end 378 379This is an example of a script file to keep the whitelist and tldlist 380current. Run as a cron job daily. 381 382 #!/usr/bin/perl 383 # 384 # cache refresh cron job 385 # 386 require URBL::Prepare; 387 388 my $whiteurl = 389 'http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/25_uribl.cf'; 390 391 my $tld2url = 'http://george.surbl.org/two-level-tlds'; 392 my $tld3url = 'http://george.surbl.org/three-level-tlds'; 393 394 my $cachedir = './cache'; 395 my $lvl2file = $cachedir .'/level2'; 396 my $lvl3file = $cachedir .'/level3'; 397 my $whtfile = $cachedir .'/white'; 398 399 mkdir $cachedir unless -d $cachedir; 400 401 URBL::Prepare->loadcache($whiteurl,$whtfile); 402 URBL::Prepare->loadcache($tld2url,$lvl2file); 403 URBL::Prepare->loadcache($tld3url,$lvl3file); 404 405=cut 406 407=head1 AUTHOR 408 409Michael Robinton E<lt>michael@bizsystems.comE<gt> 410 411=head1 COPYRIGHT 412 413 Copyright 2013-2014, Michael Robinton <michael@bizsystems.com> 414 415This program is free software; you may redistribute it and/or modify it 416under the same terms as Perl itself. 417 418This program is distributed in the hope that it will be useful, 419but WITHOUT ANY WARRANTY; without even the implied warranty of 420MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 421 422=head1 See also: 423 424L<LWP::Request>, L<Net::DNS::Dig> 425 426=cut 427 4281; 429