1
2package URBL::Prepare;
3
4use strict;
5#use diagnostics;
6use AutoLoader 'AUTOLOAD';
7use vars qw($VERSION);
8
9$VERSION = do { my @r = (q$Revision: 0.09 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
10
11sub loadcache;
12sub Destroy {};
13
14=head1 NAME
15
16URPL::Prepare -- prepare hostname for URBL domain lookup
17
18=head1 SYNOPSIS
19
20  require URBL::Prepare;
21
22  my $ubp = new URBL::Prepare;
23
24  $tlds = $blessed->cachetlds($localfilelistptr);
25  $whitelist = $blessed->cachewhite($localfilelistptr);
26  $domain = $blessed->urbldomain($hostname);
27  $response_code = $proto->loadcache($url,$localfile);
28  ($response,$message) = $proto->loadcache($url,$localfile);
29  $rv = $blessed->urblblack($hostname);
30  $rv = $blessed->urblwhite($hostname);
31
32=head1 DESCRIPTION
33
34=over 2
35
36=item * my $urbl = new URBL::Prepare;
37
38This method returns a blessed reference to an empty hash.
39
40For use with other modules:
41
42  require URBL::Prepare;
43
44  @ISA = qw(URBL::Prepare);
45
46=back
47
48=cut
49
50sub new {
51  my $proto = shift;
52  my $class = ref $proto || $proto || __PACKAGE__;
53  bless {}, $class;
54}
55
56=head1 URBL Preparation for lookup methods
57
58The following three methods are for facilitating URBL lookups.
59
60  SEE:	http://www.uribl.com/about.shtml
61  and	http://www.surbl.org/guidelines
62
63=over 2
64
65=item * $tldlist = $blessed->cachetlds($localfilelistptr);
66
67This method opens local files in "file list" and extracts the tld's found
68therein.
69
70  input: 	ptr to array of local/file/path/names
71  return:	array ptr to list of tld's
72
73NOTE: place level 3 tld's ahead of level 2 tld's
74
75=cut
76
77# do level3 tld's first
78sub cachetlds {
79  my($bls,$files) = @_;
80  my @tldlist;
81  foreach my $infile (@$files) {
82    my $tldf;
83    next unless open $tldf, $infile;
84    foreach (<$tldf>) {
85      chomp;
86      next unless $_ =~ /\S/;
87      $_ =~ s/\./\\./g;
88      push @tldlist, lc $_;
89    }
90  }
91  $bls->{-urblpreparebad} = \@tldlist;
92}
93
94=item * $whitelist = $blessed->cachewhite($localfilelistptr);
95
96This method opens local file(s) in "file list" and extracts the domains
97found therein.
98
99  See http://wiki.apache.org/spamassassin/DnsBlocklists		and
100  http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/
101
102Note:: these URL's may change
103
104  input:	ptr to array of local/file/path/names
105  return:	array ptr to whitelist domain names
106
107=cut
108
109sub cachewhite {
110  my($bls,$files) = @_;
111  my @whitelist;
112  foreach my $infile (@$files) {
113    my $wfile;
114    next unless open $wfile, $infile;
115    foreach(<$wfile>) {
116      next unless $_ =~ /uridnsbl_skip_domain\s+(.+)/;
117      (my $white = $1) =~ s/\./\\./g;
118      chomp $white;
119      my @wtmp = split /\s+/, lc $white;
120      push @whitelist, @wtmp;
121    }
122  }
123  $bls->{-urblpreparewhite} = \@whitelist;
124}
125
126=item * $blacklist = $blessed->cacheblack($localfilelistptr);
127
128This method opens local file(s) in "file list" and extracts the domains found
129therein. The domains may be space seperated many to a line.
130
131  input:	ptr to array of local/file/path/names
132  return:	ptr to blacklist domain names
133
134=cut
135
136sub cacheblack {
137  my($bls,$files) = @_;
138  my @blacklist;
139  foreach my $infile (@$files) {
140    my $bkfile;
141    next unless open $bkfile, $infile;
142    foreach(<$bkfile>) {
143      chomp;
144      (my $black = $_) =~ s/\./\\./g;
145      my @btmp = split /\s+/, lc $black;
146      push @blacklist, @btmp;
147    }
148  }
149  $bls->{-urblprepareblack} = \@blacklist;
150}
151
152=item * $domain = $blessed->urbldomain($hostname)
153
154This method extracts a domain name to check against an SURBL. If the
155hostname is whitelisted, the return value is false, otherwise a domain name
156is returned.
157
158  input:	hostname
159  return:	false if whitelisted
160	else	domain to check against SURBL
161
162NOTE: optionally white or tld testing will be bypassed if the pointer
163is undefined or points to an empty array.
164
165=cut
166
167# Implementation Guidelines
168#
169# http://www.surbl.org/guidelines
170#
171# Extract base (registered) domains from those URIs. This includes removing
172# all leading host names, subdomains, www., randomized subdomains, etc. In
173# order to determine the level of domain to check, use our tables of two level
174# and three level TLDs. Originally these were CCTLDs, but they now also
175# include some frequently abused hosting domains. (Note that these files only
176# rarely update. Please don't retrieve them more often than once per day.) The
177# usage is:
178#
179#    For any domain on the three level list, check it at the fourth level.
180#    For any domain on the two level list, check it at the third level.
181#    For any other domain, check it at the second level.
182#
183# For example, any domain found in the two level list should have a third
184# level domain name (such as foo.co.uk) checked against SURBLs. Domains not
185# specifically on the two level list (such as foo.com or foo.fr) should be
186# checked at two levels. Please do not check at levels other than these as
187# doing so will cause unnecessary queries that won't result in matches.
188
189sub urbldomain {
190  my $bls   = shift;
191  my $host  = lc shift;
192  my $white = $bls->{-urblpreparewhite} || [];
193  my $tlds  = $bls->{-urblpreparebad} || [];
194
195  foreach(@$white) {
196    return undef if $host =~ /$_$/;	# whitelisted?
197  }
198  foreach (@$tlds) {
199    if ($host =~ /([^\.]+\.$_)$/) {
200      ($host = $1) =~ s/\\//g;
201      return $host;
202    }
203  }
204  $host =~ /([^\.]+\.[^\.]+)$/;
205  return $1;
206}
207
208=item * $rv = $blessed->urblblack($hostname)
209
210This method check if a hostname is found within the local black list(s).
211
212  input:	hostname
213  return:	domain found, else false
214
215=cut
216
217sub urblblack {
218  my $bls   = shift;
219  my $host  = lc shift;
220  my $tlds  = $bls->{-urblprepareblack} || [];
221  foreach (@$tlds) {
222    if ($host =~ /$_$/) {
223      ($host = $_) =~ s/\\//g;
224      return $host;
225    }
226  }
227  return undef;
228}
229
230=item * $rv = $blessed->urbwhite($hostname)
231
232This method check if a hostname is found within the local white list.
233
234  input:	hostname
235  return:	domain found, else false
236
237=cut
238
239sub urblwhite {
240  my $bls   = shift;
241  my $host  = lc shift;
242  my $white  = $bls->{-urblpreparewhite} || [];
243  foreach (@$white) {
244    if ($host =~ /$_$/) {
245      ($host = $_) =~ s/\\//g;
246      return $host;
247    }
248  }
249  return undef;
250}
251
2521;
253__END__
254
255=item * $response_code = $proto->loadcache($url,$localfile);
256
257=item * ($response,$message) = $proto->loadcache($url,$localfile);
258
259This method uses LWP::UserAgent::mirror to conditionally retrieve files
260to fill local cache with WHITELIST and TLD names. The response code is the
261result returned by the HTTP fetch and should be one of 200 or 304. At the
262time this module was released the files were as follows:
263
264  WHITE LIST URL
265  http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/25_uribl.cf
266
267and
268
269  TLDLIST URL (include some known abusive tlds)
270  http://george.surbl.org/three-level-tlds
271  http://george.surbl.org/two-level-tlds
272
273  input:	path/name/for/localfile
274  return:	http response code,
275		response message
276
277In scalar context only the http response code is returned. In array context
278the numeric response code and a related text message are returned.
279
280  200	OK		file cached
281  304	Not Modified	file is up-to-date
282
283Any other response code indicates and error.
284
285  Usage:
286  $rv = URBL::Prepare->loadcache($url,$localfile);
287
288=back
289
290=cut
291
292sub loadcache {
293  my($bls,$url,$file) = @_;
294  require LWP::UserAgent;
295  my $ua = new LWP::UserAgent(
296        timeout => 30
297  );
298  my $r = $ua->mirror($url,$file);
299  return $r->code unless wantarray;
300  return ($r->code,$r->message);
301}
302
303=head1 APPLICATION EXAMPLES
304
305This example shows how to include URBL::Prepare in another module
306
307  #!/usr/bin/perl
308  package = Some::Package
309
310  use vars qw(@ISA);
311  require URBL::Prepare;
312
313  @ISA = qw( URBL::Prepare );
314
315  sub new {
316    my $proto = shift;
317    my $class = ref $proto || $proto || __PACKAGE__;
318    my $methodptr = {
319	....
320    };
321    bless $methodptr, $class;
322  }
323  ... package code ...
324  1;
325
326  ...end
327......................
328
329  #!/usr/bin/perl
330  # my application
331  #
332  use Net::DNS::Dig;
333  use Some::Package;
334
335  my $dig = new Net::DNS::Dig;
336  my $sp = new Some::Package;
337  #
338  # initialiaze URBL::Prepare
339  #
340  $sp->cachewhite($localwhitefiles);
341  $sp->cachetlds($localtldfiles);
342
343  # set multi.surbl.org bit mask
344  #	2 = comes from SC
345  #	4 = comes from WS
346  #	8 = comes from PH
347  #	16 = comes from OB (OB is deprecated as of 22 October 2012.)
348  #	16 = comes from MW (MW active as of 1 May 2013.)
349  #	32 = comes from AB
350  #	64 = comes from JP
351
352  # test as: surbl-org-permanent-test-point.com.multi.surbl.org
353
354  my $mask = 0xDE;
355
356    ... application ...
357    ... generates   ...
358    ... hostname    ...
359
360  my $domain = $sp->urbldomain($hostname)
361
362  my $response = $dig->for($hostname . 'multi.surbl.org')
363	if $domain;	# if not whitelisted
364
365  # if an answer is returned
366  if ($domain && $response->{HEADER}->{ANCOUNT}) {
367    # get packed ipV4 answer
368    my $answer = $response->{ANSWER}->[0]->{RDATA}->[0];
369    if ($mask & unpack("N",$answer)) {
370	# answer is found in selected surbl list
371    } else {
372	# answer not found in selected surbl list
373    }
374  }
375  # domain not found in surbl
376
377  ...end
378
379This is an example of a script file to keep the whitelist and tldlist
380current. Run as a cron job daily.
381
382  #!/usr/bin/perl
383  #
384  # cache refresh cron job
385  #
386  require URBL::Prepare;
387
388  my $whiteurl =
389  'http://spamassasin.googlecode.com/svn-history/r6/trunk/share/spamassassin/25_uribl.cf';
390
391  my $tld2url = 'http://george.surbl.org/two-level-tlds';
392  my $tld3url = 'http://george.surbl.org/three-level-tlds';
393
394  my $cachedir	= './cache';
395  my $lvl2file	= $cachedir .'/level2';
396  my $lvl3file	= $cachedir .'/level3';
397  my $whtfile	= $cachedir .'/white';
398
399  mkdir $cachedir unless -d $cachedir;
400
401  URBL::Prepare->loadcache($whiteurl,$whtfile);
402  URBL::Prepare->loadcache($tld2url,$lvl2file);
403  URBL::Prepare->loadcache($tld3url,$lvl3file);
404
405=cut
406
407=head1 AUTHOR
408
409Michael Robinton E<lt>michael@bizsystems.comE<gt>
410
411=head1 COPYRIGHT
412
413    Copyright 2013-2014, Michael Robinton <michael@bizsystems.com>
414
415This program is free software; you may redistribute it and/or modify it
416under the same terms as Perl itself.
417
418This program is distributed in the hope that it will be useful,
419but WITHOUT ANY WARRANTY; without even the implied warranty of
420MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
421
422=head1 See also:
423
424L<LWP::Request>, L<Net::DNS::Dig>
425
426=cut
427
4281;
429