1# <@LICENSE>
2# Copyright 2019 Spamhaus Technology Ltd.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15# </@LICENSE>
16
17# The Spamhaus Technology SpamAssassin development crew can be reached
18# at <spamassassin at spamteq.com> for questions/suggestions related
19# with this plug-in exclusively.
20
21# version 20200430
22
23package Mail::SpamAssassin::Plugin::SH;
24
25use strict;
26use warnings;
27
28use Net::DNS;
29use Mail::SpamAssassin;
30use Mail::SpamAssassin::Plugin;
31use Mail::SpamAssassin::PerMsgStatus;
32use Socket;
33use Mail::SpamAssassin::Logger;
34use Digest::SHA qw(sha256 );
35use Sys::Syslog qw( :DEFAULT setlogsock);
36
37our @ISA = qw(Mail::SpamAssassin::Plugin);
38
39sub new {
40  my ($class, $mailsa) = @_;
41  $class = ref($class) || $class;
42  my $self = $class->SUPER::new( $mailsa );
43  bless ($self, $class);
44  $self->set_config($mailsa->{conf});
45  # are network tests enabled?
46  if ($mailsa->{local_tests_only}) {
47    $self->{sh_available} = 0;
48    dbg("SHPlugin: local tests only, disabled");
49  } else {
50    $self->{sh_available} = 1;
51  }
52  # Finds email in the email body and check their @domains
53  $self->register_eval_rule ( 'check_sh_bodyemail' );
54  # Finds email in the email body and check their @domain's authoritative name servers IPs
55  $self->register_eval_rule ( 'check_sh_bodyemail_ns' );
56  # Checks envelope and body headers (Return-Path, From, Reply-To etc..) @domains
57  $self->register_eval_rule ( 'check_sh_headers' );
58  # Checks envelope and body headers (Return-Path, From, Reply-To etc..) @domains's authoritative name servers IPs
59  $self->register_eval_rule ( 'check_sh_headers_ns' );
60  # Checks the HELO string
61  $self->register_eval_rule ( 'check_sh_helo' );
62  # Checks the reverse DNS of the last untrusted relay
63  $self->register_eval_rule ( 'check_sh_reverse' );
64  # Finds URIs in the email body and checks their corresponding A record
65  $self->register_eval_rule ( 'check_sh_bodyuri_a' );
66  # Finds URIs in the email body and checks their domain's authoritative name servers IPs
67  $self->register_eval_rule ( 'check_sh_bodyuri_ns' );
68  # Finds Cryptowallets in the body and check if hey are being used in spam campaigns
69  $self->register_eval_rule ( 'check_sh_crypto' );
70  # Check attachment's hashes
71  $self->register_eval_rule ( 'check_sh_attachment' );
72  # Check email hashes
73  $self->register_eval_rule ( 'check_sh_emails' );
74
75  return $self;
76}
77
78sub log_syslog {
79 my ($priority, $msg) = @_;
80 return 0 unless ($priority =~ /info|err|debug/);
81 setlogsock('unix');
82 openlog("SHPlugin",'pid','mail');
83 syslog($priority, $msg);
84 closelog();
85 return 1;
86}
87
88sub finish_parsing_end {
89  my ($self, $opts) = @_;
90
91  return 0 if !$self->{sh_available};
92
93  # valid_tlds_re will be available at finish_parsing_end, compile it now,
94  # we only need to do it once and before possible forking
95  if (!exists $self->{email_regex}) {
96    $self->_init_email_re();
97  }
98  return 0;
99}
100
101sub _init_email_re {
102  my ($self) = @_;
103  my $sa_version = Mail::SpamAssassin::Version();
104  $sa_version =~ tr/\.//d;
105  # This is an ugly hack to make the regex work with SA 3.4.1 and possibly 3.4.0. Not recommended as TLDs are not updated
106  # dinamically like in 3.4.2 where they are updated via sa-update
107  if ($sa_version < 342) {
108    $self->{main}->{registryboundaries}->{valid_tlds_re} = '(?^i:(?:education|ch|watch|coffee|bo|fr|capital|et|yandex|yachts|ag|kh|es|events|gmail|systems|by|xn--3e0b707e|land|global|nl|xn--lgbbat1ad8j|kim|vi|no|gop|lgbt|cz|ni|gifts|xn--c1avg|praxi|sl|consulting|uk|xn--mgberp4a5d4ar|hm|za|kitchen|xn--wgbh1c|xn--45brj9c|xn--3bst00m|fo|mn|xn--p1ai|co|camera|voto|gw|actor|is|berlin|sa|kn|ovh|care|int|joburg|audio|shoes|cab|uy|schmidt|ceo|auction|pg|wed|neustar|xn--6frz82g|vote|reisen|company|gi|cat|vacations|xn--fzc2c9e2c|ong|onl|us|ng|cy|li|church|pr|ac|life|sz|xn--4gbrim|wang|top|tatar|ls|exposed|expert|pw|marketing|nu|py|cn|gl|cologne|sv|cr|mortgage|tirol|brussels|guitars|ad|nr|vlaanderen|eg|blackfriday|fishing|xn--czru2d|gp|xn--vhquv|xn--fpcrj9c3d|country|press|international|re|coop|nagoya|physio|cc|cleaning|rs|voyage|juegos|sy|mp|xn--fiq228c5hs|xn--nqv7fs00ema|social|er|city|recipes|ck|st|kw|healthcare|computer|koeln|hk|bnpparibas|industries|paris|university|sarl|host|xn--cg4bki|cd|diet|org|xn--ses554g|vodka|td|sn|day|lt|tokyo|rich|diamonds|gt|credit|club|qpon|gb|sm|xn--io0a7i|xn--6qq986b3xl|ge|cl|ink|nz|bargains|kz|hiv|mz|menu|sh|xn--o3cw4h|jo|post|xn--80adxhks|lighting|id|nc|plumbing|nrw|np|sd|uz|arpa|tl|clinic|photography|bio|pizza|ninja|website|il|tz|pm|tm|tv|restaurant|associates|sj|scb|organic|bb|equipment|gratis|boutique|enterprises|mx|foundation|xn--80ao21a|dentist|xn--l1acc|navy|christmas|democrat|villas|in|gbiz|fi|futbol|name|gu|xn--j6w193g|meet|ne|ventures|net|technology|vc|mk|photos|cancerresearch|bzh|xn--h2brj9c|financial|dnp|xn--90a3ac|tax|cv|estate|tj|cern|sohu|horse|gg|fitness|xn--mgbc0a9azcg|whoswho|camp|am|nra|condos|beer|kr|uno|institute|construction|dental|tattoo|accountants|ua|xyz|bv|zw|im|academy|fish|ru|quebec|mt|nhk|gm|gf|autos|me|suzuki|tools|ma|mango|ai|republican|ky|tw|flights|gq|rocks|md|black|boo|az|meme|bd|bi|hu|xn--pgbs0dh|ar|xn--ngbc5azd|durban|mobi|xxx|sexy|jobs|ae|cash|at|youtube|citic|tf|okinawa|ie|network|pf|domains|scot|support|london|rodeo|zone|nyc|pt|ws|sb|holiday|versicherung|productions|tk|vu|limo|br|maison|frogans|space|xn--i1b6b1a6a2e|xn--xkc2al3hye2a|au|xn--rhqv96g|works|wales|edu|miami|active|eu|pa|cruises|soy|furniture|xn--mgbx4cd0ab|ms|mini|gives|toys|lease|ing|hn|bike|eus|place|finance|vet|gmo|degree|sg|gov|io|software|reviews|motorcycles|vegas|bmw|immo|homes|xn--unup4y|mo|builders|green|xn--q9jyb4c|jp|sr|feedback|repair|lc|rentals|gift|info|pl|florist|archi|rest|bid|caravan|pub|tg|xn--zfr164b|fj|red|xn--mgbayh7gpa|business|gd|supplies|ro|otsuka|tienda|cards|wien|direct|exchange|xn--kprw13d|om|gn|mw|ye|pink|digital|deals|ca|ph|ve|bn|attorney|museum|xn--gecrj9c|ci|so|moscow|bt|glass|gy|dad|mg|gripe|dm|ao|cu|guide|tt|sk|dj|cheap|guru|xn--fiq64b|mh|jetzt|luxury|haus|email|catering|bg|cf|ec|partners|aq|ruhr|tr|cool|xn--ygbi2ammx|gal|xn--xhq521b|xn--ogbpf8fl|my|tn|bj|xn--mgba3a4f16a|vn|lk|xn--nqv7f|lb|yokohama|aero|xn--mgb9awbf|dz|al|properties|kaufen|aw|wiki|fund|gh|property|pe|com|ba|loans|tips|here|xn--80asehdb|spiegel|lr|krd|ir|na|hamburg|la|luxe|mc|airforce|ps|gr|iq|house|sx|hosting|bz|schule|uol|cw|af|pro|training|cg|community|as|se|kred|clothing|xn--yfro4i67o|mil|ryukyu|bw|blue|best|media|hiphop|it|build|contractors|zm|holdings|nf|rsvp|bayern|esq|cooking|dk|bs|ly|ga|gent|help|foo|xn--j1amh|directory|ug|gs|monash|mov|today|kg|ren|asia|desi|xn--fiqz9s|pn|va|ke|bm|fm|si|solar|capetown|viajes|eat|surf|ee|be|km|career|pictures|lv|dating|ngo|army|reise|cuisinella|xn--mgbbh1a71e|wf|supply|an|xn--55qw42g|insure|xn--1qqw23a|sc|pk|graphics|dance|voting|tel|center|su|lacaixa|xn--fiqs8s|bf|singles|xn--s9brj9c|frl|trade|de|mq|ht|realtor|moe|parts|mm|cm|pics|bh|xn--wgbl6a|gallery|college|engineer|farm|new|photo|services|rehab|rw|moda|xn--xkc2dl3a5ee0h|lu|report|xn--clchc0ea0b2g2a9gcd|je|kp|fail|management|cymru|ooo|tc|webcam|codes|xn--80aswg|ml|rio|qa|buzz|careers|agency|vg|fk|ax|engineering|axa|immobilien|limited|lotto|lawyer|xn--3ds443g|ki|xn--czr694b|investments|solutions|mr|travel|prod|cx|williamhill|to|surgery|hr|creditcard|xn--kput3i|market|mu|xn--kpry57d|vision|bar|xn--mgbaam7a8h|discount|biz|saarland|wtc|claims|xn--55qx5d|mv|xn--mgbab2bd|wtf|yt|link|globo|how|melbourne|kiwi|xn--d1acj3b|shiksha|town|ltda|th|do|jm|sca|click))';
109    dbg("SHPlugin: Email regex hack for SA < 3.4.2 engaged. Consider switching to 3.4.2+");
110  }
111  # Some regexp tips courtesy of http://www.regular-expressions.info/email.html
112  # full email regex v0.02
113  $self->{email_regex} = qr/
114    (?=.{0,64}\@)                       # limit userpart to 64 chars (and speed up searching?)
115    (?<![a-z0-9!#\$%&'*+\/=?^_`{|}~-])  # start boundary
116    (                                   # capture email
117    [a-z0-9!#\$%&'*+\/=?^_`{|}~-]+      # no dot in beginning
118    (?:\.[a-z0-9!#\$%&'*+\/=?^_`{|}~-]+)* # no consecutive dots, no ending dot
119    \@
120    (?:[a-z0-9](?:[a-z0-9-]{0,59}[a-z0-9])?\.){1,4} # max 4x61 char parts (should be enough?)
121    $self->{main}->{registryboundaries}->{valid_tlds_re} # ends with valid tld
122    )
123  /xi;
124# lazy man debug
125#open(my $fh, '>', "/tmp/reg") or die "Could not open file $!";
126#print $fh $self->{email_regex};
127#close $fh;
128}
129
130sub set_config {
131  my($self, $conf) = @_;
132  my @cmds;
133
134  push (@cmds, {
135    setting => 'uridnsbl_skip_domain',
136    default => {},
137    type => $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE,
138    code => sub {
139      my ($self, $key, $value, $line) = @_;
140      if ($value =~ /^$/) {
141        return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE;
142      }
143      foreach my $domain (split(/\s+/, $value)) {
144        $self->{uridnsbl_skip_domains}->{lc $domain} = 1;
145      }
146    }
147  });
148
149}
150
151sub _get_body_uris {
152  my ($self,$pms, $bodyref) = @_;
153  my $body = join('', @{$bodyref});
154  my %seen;
155  my @uris;
156  foreach my $this_uri ( $body =~ /[a-zA-Z][a-zA-Z0-9+\-.]*:\/\/(?:[a-zA-Z0-9\-._~%!$&'()*+,;=]+@)?([a-zA-Z0-9\-._~%]+|↵\[[a-zA-Z0-9\-._~%!$&'()*+,;=:]+\])/g) {
157    push (@uris, lc $this_uri) unless defined $seen{lc $this_uri};
158    $seen{lc $this_uri} = 1;
159  }
160  foreach my $this_uri (@uris) {
161    dbg("SHPlugin: (_get_body_uris) found  ".$this_uri." in body");
162  }
163  return (@uris);
164}
165
166sub _get_part_details {
167  my ($pms, $part) = @_;
168  #https://en.wikipedia.org/wiki/MIME#Content-Disposition
169  #https://github.com/mikel/mail/pull/464
170  my $ctt = $part->get_header('content-type');
171  return undef unless defined $ctt;
172  my $cte = lc($part->get_header('content-transfer-encoding') || '');
173  return undef unless ($cte =~ /^(?:base64|quoted\-printable)$/);
174  $ctt = _decode_part_header($part, $ctt || '');
175  my $name = '';
176  my $cttname = '';
177  my $ctdname = '';
178  if($ctt =~ m/(?:file)?name\s*=\s*["']?([^"';]*)["']?/is){
179    $cttname = $1;
180    $cttname =~ s/\s+$//;
181  }
182  my $ctd = $part->get_header('content-disposition');
183  $ctd = _decode_part_header($part, $ctd || '');
184  if($ctd =~ m/filename\s*=\s*["']?([^"';]*)["']?/is){
185    $ctdname = $1;
186    $ctdname =~ s/\s+$//;
187  }
188  if (lc $ctdname eq lc $cttname) {
189    $name = $ctdname;
190  } elsif ($ctdname eq '') {
191    $name = $cttname;
192  } elsif ($cttname eq '') {
193    $name = $ctdname;
194  } else {
195    if ($pms->{conf}->{olemacro_prefer_contentdisposition}) {
196      $name = $ctdname;
197    } else {
198      $name = $cttname;
199    }
200  }
201  return $ctt, $ctd, $cte, lc $name;
202}
203
204sub _decode_part_header {
205  my($part, $header_field_body) = @_;
206  return '' unless defined $header_field_body && $header_field_body ne '';
207  # deal with folding and cream the newlines and such
208  $header_field_body =~ s/\n[ \t]+/\n /g;
209  $header_field_body =~ s/\015?\012//gs;
210  local($1,$2,$3);
211  # Multiple encoded sections must ignore the interim whitespace.
212  # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE
213  # separated by whitespace.
214  1 while $header_field_body =~
215            s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+
216               ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) }
217             {$1$2}xsg;
218  # transcode properly encoded RFC 2047 substrings into UTF-8 octets,
219  # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532)
220  # or plain US-ASCII
221  $header_field_body =~
222    s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) }
223     { $part->__decode_header($1, uc($2), $3) }xsge;
224  return $header_field_body;
225}
226
227sub _get_full_body_uris {
228  my ($self,$pms, $bodyref) = @_;
229  my $body = join('', @{$bodyref});
230  my %seen;
231  my @uris;
232  foreach my $this_uri ( $body =~ /([a-zA-Z][a-zA-Z0-9+\-.]*:\/\/(?:[a-zA-Z0-9\-._~%!$&'()*+,;=]+@)?[a-zA-Z0-9\-._~%\/]+)/g) {
233    push (@uris, lc $this_uri) unless defined $seen{lc $this_uri};
234    $seen{lc $this_uri} = 1;
235  }
236  return (@uris);
237}
238
239sub _get_domains_from_body_emails {
240  my ($self,$pms) = @_;
241  # This extraction code has been heavily copypasted and slightly adapted from https://github.com/smfreegard/HashBL/blob/master/HashBL.pm
242  my %seen;
243  my @body_domains;
244  # get all <a href="mailto:", since they don't show up on stripped_body
245  my $parsed = $pms->get_uri_detail_list();
246  while (my($uri, $info) = each %{$parsed}) {
247    if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
248      if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
249        my $email = lc($1);
250        my ($this_user, $this_domain )       = split('@', $email);
251        push(@body_domains, $this_domain) unless defined $seen{$this_domain};
252        $seen{$this_domain} = 1;
253        last if scalar @body_domains >= 20; # sanity
254      }
255    }
256  }
257  # scan stripped normalized body
258  # have to do this way since get_uri_detail_list doesn't know what mails are inside <>
259  my $body = $pms->get_decoded_stripped_body_text_array();
260  BODY: foreach (@$body) {
261    # strip urls with possible emails inside
262    s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
263    # strip emails contained in <>, not mailto:
264    # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
265    s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
266    while (/$self->{email_regex}/g) {
267      my $email = lc($1);
268      my ($this_user, $this_domain )       = split('@', $email);
269      push(@body_domains, $this_domain) unless defined $seen{$this_domain};
270      $seen{$this_domain} = 1;
271      last BODY if scalar @body_domains >= 40; # sanity
272    }
273  }
274  foreach my $this_domain (@body_domains) {
275    dbg("SHPlugin: (_get_domains_from_body_emails) found domain ".$this_domain." in body email");
276  }
277  return (@body_domains);
278}
279
280sub _get_body_emails {
281  my ($self,$pms) = @_;
282  # This extraction code has been heavily copypasted and slightly adapted from https://github.com/smfreegard/HashBL/blob/master/HashBL.pm
283  my %seen;
284  my @body_emails;
285  # get all <a href="mailto:", since they don't show up on stripped_body
286  my $parsed = $pms->get_uri_detail_list();
287  while (my($uri, $info) = each %{$parsed}) {
288    if (defined $info->{types}->{a} and not defined $info->{types}->{parsed}) {
289      if ($uri =~ /^(?:(?i)mailto):$self->{email_regex}/) {
290        my $this_email = lc($1);
291        push(@body_emails, $this_email) unless defined $seen{$this_email};
292        $seen{$this_email} = 1;
293        last if scalar @body_emails >= 20; # sanity
294      }
295    }
296  }
297  # scan stripped normalized body
298  # have to do this way since get_uri_detail_list doesn't know what mails are inside <>
299  my $body = $pms->get_decoded_stripped_body_text_array();
300  BODY: foreach (@$body) {
301    # strip urls with possible emails inside
302    s#<?https?://\S{0,255}(?:\@|%40)\S{0,255}# #gi;
303    # strip emails contained in <>, not mailto:
304    # also strip ones followed by quote-like "wrote:" (but not fax: and tel: etc)
305    s#<?(?<!mailto:)$self->{email_regex}(?:>|\s{1,10}(?!(?:fa(?:x|csi)|tel|phone|e?-?mail))[a-z]{2,11}:)# #gi;
306    while (/$self->{email_regex}/g) {
307      my $this_email = lc($1);
308      push(@body_emails, $this_email) unless defined $seen{$this_email};
309      $seen{$this_email} = 1;
310      last BODY if scalar @body_emails >= 40; # sanity
311    }
312  }
313  foreach my $this_email (@body_emails) {
314    dbg("SHPlugin: (_get_body_emails) found email ".$this_email." in body");
315  }
316  return (@body_emails);
317}
318
319sub _get_headers_domains {
320  my ($self,$pms) = @_;
321  # This extraction code has been heavily copypasted and slightly adapted from https://github.com/smfreegard/HashBL/blob/master/HashBL.pm
322  my %seen;
323  my @headers_domains;
324  my @headers = ('EnvelopeFrom', 'Sender', 'From', 'Reply-To');
325  foreach my $header (@headers) {
326    if ($pms->get($header . ':addr')) {
327      my $this_domain = $self->{'main'}->{'registryboundaries'}->uri_to_domain($pms->get( $header.':addr' ));
328      if ($this_domain) {
329        dbg("SHPlugin: (_get_headers_domains) found domain ".$this_domain." in header ".$header);
330        push(@headers_domains, $this_domain) unless defined $seen{$this_domain};
331        $seen{$this_domain} = 1;
332      }
333    }
334  }
335  return (@headers_domains);
336}
337
338sub _get_headers_emails {
339  my ($self,$pms) = @_;
340  # This extraction code has been heavily copypasted and slightly adapted from https://github.com/smfreegard/HashBL/blob/master/HashBL.pm
341  my %seen;
342  my @headers_emails;
343  my @headers = ('EnvelopeFrom', 'Sender', 'From', 'Reply-To');
344  foreach my $header (@headers) {
345    my $email = lc($pms->get($header . ':addr'));
346    if ($email) {
347        dbg("SHPlugin: (_get_headers_emails) found email ".$email." in header ".$header);
348        push(@headers_emails, $email) unless defined $seen{$email};
349        $seen{$email} = 1;
350    }
351  }
352  return (@headers_emails);
353}
354
355sub check_sh_headers {
356
357  my ($self, $pms, $list, $subtest) = @_;
358
359  return 0 unless $self->{sh_available};
360  return 0 unless defined $list;
361
362  my $conf = $pms->{conf};
363  my $skip_domains = $conf->{uridnsbl_skip_domains};
364  $skip_domains = {}  if !$skip_domains;
365  my @header_domains;
366
367  (@header_domains) = _get_headers_domains($self,$pms);
368
369  my $rulename = $pms->get_current_eval_rule_name();
370  if (@header_domains) {
371    foreach my $this_domain (@header_domains) {
372      if (!($skip_domains->{$this_domain})) {
373        my $lookup = $this_domain.".".$list;
374        my $key = "SH:$lookup";
375        my $ent = {
376          key => $key,
377          zone => $list,
378          type => 'SH',
379          rulename => $rulename,
380          addr => $this_domain,
381        };
382        $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
383          my ($ent, $pkt) = @_;
384          $self->_finish_lookup($pms, $ent, $pkt, $subtest);
385        }, master_deadline => $pms->{master_deadline});
386      }
387    }
388  }
389  return 0;
390}
391
392sub check_sh_emails {
393
394  my ($self, $pms, $list, $subtest) = @_;
395
396  return 0 unless $self->{sh_available};
397  return 0 unless defined $list;
398
399  my $conf = $pms->{conf};
400  my $skip_domains = $conf->{sh_emailbl_skip_domains};
401  $skip_domains = {}  if !$skip_domains;
402  my @header_emails;
403  my @body_emails;
404  my @emails;
405  (@header_emails) = _get_headers_emails($self,$pms);
406  (@body_emails) = _get_body_emails($self,$pms);
407  push(@emails,@body_emails);
408  push(@emails,@header_emails);
409  my $rulename = $pms->get_current_eval_rule_name();
410  if (@emails) {
411    foreach my $email (@emails) {
412      # Normalize googlemail.com -> gmail.com
413      $email =~ s/\@googlemail\.com/\@gmail\.com/;
414      # Remove plus sign if present
415      $email =~ s/(\+.*\@)/@/;
416      my ($this_user, $this_domain )       = split('@', $email);
417      if (!($skip_domains->{$this_domain})) {
418	# Remove dots from left part if rightpart is gmail.com
419        if ($email =~ /\@gmail\.com/) {
420          $this_user =~ s/(\.)//g;
421          $email = $this_user.'@'.$this_domain;
422        }
423        my $hash = encode_base32(sha256($email));
424        my $lookup = $hash.".".$list;
425        my $key = "SH:$lookup";
426        my $ent = {
427          key => $key,
428          zone => $list,
429          type => 'SH',
430          rulename => $rulename,
431          addr => "$email",
432        };
433        $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
434          my ($ent, $pkt) = @_;
435          $self->_finish_lookup($pms, $ent, $pkt, $subtest);
436        }, master_deadline => $pms->{master_deadline});
437      }
438    }
439  }
440  return 0;
441}
442
443sub check_sh_headers_ns {
444
445  my ($self, $pms, $list, $subtest) = @_;
446
447  return 0 unless $self->{sh_available};
448  return 0 unless defined $list;
449
450  my $conf = $pms->{conf};
451  my $skip_domains = $conf->{uridnsbl_skip_domains};
452  $skip_domains = {}  if !$skip_domains;
453
454  my @headers_domains;
455
456  (@headers_domains) = _get_headers_domains($self,$pms);
457  my $rulename = $pms->get_current_eval_rule_name();
458
459  foreach my $this_domain (@headers_domains) {
460    if (!($skip_domains->{$this_domain})) {
461      dbg("SHPlugin: (check_sh_headers_ns) checking authoritative NS for domain ".$this_domain);
462      my $res   = Net::DNS::Resolver->new;
463      $res->udp_timeout(3);
464      $res->tcp_timeout(3);
465      my $reply_ns = $res->query("$this_domain", "NS");
466      if ($reply_ns) {
467        foreach my $rr_ns (grep { $_->type eq "NS" } $reply_ns->answer) {
468          dbg("SHPlugin: (check_sh_headers_ns) found authoritative NS for %s: %s", $this_domain, $rr_ns->nsdname);
469          $self->lookup_a_record($pms, $rr_ns->nsdname, $list, $rulename, $subtest);
470        }
471      }
472    }
473  }
474  return 0;
475}
476
477sub check_sh_helo {
478
479  my ($self, $pms, $list, $subtest) = @_;
480
481  return 0 unless $self->{sh_available};
482  return 0 unless defined $list;
483
484  my $conf = $pms->{conf};
485  my $skip_domains = $conf->{uridnsbl_skip_domains};
486  $skip_domains = {}  if !$skip_domains;
487
488  my $rulename = $pms->get_current_eval_rule_name();
489
490  my $lasthop = $pms->{relays_untrusted}->[0];
491  if (!defined $lasthop) {
492    dbg ("SHPlugin: message was delivered entirely via trusted relays, not required");
493    return;
494  }
495
496  my $helo = $lasthop->{helo};
497  if (!($skip_domains->{$helo})) {
498    dbg ("SHPlugin: (check_sh_helo) checking HELO (helo=$helo)");
499    my $lookup = $helo.".".$list;
500    my $key = "SH:$lookup";
501    my $ent = {
502      key => $key,
503      zone => $list,
504      type => 'SH',
505      rulename => $rulename,
506      addr => $helo,
507    };
508    $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
509      my ($ent, $pkt) = @_;
510      $self->_finish_lookup($pms, $ent, $pkt, $subtest);
511    }, master_deadline => $pms->{master_deadline});
512  }
513  return 0;
514}
515
516sub check_sh_bodyemail_ns {
517
518  my ($self, $pms, $bodyref, $list, $subtest) = @_;
519
520  return 0 unless $self->{sh_available};
521  return 0 unless defined $list;
522
523  my $conf = $pms->{conf};
524  my $skip_domains = $conf->{uridnsbl_skip_domains};
525  $skip_domains = {}  if !$skip_domains;
526  my $rulename = $pms->get_current_eval_rule_name();
527  my (@domains) = _get_domains_from_body_emails($self,$pms);
528  foreach my $this_domain (@domains) {
529    if (!($skip_domains->{$this_domain})) {
530      dbg("SHPlugin: (check_sh_bodyemail_ns) checking authoritative NS for domain ".$this_domain);
531      my $res   = Net::DNS::Resolver->new;
532      $res->udp_timeout(3);
533      $res->tcp_timeout(3);
534      my $reply_ns = $res->query("$this_domain", "NS");
535      if ($reply_ns) {
536        foreach my $rr_ns (grep { $_->type eq "NS" } $reply_ns->answer) {
537          dbg("SHPlugin: (check_sh_bodyemail_ns) found authoritative NS for %s: %s", $this_domain, $rr_ns->nsdname);
538          $self->lookup_a_record($pms, $rr_ns->nsdname, $list, $rulename, $subtest);
539        }
540      }
541    }
542  }
543  return 0;
544}
545
546sub check_sh_bodyemail {
547
548  my ($self, $pms, $bodyref, $list, $subtest) = @_;
549
550  return 0 unless $self->{sh_available};
551  return 0 unless defined $list;
552
553  my $conf = $pms->{conf};
554  my $skip_domains = $conf->{uridnsbl_skip_domains};
555  $skip_domains = {}  if !$skip_domains;
556  my $rulename = $pms->get_current_eval_rule_name();
557  my (@domains) = _get_domains_from_body_emails($self,$pms);
558  foreach my $this_domain (@domains) {
559    if (!($skip_domains->{$this_domain})) {
560      dbg("SHPlugin: (check_sh_bodyemail) checking body domain ".$this_domain);
561      my $lookup = $this_domain.".".$list;
562      my $key = "SH:$lookup";
563      my $ent = {
564        key => $key,
565        zone => $list,
566        type => 'SH',
567        rulename => $rulename,
568        addr => $this_domain,
569      };
570      $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
571        my ($ent, $pkt) = @_;
572        $self->_finish_lookup($pms, $ent, $pkt, $subtest);
573      }, master_deadline => $pms->{master_deadline});
574    }
575  }
576  return 0;
577}
578
579sub check_sh_bodyuri_a {
580
581  my ($self, $pms, $bodyref, $list, $subtest) = @_;
582  my $conf = $pms->{conf};
583  return 0 unless $self->{sh_available};
584  return 0 unless defined $list;
585
586  my $skip_domains = $conf->{uridnsbl_skip_domains};
587  $skip_domains = {}  if !$skip_domains;
588
589  my $body = join('', @{$bodyref});
590  my $rulename = $pms->get_current_eval_rule_name();
591
592  my @uris;
593  (@uris) = _get_body_uris($self,$pms,$bodyref);
594
595  foreach my $this_hostname (@uris) {
596    if (!($skip_domains->{$this_hostname})) {
597      dbg("SHPlugin: (check_sh_bodyuri_a) lookup_a_record for URI ".$this_hostname);
598      $self->lookup_a_record($pms, $this_hostname, $list, $rulename, $subtest);
599    }
600  }
601  return 0;
602}
603
604sub check_sh_bodyuri_ns {
605
606  my ($self, $pms, $bodyref, $list, $subtest) = @_;
607  my $conf = $pms->{conf};
608  return 0 unless $self->{sh_available};
609  return 0 unless defined $list;
610
611  my $skip_domains = $conf->{uridnsbl_skip_domains};
612  $skip_domains = {}  if !$skip_domains;
613
614  my $body = join('', @{$bodyref});
615  my $rulename = $pms->get_current_eval_rule_name();
616  my @uris;
617  (@uris) = _get_body_uris($self,$pms,$bodyref);
618  foreach my $this_hostname (@uris) {
619    my $this_domain = $self->{'main'}->{'registryboundaries'}->uri_to_domain($this_hostname);
620    if (!($skip_domains->{$this_hostname})) {
621      dbg("SHPlugin: (check_sh_bodyuri_ns) checking authoritative NS for domain ".$this_domain." from URI ".$this_hostname." found in body");
622      my $res   = Net::DNS::Resolver->new;
623      $res->udp_timeout(3);
624      $res->tcp_timeout(3);
625      my $reply_ns = $res->query("$this_domain", "NS");
626      if ($reply_ns) {
627        foreach my $rr_ns (grep { $_->type eq "NS" } $reply_ns->answer) {
628          dbg("SHPlugin: (check_sh_bodyuri_ns) found authoritative NS for %s: %s", $this_domain, $rr_ns->nsdname);
629          $self->lookup_a_record($pms, $rr_ns->nsdname, $list, $rulename, $subtest);
630        }
631      }
632    }
633  }
634  return 0;
635}
636
637sub check_sh_reverse {
638
639  my ($self, $pms, $list, $subtest) = @_;
640
641  return 0 unless $self->{sh_available};
642  return 0 unless defined $list;
643
644  my $rulename = $pms->get_current_eval_rule_name();
645
646  my $lasthop = $pms->{relays_untrusted}->[0];
647  if (!defined $lasthop) {
648    dbg ("SHPlugin: message was delivered entirely via trusted relays, not required");
649    return;
650  }
651
652  my $rdns = $lasthop->{rdns};
653  if ($rdns) {
654    dbg ("SHPlugin: (check_sh_reverse) checking RDNS of the last untrusted relay (rdns=$rdns)");
655    if ((substr $rdns, -1) eq ".") { chop $rdns; }
656    my $lookup = $rdns.".".$list;
657    my $key = "SH:$lookup";
658    my $ent = {
659      key => $key,
660      zone => $list,
661      type => 'SH',
662      rulename => $rulename,
663      addr => $rdns,
664    };
665    $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
666      my ($ent, $pkt) = @_;
667      $self->_finish_lookup($pms, $ent, $pkt, $subtest);
668    }, master_deadline => $pms->{master_deadline});
669    return 0;
670  }
671}
672
673sub check_sh_crypto {
674  my ($self, $pms, $bodyref, $list, $subtest, $cr, $cryptovalue) = @_;
675  my $regex = qr/$cr/;
676  return 0 unless $self->{sh_available};
677  return 0 unless defined $list;
678  my %addrs;
679  my $body = join('', @{$bodyref});
680  dbg("SHPlugin: looking for $cryptovalue addresses...");
681  while ($body =~ /($regex)/g) {
682    $addrs{$1} = 1;
683    dbg("SHPlugin: Found possible crypto $cryptovalue address $1");
684    last if keys %addrs >= 10; # max unique
685  }
686  if (!%addrs) {
687    dbg("SHPlugin: no crypto addresses found");
688    return 0;
689  }
690  my $rulename = $pms->get_current_eval_rule_name();
691  foreach my $addr (keys %addrs) {
692    my $hash = encode_base32(sha256($addr));
693    dbg("SHPlugin: Crypto address '$addr' of type $cryptovalue found in body, checking against $list ($hash)");
694    my $lookup = "$hash.$list";
695    my $key = "SH:$lookup";
696    my $ent = {
697        key => $key,
698        zone => $list,
699        type => 'SH',
700        rulename => $rulename,
701        addr => $addr,
702        hash => $hash,
703    };
704    $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
705      my ($ent, $pkt) = @_;
706      $self->_finish_lookup($pms, $ent, $pkt, $subtest);
707    }, master_deadline => $pms->{master_deadline});
708  }
709}
710
711sub check_sh_attachment {
712  my ($self,$pms,$body,$list,$subtest) = @_;
713  my $rulename = $pms->get_current_eval_rule_name();
714  foreach my $part ($pms->{msg}->find_parts(qr/./, 1, 1)) {
715    my ($ctt, $ctd, $cte, $name) = _get_part_details($pms, $part);
716    next unless defined $ctt;
717    my $hash = encode_base32(sha256($part->decode()));
718    dbg("SHPlugin: (check_sh_attachment) Found file $name with hash $hash");
719    my $lookup = "$hash.$list";
720    my $key = "SH:$lookup";
721    my $ent = {
722        key => $key,
723        zone => $list,
724        type => 'SH',
725        rulename => $rulename,
726        addr => $hash,
727        hash => $hash,
728    };
729    $ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent, sub {
730      my ($ent, $pkt) = @_;
731      $self->_finish_lookup($pms, $ent, $pkt, $subtest);
732    }, master_deadline => $pms->{master_deadline});
733  }
734  return 0;
735}
736
737sub _finish_lookup {
738  my ($self, $pms, $ent, $pkt,$subtest) = @_;
739  my $re;
740  return if !$pkt;
741  dbg("SHPlugin: _finish_lookup on $ent->{addr} / $ent->{rulename} / $subtest");
742  if (!($subtest)) { $re = qr/^127\./; } else { $re = qr/$subtest/; }
743  my @answer = $pkt->answer;
744  foreach my $rr (@answer) {
745    if ($rr->address =~ /$re/) {
746      if ($ent->{rulename} =~ /SH_EMAIL/) { log_syslog("info","Matched email: ".$ent->{addr}); }
747      dbg("SHPlugin: Hit on Item $ent->{addr} for $ent->{rulename}");
748      $pms->test_log($ent->{addr});
749      $pms->got_hit($ent->{rulename}, '', ruletype => 'eval');
750      return;
751    }
752  }
753}
754
755# ---------------------------------------------------------------------------
756
757sub lookup_a_record {
758	my ($self, $pms, $hname, $list, $rulename, $subtest) = @_;
759
760	my $key = "A:" . $hname;
761	my $ent = {
762		key => $key,
763		zone => $list,
764		type => "SH",
765	};
766        dbg("SHPlugin: launching lookup for $hname on $list");
767	$pms->{async}->bgsend_and_start_lookup(
768		$hname, 'A', undef, $ent,
769		sub {
770			my ($ent2,$pkt) = @_;
771			$self->continue_a_record_lookup($pms, $ent2, $pkt, $hname, $rulename, $subtest)
772			}, master_deadline => $pms->{master_deadline} );
773}
774
775sub continue_a_record_lookup
776{
777	my ($self, $pms, $ent, $pkt, $hname, $rulename, $subtest) = @_;
778
779	if (!$pkt)
780	{
781		# $pkt will be undef if the DNS query was aborted (e.g. timed out)
782		dbg("SHPlugin: continue_a_record_lookup aborted %s", $hname);
783		return;
784	}
785	dbg("SHPlugin: continue_a_record_lookup reached for %s", $hname);
786
787	my @answer = $pkt->answer;
788	foreach my $rr (@answer)
789	{
790		if ($rr->type eq 'A')
791		{
792			my $ip_address = $rr->rdatastr;
793			dbg("SHPlugin: continue_a_record_lookup found A record for URI ".$hname.": ".$ip_address);
794			my $reversed = join ".", reverse split /[.]/, $ip_address;
795			my $lookup = $reversed.".".$ent->{zone};
796			my $key = "SH:$lookup";
797			my $ent2 = {
798				key => $key,
799				zone => $ent->{zone},
800				type => 'SH',
801				addr => $ip_address,
802				rulename => $rulename,
803				};
804			$ent = $pms->{async}->bgsend_and_start_lookup($lookup, 'A', undef, $ent2, sub {
805				my ($ent3, $pkt) = @_;
806				$self->_finish_lookup($pms, $ent3, $pkt, $subtest);
807				}, master_deadline => $pms->{master_deadline});
808		}
809	}
810}
811
812sub encode_base32 {
813    my $arg = shift;
814    return '' unless defined($arg);    # mimic MIME::Base64
815
816    $arg = unpack('B*', $arg);
817    $arg =~ s/(.....)/000$1/g;
818    my $l = length($arg);
819    if ($l & 7) {
820        my $e = substr($arg, $l & ~7);
821        $arg = substr($arg, 0, $l & ~7);
822        $arg .= "000$e" . '0' x (5 - length $e);
823    }
824    $arg = pack('B*', $arg);
825    $arg =~ tr|\0-\37|A-Z2-7|;
826    return $arg;
827}
828
8291;
830
831