1# Mail::SpamAssassin::NetSet - object to manipulate CIDR net IP addrs
2# <@LICENSE>
3# Licensed to the Apache Software Foundation (ASF) under one or more
4# contributor license agreements.  See the NOTICE file distributed with
5# this work for additional information regarding copyright ownership.
6# The ASF licenses this file to you under the Apache License, Version 2.0
7# (the "License"); you may not use this file except in compliance with
8# the License.  You may obtain a copy of the License at:
9#
10#     http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17# </@LICENSE>
18
19package Mail::SpamAssassin::NetSet;
20
21use strict;
22use warnings;
23# use bytes;
24use re 'taint';
25use Time::HiRes qw(time);
26use NetAddr::IP 4.000;
27
28use Mail::SpamAssassin::Util;
29use Mail::SpamAssassin::Logger;
30
31our $have_patricia;
32BEGIN {
33  eval {
34    require Net::Patricia;
35    Net::Patricia->VERSION(1.16);  # need AF_INET6 support
36    Net::Patricia->import;
37    $have_patricia = 1;
38  };
39}
40
41###########################################################################
42
43sub new {
44  my ($class,$netset_name) = @_;
45  $class = ref($class) || $class;
46
47  $netset_name = ''  if !defined $netset_name;  # object name for debugging
48  my $self = {
49    name => $netset_name, num_nets => 0,
50    cache_hits => 0, cache_attempts => 0,
51  };
52  $self->{pt} = Net::Patricia->new(&AF_INET6)  if $have_patricia;
53
54  bless $self, $class;
55  $self;
56}
57
58###########################################################################
59
60sub DESTROY {
61  my($self) = shift;
62
63  if (exists $self->{cache}) {
64    local($@, $!, $_);  # protect outer layers from a potential surprise
65    my($hits, $attempts) = ($self->{cache_hits}, $self->{cache_attempts});
66    dbg("netset: cache %s hits/attempts: %d/%d, %.1f %%",
67        $self->{name}, $hits, $attempts, 100*$hits/$attempts) if $attempts > 0;
68  }
69}
70
71###########################################################################
72
73sub add_cidr {
74  my ($self, @nets) = @_;
75
76  $self->{nets} ||= [ ];
77  my $numadded = 0;
78  delete $self->{cache};  # invalidate cache (in case of late additions)
79
80  foreach my $cidr_orig (@nets) {
81    my $cidr = $cidr_orig;  # leave original unchanged, useful for logging
82
83    # recognizes syntax:
84    #   [IPaddr%scope]/len or IPaddr%scope/len or IPv4addr/mask
85    # optionally prefixed by a '!' to indicate negation (exclusion);
86    # the %scope (i.e. interface), /len or /mask are optional
87
88    local($1,$2,$3,$4);
89    $cidr =~ s/^\s+//;
90    my $exclude = ($cidr =~ s/^!\s*//) ? 1 : 0;
91
92    my $masklen;  # netmask or a prefix length
93    $masklen = $1  if $cidr =~ s{ / (.*) \z }{}xs;
94
95    # discard optional brackets
96    $cidr = $1  if $cidr =~ /^ \[ ( [^\]]* ) \] \z/xs;
97
98    my $scope;
99    # IPv6 Scoped Address (RFC 4007, RFC 6874, RFC 3986 "unreserved" charset)
100    if ($cidr =~ s/ % ( [A-Z0-9._~-]* ) \z //xsi) {  # scope <zone_id> ?
101      $scope = $1;  # interface specification
102      # discard interface specification, currently just ignored
103      info("netset: ignoring interface scope '%%%s' in IP address %s",
104           $scope, $cidr_orig);
105    }
106
107    my $is_ip4 = 0;
108    if ($cidr =~ /^ \d+ (\. | \z) /x) {  # looks like an IPv4 address
109      if ($cidr =~ /^ (\d+) \. (\d+) \. (\d+) \. (\d+) \z/x) {
110        # also strips leading zeroes, not liked by inet_pton
111        $cidr = sprintf('%d.%d.%d.%d', $1,$2,$3,$4);
112        $masklen = 32  if !defined $masklen;
113      } elsif ($cidr =~ /^ (\d+) \. (\d+) \. (\d+) \.? \z/x) {
114        $cidr = sprintf('%d.%d.%d.0', $1,$2,$3);
115        $masklen = 24  if !defined $masklen;
116      } elsif ($cidr =~ /^ (\d+) \. (\d+) \.? \z/x) {
117        $cidr = sprintf('%d.%d.0.0', $1,$2);
118        $masklen = 16  if !defined $masklen;
119      } elsif ($cidr =~ /^ (\d+) \.? \z/x) {
120        $cidr = sprintf('%d.0.0.0', $1);
121        $masklen = 8  if !defined $masklen;
122      } else {
123        warn "netset: illegal IPv4 address given: '$cidr_orig'\n";
124        next;
125      }
126      $is_ip4 = 1;
127    }
128
129    if ($self->{pt}) {
130      if (defined $masklen) {
131        $masklen =~ /^\d{1,3}\z/
132          or die "Network mask not supported, use a CIDR syntax: '$cidr_orig'";
133      }
134      my $key = $cidr;
135      my $prefix_len = $masklen;
136      if ($is_ip4) {
137        $key = '::ffff:' . $key;  # turn it into an IPv4-mapped IPv6 addresses
138        $prefix_len += 96  if defined $prefix_len;
139      }
140      $prefix_len = 128  if !defined $prefix_len;
141      $key .= '/' . $prefix_len;
142    # dbg("netset: add_cidr (patricia trie) %s => %s",
143    #     $cidr_orig, $exclude ? '!'.$key : $key);
144      defined eval {
145        $self->{pt}->add_string($key, $exclude ? '!'.$key : $key)
146      } or warn "netset: illegal IP address given (patricia trie): ".
147                "'$key': $@\n";
148    }
149
150    $cidr .= '/' . $masklen  if defined $masklen;
151
152    my $ip = NetAddr::IP->new($cidr);
153    if (!defined $ip) {
154      warn "netset: illegal IP address given: '$cidr_orig'\n";
155      next;
156    }
157  # dbg("netset: add_cidr %s => %s => %s", $cidr_orig, $cidr, $ip);
158
159    # if this is an IPv4 address, create an IPv6 representation, too
160    my ($ip4, $ip6);
161    if ($is_ip4) {
162      $ip4 = $ip;
163      $ip6 = $self->_convert_ipv4_cidr_to_ipv6($cidr);
164    } else {
165      $ip6 = $ip;
166    }
167
168    # bug 5931: this is O(n^2).  bad if there are lots of nets. There are  good
169    # reasons to keep it for linting purposes, though, so don't start skipping
170    # it until we have over 200 nets in our list
171    if (scalar @{$self->{nets}} < 200) {
172      next if ($self->is_net_declared($ip4, $ip6, $exclude, 0));
173    }
174
175    # note: it appears a NetAddr::IP object takes up about 279 bytes
176    push @{$self->{nets}}, {
177      exclude => $exclude,
178      ip4     => $ip4,
179      ip6     => $ip6,
180      as_string => $cidr_orig,
181    };
182    $numadded++;
183  }
184
185  $self->{num_nets} += $numadded;
186  $numadded;
187}
188
189sub get_num_nets {
190  my ($self) = @_;
191  return $self->{num_nets};
192}
193
194sub _convert_ipv4_cidr_to_ipv6 {
195  my ($self, $cidr) = @_;
196
197  # only do this for IPv4 addresses
198  return unless $cidr =~ /^\d+[.\/]/;
199
200  if ($cidr !~ /\//) {      # no mask
201    return NetAddr::IP->new6("::ffff:".$cidr);
202  }
203
204  # else we have a CIDR mask specified. use new6() to do this
205  #
206  my $ip6 = NetAddr::IP->new6($cidr)->cidr;
207  # 127.0.0.1 -> 0:0:0:0:0:0:7F00:0001/128
208  # 127/8 -> 0:0:0:0:0:0:7F00:0/104
209
210  # now, move that from 0:0:0:0:0:0: space to 0:0:0:0:0:ffff: space
211  if (!defined $ip6 || $ip6 !~ /^0:0:0:0:0:0:(.*)$/) {
212    warn "oops! unparseable IPv6 address for $cidr: $ip6";
213    return;
214  }
215
216  return NetAddr::IP->new6("::ffff:$1");
217}
218
219sub _nets_contains_network {
220  my ($self, $net4, $net6, $exclude, $quiet, $netname, $declared) = @_;
221
222  return 0 unless (defined $self->{nets});
223
224  foreach my $net (@{$self->{nets}}) {
225    # check to see if the new network is contained by the old network
226    my $in4 = defined $net4 && defined $net->{ip4} && $net->{ip4}->contains($net4);
227    my $in6 = defined $net6 && defined $net->{ip6} && $net->{ip6}->contains($net6);
228    if ($in4 || $in6) {
229      warn sprintf("netset: cannot %s %s as it has already been %s\n",
230                   $exclude ? "exclude" : "include",
231                   $netname,
232                   $net->{exclude} ? "excluded" : "included") unless $quiet;
233      # a network that matches an excluded network isn't contained by "nets"
234      # return 0 if we're not just looking to see if the network was declared
235      return 0 if (!$declared && $net->{exclude});
236      return 1;
237    }
238  }
239  return 0;
240}
241
242sub is_net_declared {
243  my ($self, $net4, $net6, $exclude, $quiet) = @_;
244  return $self->_nets_contains_network($net4, $net6, $exclude,
245                $quiet, $net4 || $net6, 1);
246}
247
248sub contains_ip {
249  my ($self, $ip) = @_;
250  my $result = 0;
251
252  if (!$self->{num_nets}) { return 0 }
253
254  $self->{cache_attempts}++;
255  if ($self->{cache} && exists $self->{cache}{$ip}) {
256    dbg("netset: %s cached lookup on %s, %d networks, result: %s",
257        $self->{name}, $ip, $self->{num_nets}, $self->{cache}{$ip});
258    $self->{cache_hits}++;
259    return $self->{cache}{$ip};
260
261  } elsif ($self->{pt}) {
262    # do a quick lookup on a Patricia Trie
263    my $t0 = time;
264    local($1,$2,$3,$4); local $_ = $ip;
265    $_ = $1  if /^ \[ ( [^\]]* ) \] \z/xs;  # discard optional brackets
266    s/%[A-Z0-9:._-]+\z//si;  # discard interface specification
267    if (m{^ (\d+) \. (\d+) \. (\d+) \. (\d+) \z}x) {
268      $_ = sprintf('::ffff:%d.%d.%d.%d', $1,$2,$3,$4);
269    } else {
270      s/^IPv6://si;  # discard optional 'IPv6:' prefix
271    }
272    eval { $result = $self->{pt}->match_string($_); 1 }  or undef $result;
273    $result = defined $result && $result !~ /^!/ ? 1 : 0;
274    dbg("netset: %s patricia lookup on %s, %d networks, result: %s, %.3f ms",
275         $self->{name}, $ip, $self->{num_nets}, $result, 1000*(time - $t0));
276  } else {
277    # do a sequential search on a list of NetAddr::IP objects
278    my $t0 = time;
279    my ($ip4, $ip6);
280    if ($ip =~ /^\d+\./) {
281      $ip4 = NetAddr::IP->new($ip);
282      $ip6 = $self->_convert_ipv4_cidr_to_ipv6($ip);
283    } else {
284      $ip6 = NetAddr::IP->new($ip);
285    }
286    foreach my $net (@{$self->{nets}}) {
287      if ((defined $ip4 && defined $net->{ip4} && $net->{ip4}->contains($ip4))
288       || (defined $ip6 && defined $net->{ip6} && $net->{ip6}->contains($ip6))){
289        $result = !$net->{exclude};
290        last;
291      }
292    }
293    dbg("netset: %s lookup on %s, %d networks, result: %s, %.3f ms",
294         $self->{name}, $ip, $self->{num_nets}, $result, 1000*(time - $t0));
295  }
296
297  $self->{cache}{$ip} = $result;
298  return $result;
299}
300
301sub contains_net {
302  my ($self, $net) = @_;
303  my $exclude = $net->{exclude};
304  my $net4 = $net->{ip4};
305  my $net6 = $net->{ip6};
306  return $self->_nets_contains_network($net4, $net6, $exclude, 1, "", 0);
307}
308
309sub ditch_cache {
310  my ($self) = @_;
311  if (exists $self->{cache}) {
312    dbg("netset: ditch cache on %s", $self->{name});
313    delete $self->{cache};
314  }
315}
316
317sub clone {
318  my ($self) = @_;
319  my $dup = Mail::SpamAssassin::NetSet->new($self->{name});
320  if ($self->{nets}) {
321    @{$dup->{nets}} = @{$self->{nets}};
322  }
323  if ($self->{pt}) {
324    my $dup_pt = $dup->{pt};
325    $self->{pt}->climb(sub {
326      my $key = $_[0]; $key =~ s/^!//;
327      defined eval { $dup_pt->add_string($key, $_[0]) }
328        or die "Adding a network $_[0] to a patricia trie failed: $@";
329      1;
330    });
331  }
332  $dup->{num_nets} = $self->{num_nets};
333  return $dup;
334}
335
336###########################################################################
337
3381;
339