1# Copyright 2009 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Utility functions related to IP Addresses & Hostnames."""
16
17# TODO(tstromberg): Investigate replacement with ipaddr library
18
19__author__ = 'tstromberg@google.com (Thomas Stromberg)'
20
21import re
22import zlib
23
24# TODO(tstromberg): Find a way to combine the following two regexps.
25
26# Used to decide whether or not to benchmark a name
27INTERNAL_RE = re.compile('^0|\.pro[md]z*\.|\.corp|\.bor|\.hot$|internal|dmz|'
28                         '\._[ut][dc]p\.|intra|\.\w$|\.\w{5,}$', re.IGNORECASE)
29
30# Used to decide if a hostname should be censored later.
31PRIVATE_RE = re.compile('^\w+dc\.|^\w+ds\.|^\w+sv\.|^\w+nt\.|\.corp|internal|'
32                        'intranet|\.local', re.IGNORECASE)
33
34# ^.*[\w-]+\.[\w-]+\.[\w-]+\.[a-zA-Z]+\.$|^[\w-]+\.[\w-]{3,}\.[a-zA-Z]+\.$
35FQDN_RE = re.compile('^.*\..*\..*\..*\.$|^.*\.[\w-]*\.\w{3,4}\.$|^[\w-]+\.[\w-]{4,}\.\w+\.')
36
37IP_RE = re.compile('^[0-9.]+$')
38
39
40def ExtractIPsFromString(ip_string):
41  """Return a tuple of ip addressed held in a string."""
42
43  ips = []
44  # IPV6 If this regexp is too loose, see Regexp-IPv6 in CPAN for inspiration.
45  ips.extend(re.findall('[\dabcdef:]+:[\dabcdef:]+', ip_string, re.IGNORECASE))
46  ips.extend(re.findall('\d+\.\d+\.\d+\.+\d+', ip_string))
47  return ips
48
49
50def ExtractIPTuplesFromString(ip_string):
51  """Return a list of (ip, name) tuples for use by NameServer class."""
52  ip_tuples = []
53  for ip in ExtractIPsFromString(ip_string):
54    ip_tuples.append((ip, ip))
55  return ip_tuples
56
57
58def IsPrivateHostname(hostname):
59  """Basic matching to determine if the hostname is likely to be 'internal'."""
60  if PRIVATE_RE.search(hostname):
61    return True
62  else:
63    return False
64
65
66def IsLoopbackIP(ip):
67  """Boolean check to see if an IP is private or not.
68
69  Args:
70    ip: str
71
72  Returns:
73    Boolean
74  """
75  if ip.startswith('127.') or ip == '::1':
76    return True
77  else:
78    return False
79
80
81def IsPrivateIP(ip):
82  """Boolean check to see if an IP is private or not.
83
84  Args:
85    ip: str
86
87  Returns:
88    Number of bits that should be preserved (int, or None)
89  """
90  if re.match('^10\.', ip):
91    return 1
92  elif re.match('^192\.168', ip):
93    return 2
94  elif re.match('^172\.(1[6-9]|2[0-9]|3[0-1])\.', ip):
95    return 1
96  else:
97    return None
98
99def MaskStringWithIPs(string):
100  """Mask all private IP addresses listed in a string."""
101
102  ips = ExtractIPsFromString(string)
103  for ip in ips:
104    use_bits = IsPrivateIP(ip)
105    if use_bits:
106      masked_ip = MaskIPBits(ip, use_bits)
107      string = string.replace(ip, masked_ip)
108  return string
109
110def MaskIPBits(ip, use_bits):
111  """Mask an IP, but still keep a meaningful checksum."""
112  ip_parts = ip.split('.')
113  checksum = zlib.crc32(''.join(ip_parts[use_bits:]))
114  masked_ip = '.'.join(ip_parts[0:use_bits])
115  return masked_ip + '.x-' + str(checksum)[-4:]
116
117
118def MaskPrivateHost(ip, hostname, name):
119  """Mask unnamed private IP's."""
120
121  # If we have a name not listed as SYS-x.x.x.x, then we're clear.
122  if name and ip not in name:
123    # Even if we are listed (Internal 192-0-1 for instance), data can leak via hostname.
124    if IsPrivateIP(ip):
125      hostname = 'internal.ip'
126    return (ip, hostname, name)
127
128  use_bits = IsPrivateIP(ip)
129  if use_bits:
130    masked_ip = MaskIPBits(ip, use_bits)
131    masked_hostname = 'internal.ip'
132  elif IsPrivateHostname(hostname):
133    masked_ip = MaskIPBits(ip, 2)
134    masked_hostname = 'internal.name'
135  else:
136    masked_ip = ip
137    masked_hostname = hostname
138
139  if 'SYS-' in name:
140    masked_name = 'SYS-%s' % masked_ip
141  else:
142    masked_name = ''
143  return (masked_ip, masked_hostname, masked_name)
144
145if __name__ == '__main__':
146  print MaskStringWithIPs('10.0.0.1 has a sharing relationship with 192.168.0.1 and 8.8.8.8')
147