1# Copyright 2009 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15"""Utility functions related to IP Addresses & Hostnames.""" 16 17# TODO(tstromberg): Investigate replacement with ipaddr library 18 19__author__ = 'tstromberg@google.com (Thomas Stromberg)' 20 21import re 22import zlib 23 24# TODO(tstromberg): Find a way to combine the following two regexps. 25 26# Used to decide whether or not to benchmark a name 27INTERNAL_RE = re.compile('^0|\.pro[md]z*\.|\.corp|\.bor|\.hot$|internal|dmz|' 28 '\._[ut][dc]p\.|intra|\.\w$|\.\w{5,}$', re.IGNORECASE) 29 30# Used to decide if a hostname should be censored later. 31PRIVATE_RE = re.compile('^\w+dc\.|^\w+ds\.|^\w+sv\.|^\w+nt\.|\.corp|internal|' 32 'intranet|\.local', re.IGNORECASE) 33 34# ^.*[\w-]+\.[\w-]+\.[\w-]+\.[a-zA-Z]+\.$|^[\w-]+\.[\w-]{3,}\.[a-zA-Z]+\.$ 35FQDN_RE = re.compile('^.*\..*\..*\..*\.$|^.*\.[\w-]*\.\w{3,4}\.$|^[\w-]+\.[\w-]{4,}\.\w+\.') 36 37IP_RE = re.compile('^[0-9.]+$') 38 39 40def ExtractIPsFromString(ip_string): 41 """Return a tuple of ip addressed held in a string.""" 42 43 ips = [] 44 # IPV6 If this regexp is too loose, see Regexp-IPv6 in CPAN for inspiration. 45 ips.extend(re.findall('[\dabcdef:]+:[\dabcdef:]+', ip_string, re.IGNORECASE)) 46 ips.extend(re.findall('\d+\.\d+\.\d+\.+\d+', ip_string)) 47 return ips 48 49 50def ExtractIPTuplesFromString(ip_string): 51 """Return a list of (ip, name) tuples for use by NameServer class.""" 52 ip_tuples = [] 53 for ip in ExtractIPsFromString(ip_string): 54 ip_tuples.append((ip, ip)) 55 return ip_tuples 56 57 58def IsPrivateHostname(hostname): 59 """Basic matching to determine if the hostname is likely to be 'internal'.""" 60 if PRIVATE_RE.search(hostname): 61 return True 62 else: 63 return False 64 65 66def IsLoopbackIP(ip): 67 """Boolean check to see if an IP is private or not. 68 69 Args: 70 ip: str 71 72 Returns: 73 Boolean 74 """ 75 if ip.startswith('127.') or ip == '::1': 76 return True 77 else: 78 return False 79 80 81def IsPrivateIP(ip): 82 """Boolean check to see if an IP is private or not. 83 84 Args: 85 ip: str 86 87 Returns: 88 Number of bits that should be preserved (int, or None) 89 """ 90 if re.match('^10\.', ip): 91 return 1 92 elif re.match('^192\.168', ip): 93 return 2 94 elif re.match('^172\.(1[6-9]|2[0-9]|3[0-1])\.', ip): 95 return 1 96 else: 97 return None 98 99def MaskStringWithIPs(string): 100 """Mask all private IP addresses listed in a string.""" 101 102 ips = ExtractIPsFromString(string) 103 for ip in ips: 104 use_bits = IsPrivateIP(ip) 105 if use_bits: 106 masked_ip = MaskIPBits(ip, use_bits) 107 string = string.replace(ip, masked_ip) 108 return string 109 110def MaskIPBits(ip, use_bits): 111 """Mask an IP, but still keep a meaningful checksum.""" 112 ip_parts = ip.split('.') 113 checksum = zlib.crc32(''.join(ip_parts[use_bits:])) 114 masked_ip = '.'.join(ip_parts[0:use_bits]) 115 return masked_ip + '.x-' + str(checksum)[-4:] 116 117 118def MaskPrivateHost(ip, hostname, name): 119 """Mask unnamed private IP's.""" 120 121 # If we have a name not listed as SYS-x.x.x.x, then we're clear. 122 if name and ip not in name: 123 # Even if we are listed (Internal 192-0-1 for instance), data can leak via hostname. 124 if IsPrivateIP(ip): 125 hostname = 'internal.ip' 126 return (ip, hostname, name) 127 128 use_bits = IsPrivateIP(ip) 129 if use_bits: 130 masked_ip = MaskIPBits(ip, use_bits) 131 masked_hostname = 'internal.ip' 132 elif IsPrivateHostname(hostname): 133 masked_ip = MaskIPBits(ip, 2) 134 masked_hostname = 'internal.name' 135 else: 136 masked_ip = ip 137 masked_hostname = hostname 138 139 if 'SYS-' in name: 140 masked_name = 'SYS-%s' % masked_ip 141 else: 142 masked_name = '' 143 return (masked_ip, masked_hostname, masked_name) 144 145if __name__ == '__main__': 146 print MaskStringWithIPs('10.0.0.1 has a sharing relationship with 192.168.0.1 and 8.8.8.8') 147