1'tldextract helpers for testing and fetching remote resources.' 2 3 4import logging 5import re 6import socket 7import sys 8 9import requests 10from requests_file import FileAdapter 11 12# pylint: disable=import-error,invalid-name,no-name-in-module,redefined-builtin 13if sys.version_info < (3,): # pragma: no cover 14 from urlparse import scheme_chars 15else: # pragma: no cover 16 from urllib.parse import scheme_chars 17 unicode = str 18# pylint: enable=import-error,invalid-name,no-name-in-module,redefined-builtin 19 20 21IP_RE = re.compile(r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$') # pylint: disable=line-too-long 22 23PUNY_RE = re.compile(r'^xn--', re.IGNORECASE) 24 25SCHEME_RE = re.compile(r'^([' + scheme_chars + ']+:)?//') 26 27LOG = logging.getLogger('tldextract') 28 29 30def find_first_response(urls, cache_fetch_timeout=None): 31 """ Decode the first successfully fetched URL, from UTF-8 encoding to 32 Python unicode. 33 """ 34 with requests.Session() as session: 35 session.mount('file://', FileAdapter()) 36 37 for url in urls: 38 try: 39 text = session.get(url, timeout=cache_fetch_timeout).text 40 except requests.exceptions.RequestException: 41 LOG.exception( 42 'Exception reading Public Suffix List url %s', 43 url 44 ) 45 else: 46 return _decode_utf8(text) 47 48 LOG.error( 49 'No Public Suffix List found. Consider using a mirror or constructing ' 50 'your TLDExtract with `suffix_list_urls=None`.' 51 ) 52 return unicode('') 53 54 55def _decode_utf8(text): 56 """ Decode from utf8 to Python unicode string. 57 58 The suffix list, wherever its origin, should be UTF-8 encoded. 59 """ 60 if not isinstance(text, unicode): 61 return unicode(text, 'utf-8') 62 else: 63 return text 64 65 66def looks_like_ip(maybe_ip): 67 """Does the given str look like an IP address?""" 68 if not maybe_ip[0].isdigit(): 69 return False 70 71 try: 72 socket.inet_aton(maybe_ip) 73 return True 74 except (AttributeError, UnicodeError): 75 if IP_RE.match(maybe_ip): 76 return True 77 except socket.error: 78 return False 79