1'tldextract helpers for testing and fetching remote resources.'
2
3
4import logging
5import re
6import socket
7import sys
8
9import requests
10from requests_file import FileAdapter
11
12# pylint: disable=import-error,invalid-name,no-name-in-module,redefined-builtin
13if sys.version_info < (3,):  # pragma: no cover
14    from urlparse import scheme_chars
15else:  # pragma: no cover
16    from urllib.parse import scheme_chars
17    unicode = str
18# pylint: enable=import-error,invalid-name,no-name-in-module,redefined-builtin
19
20
21IP_RE = re.compile(r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$')  # pylint: disable=line-too-long
22
23PUNY_RE = re.compile(r'^xn--', re.IGNORECASE)
24
25SCHEME_RE = re.compile(r'^([' + scheme_chars + ']+:)?//')
26
27LOG = logging.getLogger('tldextract')
28
29
30def find_first_response(urls, cache_fetch_timeout=None):
31    """ Decode the first successfully fetched URL, from UTF-8 encoding to
32    Python unicode.
33    """
34    with requests.Session() as session:
35        session.mount('file://', FileAdapter())
36
37        for url in urls:
38            try:
39                text = session.get(url, timeout=cache_fetch_timeout).text
40            except requests.exceptions.RequestException:
41                LOG.exception(
42                    'Exception reading Public Suffix List url %s',
43                    url
44                )
45            else:
46                return _decode_utf8(text)
47
48    LOG.error(
49        'No Public Suffix List found. Consider using a mirror or constructing '
50        'your TLDExtract with `suffix_list_urls=None`.'
51    )
52    return unicode('')
53
54
55def _decode_utf8(text):
56    """ Decode from utf8 to Python unicode string.
57
58    The suffix list, wherever its origin, should be UTF-8 encoded.
59    """
60    if not isinstance(text, unicode):
61        return unicode(text, 'utf-8')
62    else:
63        return text
64
65
66def looks_like_ip(maybe_ip):
67    """Does the given str look like an IP address?"""
68    if not maybe_ip[0].isdigit():
69        return False
70
71    try:
72        socket.inet_aton(maybe_ip)
73        return True
74    except (AttributeError, UnicodeError):
75        if IP_RE.match(maybe_ip):
76            return True
77    except socket.error:
78        return False
79