1# Copyright (c) 2018  gevent contributors. See LICENSE for details.
2
3# Portions of this code taken from the gogreen project:
4#   http://github.com/slideinc/gogreen
5#
6# Copyright (c) 2005-2010 Slide, Inc.
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are
11# met:
12#
13#     * Redistributions of source code must retain the above copyright
14#       notice, this list of conditions and the following disclaimer.
15#     * Redistributions in binary form must reproduce the above
16#       copyright notice, this list of conditions and the following
17#       disclaimer in the documentation and/or other materials provided
18#       with the distribution.
19#     * Neither the name of the author nor the names of other
20#       contributors may be used to endorse or promote products derived
21#       from this software without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35# Portions of this code taken from the eventlet project:
36# https://github.com/eventlet/eventlet/blob/master/eventlet/support/greendns.py
37
38# Unless otherwise noted, the files in Eventlet are under the following MIT license:
39
40# Copyright (c) 2005-2006, Bob Ippolito
41# Copyright (c) 2007-2010, Linden Research, Inc.
42# Copyright (c) 2008-2010, Eventlet Contributors (see AUTHORS)
43
44# Permission is hereby granted, free of charge, to any person obtaining a copy
45# of this software and associated documentation files (the "Software"), to deal
46# in the Software without restriction, including without limitation the rights
47# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
48# copies of the Software, and to permit persons to whom the Software is
49# furnished to do so, subject to the following conditions:
50
51# The above copyright notice and this permission notice shall be included in
52# all copies or substantial portions of the Software.
53
54# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
55# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
56# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
57# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
58# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
59# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
60# THE SOFTWARE.
61from __future__ import absolute_import, print_function, division
62
63import sys
64import time
65
66from _socket import error
67from _socket import gaierror
68from _socket import herror
69from _socket import NI_NUMERICSERV
70from _socket import AF_INET
71from _socket import AF_INET6
72from _socket import AF_UNSPEC
73from _socket import EAI_NONAME
74from _socket import EAI_FAMILY
75
76
77import socket
78
79from gevent.resolver import AbstractResolver
80from gevent.resolver._hostsfile import HostsFile
81
82from gevent.builtins import __import__ as g_import
83
84from gevent._compat import string_types
85from gevent._compat import iteritems
86from gevent._config import config
87
88
89__all__ = [
90    'Resolver',
91]
92
93# Import the DNS packages to use the gevent modules,
94# even if the system is not monkey-patched. If it *is* already
95# patched, this imports a second copy under a different name,
96# which is probably not strictly necessary, but matches
97# what we've historically done, and allows configuring the resolvers
98# differently.
99
100def _patch_dns():
101    from gevent._patcher import import_patched as importer
102    # The dns package itself is empty but defines __all__
103    # we make sure to import all of those things now under the
104    # patch. Note this triggers two DeprecationWarnings,
105    # one of which we could avoid.
106    extras = {
107        'dns': ('rdata', 'resolver', 'rdtypes'),
108        'dns.rdtypes': ('IN', 'ANY', ),
109        'dns.rdtypes.IN': ('A', 'AAAA',),
110        'dns.rdtypes.ANY': ('SOA', 'PTR'),
111    }
112    def extra_all(mod_name):
113        return extras.get(mod_name, ())
114
115    def after_import_hook(dns): # pylint:disable=redefined-outer-name
116        # Runs while still in the original patching scope.
117        # The dns.rdata:get_rdata_class() function tries to
118        # dynamically import modules using __import__ and then walk
119        # through the attribute tree to find classes in `dns.rdtypes`.
120        # It is critical that this all matches up, otherwise we can
121        # get different exception classes that don't get caught.
122        # We could patch __import__ to do things at runtime, but it's
123        # easier to enumerate the world and populate the cache now
124        # before we then switch the names back.
125        rdata = dns.rdata
126        get_rdata_class = rdata.get_rdata_class
127        try:
128            rdclass_values = list(dns.rdataclass.RdataClass)
129        except AttributeError:
130            # dnspython < 2.0
131            rdclass_values = dns.rdataclass._by_value
132
133        try:
134            rdtype_values = list(dns.rdatatype.RdataType)
135        except AttributeError:
136            # dnspython < 2.0
137            rdtype_values = dns.rdatatype._by_value
138
139
140        for rdclass in rdclass_values:
141            for rdtype in rdtype_values:
142                get_rdata_class(rdclass, rdtype)
143
144    patcher = importer('dns', extra_all, after_import_hook)
145    top = patcher.module
146
147    # Now disable the dynamic imports
148    def _no_dynamic_imports(name):
149        raise ValueError(name)
150
151    top.rdata.__import__ = _no_dynamic_imports
152
153    return top
154
155dns = _patch_dns()
156
157resolver = dns.resolver
158dTimeout = dns.resolver.Timeout
159
160# This is a wrapper for dns.resolver._getaddrinfo with two crucial changes.
161# First, it backports https://github.com/rthalley/dnspython/issues/316
162# from version 2.0. This can be dropped when we support only dnspython 2
163# (which means only Python 3.)
164
165# Second, it adds calls to sys.exc_clear() to avoid failing tests in
166# test__refcount.py (timeouts) on Python 2. (Actually, this isn't
167# strictly necessary, it was necessary to increase the timeouts in
168# that function because dnspython is doing some parsing/regex/host
169# lookups that are not super fast. But it does have a habit of leaving
170# exceptions around which can complicate our memleak checks.)
171def _getaddrinfo(host=None, service=None, family=AF_UNSPEC, socktype=0,
172                 proto=0, flags=0,
173                 _orig_gai=resolver._getaddrinfo,
174                 _exc_clear=getattr(sys, 'exc_clear', lambda: None)):
175    if flags & (socket.AI_ADDRCONFIG | socket.AI_V4MAPPED) != 0:
176        # Not implemented.  We raise a gaierror as opposed to a
177        # NotImplementedError as it helps callers handle errors more
178        # appropriately.  [Issue #316]
179        raise socket.gaierror(socket.EAI_SYSTEM)
180    res = _orig_gai(host, service, family, socktype, proto, flags)
181    _exc_clear()
182    return res
183
184
185resolver._getaddrinfo = _getaddrinfo
186
187HOSTS_TTL = 300.0
188
189
190class _HostsAnswer(dns.resolver.Answer):
191    # Answer class for HostsResolver object
192
193    def __init__(self, qname, rdtype, rdclass, rrset, raise_on_no_answer=True):
194        self.response = None
195        self.qname = qname
196        self.rdtype = rdtype
197        self.rdclass = rdclass
198        self.canonical_name = qname
199        if not rrset and raise_on_no_answer:
200            raise dns.resolver.NoAnswer()
201        self.rrset = rrset
202        self.expiration = (time.time() +
203                           rrset.ttl if hasattr(rrset, 'ttl') else 0)
204
205
206class _HostsResolver(object):
207    """
208    Class to parse the hosts file
209    """
210
211    def __init__(self, fname=None, interval=HOSTS_TTL):
212        self.hosts_file = HostsFile(fname)
213        self.interval = interval
214        self._last_load = 0
215
216    def query(self, qname, rdtype=dns.rdatatype.A, rdclass=dns.rdataclass.IN,
217              tcp=False, source=None, raise_on_no_answer=True): # pylint:disable=unused-argument
218        # Query the hosts file
219        #
220        # The known rdtypes are dns.rdatatype.A, dns.rdatatype.AAAA and
221        # dns.rdatatype.CNAME.
222        # The ``rdclass`` parameter must be dns.rdataclass.IN while the
223        # ``tcp`` and ``source`` parameters are ignored.
224        # Return a HostAnswer instance or raise a dns.resolver.NoAnswer
225        # exception.
226
227        now = time.time()
228        hosts_file = self.hosts_file
229        if self._last_load + self.interval < now:
230            self._last_load = now
231            hosts_file.load()
232
233        rdclass = dns.rdataclass.IN # Always
234        if isinstance(qname, string_types):
235            name = qname
236            qname = dns.name.from_text(qname)
237        else:
238            name = str(qname)
239
240        name = name.lower()
241        rrset = dns.rrset.RRset(qname, rdclass, rdtype)
242        rrset.ttl = self._last_load + self.interval - now
243
244        if rdtype == dns.rdatatype.A:
245            mapping = hosts_file.v4
246            kind = dns.rdtypes.IN.A.A
247        elif rdtype == dns.rdatatype.AAAA:
248            mapping = hosts_file.v6
249            kind = dns.rdtypes.IN.AAAA.AAAA
250        elif rdtype == dns.rdatatype.CNAME:
251            mapping = hosts_file.aliases
252            kind = lambda c, t, addr: dns.rdtypes.ANY.CNAME.CNAME(c, t, dns.name.from_text(addr))
253        elif rdtype == dns.rdatatype.PTR:
254            mapping = hosts_file.reverse
255            kind = lambda c, t, addr: dns.rdtypes.ANY.PTR.PTR(c, t, dns.name.from_text(addr))
256
257
258        addr = mapping.get(name)
259        if not addr and qname.is_absolute():
260            addr = mapping.get(name[:-1])
261        if addr:
262            rrset.add(kind(rdclass, rdtype, addr))
263        return _HostsAnswer(qname, rdtype, rdclass, rrset, raise_on_no_answer)
264
265    def getaliases(self, hostname):
266        # Return a list of all the aliases of a given cname
267
268        # Due to the way store aliases this is a bit inefficient, this
269        # clearly was an afterthought.  But this is only used by
270        # gethostbyname_ex so it's probably fine.
271        aliases = self.hosts_file.aliases
272        result = []
273        if hostname in aliases:
274            cannon = aliases[hostname]
275        else:
276            cannon = hostname
277        result.append(cannon)
278        for alias, cname in iteritems(aliases):
279            if cannon == cname:
280                result.append(alias)
281        result.remove(hostname)
282        return result
283
284class _DualResolver(object):
285
286    def __init__(self):
287        self.hosts_resolver = _HostsResolver()
288        self.network_resolver = resolver.get_default_resolver()
289        self.network_resolver.cache = resolver.LRUCache()
290
291    def query(self, qname, rdtype=dns.rdatatype.A, rdclass=dns.rdataclass.IN,
292              tcp=False, source=None, raise_on_no_answer=True,
293              _hosts_rdtypes=(dns.rdatatype.A, dns.rdatatype.AAAA, dns.rdatatype.PTR)):
294        # Query the resolver, using /etc/hosts
295
296        # Behavior:
297        # 1. if hosts is enabled and contains answer, return it now
298        # 2. query nameservers for qname
299        if qname is None:
300            qname = '0.0.0.0'
301
302        if not isinstance(qname, string_types):
303            if isinstance(qname, bytes):
304                qname = qname.decode("idna")
305
306        if isinstance(qname, string_types):
307            qname = dns.name.from_text(qname, None)
308
309        if isinstance(rdtype, string_types):
310            rdtype = dns.rdatatype.from_text(rdtype)
311
312        if rdclass == dns.rdataclass.IN and rdtype in _hosts_rdtypes:
313            try:
314                answer = self.hosts_resolver.query(qname, rdtype, raise_on_no_answer=False)
315            except Exception: # pylint: disable=broad-except
316                from gevent import get_hub
317                get_hub().handle_error(self, *sys.exc_info())
318            else:
319                if answer.rrset:
320                    return answer
321
322        return self.network_resolver.query(qname, rdtype, rdclass,
323                                           tcp, source, raise_on_no_answer=raise_on_no_answer)
324
325def _family_to_rdtype(family):
326    if family == socket.AF_INET:
327        rdtype = dns.rdatatype.A
328    elif family == socket.AF_INET6:
329        rdtype = dns.rdatatype.AAAA
330    else:
331        raise socket.gaierror(socket.EAI_FAMILY,
332                              'Address family not supported')
333    return rdtype
334
335
336class Resolver(AbstractResolver):
337    """
338    An *experimental* resolver that uses `dnspython`_.
339
340    This is typically slower than the default threaded resolver
341    (unless there's a cache hit, in which case it can be much faster).
342    It is usually much faster than the c-ares resolver. It tends to
343    scale well as more concurrent resolutions are attempted.
344
345    Under Python 2, if the ``idna`` package is installed, this
346    resolver can resolve Unicode host names that the system resolver
347    cannot.
348
349    .. note::
350
351        This **does not** use dnspython's default resolver object, or share any
352        classes with ``import dns``. A separate copy of the objects is imported to
353        be able to function in a non monkey-patched process. The documentation for the resolver
354        object still applies.
355
356        The resolver that we use is available as the :attr:`resolver` attribute
357        of this object (typically ``gevent.get_hub().resolver.resolver``).
358
359    .. caution::
360
361        Many of the same caveats about DNS results apply here as are documented
362        for :class:`gevent.resolver.ares.Resolver`. In addition, the handling of
363        symbolic scope IDs in IPv6 addresses passed to ``getaddrinfo`` exhibits
364        some differences.
365
366        On PyPy, ``getnameinfo`` can produce results when CPython raises
367        ``socket.error``, and gevent's DNSPython resolver also
368        raises ``socket.error``.
369
370    .. caution::
371
372        This resolver is experimental. It may be removed or modified in
373        the future. As always, feedback is welcome.
374
375    .. versionadded:: 1.3a2
376
377    .. versionchanged:: 20.5.0
378       The errors raised are now much more consistent with those
379       raised by the standard library resolvers.
380
381       Handling of localhost and broadcast names is now more consistent.
382
383    .. _dnspython: http://www.dnspython.org
384    """
385
386    def __init__(self, hub=None): # pylint: disable=unused-argument
387        if resolver._resolver is None:
388            _resolver = resolver._resolver = _DualResolver()
389            if config.resolver_nameservers:
390                _resolver.network_resolver.nameservers[:] = config.resolver_nameservers
391            if config.resolver_timeout:
392                _resolver.network_resolver.lifetime = config.resolver_timeout
393        # Different hubs in different threads could be sharing the same
394        # resolver.
395        assert isinstance(resolver._resolver, _DualResolver)
396        self._resolver = resolver._resolver
397
398    @property
399    def resolver(self):
400        """
401        The dnspython resolver object we use.
402
403        This object has several useful attributes that can be used to
404        adjust the behaviour of the DNS system:
405
406        * ``cache`` is a :class:`dns.resolver.LRUCache`. Its maximum size
407          can be configured by calling :meth:`resolver.cache.set_max_size`
408        * ``nameservers`` controls which nameservers to talk to
409        * ``lifetime`` configures a timeout for each individual query.
410        """
411        return self._resolver.network_resolver
412
413    def close(self):
414        pass
415
416    def _getaliases(self, hostname, family):
417        if not isinstance(hostname, str):
418            if isinstance(hostname, bytes):
419                hostname = hostname.decode("idna")
420        aliases = self._resolver.hosts_resolver.getaliases(hostname)
421        net_resolver = self._resolver.network_resolver
422        rdtype = _family_to_rdtype(family)
423        while 1:
424            try:
425                ans = net_resolver.query(hostname, dns.rdatatype.CNAME, rdtype)
426            except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers):
427                break
428            except dTimeout:
429                break
430            except AttributeError as ex:
431                if hostname is None or isinstance(hostname, int):
432                    raise TypeError(ex)
433                raise
434            else:
435                aliases.extend(str(rr.target) for rr in ans.rrset)
436                hostname = ans[0].target
437        return aliases
438
439    def _getaddrinfo(self, host_bytes, port, family, socktype, proto, flags):
440        # dnspython really wants the host to be in native format.
441        if not isinstance(host_bytes, str):
442            host_bytes = host_bytes.decode(self.HOSTNAME_ENCODING)
443
444        if host_bytes == 'ff02::1de:c0:face:8D':
445            # This is essentially a hack to make stdlib
446            # test_socket:GeneralModuleTests.test_getaddrinfo_ipv6_basic
447            # pass. They expect to get back a lowercase ``D``, but
448            # dnspython does not do that.
449            # ``test_getaddrinfo_ipv6_scopeid_symbolic`` also expect
450            # the scopeid to be dropped, but again, dnspython does not
451            # do that; we cant fix that here so we skip that test.
452            host_bytes = 'ff02::1de:c0:face:8d'
453
454        if family == AF_UNSPEC:
455            # This tends to raise in the case that a v6 address did not exist
456            # but a v4 does. So we break it into two parts.
457
458            # Note that if there is no ipv6 in the hosts file, but there *is*
459            # an ipv4, and there *is* an ipv6 in the nameservers, we will return
460            # both (from the first call). The system resolver on OS X only returns
461            # the results from the hosts file. doubleclick.com is one example.
462
463            # See also https://github.com/gevent/gevent/issues/1012
464            try:
465                return _getaddrinfo(host_bytes, port, family, socktype, proto, flags)
466            except gaierror:
467                try:
468                    return _getaddrinfo(host_bytes, port, AF_INET6, socktype, proto, flags)
469                except gaierror:
470                    return _getaddrinfo(host_bytes, port, AF_INET, socktype, proto, flags)
471        else:
472            try:
473                return _getaddrinfo(host_bytes, port, family, socktype, proto, flags)
474            except gaierror as ex:
475                if ex.args[0] == EAI_NONAME and family not in self._KNOWN_ADDR_FAMILIES:
476                    # It's possible that we got sent an unsupported family. Check
477                    # that.
478                    ex.args = (EAI_FAMILY, self.EAI_FAMILY_MSG)
479                    ex.errno = EAI_FAMILY
480                raise
481
482    def _getnameinfo(self, address_bytes, port, sockaddr, flags):
483        try:
484            return resolver._getnameinfo(sockaddr, flags)
485        except error:
486            if not flags:
487                # dnspython doesn't like getting ports it can't resolve.
488                # We have one test, test__socket_dns.py:Test_getnameinfo_geventorg.test_port_zero
489                # that does this. We conservatively fix it here; this could be expanded later.
490                return resolver._getnameinfo(sockaddr, NI_NUMERICSERV)
491
492    def _gethostbyaddr(self, ip_address_bytes):
493        try:
494            return resolver._gethostbyaddr(ip_address_bytes)
495        except gaierror as ex:
496            if ex.args[0] == EAI_NONAME:
497                # Note: The system doesn't *always* raise herror;
498                # sometimes the original gaierror propagates through.
499                # It's impossible to say ahead of time or just based
500                # on the name which it should be. The herror seems to
501                # be by far the most common, though.
502                raise herror(1, "Unknown host")
503            raise
504
505    # Things that need proper error handling
506    getnameinfo = AbstractResolver.fixup_gaierror(AbstractResolver.getnameinfo)
507    gethostbyaddr = AbstractResolver.fixup_gaierror(AbstractResolver.gethostbyaddr)
508    gethostbyname_ex = AbstractResolver.fixup_gaierror(AbstractResolver.gethostbyname_ex)
509    getaddrinfo = AbstractResolver.fixup_gaierror(AbstractResolver.getaddrinfo)
510