1#!/usr/bin/env python
2#
3# Copyright 2011 Facebook
4#
5# Licensed under the Apache License, Version 2.0 (the "License"); you may
6# not use this file except in compliance with the License. You may obtain
7# a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14# License for the specific language governing permissions and limitations
15# under the License.
16
17"""Miscellaneous network utility code."""
18
19from __future__ import absolute_import, division, print_function
20
21import errno
22import os
23import sys
24import socket
25import stat
26
27from tornado.concurrent import dummy_executor, run_on_executor
28from tornado.ioloop import IOLoop
29from tornado.platform.auto import set_close_exec
30from tornado.util import PY3, Configurable, errno_from_exception
31
32try:
33    import ssl
34except ImportError:
35    # ssl is not available on Google App Engine
36    ssl = None
37
38try:
39    import certifi
40except ImportError:
41    # certifi is optional as long as we have ssl.create_default_context.
42    if ssl is None or hasattr(ssl, 'create_default_context'):
43        certifi = None
44    else:
45        raise
46
47if PY3:
48    xrange = range
49
50if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'):  # python 3.2+
51    ssl_match_hostname = ssl.match_hostname
52    SSLCertificateError = ssl.CertificateError
53elif ssl is None:
54    ssl_match_hostname = SSLCertificateError = None  # type: ignore
55else:
56    import backports.ssl_match_hostname
57    ssl_match_hostname = backports.ssl_match_hostname.match_hostname
58    SSLCertificateError = backports.ssl_match_hostname.CertificateError  # type: ignore
59
60if hasattr(ssl, 'SSLContext'):
61    if hasattr(ssl, 'create_default_context'):
62        # Python 2.7.9+, 3.4+
63        # Note that the naming of ssl.Purpose is confusing; the purpose
64        # of a context is to authentiate the opposite side of the connection.
65        _client_ssl_defaults = ssl.create_default_context(
66            ssl.Purpose.SERVER_AUTH)
67        _server_ssl_defaults = ssl.create_default_context(
68            ssl.Purpose.CLIENT_AUTH)
69    else:
70        # Python 3.2-3.3
71        _client_ssl_defaults = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
72        _client_ssl_defaults.verify_mode = ssl.CERT_REQUIRED
73        _client_ssl_defaults.load_verify_locations(certifi.where())
74        _server_ssl_defaults = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
75        if hasattr(ssl, 'OP_NO_COMPRESSION'):
76            # Disable TLS compression to avoid CRIME and related attacks.
77            # This constant wasn't added until python 3.3.
78            _client_ssl_defaults.options |= ssl.OP_NO_COMPRESSION
79            _server_ssl_defaults.options |= ssl.OP_NO_COMPRESSION
80
81elif ssl:
82    # Python 2.6-2.7.8
83    _client_ssl_defaults = dict(cert_reqs=ssl.CERT_REQUIRED,
84                                ca_certs=certifi.where())
85    _server_ssl_defaults = {}
86else:
87    # Google App Engine
88    _client_ssl_defaults = dict(cert_reqs=None,
89                                ca_certs=None)
90    _server_ssl_defaults = {}
91
92# ThreadedResolver runs getaddrinfo on a thread. If the hostname is unicode,
93# getaddrinfo attempts to import encodings.idna. If this is done at
94# module-import time, the import lock is already held by the main thread,
95# leading to deadlock. Avoid it by caching the idna encoder on the main
96# thread now.
97u'foo'.encode('idna')
98
99# For undiagnosed reasons, 'latin1' codec may also need to be preloaded.
100u'foo'.encode('latin1')
101
102# These errnos indicate that a non-blocking operation must be retried
103# at a later time.  On most platforms they're the same value, but on
104# some they differ.
105_ERRNO_WOULDBLOCK = (errno.EWOULDBLOCK, errno.EAGAIN)
106
107if hasattr(errno, "WSAEWOULDBLOCK"):
108    _ERRNO_WOULDBLOCK += (errno.WSAEWOULDBLOCK,)  # type: ignore
109
110# Default backlog used when calling sock.listen()
111_DEFAULT_BACKLOG = 128
112
113
114def bind_sockets(port, address=None, family=socket.AF_UNSPEC,
115                 backlog=_DEFAULT_BACKLOG, flags=None, reuse_port=False):
116    """Creates listening sockets bound to the given port and address.
117
118    Returns a list of socket objects (multiple sockets are returned if
119    the given address maps to multiple IP addresses, which is most common
120    for mixed IPv4 and IPv6 use).
121
122    Address may be either an IP address or hostname.  If it's a hostname,
123    the server will listen on all IP addresses associated with the
124    name.  Address may be an empty string or None to listen on all
125    available interfaces.  Family may be set to either `socket.AF_INET`
126    or `socket.AF_INET6` to restrict to IPv4 or IPv6 addresses, otherwise
127    both will be used if available.
128
129    The ``backlog`` argument has the same meaning as for
130    `socket.listen() <socket.socket.listen>`.
131
132    ``flags`` is a bitmask of AI_* flags to `~socket.getaddrinfo`, like
133    ``socket.AI_PASSIVE | socket.AI_NUMERICHOST``.
134
135    ``reuse_port`` option sets ``SO_REUSEPORT`` option for every socket
136    in the list. If your platform doesn't support this option ValueError will
137    be raised.
138    """
139    if reuse_port and not hasattr(socket, "SO_REUSEPORT"):
140        raise ValueError("the platform doesn't support SO_REUSEPORT")
141
142    sockets = []
143    if address == "":
144        address = None
145    if not socket.has_ipv6 and family == socket.AF_UNSPEC:
146        # Python can be compiled with --disable-ipv6, which causes
147        # operations on AF_INET6 sockets to fail, but does not
148        # automatically exclude those results from getaddrinfo
149        # results.
150        # http://bugs.python.org/issue16208
151        family = socket.AF_INET
152    if flags is None:
153        flags = socket.AI_PASSIVE
154    bound_port = None
155    for res in set(socket.getaddrinfo(address, port, family, socket.SOCK_STREAM,
156                                      0, flags)):
157        af, socktype, proto, canonname, sockaddr = res
158        if (sys.platform == 'darwin' and address == 'localhost' and
159                af == socket.AF_INET6 and sockaddr[3] != 0):
160            # Mac OS X includes a link-local address fe80::1%lo0 in the
161            # getaddrinfo results for 'localhost'.  However, the firewall
162            # doesn't understand that this is a local address and will
163            # prompt for access (often repeatedly, due to an apparent
164            # bug in its ability to remember granting access to an
165            # application). Skip these addresses.
166            continue
167        try:
168            sock = socket.socket(af, socktype, proto)
169        except socket.error as e:
170            if errno_from_exception(e) == errno.EAFNOSUPPORT:
171                continue
172            raise
173        set_close_exec(sock.fileno())
174        if os.name != 'nt':
175            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
176        if reuse_port:
177            sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
178        if af == socket.AF_INET6:
179            # On linux, ipv6 sockets accept ipv4 too by default,
180            # but this makes it impossible to bind to both
181            # 0.0.0.0 in ipv4 and :: in ipv6.  On other systems,
182            # separate sockets *must* be used to listen for both ipv4
183            # and ipv6.  For consistency, always disable ipv4 on our
184            # ipv6 sockets and use a separate ipv4 socket when needed.
185            #
186            # Python 2.x on windows doesn't have IPPROTO_IPV6.
187            if hasattr(socket, "IPPROTO_IPV6"):
188                sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
189
190        # automatic port allocation with port=None
191        # should bind on the same port on IPv4 and IPv6
192        host, requested_port = sockaddr[:2]
193        if requested_port == 0 and bound_port is not None:
194            sockaddr = tuple([host, bound_port] + list(sockaddr[2:]))
195
196        sock.setblocking(0)
197        sock.bind(sockaddr)
198        bound_port = sock.getsockname()[1]
199        sock.listen(backlog)
200        sockets.append(sock)
201    return sockets
202
203
204if hasattr(socket, 'AF_UNIX'):
205    def bind_unix_socket(file, mode=0o600, backlog=_DEFAULT_BACKLOG):
206        """Creates a listening unix socket.
207
208        If a socket with the given name already exists, it will be deleted.
209        If any other file with that name exists, an exception will be
210        raised.
211
212        Returns a socket object (not a list of socket objects like
213        `bind_sockets`)
214        """
215        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
216        set_close_exec(sock.fileno())
217        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
218        sock.setblocking(0)
219        try:
220            st = os.stat(file)
221        except OSError as err:
222            if errno_from_exception(err) != errno.ENOENT:
223                raise
224        else:
225            if stat.S_ISSOCK(st.st_mode):
226                os.remove(file)
227            else:
228                raise ValueError("File %s exists and is not a socket", file)
229        sock.bind(file)
230        os.chmod(file, mode)
231        sock.listen(backlog)
232        return sock
233
234
235def add_accept_handler(sock, callback, io_loop=None):
236    """Adds an `.IOLoop` event handler to accept new connections on ``sock``.
237
238    When a connection is accepted, ``callback(connection, address)`` will
239    be run (``connection`` is a socket object, and ``address`` is the
240    address of the other end of the connection).  Note that this signature
241    is different from the ``callback(fd, events)`` signature used for
242    `.IOLoop` handlers.
243
244    .. versionchanged:: 4.1
245       The ``io_loop`` argument is deprecated.
246    """
247    if io_loop is None:
248        io_loop = IOLoop.current()
249
250    def accept_handler(fd, events):
251        # More connections may come in while we're handling callbacks;
252        # to prevent starvation of other tasks we must limit the number
253        # of connections we accept at a time.  Ideally we would accept
254        # up to the number of connections that were waiting when we
255        # entered this method, but this information is not available
256        # (and rearranging this method to call accept() as many times
257        # as possible before running any callbacks would have adverse
258        # effects on load balancing in multiprocess configurations).
259        # Instead, we use the (default) listen backlog as a rough
260        # heuristic for the number of connections we can reasonably
261        # accept at once.
262        for i in xrange(_DEFAULT_BACKLOG):
263            try:
264                connection, address = sock.accept()
265            except socket.error as e:
266                # _ERRNO_WOULDBLOCK indicate we have accepted every
267                # connection that is available.
268                if errno_from_exception(e) in _ERRNO_WOULDBLOCK:
269                    return
270                # ECONNABORTED indicates that there was a connection
271                # but it was closed while still in the accept queue.
272                # (observed on FreeBSD).
273                if errno_from_exception(e) == errno.ECONNABORTED:
274                    continue
275                raise
276            set_close_exec(connection.fileno())
277            callback(connection, address)
278    io_loop.add_handler(sock, accept_handler, IOLoop.READ)
279
280
281def is_valid_ip(ip):
282    """Returns true if the given string is a well-formed IP address.
283
284    Supports IPv4 and IPv6.
285    """
286    if not ip or '\x00' in ip:
287        # getaddrinfo resolves empty strings to localhost, and truncates
288        # on zero bytes.
289        return False
290    try:
291        res = socket.getaddrinfo(ip, 0, socket.AF_UNSPEC,
292                                 socket.SOCK_STREAM,
293                                 0, socket.AI_NUMERICHOST)
294        return bool(res)
295    except socket.gaierror as e:
296        if e.args[0] == socket.EAI_NONAME:
297            return False
298        raise
299    return True
300
301
302class Resolver(Configurable):
303    """Configurable asynchronous DNS resolver interface.
304
305    By default, a blocking implementation is used (which simply calls
306    `socket.getaddrinfo`).  An alternative implementation can be
307    chosen with the `Resolver.configure <.Configurable.configure>`
308    class method::
309
310        Resolver.configure('tornado.netutil.ThreadedResolver')
311
312    The implementations of this interface included with Tornado are
313
314    * `tornado.netutil.BlockingResolver`
315    * `tornado.netutil.ThreadedResolver`
316    * `tornado.netutil.OverrideResolver`
317    * `tornado.platform.twisted.TwistedResolver`
318    * `tornado.platform.caresresolver.CaresResolver`
319    """
320    @classmethod
321    def configurable_base(cls):
322        return Resolver
323
324    @classmethod
325    def configurable_default(cls):
326        return BlockingResolver
327
328    def resolve(self, host, port, family=socket.AF_UNSPEC, callback=None):
329        """Resolves an address.
330
331        The ``host`` argument is a string which may be a hostname or a
332        literal IP address.
333
334        Returns a `.Future` whose result is a list of (family,
335        address) pairs, where address is a tuple suitable to pass to
336        `socket.connect <socket.socket.connect>` (i.e. a ``(host,
337        port)`` pair for IPv4; additional fields may be present for
338        IPv6). If a ``callback`` is passed, it will be run with the
339        result as an argument when it is complete.
340
341        :raises IOError: if the address cannot be resolved.
342
343        .. versionchanged:: 4.4
344           Standardized all implementations to raise `IOError`.
345        """
346        raise NotImplementedError()
347
348    def close(self):
349        """Closes the `Resolver`, freeing any resources used.
350
351        .. versionadded:: 3.1
352
353        """
354        pass
355
356
357class ExecutorResolver(Resolver):
358    """Resolver implementation using a `concurrent.futures.Executor`.
359
360    Use this instead of `ThreadedResolver` when you require additional
361    control over the executor being used.
362
363    The executor will be shut down when the resolver is closed unless
364    ``close_resolver=False``; use this if you want to reuse the same
365    executor elsewhere.
366
367    .. versionchanged:: 4.1
368       The ``io_loop`` argument is deprecated.
369    """
370    def initialize(self, io_loop=None, executor=None, close_executor=True):
371        self.io_loop = io_loop or IOLoop.current()
372        if executor is not None:
373            self.executor = executor
374            self.close_executor = close_executor
375        else:
376            self.executor = dummy_executor
377            self.close_executor = False
378
379    def close(self):
380        if self.close_executor:
381            self.executor.shutdown()
382        self.executor = None
383
384    @run_on_executor
385    def resolve(self, host, port, family=socket.AF_UNSPEC):
386        # On Solaris, getaddrinfo fails if the given port is not found
387        # in /etc/services and no socket type is given, so we must pass
388        # one here.  The socket type used here doesn't seem to actually
389        # matter (we discard the one we get back in the results),
390        # so the addresses we return should still be usable with SOCK_DGRAM.
391        addrinfo = socket.getaddrinfo(host, port, family, socket.SOCK_STREAM)
392        results = []
393        for family, socktype, proto, canonname, address in addrinfo:
394            results.append((family, address))
395        return results
396
397
398class BlockingResolver(ExecutorResolver):
399    """Default `Resolver` implementation, using `socket.getaddrinfo`.
400
401    The `.IOLoop` will be blocked during the resolution, although the
402    callback will not be run until the next `.IOLoop` iteration.
403    """
404    def initialize(self, io_loop=None):
405        super(BlockingResolver, self).initialize(io_loop=io_loop)
406
407
408class ThreadedResolver(ExecutorResolver):
409    """Multithreaded non-blocking `Resolver` implementation.
410
411    Requires the `concurrent.futures` package to be installed
412    (available in the standard library since Python 3.2,
413    installable with ``pip install futures`` in older versions).
414
415    The thread pool size can be configured with::
416
417        Resolver.configure('tornado.netutil.ThreadedResolver',
418                           num_threads=10)
419
420    .. versionchanged:: 3.1
421       All ``ThreadedResolvers`` share a single thread pool, whose
422       size is set by the first one to be created.
423    """
424    _threadpool = None  # type: ignore
425    _threadpool_pid = None  # type: int
426
427    def initialize(self, io_loop=None, num_threads=10):
428        threadpool = ThreadedResolver._create_threadpool(num_threads)
429        super(ThreadedResolver, self).initialize(
430            io_loop=io_loop, executor=threadpool, close_executor=False)
431
432    @classmethod
433    def _create_threadpool(cls, num_threads):
434        pid = os.getpid()
435        if cls._threadpool_pid != pid:
436            # Threads cannot survive after a fork, so if our pid isn't what it
437            # was when we created the pool then delete it.
438            cls._threadpool = None
439        if cls._threadpool is None:
440            from concurrent.futures import ThreadPoolExecutor
441            cls._threadpool = ThreadPoolExecutor(num_threads)
442            cls._threadpool_pid = pid
443        return cls._threadpool
444
445
446class OverrideResolver(Resolver):
447    """Wraps a resolver with a mapping of overrides.
448
449    This can be used to make local DNS changes (e.g. for testing)
450    without modifying system-wide settings.
451
452    The mapping can contain either host strings or host-port pairs.
453    """
454    def initialize(self, resolver, mapping):
455        self.resolver = resolver
456        self.mapping = mapping
457
458    def close(self):
459        self.resolver.close()
460
461    def resolve(self, host, port, *args, **kwargs):
462        if (host, port) in self.mapping:
463            host, port = self.mapping[(host, port)]
464        elif host in self.mapping:
465            host = self.mapping[host]
466        return self.resolver.resolve(host, port, *args, **kwargs)
467
468
469# These are the keyword arguments to ssl.wrap_socket that must be translated
470# to their SSLContext equivalents (the other arguments are still passed
471# to SSLContext.wrap_socket).
472_SSL_CONTEXT_KEYWORDS = frozenset(['ssl_version', 'certfile', 'keyfile',
473                                   'cert_reqs', 'ca_certs', 'ciphers'])
474
475
476def ssl_options_to_context(ssl_options):
477    """Try to convert an ``ssl_options`` dictionary to an
478    `~ssl.SSLContext` object.
479
480    The ``ssl_options`` dictionary contains keywords to be passed to
481    `ssl.wrap_socket`.  In Python 2.7.9+, `ssl.SSLContext` objects can
482    be used instead.  This function converts the dict form to its
483    `~ssl.SSLContext` equivalent, and may be used when a component which
484    accepts both forms needs to upgrade to the `~ssl.SSLContext` version
485    to use features like SNI or NPN.
486    """
487    if isinstance(ssl_options, dict):
488        assert all(k in _SSL_CONTEXT_KEYWORDS for k in ssl_options), ssl_options
489    if (not hasattr(ssl, 'SSLContext') or
490            isinstance(ssl_options, ssl.SSLContext)):
491        return ssl_options
492    context = ssl.SSLContext(
493        ssl_options.get('ssl_version', ssl.PROTOCOL_SSLv23))
494    if 'certfile' in ssl_options:
495        context.load_cert_chain(ssl_options['certfile'], ssl_options.get('keyfile', None))
496    if 'cert_reqs' in ssl_options:
497        context.verify_mode = ssl_options['cert_reqs']
498    if 'ca_certs' in ssl_options:
499        context.load_verify_locations(ssl_options['ca_certs'])
500    if 'ciphers' in ssl_options:
501        context.set_ciphers(ssl_options['ciphers'])
502    if hasattr(ssl, 'OP_NO_COMPRESSION'):
503        # Disable TLS compression to avoid CRIME and related attacks.
504        # This constant wasn't added until python 3.3.
505        context.options |= ssl.OP_NO_COMPRESSION
506    return context
507
508
509def ssl_wrap_socket(socket, ssl_options, server_hostname=None, **kwargs):
510    """Returns an ``ssl.SSLSocket`` wrapping the given socket.
511
512    ``ssl_options`` may be either an `ssl.SSLContext` object or a
513    dictionary (as accepted by `ssl_options_to_context`).  Additional
514    keyword arguments are passed to ``wrap_socket`` (either the
515    `~ssl.SSLContext` method or the `ssl` module function as
516    appropriate).
517    """
518    context = ssl_options_to_context(ssl_options)
519    if hasattr(ssl, 'SSLContext') and isinstance(context, ssl.SSLContext):
520        if server_hostname is not None and getattr(ssl, 'HAS_SNI'):
521            # Python doesn't have server-side SNI support so we can't
522            # really unittest this, but it can be manually tested with
523            # python3.2 -m tornado.httpclient https://sni.velox.ch
524            return context.wrap_socket(socket, server_hostname=server_hostname,
525                                       **kwargs)
526        else:
527            return context.wrap_socket(socket, **kwargs)
528    else:
529        return ssl.wrap_socket(socket, **dict(context, **kwargs))  # type: ignore
530