1from __future__ import absolute_import
2
3import datetime
4import logging
5import os
6import re
7import socket
8import warnings
9from socket import error as SocketError
10from socket import timeout as SocketTimeout
11
12from .packages import six
13from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
14from .packages.six.moves.http_client import HTTPException  # noqa: F401
15from .util.proxy import create_proxy_ssl_context
16
17try:  # Compiled with SSL?
18    import ssl
19
20    BaseSSLError = ssl.SSLError
21except (ImportError, AttributeError):  # Platform-specific: No SSL.
22    ssl = None
23
24    class BaseSSLError(BaseException):
25        pass
26
27
28try:
29    # Python 3: not a no-op, we're adding this to the namespace so it can be imported.
30    ConnectionError = ConnectionError
31except NameError:
32    # Python 2
33    class ConnectionError(Exception):
34        pass
35
36
37try:  # Python 3:
38    # Not a no-op, we're adding this to the namespace so it can be imported.
39    BrokenPipeError = BrokenPipeError
40except NameError:  # Python 2:
41
42    class BrokenPipeError(Exception):
43        pass
44
45
46from ._collections import HTTPHeaderDict  # noqa (historical, removed in v2)
47from ._version import __version__
48from .exceptions import (
49    ConnectTimeoutError,
50    NewConnectionError,
51    SubjectAltNameWarning,
52    SystemTimeWarning,
53)
54from .packages.ssl_match_hostname import CertificateError, match_hostname
55from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection
56from .util.ssl_ import (
57    assert_fingerprint,
58    create_urllib3_context,
59    resolve_cert_reqs,
60    resolve_ssl_version,
61    ssl_wrap_socket,
62)
63
64log = logging.getLogger(__name__)
65
66port_by_scheme = {"http": 80, "https": 443}
67
68# When it comes time to update this value as a part of regular maintenance
69# (ie test_recent_date is failing) update it to ~6 months before the current date.
70RECENT_DATE = datetime.date(2019, 1, 1)
71
72_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]")
73
74
75class HTTPConnection(_HTTPConnection, object):
76    """
77    Based on :class:`http.client.HTTPConnection` but provides an extra constructor
78    backwards-compatibility layer between older and newer Pythons.
79
80    Additional keyword parameters are used to configure attributes of the connection.
81    Accepted parameters include:
82
83    - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
84    - ``source_address``: Set the source address for the current connection.
85    - ``socket_options``: Set specific options on the underlying socket. If not specified, then
86      defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
87      Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
88
89      For example, if you wish to enable TCP Keep Alive in addition to the defaults,
90      you might pass:
91
92      .. code-block:: python
93
94         HTTPConnection.default_socket_options + [
95             (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
96         ]
97
98      Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
99    """
100
101    default_port = port_by_scheme["http"]
102
103    #: Disable Nagle's algorithm by default.
104    #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
105    default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
106
107    #: Whether this connection verifies the host's certificate.
108    is_verified = False
109
110    def __init__(self, *args, **kw):
111        if not six.PY2:
112            kw.pop("strict", None)
113
114        # Pre-set source_address.
115        self.source_address = kw.get("source_address")
116
117        #: The socket options provided by the user. If no options are
118        #: provided, we use the default options.
119        self.socket_options = kw.pop("socket_options", self.default_socket_options)
120
121        # Proxy options provided by the user.
122        self.proxy = kw.pop("proxy", None)
123        self.proxy_config = kw.pop("proxy_config", None)
124
125        _HTTPConnection.__init__(self, *args, **kw)
126
127    @property
128    def host(self):
129        """
130        Getter method to remove any trailing dots that indicate the hostname is an FQDN.
131
132        In general, SSL certificates don't include the trailing dot indicating a
133        fully-qualified domain name, and thus, they don't validate properly when
134        checked against a domain name that includes the dot. In addition, some
135        servers may not expect to receive the trailing dot when provided.
136
137        However, the hostname with trailing dot is critical to DNS resolution; doing a
138        lookup with the trailing dot will properly only resolve the appropriate FQDN,
139        whereas a lookup without a trailing dot will search the system's search domain
140        list. Thus, it's important to keep the original host around for use only in
141        those cases where it's appropriate (i.e., when doing DNS lookup to establish the
142        actual TCP connection across which we're going to send HTTP requests).
143        """
144        return self._dns_host.rstrip(".")
145
146    @host.setter
147    def host(self, value):
148        """
149        Setter for the `host` property.
150
151        We assume that only urllib3 uses the _dns_host attribute; httplib itself
152        only uses `host`, and it seems reasonable that other libraries follow suit.
153        """
154        self._dns_host = value
155
156    def _new_conn(self):
157        """Establish a socket connection and set nodelay settings on it.
158
159        :return: New socket connection.
160        """
161        extra_kw = {}
162        if self.source_address:
163            extra_kw["source_address"] = self.source_address
164
165        if self.socket_options:
166            extra_kw["socket_options"] = self.socket_options
167
168        try:
169            conn = connection.create_connection(
170                (self._dns_host, self.port), self.timeout, **extra_kw
171            )
172
173        except SocketTimeout:
174            raise ConnectTimeoutError(
175                self,
176                "Connection to %s timed out. (connect timeout=%s)"
177                % (self.host, self.timeout),
178            )
179
180        except SocketError as e:
181            raise NewConnectionError(
182                self, "Failed to establish a new connection: %s" % e
183            )
184
185        return conn
186
187    def _is_using_tunnel(self):
188        # Google App Engine's httplib does not define _tunnel_host
189        return getattr(self, "_tunnel_host", None)
190
191    def _prepare_conn(self, conn):
192        self.sock = conn
193        if self._is_using_tunnel():
194            # TODO: Fix tunnel so it doesn't depend on self.sock state.
195            self._tunnel()
196            # Mark this connection as not reusable
197            self.auto_open = 0
198
199    def connect(self):
200        conn = self._new_conn()
201        self._prepare_conn(conn)
202
203    def putrequest(self, method, url, *args, **kwargs):
204        """"""
205        # Empty docstring because the indentation of CPython's implementation
206        # is broken but we don't want this method in our documentation.
207        match = _CONTAINS_CONTROL_CHAR_RE.search(method)
208        if match:
209            raise ValueError(
210                "Method cannot contain non-token characters %r (found at least %r)"
211                % (method, match.group())
212            )
213
214        return _HTTPConnection.putrequest(self, method, url, *args, **kwargs)
215
216    def putheader(self, header, *values):
217        """"""
218        if SKIP_HEADER not in values:
219            _HTTPConnection.putheader(self, header, *values)
220        elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
221            raise ValueError(
222                "urllib3.util.SKIP_HEADER only supports '%s'"
223                % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),)
224            )
225
226    def request(self, method, url, body=None, headers=None):
227        if headers is None:
228            headers = {}
229        else:
230            # Avoid modifying the headers passed into .request()
231            headers = headers.copy()
232        if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
233            headers["User-Agent"] = _get_default_user_agent()
234        super(HTTPConnection, self).request(method, url, body=body, headers=headers)
235
236    def request_chunked(self, method, url, body=None, headers=None):
237        """
238        Alternative to the common request method, which sends the
239        body with chunked encoding and not as one block
240        """
241        headers = headers or {}
242        header_keys = set([six.ensure_str(k.lower()) for k in headers])
243        skip_accept_encoding = "accept-encoding" in header_keys
244        skip_host = "host" in header_keys
245        self.putrequest(
246            method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
247        )
248        if "user-agent" not in header_keys:
249            self.putheader("User-Agent", _get_default_user_agent())
250        for header, value in headers.items():
251            self.putheader(header, value)
252        if "transfer-encoding" not in headers:
253            self.putheader("Transfer-Encoding", "chunked")
254        self.endheaders()
255
256        if body is not None:
257            stringish_types = six.string_types + (bytes,)
258            if isinstance(body, stringish_types):
259                body = (body,)
260            for chunk in body:
261                if not chunk:
262                    continue
263                if not isinstance(chunk, bytes):
264                    chunk = chunk.encode("utf8")
265                len_str = hex(len(chunk))[2:]
266                to_send = bytearray(len_str.encode())
267                to_send += b"\r\n"
268                to_send += chunk
269                to_send += b"\r\n"
270                self.send(to_send)
271
272        # After the if clause, to always have a closed body
273        self.send(b"0\r\n\r\n")
274
275
276class HTTPSConnection(HTTPConnection):
277    """
278    Many of the parameters to this constructor are passed to the underlying SSL
279    socket by means of :py:func:`urllib3.util.ssl_wrap_socket`.
280    """
281
282    default_port = port_by_scheme["https"]
283
284    cert_reqs = None
285    ca_certs = None
286    ca_cert_dir = None
287    ca_cert_data = None
288    ssl_version = None
289    assert_fingerprint = None
290    tls_in_tls_required = False
291
292    def __init__(
293        self,
294        host,
295        port=None,
296        key_file=None,
297        cert_file=None,
298        key_password=None,
299        strict=None,
300        timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
301        ssl_context=None,
302        server_hostname=None,
303        **kw
304    ):
305
306        HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw)
307
308        self.key_file = key_file
309        self.cert_file = cert_file
310        self.key_password = key_password
311        self.ssl_context = ssl_context
312        self.server_hostname = server_hostname
313
314        # Required property for Google AppEngine 1.9.0 which otherwise causes
315        # HTTPS requests to go out as HTTP. (See Issue #356)
316        self._protocol = "https"
317
318    def set_cert(
319        self,
320        key_file=None,
321        cert_file=None,
322        cert_reqs=None,
323        key_password=None,
324        ca_certs=None,
325        assert_hostname=None,
326        assert_fingerprint=None,
327        ca_cert_dir=None,
328        ca_cert_data=None,
329    ):
330        """
331        This method should only be called once, before the connection is used.
332        """
333        # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also
334        # have an SSLContext object in which case we'll use its verify_mode.
335        if cert_reqs is None:
336            if self.ssl_context is not None:
337                cert_reqs = self.ssl_context.verify_mode
338            else:
339                cert_reqs = resolve_cert_reqs(None)
340
341        self.key_file = key_file
342        self.cert_file = cert_file
343        self.cert_reqs = cert_reqs
344        self.key_password = key_password
345        self.assert_hostname = assert_hostname
346        self.assert_fingerprint = assert_fingerprint
347        self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
348        self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
349        self.ca_cert_data = ca_cert_data
350
351    def connect(self):
352        # Add certificate verification
353        conn = self._new_conn()
354        hostname = self.host
355        tls_in_tls = False
356
357        if self._is_using_tunnel():
358            if self.tls_in_tls_required:
359                conn = self._connect_tls_proxy(hostname, conn)
360                tls_in_tls = True
361
362            self.sock = conn
363
364            # Calls self._set_hostport(), so self.host is
365            # self._tunnel_host below.
366            self._tunnel()
367            # Mark this connection as not reusable
368            self.auto_open = 0
369
370            # Override the host with the one we're requesting data from.
371            hostname = self._tunnel_host
372
373        server_hostname = hostname
374        if self.server_hostname is not None:
375            server_hostname = self.server_hostname
376
377        is_time_off = datetime.date.today() < RECENT_DATE
378        if is_time_off:
379            warnings.warn(
380                (
381                    "System time is way off (before {0}). This will probably "
382                    "lead to SSL verification errors"
383                ).format(RECENT_DATE),
384                SystemTimeWarning,
385            )
386
387        # Wrap socket using verification with the root certs in
388        # trusted_root_certs
389        default_ssl_context = False
390        if self.ssl_context is None:
391            default_ssl_context = True
392            self.ssl_context = create_urllib3_context(
393                ssl_version=resolve_ssl_version(self.ssl_version),
394                cert_reqs=resolve_cert_reqs(self.cert_reqs),
395            )
396
397        context = self.ssl_context
398        context.verify_mode = resolve_cert_reqs(self.cert_reqs)
399
400        # Try to load OS default certs if none are given.
401        # Works well on Windows (requires Python3.4+)
402        if (
403            not self.ca_certs
404            and not self.ca_cert_dir
405            and not self.ca_cert_data
406            and default_ssl_context
407            and hasattr(context, "load_default_certs")
408        ):
409            context.load_default_certs()
410
411        self.sock = ssl_wrap_socket(
412            sock=conn,
413            keyfile=self.key_file,
414            certfile=self.cert_file,
415            key_password=self.key_password,
416            ca_certs=self.ca_certs,
417            ca_cert_dir=self.ca_cert_dir,
418            ca_cert_data=self.ca_cert_data,
419            server_hostname=server_hostname,
420            ssl_context=context,
421            tls_in_tls=tls_in_tls,
422        )
423
424        # If we're using all defaults and the connection
425        # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
426        # for the host.
427        if (
428            default_ssl_context
429            and self.ssl_version is None
430            and hasattr(self.sock, "version")
431            and self.sock.version() in {"TLSv1", "TLSv1.1"}
432        ):
433            warnings.warn(
434                "Negotiating TLSv1/TLSv1.1 by default is deprecated "
435                "and will be disabled in urllib3 v2.0.0. Connecting to "
436                "'%s' with '%s' can be enabled by explicitly opting-in "
437                "with 'ssl_version'" % (self.host, self.sock.version()),
438                DeprecationWarning,
439            )
440
441        if self.assert_fingerprint:
442            assert_fingerprint(
443                self.sock.getpeercert(binary_form=True), self.assert_fingerprint
444            )
445        elif (
446            context.verify_mode != ssl.CERT_NONE
447            and not getattr(context, "check_hostname", False)
448            and self.assert_hostname is not False
449        ):
450            # While urllib3 attempts to always turn off hostname matching from
451            # the TLS library, this cannot always be done. So we check whether
452            # the TLS Library still thinks it's matching hostnames.
453            cert = self.sock.getpeercert()
454            if not cert.get("subjectAltName", ()):
455                warnings.warn(
456                    (
457                        "Certificate for {0} has no `subjectAltName`, falling back to check for a "
458                        "`commonName` for now. This feature is being removed by major browsers and "
459                        "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
460                        "for details.)".format(hostname)
461                    ),
462                    SubjectAltNameWarning,
463                )
464            _match_hostname(cert, self.assert_hostname or server_hostname)
465
466        self.is_verified = (
467            context.verify_mode == ssl.CERT_REQUIRED
468            or self.assert_fingerprint is not None
469        )
470
471    def _connect_tls_proxy(self, hostname, conn):
472        """
473        Establish a TLS connection to the proxy using the provided SSL context.
474        """
475        proxy_config = self.proxy_config
476        ssl_context = proxy_config.ssl_context
477        if ssl_context:
478            # If the user provided a proxy context, we assume CA and client
479            # certificates have already been set
480            return ssl_wrap_socket(
481                sock=conn,
482                server_hostname=hostname,
483                ssl_context=ssl_context,
484            )
485
486        ssl_context = create_proxy_ssl_context(
487            self.ssl_version,
488            self.cert_reqs,
489            self.ca_certs,
490            self.ca_cert_dir,
491            self.ca_cert_data,
492        )
493
494        # If no cert was provided, use only the default options for server
495        # certificate validation
496        return ssl_wrap_socket(
497            sock=conn,
498            ca_certs=self.ca_certs,
499            ca_cert_dir=self.ca_cert_dir,
500            ca_cert_data=self.ca_cert_data,
501            server_hostname=hostname,
502            ssl_context=ssl_context,
503        )
504
505
506def _match_hostname(cert, asserted_hostname):
507    try:
508        match_hostname(cert, asserted_hostname)
509    except CertificateError as e:
510        log.warning(
511            "Certificate did not match expected hostname: %s. Certificate: %s",
512            asserted_hostname,
513            cert,
514        )
515        # Add cert to exception and reraise so client code can inspect
516        # the cert when catching the exception, if they want to
517        e._peer_cert = cert
518        raise
519
520
521def _get_default_user_agent():
522    return "python-urllib3/%s" % __version__
523
524
525class DummyConnection(object):
526    """Used to detect a failed ConnectionCls import."""
527
528    pass
529
530
531if not ssl:
532    HTTPSConnection = DummyConnection  # noqa: F811
533
534
535VerifiedHTTPSConnection = HTTPSConnection
536