1from __future__ import absolute_import
2
3import datetime
4import logging
5import os
6import re
7import socket
8import warnings
9from socket import error as SocketError
10from socket import timeout as SocketTimeout
11
12from .packages import six
13from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
14from .packages.six.moves.http_client import HTTPException  # noqa: F401
15from .util.proxy import create_proxy_ssl_context
16
17try:  # Compiled with SSL?
18    import ssl
19
20    BaseSSLError = ssl.SSLError
21except (ImportError, AttributeError):  # Platform-specific: No SSL.
22    ssl = None
23
24    class BaseSSLError(BaseException):
25        pass
26
27
28try:
29    # Python 3: not a no-op, we're adding this to the namespace so it can be imported.
30    ConnectionError = ConnectionError
31except NameError:
32    # Python 2
33    class ConnectionError(Exception):
34        pass
35
36
37try:  # Python 3:
38    # Not a no-op, we're adding this to the namespace so it can be imported.
39    BrokenPipeError = BrokenPipeError
40except NameError:  # Python 2:
41
42    class BrokenPipeError(Exception):
43        pass
44
45
46from ._collections import HTTPHeaderDict  # noqa (historical, removed in v2)
47from ._version import __version__
48from .exceptions import (
49    ConnectTimeoutError,
50    NewConnectionError,
51    SubjectAltNameWarning,
52    SystemTimeWarning,
53)
54from .packages.ssl_match_hostname import CertificateError, match_hostname
55from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection
56from .util.ssl_ import (
57    assert_fingerprint,
58    create_urllib3_context,
59    is_ipaddress,
60    resolve_cert_reqs,
61    resolve_ssl_version,
62    ssl_wrap_socket,
63)
64
65log = logging.getLogger(__name__)
66
67port_by_scheme = {"http": 80, "https": 443}
68
69# When it comes time to update this value as a part of regular maintenance
70# (ie test_recent_date is failing) update it to ~6 months before the current date.
71RECENT_DATE = datetime.date(2020, 7, 1)
72
73_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]")
74
75
76class HTTPConnection(_HTTPConnection, object):
77    """
78    Based on :class:`http.client.HTTPConnection` but provides an extra constructor
79    backwards-compatibility layer between older and newer Pythons.
80
81    Additional keyword parameters are used to configure attributes of the connection.
82    Accepted parameters include:
83
84    - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
85    - ``source_address``: Set the source address for the current connection.
86    - ``socket_options``: Set specific options on the underlying socket. If not specified, then
87      defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
88      Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
89
90      For example, if you wish to enable TCP Keep Alive in addition to the defaults,
91      you might pass:
92
93      .. code-block:: python
94
95         HTTPConnection.default_socket_options + [
96             (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
97         ]
98
99      Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
100    """
101
102    default_port = port_by_scheme["http"]
103
104    #: Disable Nagle's algorithm by default.
105    #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
106    default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
107
108    #: Whether this connection verifies the host's certificate.
109    is_verified = False
110
111    #: Whether this proxy connection (if used) verifies the proxy host's
112    #: certificate.
113    proxy_is_verified = None
114
115    def __init__(self, *args, **kw):
116        if not six.PY2:
117            kw.pop("strict", None)
118
119        # Pre-set source_address.
120        self.source_address = kw.get("source_address")
121
122        #: The socket options provided by the user. If no options are
123        #: provided, we use the default options.
124        self.socket_options = kw.pop("socket_options", self.default_socket_options)
125
126        # Proxy options provided by the user.
127        self.proxy = kw.pop("proxy", None)
128        self.proxy_config = kw.pop("proxy_config", None)
129
130        _HTTPConnection.__init__(self, *args, **kw)
131
132    @property
133    def host(self):
134        """
135        Getter method to remove any trailing dots that indicate the hostname is an FQDN.
136
137        In general, SSL certificates don't include the trailing dot indicating a
138        fully-qualified domain name, and thus, they don't validate properly when
139        checked against a domain name that includes the dot. In addition, some
140        servers may not expect to receive the trailing dot when provided.
141
142        However, the hostname with trailing dot is critical to DNS resolution; doing a
143        lookup with the trailing dot will properly only resolve the appropriate FQDN,
144        whereas a lookup without a trailing dot will search the system's search domain
145        list. Thus, it's important to keep the original host around for use only in
146        those cases where it's appropriate (i.e., when doing DNS lookup to establish the
147        actual TCP connection across which we're going to send HTTP requests).
148        """
149        return self._dns_host.rstrip(".")
150
151    @host.setter
152    def host(self, value):
153        """
154        Setter for the `host` property.
155
156        We assume that only urllib3 uses the _dns_host attribute; httplib itself
157        only uses `host`, and it seems reasonable that other libraries follow suit.
158        """
159        self._dns_host = value
160
161    def _new_conn(self):
162        """Establish a socket connection and set nodelay settings on it.
163
164        :return: New socket connection.
165        """
166        extra_kw = {}
167        if self.source_address:
168            extra_kw["source_address"] = self.source_address
169
170        if self.socket_options:
171            extra_kw["socket_options"] = self.socket_options
172
173        try:
174            conn = connection.create_connection(
175                (self._dns_host, self.port), self.timeout, **extra_kw
176            )
177
178        except SocketTimeout:
179            raise ConnectTimeoutError(
180                self,
181                "Connection to %s timed out. (connect timeout=%s)"
182                % (self.host, self.timeout),
183            )
184
185        except SocketError as e:
186            raise NewConnectionError(
187                self, "Failed to establish a new connection: %s" % e
188            )
189
190        return conn
191
192    def _is_using_tunnel(self):
193        # Google App Engine's httplib does not define _tunnel_host
194        return getattr(self, "_tunnel_host", None)
195
196    def _prepare_conn(self, conn):
197        self.sock = conn
198        if self._is_using_tunnel():
199            # TODO: Fix tunnel so it doesn't depend on self.sock state.
200            self._tunnel()
201            # Mark this connection as not reusable
202            self.auto_open = 0
203
204    def connect(self):
205        conn = self._new_conn()
206        self._prepare_conn(conn)
207
208    def putrequest(self, method, url, *args, **kwargs):
209        """ """
210        # Empty docstring because the indentation of CPython's implementation
211        # is broken but we don't want this method in our documentation.
212        match = _CONTAINS_CONTROL_CHAR_RE.search(method)
213        if match:
214            raise ValueError(
215                "Method cannot contain non-token characters %r (found at least %r)"
216                % (method, match.group())
217            )
218
219        return _HTTPConnection.putrequest(self, method, url, *args, **kwargs)
220
221    def putheader(self, header, *values):
222        """ """
223        if not any(isinstance(v, str) and v == SKIP_HEADER for v in values):
224            _HTTPConnection.putheader(self, header, *values)
225        elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
226            raise ValueError(
227                "urllib3.util.SKIP_HEADER only supports '%s'"
228                % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),)
229            )
230
231    def request(self, method, url, body=None, headers=None):
232        if headers is None:
233            headers = {}
234        else:
235            # Avoid modifying the headers passed into .request()
236            headers = headers.copy()
237        if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
238            headers["User-Agent"] = _get_default_user_agent()
239        super(HTTPConnection, self).request(method, url, body=body, headers=headers)
240
241    def request_chunked(self, method, url, body=None, headers=None):
242        """
243        Alternative to the common request method, which sends the
244        body with chunked encoding and not as one block
245        """
246        headers = headers or {}
247        header_keys = set([six.ensure_str(k.lower()) for k in headers])
248        skip_accept_encoding = "accept-encoding" in header_keys
249        skip_host = "host" in header_keys
250        self.putrequest(
251            method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
252        )
253        if "user-agent" not in header_keys:
254            self.putheader("User-Agent", _get_default_user_agent())
255        for header, value in headers.items():
256            self.putheader(header, value)
257        if "transfer-encoding" not in header_keys:
258            self.putheader("Transfer-Encoding", "chunked")
259        self.endheaders()
260
261        if body is not None:
262            stringish_types = six.string_types + (bytes,)
263            if isinstance(body, stringish_types):
264                body = (body,)
265            for chunk in body:
266                if not chunk:
267                    continue
268                if not isinstance(chunk, bytes):
269                    chunk = chunk.encode("utf8")
270                len_str = hex(len(chunk))[2:]
271                to_send = bytearray(len_str.encode())
272                to_send += b"\r\n"
273                to_send += chunk
274                to_send += b"\r\n"
275                self.send(to_send)
276
277        # After the if clause, to always have a closed body
278        self.send(b"0\r\n\r\n")
279
280
281class HTTPSConnection(HTTPConnection):
282    """
283    Many of the parameters to this constructor are passed to the underlying SSL
284    socket by means of :py:func:`urllib3.util.ssl_wrap_socket`.
285    """
286
287    default_port = port_by_scheme["https"]
288
289    cert_reqs = None
290    ca_certs = None
291    ca_cert_dir = None
292    ca_cert_data = None
293    ssl_version = None
294    assert_fingerprint = None
295    tls_in_tls_required = False
296
297    def __init__(
298        self,
299        host,
300        port=None,
301        key_file=None,
302        cert_file=None,
303        key_password=None,
304        strict=None,
305        timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
306        ssl_context=None,
307        server_hostname=None,
308        **kw
309    ):
310
311        HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw)
312
313        self.key_file = key_file
314        self.cert_file = cert_file
315        self.key_password = key_password
316        self.ssl_context = ssl_context
317        self.server_hostname = server_hostname
318
319        # Required property for Google AppEngine 1.9.0 which otherwise causes
320        # HTTPS requests to go out as HTTP. (See Issue #356)
321        self._protocol = "https"
322
323    def set_cert(
324        self,
325        key_file=None,
326        cert_file=None,
327        cert_reqs=None,
328        key_password=None,
329        ca_certs=None,
330        assert_hostname=None,
331        assert_fingerprint=None,
332        ca_cert_dir=None,
333        ca_cert_data=None,
334    ):
335        """
336        This method should only be called once, before the connection is used.
337        """
338        # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also
339        # have an SSLContext object in which case we'll use its verify_mode.
340        if cert_reqs is None:
341            if self.ssl_context is not None:
342                cert_reqs = self.ssl_context.verify_mode
343            else:
344                cert_reqs = resolve_cert_reqs(None)
345
346        self.key_file = key_file
347        self.cert_file = cert_file
348        self.cert_reqs = cert_reqs
349        self.key_password = key_password
350        self.assert_hostname = assert_hostname
351        self.assert_fingerprint = assert_fingerprint
352        self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
353        self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
354        self.ca_cert_data = ca_cert_data
355
356    def connect(self):
357        # Add certificate verification
358        conn = self._new_conn()
359        hostname = self.host
360        tls_in_tls = False
361
362        if self._is_using_tunnel():
363            if self.tls_in_tls_required:
364                conn = self._connect_tls_proxy(hostname, conn)
365                tls_in_tls = True
366
367            self.sock = conn
368
369            # Calls self._set_hostport(), so self.host is
370            # self._tunnel_host below.
371            self._tunnel()
372            # Mark this connection as not reusable
373            self.auto_open = 0
374
375            # Override the host with the one we're requesting data from.
376            hostname = self._tunnel_host
377
378        server_hostname = hostname
379        if self.server_hostname is not None:
380            server_hostname = self.server_hostname
381
382        is_time_off = datetime.date.today() < RECENT_DATE
383        if is_time_off:
384            warnings.warn(
385                (
386                    "System time is way off (before {0}). This will probably "
387                    "lead to SSL verification errors"
388                ).format(RECENT_DATE),
389                SystemTimeWarning,
390            )
391
392        # Wrap socket using verification with the root certs in
393        # trusted_root_certs
394        default_ssl_context = False
395        if self.ssl_context is None:
396            default_ssl_context = True
397            self.ssl_context = create_urllib3_context(
398                ssl_version=resolve_ssl_version(self.ssl_version),
399                cert_reqs=resolve_cert_reqs(self.cert_reqs),
400            )
401
402        context = self.ssl_context
403        context.verify_mode = resolve_cert_reqs(self.cert_reqs)
404
405        # Try to load OS default certs if none are given.
406        # Works well on Windows (requires Python3.4+)
407        if (
408            not self.ca_certs
409            and not self.ca_cert_dir
410            and not self.ca_cert_data
411            and default_ssl_context
412            and hasattr(context, "load_default_certs")
413        ):
414            context.load_default_certs()
415
416        self.sock = ssl_wrap_socket(
417            sock=conn,
418            keyfile=self.key_file,
419            certfile=self.cert_file,
420            key_password=self.key_password,
421            ca_certs=self.ca_certs,
422            ca_cert_dir=self.ca_cert_dir,
423            ca_cert_data=self.ca_cert_data,
424            server_hostname=server_hostname,
425            ssl_context=context,
426            tls_in_tls=tls_in_tls,
427        )
428
429        # If we're using all defaults and the connection
430        # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
431        # for the host.
432        if (
433            default_ssl_context
434            and self.ssl_version is None
435            and hasattr(self.sock, "version")
436            and self.sock.version() in {"TLSv1", "TLSv1.1"}
437        ):
438            warnings.warn(
439                "Negotiating TLSv1/TLSv1.1 by default is deprecated "
440                "and will be disabled in urllib3 v2.0.0. Connecting to "
441                "'%s' with '%s' can be enabled by explicitly opting-in "
442                "with 'ssl_version'" % (self.host, self.sock.version()),
443                DeprecationWarning,
444            )
445
446        if self.assert_fingerprint:
447            assert_fingerprint(
448                self.sock.getpeercert(binary_form=True), self.assert_fingerprint
449            )
450        elif (
451            context.verify_mode != ssl.CERT_NONE
452            and not getattr(context, "check_hostname", False)
453            and self.assert_hostname is not False
454        ):
455            # While urllib3 attempts to always turn off hostname matching from
456            # the TLS library, this cannot always be done. So we check whether
457            # the TLS Library still thinks it's matching hostnames.
458            cert = self.sock.getpeercert()
459            if not cert.get("subjectAltName", ()):
460                warnings.warn(
461                    (
462                        "Certificate for {0} has no `subjectAltName`, falling back to check for a "
463                        "`commonName` for now. This feature is being removed by major browsers and "
464                        "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
465                        "for details.)".format(hostname)
466                    ),
467                    SubjectAltNameWarning,
468                )
469            _match_hostname(cert, self.assert_hostname or server_hostname)
470
471        self.is_verified = (
472            context.verify_mode == ssl.CERT_REQUIRED
473            or self.assert_fingerprint is not None
474        )
475
476    def _connect_tls_proxy(self, hostname, conn):
477        """
478        Establish a TLS connection to the proxy using the provided SSL context.
479        """
480        proxy_config = self.proxy_config
481        ssl_context = proxy_config.ssl_context
482        if ssl_context:
483            # If the user provided a proxy context, we assume CA and client
484            # certificates have already been set
485            return ssl_wrap_socket(
486                sock=conn,
487                server_hostname=hostname,
488                ssl_context=ssl_context,
489            )
490
491        ssl_context = create_proxy_ssl_context(
492            self.ssl_version,
493            self.cert_reqs,
494            self.ca_certs,
495            self.ca_cert_dir,
496            self.ca_cert_data,
497        )
498
499        # If no cert was provided, use only the default options for server
500        # certificate validation
501        socket = ssl_wrap_socket(
502            sock=conn,
503            ca_certs=self.ca_certs,
504            ca_cert_dir=self.ca_cert_dir,
505            ca_cert_data=self.ca_cert_data,
506            server_hostname=hostname,
507            ssl_context=ssl_context,
508        )
509
510        if ssl_context.verify_mode != ssl.CERT_NONE and not getattr(
511            ssl_context, "check_hostname", False
512        ):
513            # While urllib3 attempts to always turn off hostname matching from
514            # the TLS library, this cannot always be done. So we check whether
515            # the TLS Library still thinks it's matching hostnames.
516            cert = socket.getpeercert()
517            if not cert.get("subjectAltName", ()):
518                warnings.warn(
519                    (
520                        "Certificate for {0} has no `subjectAltName`, falling back to check for a "
521                        "`commonName` for now. This feature is being removed by major browsers and "
522                        "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
523                        "for details.)".format(hostname)
524                    ),
525                    SubjectAltNameWarning,
526                )
527            _match_hostname(cert, hostname)
528
529        self.proxy_is_verified = ssl_context.verify_mode == ssl.CERT_REQUIRED
530        return socket
531
532
533def _match_hostname(cert, asserted_hostname):
534    # Our upstream implementation of ssl.match_hostname()
535    # only applies this normalization to IP addresses so it doesn't
536    # match DNS SANs so we do the same thing!
537    stripped_hostname = asserted_hostname.strip("u[]")
538    if is_ipaddress(stripped_hostname):
539        asserted_hostname = stripped_hostname
540
541    try:
542        match_hostname(cert, asserted_hostname)
543    except CertificateError as e:
544        log.warning(
545            "Certificate did not match expected hostname: %s. Certificate: %s",
546            asserted_hostname,
547            cert,
548        )
549        # Add cert to exception and reraise so client code can inspect
550        # the cert when catching the exception, if they want to
551        e._peer_cert = cert
552        raise
553
554
555def _get_default_user_agent():
556    return "python-urllib3/%s" % __version__
557
558
559class DummyConnection(object):
560    """Used to detect a failed ConnectionCls import."""
561
562    pass
563
564
565if not ssl:
566    HTTPSConnection = DummyConnection  # noqa: F811
567
568
569VerifiedHTTPSConnection = HTTPSConnection
570