1# coding: utf-8 2# Modified Work: Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved. 3# This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license. 4# Copyright 2008-2016 Andrey Petrov and contributors 5 6from __future__ import absolute_import 7 8import datetime 9import logging 10import os 11import re 12import socket 13import warnings 14from socket import error as SocketError 15from socket import timeout as SocketTimeout 16 17from .packages import six 18from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection 19from .packages.six.moves.http_client import HTTPException # noqa: F401 20from .util.proxy import create_proxy_ssl_context 21 22try: # Compiled with SSL? 23 import ssl 24 25 BaseSSLError = ssl.SSLError 26except (ImportError, AttributeError): # Platform-specific: No SSL. 27 ssl = None 28 29 class BaseSSLError(BaseException): 30 pass 31 32 33try: 34 # Python 3: not a no-op, we're adding this to the namespace so it can be imported. 35 ConnectionError = ConnectionError 36except NameError: 37 # Python 2 38 class ConnectionError(Exception): 39 pass 40 41 42try: # Python 3: 43 # Not a no-op, we're adding this to the namespace so it can be imported. 44 BrokenPipeError = BrokenPipeError 45except NameError: # Python 2: 46 47 class BrokenPipeError(Exception): 48 pass 49 50 51from ._collections import HTTPHeaderDict # noqa (historical, removed in v2) 52from ._version import __version__ 53from .exceptions import ( 54 ConnectTimeoutError, 55 NewConnectionError, 56 SubjectAltNameWarning, 57 SystemTimeWarning, 58) 59from .packages.ssl_match_hostname import CertificateError, match_hostname 60from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection 61from .util.ssl_ import ( 62 assert_fingerprint, 63 create_urllib3_context, 64 resolve_cert_reqs, 65 resolve_ssl_version, 66 ssl_wrap_socket, 67) 68 69log = logging.getLogger(__name__) 70 71port_by_scheme = {"http": 80, "https": 443} 72 73# When it comes time to update this value as a part of regular maintenance 74# (ie test_recent_date is failing) update it to ~6 months before the current date. 75RECENT_DATE = datetime.date(2020, 7, 1) 76 77_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") 78 79 80class HTTPConnection(_HTTPConnection, object): 81 """ 82 Based on :class:`http.client.HTTPConnection` but provides an extra constructor 83 backwards-compatibility layer between older and newer Pythons. 84 85 Additional keyword parameters are used to configure attributes of the connection. 86 Accepted parameters include: 87 88 - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` 89 - ``source_address``: Set the source address for the current connection. 90 - ``socket_options``: Set specific options on the underlying socket. If not specified, then 91 defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 92 Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 93 94 For example, if you wish to enable TCP Keep Alive in addition to the defaults, 95 you might pass: 96 97 .. code-block:: python 98 99 HTTPConnection.default_socket_options + [ 100 (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 101 ] 102 103 Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 104 """ 105 106 default_port = port_by_scheme["http"] 107 108 #: Disable Nagle's algorithm by default. 109 #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 110 default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] 111 112 #: Whether this connection verifies the host's certificate. 113 is_verified = False 114 115 def __init__(self, *args, **kw): 116 if not six.PY2: 117 kw.pop("strict", None) 118 119 # Pre-set source_address. 120 self.source_address = kw.get("source_address") 121 122 #: The socket options provided by the user. If no options are 123 #: provided, we use the default options. 124 self.socket_options = kw.pop("socket_options", self.default_socket_options) 125 126 # Proxy options provided by the user. 127 self.proxy = kw.pop("proxy", None) 128 self.proxy_config = kw.pop("proxy_config", None) 129 130 _HTTPConnection.__init__(self, *args, **kw) 131 132 @property 133 def host(self): 134 """ 135 Getter method to remove any trailing dots that indicate the hostname is an FQDN. 136 137 In general, SSL certificates don't include the trailing dot indicating a 138 fully-qualified domain name, and thus, they don't validate properly when 139 checked against a domain name that includes the dot. In addition, some 140 servers may not expect to receive the trailing dot when provided. 141 142 However, the hostname with trailing dot is critical to DNS resolution; doing a 143 lookup with the trailing dot will properly only resolve the appropriate FQDN, 144 whereas a lookup without a trailing dot will search the system's search domain 145 list. Thus, it's important to keep the original host around for use only in 146 those cases where it's appropriate (i.e., when doing DNS lookup to establish the 147 actual TCP connection across which we're going to send HTTP requests). 148 """ 149 return self._dns_host.rstrip(".") 150 151 @host.setter 152 def host(self, value): 153 """ 154 Setter for the `host` property. 155 156 We assume that only urllib3 uses the _dns_host attribute; httplib itself 157 only uses `host`, and it seems reasonable that other libraries follow suit. 158 """ 159 self._dns_host = value 160 161 def _new_conn(self): 162 """Establish a socket connection and set nodelay settings on it. 163 164 :return: New socket connection. 165 """ 166 extra_kw = {} 167 if self.source_address: 168 extra_kw["source_address"] = self.source_address 169 170 if self.socket_options: 171 extra_kw["socket_options"] = self.socket_options 172 173 try: 174 conn = connection.create_connection( 175 (self._dns_host, self.port), self.timeout, **extra_kw 176 ) 177 178 except SocketTimeout: 179 raise ConnectTimeoutError( 180 self, 181 "Connection to %s timed out. (connect timeout=%s)" 182 % (self.host, self.timeout), 183 ) 184 185 except SocketError as e: 186 raise NewConnectionError( 187 self, "Failed to establish a new connection: %s" % e 188 ) 189 190 return conn 191 192 def _is_using_tunnel(self): 193 # Google App Engine's httplib does not define _tunnel_host 194 return getattr(self, "_tunnel_host", None) 195 196 def _prepare_conn(self, conn): 197 self.sock = conn 198 if self._is_using_tunnel(): 199 # TODO: Fix tunnel so it doesn't depend on self.sock state. 200 self._tunnel() 201 # Mark this connection as not reusable 202 self.auto_open = 0 203 204 def connect(self): 205 conn = self._new_conn() 206 self._prepare_conn(conn) 207 208 def putrequest(self, method, url, *args, **kwargs): 209 """""" 210 # Empty docstring because the indentation of CPython's implementation 211 # is broken but we don't want this method in our documentation. 212 match = _CONTAINS_CONTROL_CHAR_RE.search(method) 213 if match: 214 raise ValueError( 215 "Method cannot contain non-token characters %r (found at least %r)" 216 % (method, match.group()) 217 ) 218 219 return _HTTPConnection.putrequest(self, method, url, *args, **kwargs) 220 221 def putheader(self, header, *values): 222 """""" 223 if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): 224 _HTTPConnection.putheader(self, header, *values) 225 elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS: 226 raise ValueError( 227 "urllib3.util.SKIP_HEADER only supports '%s'" 228 % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),) 229 ) 230 231 def request(self, method, url, body=None, headers=None): 232 if headers is None: 233 headers = {} 234 else: 235 # Avoid modifying the headers passed into .request() 236 headers = headers.copy() 237 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers): 238 headers["User-Agent"] = _get_default_user_agent() 239 super(HTTPConnection, self).request(method, url, body=body, headers=headers) 240 241 def request_chunked(self, method, url, body=None, headers=None): 242 """ 243 Alternative to the common request method, which sends the 244 body with chunked encoding and not as one block 245 """ 246 headers = headers or {} 247 header_keys = set([six.ensure_str(k.lower()) for k in headers]) 248 skip_accept_encoding = "accept-encoding" in header_keys 249 skip_host = "host" in header_keys 250 self.putrequest( 251 method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host 252 ) 253 if "user-agent" not in header_keys: 254 self.putheader("User-Agent", _get_default_user_agent()) 255 for header, value in headers.items(): 256 self.putheader(header, value) 257 if "transfer-encoding" not in headers: 258 self.putheader("Transfer-Encoding", "chunked") 259 self.endheaders() 260 261 if body is not None: 262 stringish_types = six.string_types + (bytes,) 263 if isinstance(body, stringish_types): 264 body = (body,) 265 for chunk in body: 266 if not chunk: 267 continue 268 if not isinstance(chunk, bytes): 269 chunk = chunk.encode("utf8") 270 len_str = hex(len(chunk))[2:] 271 to_send = bytearray(len_str.encode()) 272 to_send += b"\r\n" 273 to_send += chunk 274 to_send += b"\r\n" 275 self.send(to_send) 276 277 # After the if clause, to always have a closed body 278 self.send(b"0\r\n\r\n") 279 280 281class HTTPSConnection(HTTPConnection): 282 """ 283 Many of the parameters to this constructor are passed to the underlying SSL 284 socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. 285 """ 286 287 default_port = port_by_scheme["https"] 288 289 cert_reqs = None 290 ca_certs = None 291 ca_cert_dir = None 292 ca_cert_data = None 293 ssl_version = None 294 assert_fingerprint = None 295 tls_in_tls_required = False 296 297 def __init__( 298 self, 299 host, 300 port=None, 301 key_file=None, 302 cert_file=None, 303 key_password=None, 304 strict=None, 305 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 306 ssl_context=None, 307 server_hostname=None, 308 **kw 309 ): 310 311 HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw) 312 313 self.key_file = key_file 314 self.cert_file = cert_file 315 self.key_password = key_password 316 self.ssl_context = ssl_context 317 self.server_hostname = server_hostname 318 319 # Required property for Google AppEngine 1.9.0 which otherwise causes 320 # HTTPS requests to go out as HTTP. (See Issue #356) 321 self._protocol = "https" 322 323 def set_cert( 324 self, 325 key_file=None, 326 cert_file=None, 327 cert_reqs=None, 328 key_password=None, 329 ca_certs=None, 330 assert_hostname=None, 331 assert_fingerprint=None, 332 ca_cert_dir=None, 333 ca_cert_data=None, 334 ): 335 """ 336 This method should only be called once, before the connection is used. 337 """ 338 # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also 339 # have an SSLContext object in which case we'll use its verify_mode. 340 if cert_reqs is None: 341 if self.ssl_context is not None: 342 cert_reqs = self.ssl_context.verify_mode 343 else: 344 cert_reqs = resolve_cert_reqs(None) 345 346 self.key_file = key_file 347 self.cert_file = cert_file 348 self.cert_reqs = cert_reqs 349 self.key_password = key_password 350 self.assert_hostname = assert_hostname 351 self.assert_fingerprint = assert_fingerprint 352 self.ca_certs = ca_certs and os.path.expanduser(ca_certs) 353 self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) 354 self.ca_cert_data = ca_cert_data 355 356 def connect(self): 357 # Add certificate verification 358 conn = self._new_conn() 359 hostname = self.host 360 tls_in_tls = False 361 362 if self._is_using_tunnel(): 363 if self.tls_in_tls_required: 364 conn = self._connect_tls_proxy(hostname, conn) 365 tls_in_tls = True 366 367 self.sock = conn 368 369 # Calls self._set_hostport(), so self.host is 370 # self._tunnel_host below. 371 self._tunnel() 372 # Mark this connection as not reusable 373 self.auto_open = 0 374 375 # Override the host with the one we're requesting data from. 376 hostname = self._tunnel_host 377 378 server_hostname = hostname 379 if self.server_hostname is not None: 380 server_hostname = self.server_hostname 381 382 is_time_off = datetime.date.today() < RECENT_DATE 383 if is_time_off: 384 warnings.warn( 385 ( 386 "System time is way off (before {0}). This will probably " 387 "lead to SSL verification errors" 388 ).format(RECENT_DATE), 389 SystemTimeWarning, 390 ) 391 392 # Wrap socket using verification with the root certs in 393 # trusted_root_certs 394 default_ssl_context = False 395 if self.ssl_context is None: 396 default_ssl_context = True 397 self.ssl_context = create_urllib3_context( 398 ssl_version=resolve_ssl_version(self.ssl_version), 399 cert_reqs=resolve_cert_reqs(self.cert_reqs), 400 ) 401 402 context = self.ssl_context 403 context.verify_mode = resolve_cert_reqs(self.cert_reqs) 404 405 # Try to load OS default certs if none are given. 406 # Works well on Windows (requires Python3.4+) 407 if ( 408 not self.ca_certs 409 and not self.ca_cert_dir 410 and not self.ca_cert_data 411 and default_ssl_context 412 and hasattr(context, "load_default_certs") 413 ): 414 context.load_default_certs() 415 416 self.sock = ssl_wrap_socket( 417 sock=conn, 418 keyfile=self.key_file, 419 certfile=self.cert_file, 420 key_password=self.key_password, 421 ca_certs=self.ca_certs, 422 ca_cert_dir=self.ca_cert_dir, 423 ca_cert_data=self.ca_cert_data, 424 server_hostname=server_hostname, 425 ssl_context=context, 426 tls_in_tls=tls_in_tls, 427 ) 428 429 # If we're using all defaults and the connection 430 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning 431 # for the host. 432 if ( 433 default_ssl_context 434 and self.ssl_version is None 435 and hasattr(self.sock, "version") 436 and self.sock.version() in {"TLSv1", "TLSv1.1"} 437 ): 438 warnings.warn( 439 "Negotiating TLSv1/TLSv1.1 by default is deprecated " 440 "and will be disabled in urllib3 v2.0.0. Connecting to " 441 "'%s' with '%s' can be enabled by explicitly opting-in " 442 "with 'ssl_version'" % (self.host, self.sock.version()), 443 DeprecationWarning, 444 ) 445 446 if self.assert_fingerprint: 447 assert_fingerprint( 448 self.sock.getpeercert(binary_form=True), self.assert_fingerprint 449 ) 450 elif ( 451 context.verify_mode != ssl.CERT_NONE 452 and not getattr(context, "check_hostname", False) 453 and self.assert_hostname is not False 454 ): 455 # While urllib3 attempts to always turn off hostname matching from 456 # the TLS library, this cannot always be done. So we check whether 457 # the TLS Library still thinks it's matching hostnames. 458 cert = self.sock.getpeercert() 459 if not cert.get("subjectAltName", ()): 460 warnings.warn( 461 ( 462 "Certificate for {0} has no `subjectAltName`, falling back to check for a " 463 "`commonName` for now. This feature is being removed by major browsers and " 464 "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " 465 "for details.)".format(hostname) 466 ), 467 SubjectAltNameWarning, 468 ) 469 _match_hostname(cert, self.assert_hostname or server_hostname) 470 471 self.is_verified = ( 472 context.verify_mode == ssl.CERT_REQUIRED 473 or self.assert_fingerprint is not None 474 ) 475 476 def _connect_tls_proxy(self, hostname, conn): 477 """ 478 Establish a TLS connection to the proxy using the provided SSL context. 479 """ 480 proxy_config = self.proxy_config 481 ssl_context = proxy_config.ssl_context 482 if ssl_context: 483 # If the user provided a proxy context, we assume CA and client 484 # certificates have already been set 485 return ssl_wrap_socket( 486 sock=conn, 487 server_hostname=hostname, 488 ssl_context=ssl_context, 489 ) 490 491 ssl_context = create_proxy_ssl_context( 492 self.ssl_version, 493 self.cert_reqs, 494 self.ca_certs, 495 self.ca_cert_dir, 496 self.ca_cert_data, 497 ) 498 # By default urllib3's SSLContext disables `check_hostname` and uses 499 # a custom check. For proxies we're good with relying on the default 500 # verification. 501 ssl_context.check_hostname = True 502 503 # If no cert was provided, use only the default options for server 504 # certificate validation 505 return ssl_wrap_socket( 506 sock=conn, 507 ca_certs=self.ca_certs, 508 ca_cert_dir=self.ca_cert_dir, 509 ca_cert_data=self.ca_cert_data, 510 server_hostname=hostname, 511 ssl_context=ssl_context, 512 ) 513 514 515def _match_hostname(cert, asserted_hostname): 516 try: 517 match_hostname(cert, asserted_hostname) 518 except CertificateError as e: 519 log.warning( 520 "Certificate did not match expected hostname: %s. Certificate: %s", 521 asserted_hostname, 522 cert, 523 ) 524 # Add cert to exception and reraise so client code can inspect 525 # the cert when catching the exception, if they want to 526 e._peer_cert = cert 527 raise 528 529 530def _get_default_user_agent(): 531 return "python-urllib3/%s" % __version__ 532 533 534class DummyConnection(object): 535 """Used to detect a failed ConnectionCls import.""" 536 537 pass 538 539 540if not ssl: 541 HTTPSConnection = DummyConnection # noqa: F811 542 543 544VerifiedHTTPSConnection = HTTPSConnection 545