1from __future__ import absolute_import 2 3import datetime 4import logging 5import os 6import re 7import socket 8import warnings 9from socket import error as SocketError 10from socket import timeout as SocketTimeout 11 12from .packages import six 13from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection 14from .packages.six.moves.http_client import HTTPException # noqa: F401 15from .util.proxy import create_proxy_ssl_context 16 17try: # Compiled with SSL? 18 import ssl 19 20 BaseSSLError = ssl.SSLError 21except (ImportError, AttributeError): # Platform-specific: No SSL. 22 ssl = None 23 24 class BaseSSLError(BaseException): 25 pass 26 27 28try: 29 # Python 3: not a no-op, we're adding this to the namespace so it can be imported. 30 ConnectionError = ConnectionError 31except NameError: 32 # Python 2 33 class ConnectionError(Exception): 34 pass 35 36 37try: # Python 3: 38 # Not a no-op, we're adding this to the namespace so it can be imported. 39 BrokenPipeError = BrokenPipeError 40except NameError: # Python 2: 41 42 class BrokenPipeError(Exception): 43 pass 44 45 46from ._collections import HTTPHeaderDict # noqa (historical, removed in v2) 47from ._version import __version__ 48from .exceptions import ( 49 ConnectTimeoutError, 50 NewConnectionError, 51 SubjectAltNameWarning, 52 SystemTimeWarning, 53) 54from .packages.ssl_match_hostname import CertificateError, match_hostname 55from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection 56from .util.ssl_ import ( 57 assert_fingerprint, 58 create_urllib3_context, 59 is_ipaddress, 60 resolve_cert_reqs, 61 resolve_ssl_version, 62 ssl_wrap_socket, 63) 64 65log = logging.getLogger(__name__) 66 67port_by_scheme = {"http": 80, "https": 443} 68 69# When it comes time to update this value as a part of regular maintenance 70# (ie test_recent_date is failing) update it to ~6 months before the current date. 71RECENT_DATE = datetime.date(2020, 7, 1) 72 73_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") 74 75 76class HTTPConnection(_HTTPConnection, object): 77 """ 78 Based on :class:`http.client.HTTPConnection` but provides an extra constructor 79 backwards-compatibility layer between older and newer Pythons. 80 81 Additional keyword parameters are used to configure attributes of the connection. 82 Accepted parameters include: 83 84 - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` 85 - ``source_address``: Set the source address for the current connection. 86 - ``socket_options``: Set specific options on the underlying socket. If not specified, then 87 defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 88 Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 89 90 For example, if you wish to enable TCP Keep Alive in addition to the defaults, 91 you might pass: 92 93 .. code-block:: python 94 95 HTTPConnection.default_socket_options + [ 96 (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 97 ] 98 99 Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 100 """ 101 102 default_port = port_by_scheme["http"] 103 104 #: Disable Nagle's algorithm by default. 105 #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 106 default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] 107 108 #: Whether this connection verifies the host's certificate. 109 is_verified = False 110 111 #: Whether this proxy connection (if used) verifies the proxy host's 112 #: certificate. 113 proxy_is_verified = None 114 115 def __init__(self, *args, **kw): 116 if not six.PY2: 117 kw.pop("strict", None) 118 119 # Pre-set source_address. 120 self.source_address = kw.get("source_address") 121 122 #: The socket options provided by the user. If no options are 123 #: provided, we use the default options. 124 self.socket_options = kw.pop("socket_options", self.default_socket_options) 125 126 # Proxy options provided by the user. 127 self.proxy = kw.pop("proxy", None) 128 self.proxy_config = kw.pop("proxy_config", None) 129 130 _HTTPConnection.__init__(self, *args, **kw) 131 132 @property 133 def host(self): 134 """ 135 Getter method to remove any trailing dots that indicate the hostname is an FQDN. 136 137 In general, SSL certificates don't include the trailing dot indicating a 138 fully-qualified domain name, and thus, they don't validate properly when 139 checked against a domain name that includes the dot. In addition, some 140 servers may not expect to receive the trailing dot when provided. 141 142 However, the hostname with trailing dot is critical to DNS resolution; doing a 143 lookup with the trailing dot will properly only resolve the appropriate FQDN, 144 whereas a lookup without a trailing dot will search the system's search domain 145 list. Thus, it's important to keep the original host around for use only in 146 those cases where it's appropriate (i.e., when doing DNS lookup to establish the 147 actual TCP connection across which we're going to send HTTP requests). 148 """ 149 return self._dns_host.rstrip(".") 150 151 @host.setter 152 def host(self, value): 153 """ 154 Setter for the `host` property. 155 156 We assume that only urllib3 uses the _dns_host attribute; httplib itself 157 only uses `host`, and it seems reasonable that other libraries follow suit. 158 """ 159 self._dns_host = value 160 161 def _new_conn(self): 162 """Establish a socket connection and set nodelay settings on it. 163 164 :return: New socket connection. 165 """ 166 extra_kw = {} 167 if self.source_address: 168 extra_kw["source_address"] = self.source_address 169 170 if self.socket_options: 171 extra_kw["socket_options"] = self.socket_options 172 173 try: 174 conn = connection.create_connection( 175 (self._dns_host, self.port), self.timeout, **extra_kw 176 ) 177 178 except SocketTimeout: 179 raise ConnectTimeoutError( 180 self, 181 "Connection to %s timed out. (connect timeout=%s)" 182 % (self.host, self.timeout), 183 ) 184 185 except SocketError as e: 186 raise NewConnectionError( 187 self, "Failed to establish a new connection: %s" % e 188 ) 189 190 return conn 191 192 def _is_using_tunnel(self): 193 # Google App Engine's httplib does not define _tunnel_host 194 return getattr(self, "_tunnel_host", None) 195 196 def _prepare_conn(self, conn): 197 self.sock = conn 198 if self._is_using_tunnel(): 199 # TODO: Fix tunnel so it doesn't depend on self.sock state. 200 self._tunnel() 201 # Mark this connection as not reusable 202 self.auto_open = 0 203 204 def connect(self): 205 conn = self._new_conn() 206 self._prepare_conn(conn) 207 208 def putrequest(self, method, url, *args, **kwargs): 209 """ """ 210 # Empty docstring because the indentation of CPython's implementation 211 # is broken but we don't want this method in our documentation. 212 match = _CONTAINS_CONTROL_CHAR_RE.search(method) 213 if match: 214 raise ValueError( 215 "Method cannot contain non-token characters %r (found at least %r)" 216 % (method, match.group()) 217 ) 218 219 return _HTTPConnection.putrequest(self, method, url, *args, **kwargs) 220 221 def putheader(self, header, *values): 222 """ """ 223 if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): 224 _HTTPConnection.putheader(self, header, *values) 225 elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS: 226 raise ValueError( 227 "urllib3.util.SKIP_HEADER only supports '%s'" 228 % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),) 229 ) 230 231 def request(self, method, url, body=None, headers=None): 232 if headers is None: 233 headers = {} 234 else: 235 # Avoid modifying the headers passed into .request() 236 headers = headers.copy() 237 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers): 238 headers["User-Agent"] = _get_default_user_agent() 239 super(HTTPConnection, self).request(method, url, body=body, headers=headers) 240 241 def request_chunked(self, method, url, body=None, headers=None): 242 """ 243 Alternative to the common request method, which sends the 244 body with chunked encoding and not as one block 245 """ 246 headers = headers or {} 247 header_keys = set([six.ensure_str(k.lower()) for k in headers]) 248 skip_accept_encoding = "accept-encoding" in header_keys 249 skip_host = "host" in header_keys 250 self.putrequest( 251 method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host 252 ) 253 if "user-agent" not in header_keys: 254 self.putheader("User-Agent", _get_default_user_agent()) 255 for header, value in headers.items(): 256 self.putheader(header, value) 257 if "transfer-encoding" not in header_keys: 258 self.putheader("Transfer-Encoding", "chunked") 259 self.endheaders() 260 261 if body is not None: 262 stringish_types = six.string_types + (bytes,) 263 if isinstance(body, stringish_types): 264 body = (body,) 265 for chunk in body: 266 if not chunk: 267 continue 268 if not isinstance(chunk, bytes): 269 chunk = chunk.encode("utf8") 270 len_str = hex(len(chunk))[2:] 271 to_send = bytearray(len_str.encode()) 272 to_send += b"\r\n" 273 to_send += chunk 274 to_send += b"\r\n" 275 self.send(to_send) 276 277 # After the if clause, to always have a closed body 278 self.send(b"0\r\n\r\n") 279 280 281class HTTPSConnection(HTTPConnection): 282 """ 283 Many of the parameters to this constructor are passed to the underlying SSL 284 socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. 285 """ 286 287 default_port = port_by_scheme["https"] 288 289 cert_reqs = None 290 ca_certs = None 291 ca_cert_dir = None 292 ca_cert_data = None 293 ssl_version = None 294 assert_fingerprint = None 295 tls_in_tls_required = False 296 297 def __init__( 298 self, 299 host, 300 port=None, 301 key_file=None, 302 cert_file=None, 303 key_password=None, 304 strict=None, 305 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 306 ssl_context=None, 307 server_hostname=None, 308 **kw 309 ): 310 311 HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw) 312 313 self.key_file = key_file 314 self.cert_file = cert_file 315 self.key_password = key_password 316 self.ssl_context = ssl_context 317 self.server_hostname = server_hostname 318 319 # Required property for Google AppEngine 1.9.0 which otherwise causes 320 # HTTPS requests to go out as HTTP. (See Issue #356) 321 self._protocol = "https" 322 323 def set_cert( 324 self, 325 key_file=None, 326 cert_file=None, 327 cert_reqs=None, 328 key_password=None, 329 ca_certs=None, 330 assert_hostname=None, 331 assert_fingerprint=None, 332 ca_cert_dir=None, 333 ca_cert_data=None, 334 ): 335 """ 336 This method should only be called once, before the connection is used. 337 """ 338 # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also 339 # have an SSLContext object in which case we'll use its verify_mode. 340 if cert_reqs is None: 341 if self.ssl_context is not None: 342 cert_reqs = self.ssl_context.verify_mode 343 else: 344 cert_reqs = resolve_cert_reqs(None) 345 346 self.key_file = key_file 347 self.cert_file = cert_file 348 self.cert_reqs = cert_reqs 349 self.key_password = key_password 350 self.assert_hostname = assert_hostname 351 self.assert_fingerprint = assert_fingerprint 352 self.ca_certs = ca_certs and os.path.expanduser(ca_certs) 353 self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) 354 self.ca_cert_data = ca_cert_data 355 356 def connect(self): 357 # Add certificate verification 358 conn = self._new_conn() 359 hostname = self.host 360 tls_in_tls = False 361 362 if self._is_using_tunnel(): 363 if self.tls_in_tls_required: 364 conn = self._connect_tls_proxy(hostname, conn) 365 tls_in_tls = True 366 367 self.sock = conn 368 369 # Calls self._set_hostport(), so self.host is 370 # self._tunnel_host below. 371 self._tunnel() 372 # Mark this connection as not reusable 373 self.auto_open = 0 374 375 # Override the host with the one we're requesting data from. 376 hostname = self._tunnel_host 377 378 server_hostname = hostname 379 if self.server_hostname is not None: 380 server_hostname = self.server_hostname 381 382 is_time_off = datetime.date.today() < RECENT_DATE 383 if is_time_off: 384 warnings.warn( 385 ( 386 "System time is way off (before {0}). This will probably " 387 "lead to SSL verification errors" 388 ).format(RECENT_DATE), 389 SystemTimeWarning, 390 ) 391 392 # Wrap socket using verification with the root certs in 393 # trusted_root_certs 394 default_ssl_context = False 395 if self.ssl_context is None: 396 default_ssl_context = True 397 self.ssl_context = create_urllib3_context( 398 ssl_version=resolve_ssl_version(self.ssl_version), 399 cert_reqs=resolve_cert_reqs(self.cert_reqs), 400 ) 401 402 context = self.ssl_context 403 context.verify_mode = resolve_cert_reqs(self.cert_reqs) 404 405 # Try to load OS default certs if none are given. 406 # Works well on Windows (requires Python3.4+) 407 if ( 408 not self.ca_certs 409 and not self.ca_cert_dir 410 and not self.ca_cert_data 411 and default_ssl_context 412 and hasattr(context, "load_default_certs") 413 ): 414 context.load_default_certs() 415 416 self.sock = ssl_wrap_socket( 417 sock=conn, 418 keyfile=self.key_file, 419 certfile=self.cert_file, 420 key_password=self.key_password, 421 ca_certs=self.ca_certs, 422 ca_cert_dir=self.ca_cert_dir, 423 ca_cert_data=self.ca_cert_data, 424 server_hostname=server_hostname, 425 ssl_context=context, 426 tls_in_tls=tls_in_tls, 427 ) 428 429 # If we're using all defaults and the connection 430 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning 431 # for the host. 432 if ( 433 default_ssl_context 434 and self.ssl_version is None 435 and hasattr(self.sock, "version") 436 and self.sock.version() in {"TLSv1", "TLSv1.1"} 437 ): 438 warnings.warn( 439 "Negotiating TLSv1/TLSv1.1 by default is deprecated " 440 "and will be disabled in urllib3 v2.0.0. Connecting to " 441 "'%s' with '%s' can be enabled by explicitly opting-in " 442 "with 'ssl_version'" % (self.host, self.sock.version()), 443 DeprecationWarning, 444 ) 445 446 if self.assert_fingerprint: 447 assert_fingerprint( 448 self.sock.getpeercert(binary_form=True), self.assert_fingerprint 449 ) 450 elif ( 451 context.verify_mode != ssl.CERT_NONE 452 and not getattr(context, "check_hostname", False) 453 and self.assert_hostname is not False 454 ): 455 # While urllib3 attempts to always turn off hostname matching from 456 # the TLS library, this cannot always be done. So we check whether 457 # the TLS Library still thinks it's matching hostnames. 458 cert = self.sock.getpeercert() 459 if not cert.get("subjectAltName", ()): 460 warnings.warn( 461 ( 462 "Certificate for {0} has no `subjectAltName`, falling back to check for a " 463 "`commonName` for now. This feature is being removed by major browsers and " 464 "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " 465 "for details.)".format(hostname) 466 ), 467 SubjectAltNameWarning, 468 ) 469 _match_hostname(cert, self.assert_hostname or server_hostname) 470 471 self.is_verified = ( 472 context.verify_mode == ssl.CERT_REQUIRED 473 or self.assert_fingerprint is not None 474 ) 475 476 def _connect_tls_proxy(self, hostname, conn): 477 """ 478 Establish a TLS connection to the proxy using the provided SSL context. 479 """ 480 proxy_config = self.proxy_config 481 ssl_context = proxy_config.ssl_context 482 if ssl_context: 483 # If the user provided a proxy context, we assume CA and client 484 # certificates have already been set 485 return ssl_wrap_socket( 486 sock=conn, 487 server_hostname=hostname, 488 ssl_context=ssl_context, 489 ) 490 491 ssl_context = create_proxy_ssl_context( 492 self.ssl_version, 493 self.cert_reqs, 494 self.ca_certs, 495 self.ca_cert_dir, 496 self.ca_cert_data, 497 ) 498 499 # If no cert was provided, use only the default options for server 500 # certificate validation 501 socket = ssl_wrap_socket( 502 sock=conn, 503 ca_certs=self.ca_certs, 504 ca_cert_dir=self.ca_cert_dir, 505 ca_cert_data=self.ca_cert_data, 506 server_hostname=hostname, 507 ssl_context=ssl_context, 508 ) 509 510 if ssl_context.verify_mode != ssl.CERT_NONE and not getattr( 511 ssl_context, "check_hostname", False 512 ): 513 # While urllib3 attempts to always turn off hostname matching from 514 # the TLS library, this cannot always be done. So we check whether 515 # the TLS Library still thinks it's matching hostnames. 516 cert = socket.getpeercert() 517 if not cert.get("subjectAltName", ()): 518 warnings.warn( 519 ( 520 "Certificate for {0} has no `subjectAltName`, falling back to check for a " 521 "`commonName` for now. This feature is being removed by major browsers and " 522 "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " 523 "for details.)".format(hostname) 524 ), 525 SubjectAltNameWarning, 526 ) 527 _match_hostname(cert, hostname) 528 529 self.proxy_is_verified = ssl_context.verify_mode == ssl.CERT_REQUIRED 530 return socket 531 532 533def _match_hostname(cert, asserted_hostname): 534 # Our upstream implementation of ssl.match_hostname() 535 # only applies this normalization to IP addresses so it doesn't 536 # match DNS SANs so we do the same thing! 537 stripped_hostname = asserted_hostname.strip("u[]") 538 if is_ipaddress(stripped_hostname): 539 asserted_hostname = stripped_hostname 540 541 try: 542 match_hostname(cert, asserted_hostname) 543 except CertificateError as e: 544 log.warning( 545 "Certificate did not match expected hostname: %s. Certificate: %s", 546 asserted_hostname, 547 cert, 548 ) 549 # Add cert to exception and reraise so client code can inspect 550 # the cert when catching the exception, if they want to 551 e._peer_cert = cert 552 raise 553 554 555def _get_default_user_agent(): 556 return "python-urllib3/%s" % __version__ 557 558 559class DummyConnection(object): 560 """Used to detect a failed ConnectionCls import.""" 561 562 pass 563 564 565if not ssl: 566 HTTPSConnection = DummyConnection # noqa: F811 567 568 569VerifiedHTTPSConnection = HTTPSConnection 570