1from __future__ import absolute_import 2 3import datetime 4import logging 5import os 6import re 7import socket 8import warnings 9from socket import error as SocketError 10from socket import timeout as SocketTimeout 11 12from .packages import six 13from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection 14from .packages.six.moves.http_client import HTTPException # noqa: F401 15from .util.proxy import create_proxy_ssl_context 16 17try: # Compiled with SSL? 18 import ssl 19 20 BaseSSLError = ssl.SSLError 21except (ImportError, AttributeError): # Platform-specific: No SSL. 22 ssl = None 23 24 class BaseSSLError(BaseException): 25 pass 26 27 28try: 29 # Python 3: not a no-op, we're adding this to the namespace so it can be imported. 30 ConnectionError = ConnectionError 31except NameError: 32 # Python 2 33 class ConnectionError(Exception): 34 pass 35 36 37try: # Python 3: 38 # Not a no-op, we're adding this to the namespace so it can be imported. 39 BrokenPipeError = BrokenPipeError 40except NameError: # Python 2: 41 42 class BrokenPipeError(Exception): 43 pass 44 45 46from ._collections import HTTPHeaderDict # noqa (historical, removed in v2) 47from ._version import __version__ 48from .exceptions import ( 49 ConnectTimeoutError, 50 NewConnectionError, 51 SubjectAltNameWarning, 52 SystemTimeWarning, 53) 54from .packages.ssl_match_hostname import CertificateError, match_hostname 55from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection 56from .util.ssl_ import ( 57 assert_fingerprint, 58 create_urllib3_context, 59 resolve_cert_reqs, 60 resolve_ssl_version, 61 ssl_wrap_socket, 62) 63 64log = logging.getLogger(__name__) 65 66port_by_scheme = {"http": 80, "https": 443} 67 68# When it comes time to update this value as a part of regular maintenance 69# (ie test_recent_date is failing) update it to ~6 months before the current date. 70RECENT_DATE = datetime.date(2019, 1, 1) 71 72_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") 73 74 75class HTTPConnection(_HTTPConnection, object): 76 """ 77 Based on :class:`http.client.HTTPConnection` but provides an extra constructor 78 backwards-compatibility layer between older and newer Pythons. 79 80 Additional keyword parameters are used to configure attributes of the connection. 81 Accepted parameters include: 82 83 - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` 84 - ``source_address``: Set the source address for the current connection. 85 - ``socket_options``: Set specific options on the underlying socket. If not specified, then 86 defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 87 Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 88 89 For example, if you wish to enable TCP Keep Alive in addition to the defaults, 90 you might pass: 91 92 .. code-block:: python 93 94 HTTPConnection.default_socket_options + [ 95 (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 96 ] 97 98 Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 99 """ 100 101 default_port = port_by_scheme["http"] 102 103 #: Disable Nagle's algorithm by default. 104 #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 105 default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] 106 107 #: Whether this connection verifies the host's certificate. 108 is_verified = False 109 110 def __init__(self, *args, **kw): 111 if not six.PY2: 112 kw.pop("strict", None) 113 114 # Pre-set source_address. 115 self.source_address = kw.get("source_address") 116 117 #: The socket options provided by the user. If no options are 118 #: provided, we use the default options. 119 self.socket_options = kw.pop("socket_options", self.default_socket_options) 120 121 # Proxy options provided by the user. 122 self.proxy = kw.pop("proxy", None) 123 self.proxy_config = kw.pop("proxy_config", None) 124 125 _HTTPConnection.__init__(self, *args, **kw) 126 127 @property 128 def host(self): 129 """ 130 Getter method to remove any trailing dots that indicate the hostname is an FQDN. 131 132 In general, SSL certificates don't include the trailing dot indicating a 133 fully-qualified domain name, and thus, they don't validate properly when 134 checked against a domain name that includes the dot. In addition, some 135 servers may not expect to receive the trailing dot when provided. 136 137 However, the hostname with trailing dot is critical to DNS resolution; doing a 138 lookup with the trailing dot will properly only resolve the appropriate FQDN, 139 whereas a lookup without a trailing dot will search the system's search domain 140 list. Thus, it's important to keep the original host around for use only in 141 those cases where it's appropriate (i.e., when doing DNS lookup to establish the 142 actual TCP connection across which we're going to send HTTP requests). 143 """ 144 return self._dns_host.rstrip(".") 145 146 @host.setter 147 def host(self, value): 148 """ 149 Setter for the `host` property. 150 151 We assume that only urllib3 uses the _dns_host attribute; httplib itself 152 only uses `host`, and it seems reasonable that other libraries follow suit. 153 """ 154 self._dns_host = value 155 156 def _new_conn(self): 157 """Establish a socket connection and set nodelay settings on it. 158 159 :return: New socket connection. 160 """ 161 extra_kw = {} 162 if self.source_address: 163 extra_kw["source_address"] = self.source_address 164 165 if self.socket_options: 166 extra_kw["socket_options"] = self.socket_options 167 168 try: 169 conn = connection.create_connection( 170 (self._dns_host, self.port), self.timeout, **extra_kw 171 ) 172 173 except SocketTimeout: 174 raise ConnectTimeoutError( 175 self, 176 "Connection to %s timed out. (connect timeout=%s)" 177 % (self.host, self.timeout), 178 ) 179 180 except SocketError as e: 181 raise NewConnectionError( 182 self, "Failed to establish a new connection: %s" % e 183 ) 184 185 return conn 186 187 def _is_using_tunnel(self): 188 # Google App Engine's httplib does not define _tunnel_host 189 return getattr(self, "_tunnel_host", None) 190 191 def _prepare_conn(self, conn): 192 self.sock = conn 193 if self._is_using_tunnel(): 194 # TODO: Fix tunnel so it doesn't depend on self.sock state. 195 self._tunnel() 196 # Mark this connection as not reusable 197 self.auto_open = 0 198 199 def connect(self): 200 conn = self._new_conn() 201 self._prepare_conn(conn) 202 203 def putrequest(self, method, url, *args, **kwargs): 204 """""" 205 # Empty docstring because the indentation of CPython's implementation 206 # is broken but we don't want this method in our documentation. 207 match = _CONTAINS_CONTROL_CHAR_RE.search(method) 208 if match: 209 raise ValueError( 210 "Method cannot contain non-token characters %r (found at least %r)" 211 % (method, match.group()) 212 ) 213 214 return _HTTPConnection.putrequest(self, method, url, *args, **kwargs) 215 216 def putheader(self, header, *values): 217 """""" 218 if SKIP_HEADER not in values: 219 _HTTPConnection.putheader(self, header, *values) 220 elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS: 221 raise ValueError( 222 "urllib3.util.SKIP_HEADER only supports '%s'" 223 % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),) 224 ) 225 226 def request(self, method, url, body=None, headers=None): 227 if headers is None: 228 headers = {} 229 else: 230 # Avoid modifying the headers passed into .request() 231 headers = headers.copy() 232 if "user-agent" not in (six.ensure_str(k.lower()) for k in headers): 233 headers["User-Agent"] = _get_default_user_agent() 234 super(HTTPConnection, self).request(method, url, body=body, headers=headers) 235 236 def request_chunked(self, method, url, body=None, headers=None): 237 """ 238 Alternative to the common request method, which sends the 239 body with chunked encoding and not as one block 240 """ 241 headers = headers or {} 242 header_keys = set([six.ensure_str(k.lower()) for k in headers]) 243 skip_accept_encoding = "accept-encoding" in header_keys 244 skip_host = "host" in header_keys 245 self.putrequest( 246 method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host 247 ) 248 if "user-agent" not in header_keys: 249 self.putheader("User-Agent", _get_default_user_agent()) 250 for header, value in headers.items(): 251 self.putheader(header, value) 252 if "transfer-encoding" not in headers: 253 self.putheader("Transfer-Encoding", "chunked") 254 self.endheaders() 255 256 if body is not None: 257 stringish_types = six.string_types + (bytes,) 258 if isinstance(body, stringish_types): 259 body = (body,) 260 for chunk in body: 261 if not chunk: 262 continue 263 if not isinstance(chunk, bytes): 264 chunk = chunk.encode("utf8") 265 len_str = hex(len(chunk))[2:] 266 to_send = bytearray(len_str.encode()) 267 to_send += b"\r\n" 268 to_send += chunk 269 to_send += b"\r\n" 270 self.send(to_send) 271 272 # After the if clause, to always have a closed body 273 self.send(b"0\r\n\r\n") 274 275 276class HTTPSConnection(HTTPConnection): 277 """ 278 Many of the parameters to this constructor are passed to the underlying SSL 279 socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. 280 """ 281 282 default_port = port_by_scheme["https"] 283 284 cert_reqs = None 285 ca_certs = None 286 ca_cert_dir = None 287 ca_cert_data = None 288 ssl_version = None 289 assert_fingerprint = None 290 tls_in_tls_required = False 291 292 def __init__( 293 self, 294 host, 295 port=None, 296 key_file=None, 297 cert_file=None, 298 key_password=None, 299 strict=None, 300 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 301 ssl_context=None, 302 server_hostname=None, 303 **kw 304 ): 305 306 HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw) 307 308 self.key_file = key_file 309 self.cert_file = cert_file 310 self.key_password = key_password 311 self.ssl_context = ssl_context 312 self.server_hostname = server_hostname 313 314 # Required property for Google AppEngine 1.9.0 which otherwise causes 315 # HTTPS requests to go out as HTTP. (See Issue #356) 316 self._protocol = "https" 317 318 def set_cert( 319 self, 320 key_file=None, 321 cert_file=None, 322 cert_reqs=None, 323 key_password=None, 324 ca_certs=None, 325 assert_hostname=None, 326 assert_fingerprint=None, 327 ca_cert_dir=None, 328 ca_cert_data=None, 329 ): 330 """ 331 This method should only be called once, before the connection is used. 332 """ 333 # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also 334 # have an SSLContext object in which case we'll use its verify_mode. 335 if cert_reqs is None: 336 if self.ssl_context is not None: 337 cert_reqs = self.ssl_context.verify_mode 338 else: 339 cert_reqs = resolve_cert_reqs(None) 340 341 self.key_file = key_file 342 self.cert_file = cert_file 343 self.cert_reqs = cert_reqs 344 self.key_password = key_password 345 self.assert_hostname = assert_hostname 346 self.assert_fingerprint = assert_fingerprint 347 self.ca_certs = ca_certs and os.path.expanduser(ca_certs) 348 self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) 349 self.ca_cert_data = ca_cert_data 350 351 def connect(self): 352 # Add certificate verification 353 conn = self._new_conn() 354 hostname = self.host 355 tls_in_tls = False 356 357 if self._is_using_tunnel(): 358 if self.tls_in_tls_required: 359 conn = self._connect_tls_proxy(hostname, conn) 360 tls_in_tls = True 361 362 self.sock = conn 363 364 # Calls self._set_hostport(), so self.host is 365 # self._tunnel_host below. 366 self._tunnel() 367 # Mark this connection as not reusable 368 self.auto_open = 0 369 370 # Override the host with the one we're requesting data from. 371 hostname = self._tunnel_host 372 373 server_hostname = hostname 374 if self.server_hostname is not None: 375 server_hostname = self.server_hostname 376 377 is_time_off = datetime.date.today() < RECENT_DATE 378 if is_time_off: 379 warnings.warn( 380 ( 381 "System time is way off (before {0}). This will probably " 382 "lead to SSL verification errors" 383 ).format(RECENT_DATE), 384 SystemTimeWarning, 385 ) 386 387 # Wrap socket using verification with the root certs in 388 # trusted_root_certs 389 default_ssl_context = False 390 if self.ssl_context is None: 391 default_ssl_context = True 392 self.ssl_context = create_urllib3_context( 393 ssl_version=resolve_ssl_version(self.ssl_version), 394 cert_reqs=resolve_cert_reqs(self.cert_reqs), 395 ) 396 397 context = self.ssl_context 398 context.verify_mode = resolve_cert_reqs(self.cert_reqs) 399 400 # Try to load OS default certs if none are given. 401 # Works well on Windows (requires Python3.4+) 402 if ( 403 not self.ca_certs 404 and not self.ca_cert_dir 405 and not self.ca_cert_data 406 and default_ssl_context 407 and hasattr(context, "load_default_certs") 408 ): 409 context.load_default_certs() 410 411 self.sock = ssl_wrap_socket( 412 sock=conn, 413 keyfile=self.key_file, 414 certfile=self.cert_file, 415 key_password=self.key_password, 416 ca_certs=self.ca_certs, 417 ca_cert_dir=self.ca_cert_dir, 418 ca_cert_data=self.ca_cert_data, 419 server_hostname=server_hostname, 420 ssl_context=context, 421 tls_in_tls=tls_in_tls, 422 ) 423 424 # If we're using all defaults and the connection 425 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning 426 # for the host. 427 if ( 428 default_ssl_context 429 and self.ssl_version is None 430 and hasattr(self.sock, "version") 431 and self.sock.version() in {"TLSv1", "TLSv1.1"} 432 ): 433 warnings.warn( 434 "Negotiating TLSv1/TLSv1.1 by default is deprecated " 435 "and will be disabled in urllib3 v2.0.0. Connecting to " 436 "'%s' with '%s' can be enabled by explicitly opting-in " 437 "with 'ssl_version'" % (self.host, self.sock.version()), 438 DeprecationWarning, 439 ) 440 441 if self.assert_fingerprint: 442 assert_fingerprint( 443 self.sock.getpeercert(binary_form=True), self.assert_fingerprint 444 ) 445 elif ( 446 context.verify_mode != ssl.CERT_NONE 447 and not getattr(context, "check_hostname", False) 448 and self.assert_hostname is not False 449 ): 450 # While urllib3 attempts to always turn off hostname matching from 451 # the TLS library, this cannot always be done. So we check whether 452 # the TLS Library still thinks it's matching hostnames. 453 cert = self.sock.getpeercert() 454 if not cert.get("subjectAltName", ()): 455 warnings.warn( 456 ( 457 "Certificate for {0} has no `subjectAltName`, falling back to check for a " 458 "`commonName` for now. This feature is being removed by major browsers and " 459 "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 " 460 "for details.)".format(hostname) 461 ), 462 SubjectAltNameWarning, 463 ) 464 _match_hostname(cert, self.assert_hostname or server_hostname) 465 466 self.is_verified = ( 467 context.verify_mode == ssl.CERT_REQUIRED 468 or self.assert_fingerprint is not None 469 ) 470 471 def _connect_tls_proxy(self, hostname, conn): 472 """ 473 Establish a TLS connection to the proxy using the provided SSL context. 474 """ 475 proxy_config = self.proxy_config 476 ssl_context = proxy_config.ssl_context 477 if ssl_context: 478 # If the user provided a proxy context, we assume CA and client 479 # certificates have already been set 480 return ssl_wrap_socket( 481 sock=conn, 482 server_hostname=hostname, 483 ssl_context=ssl_context, 484 ) 485 486 ssl_context = create_proxy_ssl_context( 487 self.ssl_version, 488 self.cert_reqs, 489 self.ca_certs, 490 self.ca_cert_dir, 491 self.ca_cert_data, 492 ) 493 494 # If no cert was provided, use only the default options for server 495 # certificate validation 496 return ssl_wrap_socket( 497 sock=conn, 498 ca_certs=self.ca_certs, 499 ca_cert_dir=self.ca_cert_dir, 500 ca_cert_data=self.ca_cert_data, 501 server_hostname=hostname, 502 ssl_context=ssl_context, 503 ) 504 505 506def _match_hostname(cert, asserted_hostname): 507 try: 508 match_hostname(cert, asserted_hostname) 509 except CertificateError as e: 510 log.warning( 511 "Certificate did not match expected hostname: %s. Certificate: %s", 512 asserted_hostname, 513 cert, 514 ) 515 # Add cert to exception and reraise so client code can inspect 516 # the cert when catching the exception, if they want to 517 e._peer_cert = cert 518 raise 519 520 521def _get_default_user_agent(): 522 return "python-urllib3/%s" % __version__ 523 524 525class DummyConnection(object): 526 """Used to detect a failed ConnectionCls import.""" 527 528 pass 529 530 531if not ssl: 532 HTTPSConnection = DummyConnection # noqa: F811 533 534 535VerifiedHTTPSConnection = HTTPSConnection 536