1from __future__ import absolute_import 2 3import collections 4import functools 5import logging 6 7from ._collections import RecentlyUsedContainer 8from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme 9from .exceptions import ( 10 LocationValueError, 11 MaxRetryError, 12 ProxySchemeUnknown, 13 ProxySchemeUnsupported, 14 URLSchemeUnknown, 15) 16from .packages import six 17from .packages.six.moves.urllib.parse import urljoin 18from .request import RequestMethods 19from .util.proxy import connection_requires_http_tunnel 20from .util.retry import Retry 21from .util.url import parse_url 22 23__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 24 25 26log = logging.getLogger(__name__) 27 28SSL_KEYWORDS = ( 29 "key_file", 30 "cert_file", 31 "cert_reqs", 32 "ca_certs", 33 "ssl_version", 34 "ca_cert_dir", 35 "ssl_context", 36 "key_password", 37) 38 39# All known keyword arguments that could be provided to the pool manager, its 40# pools, or the underlying connections. This is used to construct a pool key. 41_key_fields = ( 42 "key_scheme", # str 43 "key_host", # str 44 "key_port", # int 45 "key_timeout", # int or float or Timeout 46 "key_retries", # int or Retry 47 "key_strict", # bool 48 "key_block", # bool 49 "key_source_address", # str 50 "key_key_file", # str 51 "key_key_password", # str 52 "key_cert_file", # str 53 "key_cert_reqs", # str 54 "key_ca_certs", # str 55 "key_ssl_version", # str 56 "key_ca_cert_dir", # str 57 "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext 58 "key_maxsize", # int 59 "key_headers", # dict 60 "key__proxy", # parsed proxy url 61 "key__proxy_headers", # dict 62 "key__proxy_config", # class 63 "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples 64 "key__socks_options", # dict 65 "key_assert_hostname", # bool or string 66 "key_assert_fingerprint", # str 67 "key_server_hostname", # str 68) 69 70#: The namedtuple class used to construct keys for the connection pool. 71#: All custom key schemes should include the fields in this key at a minimum. 72PoolKey = collections.namedtuple("PoolKey", _key_fields) 73 74_proxy_config_fields = ("ssl_context", "use_forwarding_for_https") 75ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields) 76 77 78def _default_key_normalizer(key_class, request_context): 79 """ 80 Create a pool key out of a request context dictionary. 81 82 According to RFC 3986, both the scheme and host are case-insensitive. 83 Therefore, this function normalizes both before constructing the pool 84 key for an HTTPS request. If you wish to change this behaviour, provide 85 alternate callables to ``key_fn_by_scheme``. 86 87 :param key_class: 88 The class to use when constructing the key. This should be a namedtuple 89 with the ``scheme`` and ``host`` keys at a minimum. 90 :type key_class: namedtuple 91 :param request_context: 92 A dictionary-like object that contain the context for a request. 93 :type request_context: dict 94 95 :return: A namedtuple that can be used as a connection pool key. 96 :rtype: PoolKey 97 """ 98 # Since we mutate the dictionary, make a copy first 99 context = request_context.copy() 100 context["scheme"] = context["scheme"].lower() 101 context["host"] = context["host"].lower() 102 103 # These are both dictionaries and need to be transformed into frozensets 104 for key in ("headers", "_proxy_headers", "_socks_options"): 105 if key in context and context[key] is not None: 106 context[key] = frozenset(context[key].items()) 107 108 # The socket_options key may be a list and needs to be transformed into a 109 # tuple. 110 socket_opts = context.get("socket_options") 111 if socket_opts is not None: 112 context["socket_options"] = tuple(socket_opts) 113 114 # Map the kwargs to the names in the namedtuple - this is necessary since 115 # namedtuples can't have fields starting with '_'. 116 for key in list(context.keys()): 117 context["key_" + key] = context.pop(key) 118 119 # Default to ``None`` for keys missing from the context 120 for field in key_class._fields: 121 if field not in context: 122 context[field] = None 123 124 return key_class(**context) 125 126 127#: A dictionary that maps a scheme to a callable that creates a pool key. 128#: This can be used to alter the way pool keys are constructed, if desired. 129#: Each PoolManager makes a copy of this dictionary so they can be configured 130#: globally here, or individually on the instance. 131key_fn_by_scheme = { 132 "http": functools.partial(_default_key_normalizer, PoolKey), 133 "https": functools.partial(_default_key_normalizer, PoolKey), 134} 135 136pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 137 138 139class PoolManager(RequestMethods): 140 """ 141 Allows for arbitrary requests while transparently keeping track of 142 necessary connection pools for you. 143 144 :param num_pools: 145 Number of connection pools to cache before discarding the least 146 recently used pool. 147 148 :param headers: 149 Headers to include with all requests, unless other headers are given 150 explicitly. 151 152 :param \\**connection_pool_kw: 153 Additional parameters are used to create fresh 154 :class:`urllib3.connectionpool.ConnectionPool` instances. 155 156 Example:: 157 158 >>> manager = PoolManager(num_pools=2) 159 >>> r = manager.request('GET', 'http://google.com/') 160 >>> r = manager.request('GET', 'http://google.com/mail') 161 >>> r = manager.request('GET', 'http://yahoo.com/') 162 >>> len(manager.pools) 163 2 164 165 """ 166 167 proxy = None 168 proxy_config = None 169 170 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 171 RequestMethods.__init__(self, headers) 172 self.connection_pool_kw = connection_pool_kw 173 self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) 174 175 # Locally set the pool classes and keys so other PoolManagers can 176 # override them. 177 self.pool_classes_by_scheme = pool_classes_by_scheme 178 self.key_fn_by_scheme = key_fn_by_scheme.copy() 179 180 def __enter__(self): 181 return self 182 183 def __exit__(self, exc_type, exc_val, exc_tb): 184 self.clear() 185 # Return False to re-raise any potential exceptions 186 return False 187 188 def _new_pool(self, scheme, host, port, request_context=None): 189 """ 190 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and 191 any additional pool keyword arguments. 192 193 If ``request_context`` is provided, it is provided as keyword arguments 194 to the pool class used. This method is used to actually create the 195 connection pools handed out by :meth:`connection_from_url` and 196 companion methods. It is intended to be overridden for customization. 197 """ 198 pool_cls = self.pool_classes_by_scheme[scheme] 199 if request_context is None: 200 request_context = self.connection_pool_kw.copy() 201 202 # Although the context has everything necessary to create the pool, 203 # this function has historically only used the scheme, host, and port 204 # in the positional args. When an API change is acceptable these can 205 # be removed. 206 for key in ("scheme", "host", "port"): 207 request_context.pop(key, None) 208 209 if scheme == "http": 210 for kw in SSL_KEYWORDS: 211 request_context.pop(kw, None) 212 213 return pool_cls(host, port, **request_context) 214 215 def clear(self): 216 """ 217 Empty our store of pools and direct them all to close. 218 219 This will not affect in-flight connections, but they will not be 220 re-used after completion. 221 """ 222 self.pools.clear() 223 224 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 225 """ 226 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. 227 228 If ``port`` isn't given, it will be derived from the ``scheme`` using 229 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 230 provided, it is merged with the instance's ``connection_pool_kw`` 231 variable and used to create the new connection pool, if one is 232 needed. 233 """ 234 235 if not host: 236 raise LocationValueError("No host specified.") 237 238 request_context = self._merge_pool_kwargs(pool_kwargs) 239 request_context["scheme"] = scheme or "http" 240 if not port: 241 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 242 request_context["port"] = port 243 request_context["host"] = host 244 245 return self.connection_from_context(request_context) 246 247 def connection_from_context(self, request_context): 248 """ 249 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. 250 251 ``request_context`` must at least contain the ``scheme`` key and its 252 value must be a key in ``key_fn_by_scheme`` instance variable. 253 """ 254 scheme = request_context["scheme"].lower() 255 pool_key_constructor = self.key_fn_by_scheme.get(scheme) 256 if not pool_key_constructor: 257 raise URLSchemeUnknown(scheme) 258 pool_key = pool_key_constructor(request_context) 259 260 return self.connection_from_pool_key(pool_key, request_context=request_context) 261 262 def connection_from_pool_key(self, pool_key, request_context=None): 263 """ 264 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. 265 266 ``pool_key`` should be a namedtuple that only contains immutable 267 objects. At a minimum it must have the ``scheme``, ``host``, and 268 ``port`` fields. 269 """ 270 with self.pools.lock: 271 # If the scheme, host, or port doesn't match existing open 272 # connections, open a new ConnectionPool. 273 pool = self.pools.get(pool_key) 274 if pool: 275 return pool 276 277 # Make a fresh ConnectionPool of the desired type 278 scheme = request_context["scheme"] 279 host = request_context["host"] 280 port = request_context["port"] 281 pool = self._new_pool(scheme, host, port, request_context=request_context) 282 self.pools[pool_key] = pool 283 284 return pool 285 286 def connection_from_url(self, url, pool_kwargs=None): 287 """ 288 Similar to :func:`urllib3.connectionpool.connection_from_url`. 289 290 If ``pool_kwargs`` is not provided and a new pool needs to be 291 constructed, ``self.connection_pool_kw`` is used to initialize 292 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 293 is provided, it is used instead. Note that if a new pool does not 294 need to be created for the request, the provided ``pool_kwargs`` are 295 not used. 296 """ 297 u = parse_url(url) 298 return self.connection_from_host( 299 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 300 ) 301 302 def _merge_pool_kwargs(self, override): 303 """ 304 Merge a dictionary of override values for self.connection_pool_kw. 305 306 This does not modify self.connection_pool_kw and returns a new dict. 307 Any keys in the override dictionary with a value of ``None`` are 308 removed from the merged dictionary. 309 """ 310 base_pool_kwargs = self.connection_pool_kw.copy() 311 if override: 312 for key, value in override.items(): 313 if value is None: 314 try: 315 del base_pool_kwargs[key] 316 except KeyError: 317 pass 318 else: 319 base_pool_kwargs[key] = value 320 return base_pool_kwargs 321 322 def _proxy_requires_url_absolute_form(self, parsed_url): 323 """ 324 Indicates if the proxy requires the complete destination URL in the 325 request. Normally this is only needed when not using an HTTP CONNECT 326 tunnel. 327 """ 328 if self.proxy is None: 329 return False 330 331 return not connection_requires_http_tunnel( 332 self.proxy, self.proxy_config, parsed_url.scheme 333 ) 334 335 def _validate_proxy_scheme_url_selection(self, url_scheme): 336 """ 337 Validates that were not attempting to do TLS in TLS connections on 338 Python2 or with unsupported SSL implementations. 339 """ 340 if self.proxy is None or url_scheme != "https": 341 return 342 343 if self.proxy.scheme != "https": 344 return 345 346 if six.PY2 and not self.proxy_config.use_forwarding_for_https: 347 raise ProxySchemeUnsupported( 348 "Contacting HTTPS destinations through HTTPS proxies " 349 "'via CONNECT tunnels' is not supported in Python 2" 350 ) 351 352 def urlopen(self, method, url, redirect=True, **kw): 353 """ 354 Same as :meth:`urllib3.HTTPConnectionPool.urlopen` 355 with custom cross-host redirect logic and only sends the request-uri 356 portion of the ``url``. 357 358 The given ``url`` parameter must be absolute, such that an appropriate 359 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 360 """ 361 u = parse_url(url) 362 self._validate_proxy_scheme_url_selection(u.scheme) 363 364 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 365 366 kw["assert_same_host"] = False 367 kw["redirect"] = False 368 369 if "headers" not in kw: 370 kw["headers"] = self.headers.copy() 371 372 if self._proxy_requires_url_absolute_form(u): 373 response = conn.urlopen(method, url, **kw) 374 else: 375 response = conn.urlopen(method, u.request_uri, **kw) 376 377 redirect_location = redirect and response.get_redirect_location() 378 if not redirect_location: 379 return response 380 381 # Support relative URLs for redirecting. 382 redirect_location = urljoin(url, redirect_location) 383 384 # RFC 7231, Section 6.4.4 385 if response.status == 303: 386 method = "GET" 387 388 retries = kw.get("retries") 389 if not isinstance(retries, Retry): 390 retries = Retry.from_int(retries, redirect=redirect) 391 392 # Strip headers marked as unsafe to forward to the redirected location. 393 # Check remove_headers_on_redirect to avoid a potential network call within 394 # conn.is_same_host() which may use socket.gethostbyname() in the future. 395 if retries.remove_headers_on_redirect and not conn.is_same_host( 396 redirect_location 397 ): 398 headers = list(six.iterkeys(kw["headers"])) 399 for header in headers: 400 if header.lower() in retries.remove_headers_on_redirect: 401 kw["headers"].pop(header, None) 402 403 try: 404 retries = retries.increment(method, url, response=response, _pool=conn) 405 except MaxRetryError: 406 if retries.raise_on_redirect: 407 response.drain_conn() 408 raise 409 return response 410 411 kw["retries"] = retries 412 kw["redirect"] = redirect 413 414 log.info("Redirecting %s -> %s", url, redirect_location) 415 416 response.drain_conn() 417 return self.urlopen(method, redirect_location, **kw) 418 419 420class ProxyManager(PoolManager): 421 """ 422 Behaves just like :class:`PoolManager`, but sends all requests through 423 the defined proxy, using the CONNECT method for HTTPS URLs. 424 425 :param proxy_url: 426 The URL of the proxy to be used. 427 428 :param proxy_headers: 429 A dictionary containing headers that will be sent to the proxy. In case 430 of HTTP they are being sent with each request, while in the 431 HTTPS/CONNECT case they are sent only once. Could be used for proxy 432 authentication. 433 434 :param proxy_ssl_context: 435 The proxy SSL context is used to establish the TLS connection to the 436 proxy when using HTTPS proxies. 437 438 :param use_forwarding_for_https: 439 (Defaults to False) If set to True will forward requests to the HTTPS 440 proxy to be made on behalf of the client instead of creating a TLS 441 tunnel via the CONNECT method. **Enabling this flag means that request 442 and response headers and content will be visible from the HTTPS proxy** 443 whereas tunneling keeps request and response headers and content 444 private. IP address, target hostname, SNI, and port are always visible 445 to an HTTPS proxy even when this flag is disabled. 446 447 Example: 448 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 449 >>> r1 = proxy.request('GET', 'http://google.com/') 450 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 451 >>> len(proxy.pools) 452 1 453 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 454 >>> r4 = proxy.request('GET', 'https://twitter.com/') 455 >>> len(proxy.pools) 456 3 457 458 """ 459 460 def __init__( 461 self, 462 proxy_url, 463 num_pools=10, 464 headers=None, 465 proxy_headers=None, 466 proxy_ssl_context=None, 467 use_forwarding_for_https=False, 468 **connection_pool_kw 469 ): 470 471 if isinstance(proxy_url, HTTPConnectionPool): 472 proxy_url = "%s://%s:%i" % ( 473 proxy_url.scheme, 474 proxy_url.host, 475 proxy_url.port, 476 ) 477 proxy = parse_url(proxy_url) 478 479 if proxy.scheme not in ("http", "https"): 480 raise ProxySchemeUnknown(proxy.scheme) 481 482 if not proxy.port: 483 port = port_by_scheme.get(proxy.scheme, 80) 484 proxy = proxy._replace(port=port) 485 486 self.proxy = proxy 487 self.proxy_headers = proxy_headers or {} 488 self.proxy_ssl_context = proxy_ssl_context 489 self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https) 490 491 connection_pool_kw["_proxy"] = self.proxy 492 connection_pool_kw["_proxy_headers"] = self.proxy_headers 493 connection_pool_kw["_proxy_config"] = self.proxy_config 494 495 super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) 496 497 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 498 if scheme == "https": 499 return super(ProxyManager, self).connection_from_host( 500 host, port, scheme, pool_kwargs=pool_kwargs 501 ) 502 503 return super(ProxyManager, self).connection_from_host( 504 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs 505 ) 506 507 def _set_proxy_headers(self, url, headers=None): 508 """ 509 Sets headers needed by proxies: specifically, the Accept and Host 510 headers. Only sets headers not provided by the user. 511 """ 512 headers_ = {"Accept": "*/*"} 513 514 netloc = parse_url(url).netloc 515 if netloc: 516 headers_["Host"] = netloc 517 518 if headers: 519 headers_.update(headers) 520 return headers_ 521 522 def urlopen(self, method, url, redirect=True, **kw): 523 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 524 u = parse_url(url) 525 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): 526 # For connections using HTTP CONNECT, httplib sets the necessary 527 # headers on the CONNECT to the proxy. If we're not using CONNECT, 528 # we'll definitely need to set 'Host' at the very least. 529 headers = kw.get("headers", self.headers) 530 kw["headers"] = self._set_proxy_headers(url, headers) 531 532 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 533 534 535def proxy_from_url(url, **kw): 536 return ProxyManager(proxy_url=url, **kw) 537