1from __future__ import absolute_import 2import collections 3import functools 4import logging 5import warnings 6 7from ._collections import RecentlyUsedContainer 8from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool 9from .connectionpool import port_by_scheme 10from .exceptions import ( 11 LocationValueError, 12 MaxRetryError, 13 ProxySchemeUnknown, 14 InvalidProxyConfigurationWarning, 15) 16from .packages import six 17from .packages.six.moves.urllib.parse import urljoin 18from .request import RequestMethods 19from .util.url import parse_url 20from .util.retry import Retry 21 22 23__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 24 25 26log = logging.getLogger(__name__) 27 28SSL_KEYWORDS = ( 29 "key_file", 30 "cert_file", 31 "cert_reqs", 32 "ca_certs", 33 "ssl_version", 34 "ca_cert_dir", 35 "ssl_context", 36 "key_password", 37) 38 39# All known keyword arguments that could be provided to the pool manager, its 40# pools, or the underlying connections. This is used to construct a pool key. 41_key_fields = ( 42 "key_scheme", # str 43 "key_host", # str 44 "key_port", # int 45 "key_timeout", # int or float or Timeout 46 "key_retries", # int or Retry 47 "key_strict", # bool 48 "key_block", # bool 49 "key_source_address", # str 50 "key_key_file", # str 51 "key_key_password", # str 52 "key_cert_file", # str 53 "key_cert_reqs", # str 54 "key_ca_certs", # str 55 "key_ssl_version", # str 56 "key_ca_cert_dir", # str 57 "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext 58 "key_maxsize", # int 59 "key_headers", # dict 60 "key__proxy", # parsed proxy url 61 "key__proxy_headers", # dict 62 "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples 63 "key__socks_options", # dict 64 "key_assert_hostname", # bool or string 65 "key_assert_fingerprint", # str 66 "key_server_hostname", # str 67) 68 69#: The namedtuple class used to construct keys for the connection pool. 70#: All custom key schemes should include the fields in this key at a minimum. 71PoolKey = collections.namedtuple("PoolKey", _key_fields) 72 73 74def _default_key_normalizer(key_class, request_context): 75 """ 76 Create a pool key out of a request context dictionary. 77 78 According to RFC 3986, both the scheme and host are case-insensitive. 79 Therefore, this function normalizes both before constructing the pool 80 key for an HTTPS request. If you wish to change this behaviour, provide 81 alternate callables to ``key_fn_by_scheme``. 82 83 :param key_class: 84 The class to use when constructing the key. This should be a namedtuple 85 with the ``scheme`` and ``host`` keys at a minimum. 86 :type key_class: namedtuple 87 :param request_context: 88 A dictionary-like object that contain the context for a request. 89 :type request_context: dict 90 91 :return: A namedtuple that can be used as a connection pool key. 92 :rtype: PoolKey 93 """ 94 # Since we mutate the dictionary, make a copy first 95 context = request_context.copy() 96 context["scheme"] = context["scheme"].lower() 97 context["host"] = context["host"].lower() 98 99 # These are both dictionaries and need to be transformed into frozensets 100 for key in ("headers", "_proxy_headers", "_socks_options"): 101 if key in context and context[key] is not None: 102 context[key] = frozenset(context[key].items()) 103 104 # The socket_options key may be a list and needs to be transformed into a 105 # tuple. 106 socket_opts = context.get("socket_options") 107 if socket_opts is not None: 108 context["socket_options"] = tuple(socket_opts) 109 110 # Map the kwargs to the names in the namedtuple - this is necessary since 111 # namedtuples can't have fields starting with '_'. 112 for key in list(context.keys()): 113 context["key_" + key] = context.pop(key) 114 115 # Default to ``None`` for keys missing from the context 116 for field in key_class._fields: 117 if field not in context: 118 context[field] = None 119 120 return key_class(**context) 121 122 123#: A dictionary that maps a scheme to a callable that creates a pool key. 124#: This can be used to alter the way pool keys are constructed, if desired. 125#: Each PoolManager makes a copy of this dictionary so they can be configured 126#: globally here, or individually on the instance. 127key_fn_by_scheme = { 128 "http": functools.partial(_default_key_normalizer, PoolKey), 129 "https": functools.partial(_default_key_normalizer, PoolKey), 130} 131 132pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 133 134 135class PoolManager(RequestMethods): 136 """ 137 Allows for arbitrary requests while transparently keeping track of 138 necessary connection pools for you. 139 140 :param num_pools: 141 Number of connection pools to cache before discarding the least 142 recently used pool. 143 144 :param headers: 145 Headers to include with all requests, unless other headers are given 146 explicitly. 147 148 :param \\**connection_pool_kw: 149 Additional parameters are used to create fresh 150 :class:`urllib3.connectionpool.ConnectionPool` instances. 151 152 Example:: 153 154 >>> manager = PoolManager(num_pools=2) 155 >>> r = manager.request('GET', 'http://google.com/') 156 >>> r = manager.request('GET', 'http://google.com/mail') 157 >>> r = manager.request('GET', 'http://yahoo.com/') 158 >>> len(manager.pools) 159 2 160 161 """ 162 163 proxy = None 164 165 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 166 RequestMethods.__init__(self, headers) 167 self.connection_pool_kw = connection_pool_kw 168 self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) 169 170 # Locally set the pool classes and keys so other PoolManagers can 171 # override them. 172 self.pool_classes_by_scheme = pool_classes_by_scheme 173 self.key_fn_by_scheme = key_fn_by_scheme.copy() 174 175 def __enter__(self): 176 return self 177 178 def __exit__(self, exc_type, exc_val, exc_tb): 179 self.clear() 180 # Return False to re-raise any potential exceptions 181 return False 182 183 def _new_pool(self, scheme, host, port, request_context=None): 184 """ 185 Create a new :class:`ConnectionPool` based on host, port, scheme, and 186 any additional pool keyword arguments. 187 188 If ``request_context`` is provided, it is provided as keyword arguments 189 to the pool class used. This method is used to actually create the 190 connection pools handed out by :meth:`connection_from_url` and 191 companion methods. It is intended to be overridden for customization. 192 """ 193 pool_cls = self.pool_classes_by_scheme[scheme] 194 if request_context is None: 195 request_context = self.connection_pool_kw.copy() 196 197 # Although the context has everything necessary to create the pool, 198 # this function has historically only used the scheme, host, and port 199 # in the positional args. When an API change is acceptable these can 200 # be removed. 201 for key in ("scheme", "host", "port"): 202 request_context.pop(key, None) 203 204 if scheme == "http": 205 for kw in SSL_KEYWORDS: 206 request_context.pop(kw, None) 207 208 return pool_cls(host, port, **request_context) 209 210 def clear(self): 211 """ 212 Empty our store of pools and direct them all to close. 213 214 This will not affect in-flight connections, but they will not be 215 re-used after completion. 216 """ 217 self.pools.clear() 218 219 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 220 """ 221 Get a :class:`ConnectionPool` based on the host, port, and scheme. 222 223 If ``port`` isn't given, it will be derived from the ``scheme`` using 224 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 225 provided, it is merged with the instance's ``connection_pool_kw`` 226 variable and used to create the new connection pool, if one is 227 needed. 228 """ 229 230 if not host: 231 raise LocationValueError("No host specified.") 232 233 request_context = self._merge_pool_kwargs(pool_kwargs) 234 request_context["scheme"] = scheme or "http" 235 if not port: 236 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 237 request_context["port"] = port 238 request_context["host"] = host 239 240 return self.connection_from_context(request_context) 241 242 def connection_from_context(self, request_context): 243 """ 244 Get a :class:`ConnectionPool` based on the request context. 245 246 ``request_context`` must at least contain the ``scheme`` key and its 247 value must be a key in ``key_fn_by_scheme`` instance variable. 248 """ 249 scheme = request_context["scheme"].lower() 250 pool_key_constructor = self.key_fn_by_scheme[scheme] 251 pool_key = pool_key_constructor(request_context) 252 253 return self.connection_from_pool_key(pool_key, request_context=request_context) 254 255 def connection_from_pool_key(self, pool_key, request_context=None): 256 """ 257 Get a :class:`ConnectionPool` based on the provided pool key. 258 259 ``pool_key`` should be a namedtuple that only contains immutable 260 objects. At a minimum it must have the ``scheme``, ``host``, and 261 ``port`` fields. 262 """ 263 with self.pools.lock: 264 # If the scheme, host, or port doesn't match existing open 265 # connections, open a new ConnectionPool. 266 pool = self.pools.get(pool_key) 267 if pool: 268 return pool 269 270 # Make a fresh ConnectionPool of the desired type 271 scheme = request_context["scheme"] 272 host = request_context["host"] 273 port = request_context["port"] 274 pool = self._new_pool(scheme, host, port, request_context=request_context) 275 self.pools[pool_key] = pool 276 277 return pool 278 279 def connection_from_url(self, url, pool_kwargs=None): 280 """ 281 Similar to :func:`urllib3.connectionpool.connection_from_url`. 282 283 If ``pool_kwargs`` is not provided and a new pool needs to be 284 constructed, ``self.connection_pool_kw`` is used to initialize 285 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 286 is provided, it is used instead. Note that if a new pool does not 287 need to be created for the request, the provided ``pool_kwargs`` are 288 not used. 289 """ 290 u = parse_url(url) 291 return self.connection_from_host( 292 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 293 ) 294 295 def _merge_pool_kwargs(self, override): 296 """ 297 Merge a dictionary of override values for self.connection_pool_kw. 298 299 This does not modify self.connection_pool_kw and returns a new dict. 300 Any keys in the override dictionary with a value of ``None`` are 301 removed from the merged dictionary. 302 """ 303 base_pool_kwargs = self.connection_pool_kw.copy() 304 if override: 305 for key, value in override.items(): 306 if value is None: 307 try: 308 del base_pool_kwargs[key] 309 except KeyError: 310 pass 311 else: 312 base_pool_kwargs[key] = value 313 return base_pool_kwargs 314 315 def urlopen(self, method, url, redirect=True, **kw): 316 """ 317 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` 318 with custom cross-host redirect logic and only sends the request-uri 319 portion of the ``url``. 320 321 The given ``url`` parameter must be absolute, such that an appropriate 322 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 323 """ 324 u = parse_url(url) 325 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 326 327 kw["assert_same_host"] = False 328 kw["redirect"] = False 329 330 if "headers" not in kw: 331 kw["headers"] = self.headers.copy() 332 333 if self.proxy is not None and u.scheme == "http": 334 response = conn.urlopen(method, url, **kw) 335 else: 336 response = conn.urlopen(method, u.request_uri, **kw) 337 338 redirect_location = redirect and response.get_redirect_location() 339 if not redirect_location: 340 return response 341 342 # Support relative URLs for redirecting. 343 redirect_location = urljoin(url, redirect_location) 344 345 # RFC 7231, Section 6.4.4 346 if response.status == 303: 347 method = "GET" 348 349 retries = kw.get("retries") 350 if not isinstance(retries, Retry): 351 retries = Retry.from_int(retries, redirect=redirect) 352 353 # Strip headers marked as unsafe to forward to the redirected location. 354 # Check remove_headers_on_redirect to avoid a potential network call within 355 # conn.is_same_host() which may use socket.gethostbyname() in the future. 356 if retries.remove_headers_on_redirect and not conn.is_same_host( 357 redirect_location 358 ): 359 headers = list(six.iterkeys(kw["headers"])) 360 for header in headers: 361 if header.lower() in retries.remove_headers_on_redirect: 362 kw["headers"].pop(header, None) 363 364 try: 365 retries = retries.increment(method, url, response=response, _pool=conn) 366 except MaxRetryError: 367 if retries.raise_on_redirect: 368 response.drain_conn() 369 raise 370 return response 371 372 kw["retries"] = retries 373 kw["redirect"] = redirect 374 375 log.info("Redirecting %s -> %s", url, redirect_location) 376 377 response.drain_conn() 378 return self.urlopen(method, redirect_location, **kw) 379 380 381class ProxyManager(PoolManager): 382 """ 383 Behaves just like :class:`PoolManager`, but sends all requests through 384 the defined proxy, using the CONNECT method for HTTPS URLs. 385 386 :param proxy_url: 387 The URL of the proxy to be used. 388 389 :param proxy_headers: 390 A dictionary containing headers that will be sent to the proxy. In case 391 of HTTP they are being sent with each request, while in the 392 HTTPS/CONNECT case they are sent only once. Could be used for proxy 393 authentication. 394 395 Example: 396 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 397 >>> r1 = proxy.request('GET', 'http://google.com/') 398 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 399 >>> len(proxy.pools) 400 1 401 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 402 >>> r4 = proxy.request('GET', 'https://twitter.com/') 403 >>> len(proxy.pools) 404 3 405 406 """ 407 408 def __init__( 409 self, 410 proxy_url, 411 num_pools=10, 412 headers=None, 413 proxy_headers=None, 414 **connection_pool_kw 415 ): 416 417 if isinstance(proxy_url, HTTPConnectionPool): 418 proxy_url = "%s://%s:%i" % ( 419 proxy_url.scheme, 420 proxy_url.host, 421 proxy_url.port, 422 ) 423 proxy = parse_url(proxy_url) 424 if not proxy.port: 425 port = port_by_scheme.get(proxy.scheme, 80) 426 proxy = proxy._replace(port=port) 427 428 if proxy.scheme not in ("http", "https"): 429 raise ProxySchemeUnknown(proxy.scheme) 430 431 self.proxy = proxy 432 self.proxy_headers = proxy_headers or {} 433 434 connection_pool_kw["_proxy"] = self.proxy 435 connection_pool_kw["_proxy_headers"] = self.proxy_headers 436 437 super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) 438 439 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 440 if scheme == "https": 441 return super(ProxyManager, self).connection_from_host( 442 host, port, scheme, pool_kwargs=pool_kwargs 443 ) 444 445 return super(ProxyManager, self).connection_from_host( 446 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs 447 ) 448 449 def _set_proxy_headers(self, url, headers=None): 450 """ 451 Sets headers needed by proxies: specifically, the Accept and Host 452 headers. Only sets headers not provided by the user. 453 """ 454 headers_ = {"Accept": "*/*"} 455 456 netloc = parse_url(url).netloc 457 if netloc: 458 headers_["Host"] = netloc 459 460 if headers: 461 headers_.update(headers) 462 return headers_ 463 464 def _validate_proxy_scheme_url_selection(self, url_scheme): 465 if url_scheme == "https" and self.proxy.scheme == "https": 466 warnings.warn( 467 "Your proxy configuration specified an HTTPS scheme for the proxy. " 468 "Are you sure you want to use HTTPS to contact the proxy? " 469 "This most likely indicates an error in your configuration. " 470 "Read this issue for more info: " 471 "https://github.com/urllib3/urllib3/issues/1850", 472 InvalidProxyConfigurationWarning, 473 stacklevel=3, 474 ) 475 476 def urlopen(self, method, url, redirect=True, **kw): 477 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 478 u = parse_url(url) 479 self._validate_proxy_scheme_url_selection(u.scheme) 480 481 if u.scheme == "http": 482 # For proxied HTTPS requests, httplib sets the necessary headers 483 # on the CONNECT to the proxy. For HTTP, we'll definitely 484 # need to set 'Host' at the very least. 485 headers = kw.get("headers", self.headers) 486 kw["headers"] = self._set_proxy_headers(url, headers) 487 488 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 489 490 491def proxy_from_url(url, **kw): 492 return ProxyManager(proxy_url=url, **kw) 493