1from __future__ import absolute_import 2import collections 3import functools 4import logging 5 6from ._collections import RecentlyUsedContainer 7from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool 8from .connectionpool import port_by_scheme 9from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown 10from .packages import six 11from .packages.six.moves.urllib.parse import urljoin 12from .request import RequestMethods 13from .util.url import parse_url 14from .util.retry import Retry 15 16 17__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 18 19 20log = logging.getLogger(__name__) 21 22SSL_KEYWORDS = ( 23 "key_file", 24 "cert_file", 25 "cert_reqs", 26 "ca_certs", 27 "ssl_version", 28 "ca_cert_dir", 29 "ssl_context", 30 "key_password", 31) 32 33# All known keyword arguments that could be provided to the pool manager, its 34# pools, or the underlying connections. This is used to construct a pool key. 35_key_fields = ( 36 "key_scheme", # str 37 "key_host", # str 38 "key_port", # int 39 "key_timeout", # int or float or Timeout 40 "key_retries", # int or Retry 41 "key_strict", # bool 42 "key_block", # bool 43 "key_source_address", # str 44 "key_key_file", # str 45 "key_key_password", # str 46 "key_cert_file", # str 47 "key_cert_reqs", # str 48 "key_ca_certs", # str 49 "key_ssl_version", # str 50 "key_ca_cert_dir", # str 51 "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext 52 "key_maxsize", # int 53 "key_headers", # dict 54 "key__proxy", # parsed proxy url 55 "key__proxy_headers", # dict 56 "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples 57 "key__socks_options", # dict 58 "key_assert_hostname", # bool or string 59 "key_assert_fingerprint", # str 60 "key_server_hostname", # str 61) 62 63#: The namedtuple class used to construct keys for the connection pool. 64#: All custom key schemes should include the fields in this key at a minimum. 65PoolKey = collections.namedtuple("PoolKey", _key_fields) 66 67 68def _default_key_normalizer(key_class, request_context): 69 """ 70 Create a pool key out of a request context dictionary. 71 72 According to RFC 3986, both the scheme and host are case-insensitive. 73 Therefore, this function normalizes both before constructing the pool 74 key for an HTTPS request. If you wish to change this behaviour, provide 75 alternate callables to ``key_fn_by_scheme``. 76 77 :param key_class: 78 The class to use when constructing the key. This should be a namedtuple 79 with the ``scheme`` and ``host`` keys at a minimum. 80 :type key_class: namedtuple 81 :param request_context: 82 A dictionary-like object that contain the context for a request. 83 :type request_context: dict 84 85 :return: A namedtuple that can be used as a connection pool key. 86 :rtype: PoolKey 87 """ 88 # Since we mutate the dictionary, make a copy first 89 context = request_context.copy() 90 context["scheme"] = context["scheme"].lower() 91 context["host"] = context["host"].lower() 92 93 # These are both dictionaries and need to be transformed into frozensets 94 for key in ("headers", "_proxy_headers", "_socks_options"): 95 if key in context and context[key] is not None: 96 context[key] = frozenset(context[key].items()) 97 98 # The socket_options key may be a list and needs to be transformed into a 99 # tuple. 100 socket_opts = context.get("socket_options") 101 if socket_opts is not None: 102 context["socket_options"] = tuple(socket_opts) 103 104 # Map the kwargs to the names in the namedtuple - this is necessary since 105 # namedtuples can't have fields starting with '_'. 106 for key in list(context.keys()): 107 context["key_" + key] = context.pop(key) 108 109 # Default to ``None`` for keys missing from the context 110 for field in key_class._fields: 111 if field not in context: 112 context[field] = None 113 114 return key_class(**context) 115 116 117#: A dictionary that maps a scheme to a callable that creates a pool key. 118#: This can be used to alter the way pool keys are constructed, if desired. 119#: Each PoolManager makes a copy of this dictionary so they can be configured 120#: globally here, or individually on the instance. 121key_fn_by_scheme = { 122 "http": functools.partial(_default_key_normalizer, PoolKey), 123 "https": functools.partial(_default_key_normalizer, PoolKey), 124} 125 126pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 127 128 129class PoolManager(RequestMethods): 130 """ 131 Allows for arbitrary requests while transparently keeping track of 132 necessary connection pools for you. 133 134 :param num_pools: 135 Number of connection pools to cache before discarding the least 136 recently used pool. 137 138 :param headers: 139 Headers to include with all requests, unless other headers are given 140 explicitly. 141 142 :param \\**connection_pool_kw: 143 Additional parameters are used to create fresh 144 :class:`urllib3.connectionpool.ConnectionPool` instances. 145 146 Example:: 147 148 >>> manager = PoolManager(num_pools=2) 149 >>> r = manager.request('GET', 'http://google.com/') 150 >>> r = manager.request('GET', 'http://google.com/mail') 151 >>> r = manager.request('GET', 'http://yahoo.com/') 152 >>> len(manager.pools) 153 2 154 155 """ 156 157 proxy = None 158 159 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 160 RequestMethods.__init__(self, headers) 161 self.connection_pool_kw = connection_pool_kw 162 self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close()) 163 164 # Locally set the pool classes and keys so other PoolManagers can 165 # override them. 166 self.pool_classes_by_scheme = pool_classes_by_scheme 167 self.key_fn_by_scheme = key_fn_by_scheme.copy() 168 169 def __enter__(self): 170 return self 171 172 def __exit__(self, exc_type, exc_val, exc_tb): 173 self.clear() 174 # Return False to re-raise any potential exceptions 175 return False 176 177 def _new_pool(self, scheme, host, port, request_context=None): 178 """ 179 Create a new :class:`ConnectionPool` based on host, port, scheme, and 180 any additional pool keyword arguments. 181 182 If ``request_context`` is provided, it is provided as keyword arguments 183 to the pool class used. This method is used to actually create the 184 connection pools handed out by :meth:`connection_from_url` and 185 companion methods. It is intended to be overridden for customization. 186 """ 187 pool_cls = self.pool_classes_by_scheme[scheme] 188 if request_context is None: 189 request_context = self.connection_pool_kw.copy() 190 191 # Although the context has everything necessary to create the pool, 192 # this function has historically only used the scheme, host, and port 193 # in the positional args. When an API change is acceptable these can 194 # be removed. 195 for key in ("scheme", "host", "port"): 196 request_context.pop(key, None) 197 198 if scheme == "http": 199 for kw in SSL_KEYWORDS: 200 request_context.pop(kw, None) 201 202 return pool_cls(host, port, **request_context) 203 204 def clear(self): 205 """ 206 Empty our store of pools and direct them all to close. 207 208 This will not affect in-flight connections, but they will not be 209 re-used after completion. 210 """ 211 self.pools.clear() 212 213 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 214 """ 215 Get a :class:`ConnectionPool` based on the host, port, and scheme. 216 217 If ``port`` isn't given, it will be derived from the ``scheme`` using 218 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 219 provided, it is merged with the instance's ``connection_pool_kw`` 220 variable and used to create the new connection pool, if one is 221 needed. 222 """ 223 224 if not host: 225 raise LocationValueError("No host specified.") 226 227 request_context = self._merge_pool_kwargs(pool_kwargs) 228 request_context["scheme"] = scheme or "http" 229 if not port: 230 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 231 request_context["port"] = port 232 request_context["host"] = host 233 234 return self.connection_from_context(request_context) 235 236 def connection_from_context(self, request_context): 237 """ 238 Get a :class:`ConnectionPool` based on the request context. 239 240 ``request_context`` must at least contain the ``scheme`` key and its 241 value must be a key in ``key_fn_by_scheme`` instance variable. 242 """ 243 scheme = request_context["scheme"].lower() 244 pool_key_constructor = self.key_fn_by_scheme[scheme] 245 pool_key = pool_key_constructor(request_context) 246 247 return self.connection_from_pool_key(pool_key, request_context=request_context) 248 249 def connection_from_pool_key(self, pool_key, request_context=None): 250 """ 251 Get a :class:`ConnectionPool` based on the provided pool key. 252 253 ``pool_key`` should be a namedtuple that only contains immutable 254 objects. At a minimum it must have the ``scheme``, ``host``, and 255 ``port`` fields. 256 """ 257 with self.pools.lock: 258 # If the scheme, host, or port doesn't match existing open 259 # connections, open a new ConnectionPool. 260 pool = self.pools.get(pool_key) 261 if pool: 262 return pool 263 264 # Make a fresh ConnectionPool of the desired type 265 scheme = request_context["scheme"] 266 host = request_context["host"] 267 port = request_context["port"] 268 pool = self._new_pool(scheme, host, port, request_context=request_context) 269 self.pools[pool_key] = pool 270 271 return pool 272 273 def connection_from_url(self, url, pool_kwargs=None): 274 """ 275 Similar to :func:`urllib3.connectionpool.connection_from_url`. 276 277 If ``pool_kwargs`` is not provided and a new pool needs to be 278 constructed, ``self.connection_pool_kw`` is used to initialize 279 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 280 is provided, it is used instead. Note that if a new pool does not 281 need to be created for the request, the provided ``pool_kwargs`` are 282 not used. 283 """ 284 u = parse_url(url) 285 return self.connection_from_host( 286 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 287 ) 288 289 def _merge_pool_kwargs(self, override): 290 """ 291 Merge a dictionary of override values for self.connection_pool_kw. 292 293 This does not modify self.connection_pool_kw and returns a new dict. 294 Any keys in the override dictionary with a value of ``None`` are 295 removed from the merged dictionary. 296 """ 297 base_pool_kwargs = self.connection_pool_kw.copy() 298 if override: 299 for key, value in override.items(): 300 if value is None: 301 try: 302 del base_pool_kwargs[key] 303 except KeyError: 304 pass 305 else: 306 base_pool_kwargs[key] = value 307 return base_pool_kwargs 308 309 def urlopen(self, method, url, redirect=True, **kw): 310 """ 311 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` 312 with custom cross-host redirect logic and only sends the request-uri 313 portion of the ``url``. 314 315 The given ``url`` parameter must be absolute, such that an appropriate 316 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 317 """ 318 u = parse_url(url) 319 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 320 321 kw["assert_same_host"] = False 322 kw["redirect"] = False 323 324 if "headers" not in kw: 325 kw["headers"] = self.headers.copy() 326 327 if self.proxy is not None and u.scheme == "http": 328 response = conn.urlopen(method, url, **kw) 329 else: 330 response = conn.urlopen(method, u.request_uri, **kw) 331 332 redirect_location = redirect and response.get_redirect_location() 333 if not redirect_location: 334 return response 335 336 # Support relative URLs for redirecting. 337 redirect_location = urljoin(url, redirect_location) 338 339 # RFC 7231, Section 6.4.4 340 if response.status == 303: 341 method = "GET" 342 343 retries = kw.get("retries") 344 if not isinstance(retries, Retry): 345 retries = Retry.from_int(retries, redirect=redirect) 346 347 # Strip headers marked as unsafe to forward to the redirected location. 348 # Check remove_headers_on_redirect to avoid a potential network call within 349 # conn.is_same_host() which may use socket.gethostbyname() in the future. 350 if retries.remove_headers_on_redirect and not conn.is_same_host( 351 redirect_location 352 ): 353 headers = list(six.iterkeys(kw["headers"])) 354 for header in headers: 355 if header.lower() in retries.remove_headers_on_redirect: 356 kw["headers"].pop(header, None) 357 358 try: 359 retries = retries.increment(method, url, response=response, _pool=conn) 360 except MaxRetryError: 361 if retries.raise_on_redirect: 362 raise 363 return response 364 365 kw["retries"] = retries 366 kw["redirect"] = redirect 367 368 log.info("Redirecting %s -> %s", url, redirect_location) 369 return self.urlopen(method, redirect_location, **kw) 370 371 372class ProxyManager(PoolManager): 373 """ 374 Behaves just like :class:`PoolManager`, but sends all requests through 375 the defined proxy, using the CONNECT method for HTTPS URLs. 376 377 :param proxy_url: 378 The URL of the proxy to be used. 379 380 :param proxy_headers: 381 A dictionary containing headers that will be sent to the proxy. In case 382 of HTTP they are being sent with each request, while in the 383 HTTPS/CONNECT case they are sent only once. Could be used for proxy 384 authentication. 385 386 Example: 387 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 388 >>> r1 = proxy.request('GET', 'http://google.com/') 389 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 390 >>> len(proxy.pools) 391 1 392 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 393 >>> r4 = proxy.request('GET', 'https://twitter.com/') 394 >>> len(proxy.pools) 395 3 396 397 """ 398 399 def __init__( 400 self, 401 proxy_url, 402 num_pools=10, 403 headers=None, 404 proxy_headers=None, 405 **connection_pool_kw 406 ): 407 408 if isinstance(proxy_url, HTTPConnectionPool): 409 proxy_url = "%s://%s:%i" % ( 410 proxy_url.scheme, 411 proxy_url.host, 412 proxy_url.port, 413 ) 414 proxy = parse_url(proxy_url) 415 if not proxy.port: 416 port = port_by_scheme.get(proxy.scheme, 80) 417 proxy = proxy._replace(port=port) 418 419 if proxy.scheme not in ("http", "https"): 420 raise ProxySchemeUnknown(proxy.scheme) 421 422 self.proxy = proxy 423 self.proxy_headers = proxy_headers or {} 424 425 connection_pool_kw["_proxy"] = self.proxy 426 connection_pool_kw["_proxy_headers"] = self.proxy_headers 427 428 super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) 429 430 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 431 if scheme == "https": 432 return super(ProxyManager, self).connection_from_host( 433 host, port, scheme, pool_kwargs=pool_kwargs 434 ) 435 436 return super(ProxyManager, self).connection_from_host( 437 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs 438 ) 439 440 def _set_proxy_headers(self, url, headers=None): 441 """ 442 Sets headers needed by proxies: specifically, the Accept and Host 443 headers. Only sets headers not provided by the user. 444 """ 445 headers_ = {"Accept": "*/*"} 446 447 netloc = parse_url(url).netloc 448 if netloc: 449 headers_["Host"] = netloc 450 451 if headers: 452 headers_.update(headers) 453 return headers_ 454 455 def urlopen(self, method, url, redirect=True, **kw): 456 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 457 u = parse_url(url) 458 459 if u.scheme == "http": 460 # For proxied HTTPS requests, httplib sets the necessary headers 461 # on the CONNECT to the proxy. For HTTP, we'll definitely 462 # need to set 'Host' at the very least. 463 headers = kw.get("headers", self.headers) 464 kw["headers"] = self._set_proxy_headers(url, headers) 465 466 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 467 468 469def proxy_from_url(url, **kw): 470 return ProxyManager(proxy_url=url, **kw) 471