1from __future__ import absolute_import 2import collections 3import functools 4import logging 5 6from ._collections import RecentlyUsedContainer 7from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool 8from .connectionpool import port_by_scheme 9from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown 10from .packages import six 11from .packages.six.moves.urllib.parse import urljoin 12from .request import RequestMethods 13from .util.url import parse_url 14from .util.retry import Retry 15 16 17__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] 18 19 20log = logging.getLogger(__name__) 21 22SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', 23 'ssl_version', 'ca_cert_dir', 'ssl_context', 24 'key_password') 25 26# All known keyword arguments that could be provided to the pool manager, its 27# pools, or the underlying connections. This is used to construct a pool key. 28_key_fields = ( 29 'key_scheme', # str 30 'key_host', # str 31 'key_port', # int 32 'key_timeout', # int or float or Timeout 33 'key_retries', # int or Retry 34 'key_strict', # bool 35 'key_block', # bool 36 'key_source_address', # str 37 'key_key_file', # str 38 'key_key_password', # str 39 'key_cert_file', # str 40 'key_cert_reqs', # str 41 'key_ca_certs', # str 42 'key_ssl_version', # str 43 'key_ca_cert_dir', # str 44 'key_ssl_context', # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext 45 'key_maxsize', # int 46 'key_headers', # dict 47 'key__proxy', # parsed proxy url 48 'key__proxy_headers', # dict 49 'key_socket_options', # list of (level (int), optname (int), value (int or str)) tuples 50 'key__socks_options', # dict 51 'key_assert_hostname', # bool or string 52 'key_assert_fingerprint', # str 53 'key_server_hostname', # str 54) 55 56#: The namedtuple class used to construct keys for the connection pool. 57#: All custom key schemes should include the fields in this key at a minimum. 58PoolKey = collections.namedtuple('PoolKey', _key_fields) 59 60 61def _default_key_normalizer(key_class, request_context): 62 """ 63 Create a pool key out of a request context dictionary. 64 65 According to RFC 3986, both the scheme and host are case-insensitive. 66 Therefore, this function normalizes both before constructing the pool 67 key for an HTTPS request. If you wish to change this behaviour, provide 68 alternate callables to ``key_fn_by_scheme``. 69 70 :param key_class: 71 The class to use when constructing the key. This should be a namedtuple 72 with the ``scheme`` and ``host`` keys at a minimum. 73 :type key_class: namedtuple 74 :param request_context: 75 A dictionary-like object that contain the context for a request. 76 :type request_context: dict 77 78 :return: A namedtuple that can be used as a connection pool key. 79 :rtype: PoolKey 80 """ 81 # Since we mutate the dictionary, make a copy first 82 context = request_context.copy() 83 context['scheme'] = context['scheme'].lower() 84 context['host'] = context['host'].lower() 85 86 # These are both dictionaries and need to be transformed into frozensets 87 for key in ('headers', '_proxy_headers', '_socks_options'): 88 if key in context and context[key] is not None: 89 context[key] = frozenset(context[key].items()) 90 91 # The socket_options key may be a list and needs to be transformed into a 92 # tuple. 93 socket_opts = context.get('socket_options') 94 if socket_opts is not None: 95 context['socket_options'] = tuple(socket_opts) 96 97 # Map the kwargs to the names in the namedtuple - this is necessary since 98 # namedtuples can't have fields starting with '_'. 99 for key in list(context.keys()): 100 context['key_' + key] = context.pop(key) 101 102 # Default to ``None`` for keys missing from the context 103 for field in key_class._fields: 104 if field not in context: 105 context[field] = None 106 107 return key_class(**context) 108 109 110#: A dictionary that maps a scheme to a callable that creates a pool key. 111#: This can be used to alter the way pool keys are constructed, if desired. 112#: Each PoolManager makes a copy of this dictionary so they can be configured 113#: globally here, or individually on the instance. 114key_fn_by_scheme = { 115 'http': functools.partial(_default_key_normalizer, PoolKey), 116 'https': functools.partial(_default_key_normalizer, PoolKey), 117} 118 119pool_classes_by_scheme = { 120 'http': HTTPConnectionPool, 121 'https': HTTPSConnectionPool, 122} 123 124 125class PoolManager(RequestMethods): 126 """ 127 Allows for arbitrary requests while transparently keeping track of 128 necessary connection pools for you. 129 130 :param num_pools: 131 Number of connection pools to cache before discarding the least 132 recently used pool. 133 134 :param headers: 135 Headers to include with all requests, unless other headers are given 136 explicitly. 137 138 :param \\**connection_pool_kw: 139 Additional parameters are used to create fresh 140 :class:`urllib3.connectionpool.ConnectionPool` instances. 141 142 Example:: 143 144 >>> manager = PoolManager(num_pools=2) 145 >>> r = manager.request('GET', 'http://google.com/') 146 >>> r = manager.request('GET', 'http://google.com/mail') 147 >>> r = manager.request('GET', 'http://yahoo.com/') 148 >>> len(manager.pools) 149 2 150 151 """ 152 153 proxy = None 154 155 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 156 RequestMethods.__init__(self, headers) 157 self.connection_pool_kw = connection_pool_kw 158 self.pools = RecentlyUsedContainer(num_pools, 159 dispose_func=lambda p: p.close()) 160 161 # Locally set the pool classes and keys so other PoolManagers can 162 # override them. 163 self.pool_classes_by_scheme = pool_classes_by_scheme 164 self.key_fn_by_scheme = key_fn_by_scheme.copy() 165 166 def __enter__(self): 167 return self 168 169 def __exit__(self, exc_type, exc_val, exc_tb): 170 self.clear() 171 # Return False to re-raise any potential exceptions 172 return False 173 174 def _new_pool(self, scheme, host, port, request_context=None): 175 """ 176 Create a new :class:`ConnectionPool` based on host, port, scheme, and 177 any additional pool keyword arguments. 178 179 If ``request_context`` is provided, it is provided as keyword arguments 180 to the pool class used. This method is used to actually create the 181 connection pools handed out by :meth:`connection_from_url` and 182 companion methods. It is intended to be overridden for customization. 183 """ 184 pool_cls = self.pool_classes_by_scheme[scheme] 185 if request_context is None: 186 request_context = self.connection_pool_kw.copy() 187 188 # Although the context has everything necessary to create the pool, 189 # this function has historically only used the scheme, host, and port 190 # in the positional args. When an API change is acceptable these can 191 # be removed. 192 for key in ('scheme', 'host', 'port'): 193 request_context.pop(key, None) 194 195 if scheme == 'http': 196 for kw in SSL_KEYWORDS: 197 request_context.pop(kw, None) 198 199 return pool_cls(host, port, **request_context) 200 201 def clear(self): 202 """ 203 Empty our store of pools and direct them all to close. 204 205 This will not affect in-flight connections, but they will not be 206 re-used after completion. 207 """ 208 self.pools.clear() 209 210 def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None): 211 """ 212 Get a :class:`ConnectionPool` based on the host, port, and scheme. 213 214 If ``port`` isn't given, it will be derived from the ``scheme`` using 215 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 216 provided, it is merged with the instance's ``connection_pool_kw`` 217 variable and used to create the new connection pool, if one is 218 needed. 219 """ 220 221 if not host: 222 raise LocationValueError("No host specified.") 223 224 request_context = self._merge_pool_kwargs(pool_kwargs) 225 request_context['scheme'] = scheme or 'http' 226 if not port: 227 port = port_by_scheme.get(request_context['scheme'].lower(), 80) 228 request_context['port'] = port 229 request_context['host'] = host 230 231 return self.connection_from_context(request_context) 232 233 def connection_from_context(self, request_context): 234 """ 235 Get a :class:`ConnectionPool` based on the request context. 236 237 ``request_context`` must at least contain the ``scheme`` key and its 238 value must be a key in ``key_fn_by_scheme`` instance variable. 239 """ 240 scheme = request_context['scheme'].lower() 241 pool_key_constructor = self.key_fn_by_scheme[scheme] 242 pool_key = pool_key_constructor(request_context) 243 244 return self.connection_from_pool_key(pool_key, request_context=request_context) 245 246 def connection_from_pool_key(self, pool_key, request_context=None): 247 """ 248 Get a :class:`ConnectionPool` based on the provided pool key. 249 250 ``pool_key`` should be a namedtuple that only contains immutable 251 objects. At a minimum it must have the ``scheme``, ``host``, and 252 ``port`` fields. 253 """ 254 with self.pools.lock: 255 # If the scheme, host, or port doesn't match existing open 256 # connections, open a new ConnectionPool. 257 pool = self.pools.get(pool_key) 258 if pool: 259 return pool 260 261 # Make a fresh ConnectionPool of the desired type 262 scheme = request_context['scheme'] 263 host = request_context['host'] 264 port = request_context['port'] 265 pool = self._new_pool(scheme, host, port, request_context=request_context) 266 self.pools[pool_key] = pool 267 268 return pool 269 270 def connection_from_url(self, url, pool_kwargs=None): 271 """ 272 Similar to :func:`urllib3.connectionpool.connection_from_url`. 273 274 If ``pool_kwargs`` is not provided and a new pool needs to be 275 constructed, ``self.connection_pool_kw`` is used to initialize 276 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 277 is provided, it is used instead. Note that if a new pool does not 278 need to be created for the request, the provided ``pool_kwargs`` are 279 not used. 280 """ 281 u = parse_url(url) 282 return self.connection_from_host(u.host, port=u.port, scheme=u.scheme, 283 pool_kwargs=pool_kwargs) 284 285 def _merge_pool_kwargs(self, override): 286 """ 287 Merge a dictionary of override values for self.connection_pool_kw. 288 289 This does not modify self.connection_pool_kw and returns a new dict. 290 Any keys in the override dictionary with a value of ``None`` are 291 removed from the merged dictionary. 292 """ 293 base_pool_kwargs = self.connection_pool_kw.copy() 294 if override: 295 for key, value in override.items(): 296 if value is None: 297 try: 298 del base_pool_kwargs[key] 299 except KeyError: 300 pass 301 else: 302 base_pool_kwargs[key] = value 303 return base_pool_kwargs 304 305 def urlopen(self, method, url, redirect=True, **kw): 306 """ 307 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` 308 with custom cross-host redirect logic and only sends the request-uri 309 portion of the ``url``. 310 311 The given ``url`` parameter must be absolute, such that an appropriate 312 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 313 """ 314 u = parse_url(url) 315 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 316 317 kw['assert_same_host'] = False 318 kw['redirect'] = False 319 320 if 'headers' not in kw: 321 kw['headers'] = self.headers.copy() 322 323 if self.proxy is not None and u.scheme == "http": 324 response = conn.urlopen(method, url, **kw) 325 else: 326 response = conn.urlopen(method, u.request_uri, **kw) 327 328 redirect_location = redirect and response.get_redirect_location() 329 if not redirect_location: 330 return response 331 332 # Support relative URLs for redirecting. 333 redirect_location = urljoin(url, redirect_location) 334 335 # RFC 7231, Section 6.4.4 336 if response.status == 303: 337 method = 'GET' 338 339 retries = kw.get('retries') 340 if not isinstance(retries, Retry): 341 retries = Retry.from_int(retries, redirect=redirect) 342 343 # Strip headers marked as unsafe to forward to the redirected location. 344 # Check remove_headers_on_redirect to avoid a potential network call within 345 # conn.is_same_host() which may use socket.gethostbyname() in the future. 346 if (retries.remove_headers_on_redirect 347 and not conn.is_same_host(redirect_location)): 348 headers = list(six.iterkeys(kw['headers'])) 349 for header in headers: 350 if header.lower() in retries.remove_headers_on_redirect: 351 kw['headers'].pop(header, None) 352 353 try: 354 retries = retries.increment(method, url, response=response, _pool=conn) 355 except MaxRetryError: 356 if retries.raise_on_redirect: 357 raise 358 return response 359 360 kw['retries'] = retries 361 kw['redirect'] = redirect 362 363 log.info("Redirecting %s -> %s", url, redirect_location) 364 return self.urlopen(method, redirect_location, **kw) 365 366 367class ProxyManager(PoolManager): 368 """ 369 Behaves just like :class:`PoolManager`, but sends all requests through 370 the defined proxy, using the CONNECT method for HTTPS URLs. 371 372 :param proxy_url: 373 The URL of the proxy to be used. 374 375 :param proxy_headers: 376 A dictionary containing headers that will be sent to the proxy. In case 377 of HTTP they are being sent with each request, while in the 378 HTTPS/CONNECT case they are sent only once. Could be used for proxy 379 authentication. 380 381 Example: 382 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 383 >>> r1 = proxy.request('GET', 'http://google.com/') 384 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 385 >>> len(proxy.pools) 386 1 387 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 388 >>> r4 = proxy.request('GET', 'https://twitter.com/') 389 >>> len(proxy.pools) 390 3 391 392 """ 393 394 def __init__(self, proxy_url, num_pools=10, headers=None, 395 proxy_headers=None, **connection_pool_kw): 396 397 if isinstance(proxy_url, HTTPConnectionPool): 398 proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, 399 proxy_url.port) 400 proxy = parse_url(proxy_url) 401 if not proxy.port: 402 port = port_by_scheme.get(proxy.scheme, 80) 403 proxy = proxy._replace(port=port) 404 405 if proxy.scheme not in ("http", "https"): 406 raise ProxySchemeUnknown(proxy.scheme) 407 408 self.proxy = proxy 409 self.proxy_headers = proxy_headers or {} 410 411 connection_pool_kw['_proxy'] = self.proxy 412 connection_pool_kw['_proxy_headers'] = self.proxy_headers 413 414 super(ProxyManager, self).__init__( 415 num_pools, headers, **connection_pool_kw) 416 417 def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None): 418 if scheme == "https": 419 return super(ProxyManager, self).connection_from_host( 420 host, port, scheme, pool_kwargs=pool_kwargs) 421 422 return super(ProxyManager, self).connection_from_host( 423 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs) 424 425 def _set_proxy_headers(self, url, headers=None): 426 """ 427 Sets headers needed by proxies: specifically, the Accept and Host 428 headers. Only sets headers not provided by the user. 429 """ 430 headers_ = {'Accept': '*/*'} 431 432 netloc = parse_url(url).netloc 433 if netloc: 434 headers_['Host'] = netloc 435 436 if headers: 437 headers_.update(headers) 438 return headers_ 439 440 def urlopen(self, method, url, redirect=True, **kw): 441 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 442 u = parse_url(url) 443 444 if u.scheme == "http": 445 # For proxied HTTPS requests, httplib sets the necessary headers 446 # on the CONNECT to the proxy. For HTTP, we'll definitely 447 # need to set 'Host' at the very least. 448 headers = kw.get('headers', self.headers) 449 kw['headers'] = self._set_proxy_headers(url, headers) 450 451 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 452 453 454def proxy_from_url(url, **kw): 455 return ProxyManager(proxy_url=url, **kw) 456