1from __future__ import absolute_import 2import collections 3import functools 4import logging 5 6from ._collections import RecentlyUsedContainer 7from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool 8from .connectionpool import port_by_scheme 9from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown 10from .packages.six.moves.urllib.parse import urljoin 11from .request import RequestMethods 12from .util.url import parse_url 13from .util.retry import Retry 14 15 16__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] 17 18 19log = logging.getLogger(__name__) 20 21SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', 22 'ssl_version', 'ca_cert_dir', 'ssl_context') 23 24# The base fields to use when determining what pool to get a connection from; 25# these do not rely on the ``connection_pool_kw`` and can be determined by the 26# URL and potentially the ``urllib3.connection.port_by_scheme`` dictionary. 27# 28# All custom key schemes should include the fields in this key at a minimum. 29BasePoolKey = collections.namedtuple('BasePoolKey', ('scheme', 'host', 'port')) 30 31# The fields to use when determining what pool to get a HTTP and HTTPS 32# connection from. All additional fields must be present in the PoolManager's 33# ``connection_pool_kw`` instance variable. 34HTTPPoolKey = collections.namedtuple( 35 'HTTPPoolKey', BasePoolKey._fields + ('timeout', 'retries', 'strict', 36 'block', 'source_address') 37) 38HTTPSPoolKey = collections.namedtuple( 39 'HTTPSPoolKey', HTTPPoolKey._fields + SSL_KEYWORDS 40) 41 42 43def _default_key_normalizer(key_class, request_context): 44 """ 45 Create a pool key of type ``key_class`` for a request. 46 47 According to RFC 3986, both the scheme and host are case-insensitive. 48 Therefore, this function normalizes both before constructing the pool 49 key for an HTTPS request. If you wish to change this behaviour, provide 50 alternate callables to ``key_fn_by_scheme``. 51 52 :param key_class: 53 The class to use when constructing the key. This should be a namedtuple 54 with the ``scheme`` and ``host`` keys at a minimum. 55 56 :param request_context: 57 A dictionary-like object that contain the context for a request. 58 It should contain a key for each field in the :class:`HTTPPoolKey` 59 """ 60 context = {} 61 for key in key_class._fields: 62 context[key] = request_context.get(key) 63 context['scheme'] = context['scheme'].lower() 64 context['host'] = context['host'].lower() 65 return key_class(**context) 66 67 68# A dictionary that maps a scheme to a callable that creates a pool key. 69# This can be used to alter the way pool keys are constructed, if desired. 70# Each PoolManager makes a copy of this dictionary so they can be configured 71# globally here, or individually on the instance. 72key_fn_by_scheme = { 73 'http': functools.partial(_default_key_normalizer, HTTPPoolKey), 74 'https': functools.partial(_default_key_normalizer, HTTPSPoolKey), 75} 76 77pool_classes_by_scheme = { 78 'http': HTTPConnectionPool, 79 'https': HTTPSConnectionPool, 80} 81 82 83class PoolManager(RequestMethods): 84 """ 85 Allows for arbitrary requests while transparently keeping track of 86 necessary connection pools for you. 87 88 :param num_pools: 89 Number of connection pools to cache before discarding the least 90 recently used pool. 91 92 :param headers: 93 Headers to include with all requests, unless other headers are given 94 explicitly. 95 96 :param \\**connection_pool_kw: 97 Additional parameters are used to create fresh 98 :class:`urllib3.connectionpool.ConnectionPool` instances. 99 100 Example:: 101 102 >>> manager = PoolManager(num_pools=2) 103 >>> r = manager.request('GET', 'http://google.com/') 104 >>> r = manager.request('GET', 'http://google.com/mail') 105 >>> r = manager.request('GET', 'http://yahoo.com/') 106 >>> len(manager.pools) 107 2 108 109 """ 110 111 proxy = None 112 113 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 114 RequestMethods.__init__(self, headers) 115 self.connection_pool_kw = connection_pool_kw 116 self.pools = RecentlyUsedContainer(num_pools, 117 dispose_func=lambda p: p.close()) 118 119 # Locally set the pool classes and keys so other PoolManagers can 120 # override them. 121 self.pool_classes_by_scheme = pool_classes_by_scheme 122 self.key_fn_by_scheme = key_fn_by_scheme.copy() 123 124 def __enter__(self): 125 return self 126 127 def __exit__(self, exc_type, exc_val, exc_tb): 128 self.clear() 129 # Return False to re-raise any potential exceptions 130 return False 131 132 def _new_pool(self, scheme, host, port): 133 """ 134 Create a new :class:`ConnectionPool` based on host, port and scheme. 135 136 This method is used to actually create the connection pools handed out 137 by :meth:`connection_from_url` and companion methods. It is intended 138 to be overridden for customization. 139 """ 140 pool_cls = self.pool_classes_by_scheme[scheme] 141 kwargs = self.connection_pool_kw 142 if scheme == 'http': 143 kwargs = self.connection_pool_kw.copy() 144 for kw in SSL_KEYWORDS: 145 kwargs.pop(kw, None) 146 147 return pool_cls(host, port, **kwargs) 148 149 def clear(self): 150 """ 151 Empty our store of pools and direct them all to close. 152 153 This will not affect in-flight connections, but they will not be 154 re-used after completion. 155 """ 156 self.pools.clear() 157 158 def connection_from_host(self, host, port=None, scheme='http'): 159 """ 160 Get a :class:`ConnectionPool` based on the host, port, and scheme. 161 162 If ``port`` isn't given, it will be derived from the ``scheme`` using 163 ``urllib3.connectionpool.port_by_scheme``. 164 """ 165 166 if not host: 167 raise LocationValueError("No host specified.") 168 169 request_context = self.connection_pool_kw.copy() 170 request_context['scheme'] = scheme or 'http' 171 if not port: 172 port = port_by_scheme.get(request_context['scheme'].lower(), 80) 173 request_context['port'] = port 174 request_context['host'] = host 175 176 return self.connection_from_context(request_context) 177 178 def connection_from_context(self, request_context): 179 """ 180 Get a :class:`ConnectionPool` based on the request context. 181 182 ``request_context`` must at least contain the ``scheme`` key and its 183 value must be a key in ``key_fn_by_scheme`` instance variable. 184 """ 185 scheme = request_context['scheme'].lower() 186 pool_key_constructor = self.key_fn_by_scheme[scheme] 187 pool_key = pool_key_constructor(request_context) 188 189 return self.connection_from_pool_key(pool_key) 190 191 def connection_from_pool_key(self, pool_key): 192 """ 193 Get a :class:`ConnectionPool` based on the provided pool key. 194 195 ``pool_key`` should be a namedtuple that only contains immutable 196 objects. At a minimum it must have the ``scheme``, ``host``, and 197 ``port`` fields. 198 """ 199 with self.pools.lock: 200 # If the scheme, host, or port doesn't match existing open 201 # connections, open a new ConnectionPool. 202 pool = self.pools.get(pool_key) 203 if pool: 204 return pool 205 206 # Make a fresh ConnectionPool of the desired type 207 pool = self._new_pool(pool_key.scheme, pool_key.host, pool_key.port) 208 self.pools[pool_key] = pool 209 210 return pool 211 212 def connection_from_url(self, url): 213 """ 214 Similar to :func:`urllib3.connectionpool.connection_from_url` but 215 doesn't pass any additional parameters to the 216 :class:`urllib3.connectionpool.ConnectionPool` constructor. 217 218 Additional parameters are taken from the :class:`.PoolManager` 219 constructor. 220 """ 221 u = parse_url(url) 222 return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 223 224 def urlopen(self, method, url, redirect=True, **kw): 225 """ 226 Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` 227 with custom cross-host redirect logic and only sends the request-uri 228 portion of the ``url``. 229 230 The given ``url`` parameter must be absolute, such that an appropriate 231 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 232 """ 233 u = parse_url(url) 234 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 235 236 kw['assert_same_host'] = False 237 kw['redirect'] = False 238 if 'headers' not in kw: 239 kw['headers'] = self.headers 240 241 if self.proxy is not None and u.scheme == "http": 242 response = conn.urlopen(method, url, **kw) 243 else: 244 response = conn.urlopen(method, u.request_uri, **kw) 245 246 redirect_location = redirect and response.get_redirect_location() 247 if not redirect_location: 248 return response 249 250 # Support relative URLs for redirecting. 251 redirect_location = urljoin(url, redirect_location) 252 253 # RFC 7231, Section 6.4.4 254 if response.status == 303: 255 method = 'GET' 256 257 retries = kw.get('retries') 258 if not isinstance(retries, Retry): 259 retries = Retry.from_int(retries, redirect=redirect) 260 261 try: 262 retries = retries.increment(method, url, response=response, _pool=conn) 263 except MaxRetryError: 264 if retries.raise_on_redirect: 265 raise 266 return response 267 268 kw['retries'] = retries 269 kw['redirect'] = redirect 270 271 log.info("Redirecting %s -> %s", url, redirect_location) 272 return self.urlopen(method, redirect_location, **kw) 273 274 275class ProxyManager(PoolManager): 276 """ 277 Behaves just like :class:`PoolManager`, but sends all requests through 278 the defined proxy, using the CONNECT method for HTTPS URLs. 279 280 :param proxy_url: 281 The URL of the proxy to be used. 282 283 :param proxy_headers: 284 A dictionary contaning headers that will be sent to the proxy. In case 285 of HTTP they are being sent with each request, while in the 286 HTTPS/CONNECT case they are sent only once. Could be used for proxy 287 authentication. 288 289 Example: 290 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 291 >>> r1 = proxy.request('GET', 'http://google.com/') 292 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 293 >>> len(proxy.pools) 294 1 295 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 296 >>> r4 = proxy.request('GET', 'https://twitter.com/') 297 >>> len(proxy.pools) 298 3 299 300 """ 301 302 def __init__(self, proxy_url, num_pools=10, headers=None, 303 proxy_headers=None, **connection_pool_kw): 304 305 if isinstance(proxy_url, HTTPConnectionPool): 306 proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, 307 proxy_url.port) 308 proxy = parse_url(proxy_url) 309 if not proxy.port: 310 port = port_by_scheme.get(proxy.scheme, 80) 311 proxy = proxy._replace(port=port) 312 313 if proxy.scheme not in ("http", "https"): 314 raise ProxySchemeUnknown(proxy.scheme) 315 316 self.proxy = proxy 317 self.proxy_headers = proxy_headers or {} 318 319 connection_pool_kw['_proxy'] = self.proxy 320 connection_pool_kw['_proxy_headers'] = self.proxy_headers 321 322 super(ProxyManager, self).__init__( 323 num_pools, headers, **connection_pool_kw) 324 325 def connection_from_host(self, host, port=None, scheme='http'): 326 if scheme == "https": 327 return super(ProxyManager, self).connection_from_host( 328 host, port, scheme) 329 330 return super(ProxyManager, self).connection_from_host( 331 self.proxy.host, self.proxy.port, self.proxy.scheme) 332 333 def _set_proxy_headers(self, url, headers=None): 334 """ 335 Sets headers needed by proxies: specifically, the Accept and Host 336 headers. Only sets headers not provided by the user. 337 """ 338 headers_ = {'Accept': '*/*'} 339 340 netloc = parse_url(url).netloc 341 if netloc: 342 headers_['Host'] = netloc 343 344 if headers: 345 headers_.update(headers) 346 return headers_ 347 348 def urlopen(self, method, url, redirect=True, **kw): 349 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 350 u = parse_url(url) 351 352 if u.scheme == "http": 353 # For proxied HTTPS requests, httplib sets the necessary headers 354 # on the CONNECT to the proxy. For HTTP, we'll definitely 355 # need to set 'Host' at the very least. 356 headers = kw.get('headers', self.headers) 357 kw['headers'] = self._set_proxy_headers(url, headers) 358 359 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 360 361 362def proxy_from_url(url, **kw): 363 return ProxyManager(proxy_url=url, **kw) 364