1from __future__ import absolute_import
2import collections
3import functools
4import logging
5
6from ._collections import RecentlyUsedContainer
7from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
8from .connectionpool import port_by_scheme
9from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
10from .packages import six
11from .packages.six.moves.urllib.parse import urljoin
12from .request import RequestMethods
13from .util.url import parse_url
14from .util.retry import Retry
15
16
17__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
18
19
20log = logging.getLogger(__name__)
21
22SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
23                'ssl_version', 'ca_cert_dir', 'ssl_context',
24                'key_password')
25
26# All known keyword arguments that could be provided to the pool manager, its
27# pools, or the underlying connections. This is used to construct a pool key.
28_key_fields = (
29    'key_scheme',  # str
30    'key_host',  # str
31    'key_port',  # int
32    'key_timeout',  # int or float or Timeout
33    'key_retries',  # int or Retry
34    'key_strict',  # bool
35    'key_block',  # bool
36    'key_source_address',  # str
37    'key_key_file',  # str
38    'key_key_password',  # str
39    'key_cert_file',  # str
40    'key_cert_reqs',  # str
41    'key_ca_certs',  # str
42    'key_ssl_version',  # str
43    'key_ca_cert_dir',  # str
44    'key_ssl_context',  # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
45    'key_maxsize',  # int
46    'key_headers',  # dict
47    'key__proxy',  # parsed proxy url
48    'key__proxy_headers',  # dict
49    'key_socket_options',  # list of (level (int), optname (int), value (int or str)) tuples
50    'key__socks_options',  # dict
51    'key_assert_hostname',  # bool or string
52    'key_assert_fingerprint',  # str
53    'key_server_hostname',  # str
54)
55
56#: The namedtuple class used to construct keys for the connection pool.
57#: All custom key schemes should include the fields in this key at a minimum.
58PoolKey = collections.namedtuple('PoolKey', _key_fields)
59
60
61def _default_key_normalizer(key_class, request_context):
62    """
63    Create a pool key out of a request context dictionary.
64
65    According to RFC 3986, both the scheme and host are case-insensitive.
66    Therefore, this function normalizes both before constructing the pool
67    key for an HTTPS request. If you wish to change this behaviour, provide
68    alternate callables to ``key_fn_by_scheme``.
69
70    :param key_class:
71        The class to use when constructing the key. This should be a namedtuple
72        with the ``scheme`` and ``host`` keys at a minimum.
73    :type  key_class: namedtuple
74    :param request_context:
75        A dictionary-like object that contain the context for a request.
76    :type  request_context: dict
77
78    :return: A namedtuple that can be used as a connection pool key.
79    :rtype:  PoolKey
80    """
81    # Since we mutate the dictionary, make a copy first
82    context = request_context.copy()
83    context['scheme'] = context['scheme'].lower()
84    context['host'] = context['host'].lower()
85
86    # These are both dictionaries and need to be transformed into frozensets
87    for key in ('headers', '_proxy_headers', '_socks_options'):
88        if key in context and context[key] is not None:
89            context[key] = frozenset(context[key].items())
90
91    # The socket_options key may be a list and needs to be transformed into a
92    # tuple.
93    socket_opts = context.get('socket_options')
94    if socket_opts is not None:
95        context['socket_options'] = tuple(socket_opts)
96
97    # Map the kwargs to the names in the namedtuple - this is necessary since
98    # namedtuples can't have fields starting with '_'.
99    for key in list(context.keys()):
100        context['key_' + key] = context.pop(key)
101
102    # Default to ``None`` for keys missing from the context
103    for field in key_class._fields:
104        if field not in context:
105            context[field] = None
106
107    return key_class(**context)
108
109
110#: A dictionary that maps a scheme to a callable that creates a pool key.
111#: This can be used to alter the way pool keys are constructed, if desired.
112#: Each PoolManager makes a copy of this dictionary so they can be configured
113#: globally here, or individually on the instance.
114key_fn_by_scheme = {
115    'http': functools.partial(_default_key_normalizer, PoolKey),
116    'https': functools.partial(_default_key_normalizer, PoolKey),
117}
118
119pool_classes_by_scheme = {
120    'http': HTTPConnectionPool,
121    'https': HTTPSConnectionPool,
122}
123
124
125class PoolManager(RequestMethods):
126    """
127    Allows for arbitrary requests while transparently keeping track of
128    necessary connection pools for you.
129
130    :param num_pools:
131        Number of connection pools to cache before discarding the least
132        recently used pool.
133
134    :param headers:
135        Headers to include with all requests, unless other headers are given
136        explicitly.
137
138    :param \\**connection_pool_kw:
139        Additional parameters are used to create fresh
140        :class:`urllib3.connectionpool.ConnectionPool` instances.
141
142    Example::
143
144        >>> manager = PoolManager(num_pools=2)
145        >>> r = manager.request('GET', 'http://google.com/')
146        >>> r = manager.request('GET', 'http://google.com/mail')
147        >>> r = manager.request('GET', 'http://yahoo.com/')
148        >>> len(manager.pools)
149        2
150
151    """
152
153    proxy = None
154
155    def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
156        RequestMethods.__init__(self, headers)
157        self.connection_pool_kw = connection_pool_kw
158        self.pools = RecentlyUsedContainer(num_pools,
159                                           dispose_func=lambda p: p.close())
160
161        # Locally set the pool classes and keys so other PoolManagers can
162        # override them.
163        self.pool_classes_by_scheme = pool_classes_by_scheme
164        self.key_fn_by_scheme = key_fn_by_scheme.copy()
165
166    def __enter__(self):
167        return self
168
169    def __exit__(self, exc_type, exc_val, exc_tb):
170        self.clear()
171        # Return False to re-raise any potential exceptions
172        return False
173
174    def _new_pool(self, scheme, host, port, request_context=None):
175        """
176        Create a new :class:`ConnectionPool` based on host, port, scheme, and
177        any additional pool keyword arguments.
178
179        If ``request_context`` is provided, it is provided as keyword arguments
180        to the pool class used. This method is used to actually create the
181        connection pools handed out by :meth:`connection_from_url` and
182        companion methods. It is intended to be overridden for customization.
183        """
184        pool_cls = self.pool_classes_by_scheme[scheme]
185        if request_context is None:
186            request_context = self.connection_pool_kw.copy()
187
188        # Although the context has everything necessary to create the pool,
189        # this function has historically only used the scheme, host, and port
190        # in the positional args. When an API change is acceptable these can
191        # be removed.
192        for key in ('scheme', 'host', 'port'):
193            request_context.pop(key, None)
194
195        if scheme == 'http':
196            for kw in SSL_KEYWORDS:
197                request_context.pop(kw, None)
198
199        return pool_cls(host, port, **request_context)
200
201    def clear(self):
202        """
203        Empty our store of pools and direct them all to close.
204
205        This will not affect in-flight connections, but they will not be
206        re-used after completion.
207        """
208        self.pools.clear()
209
210    def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None):
211        """
212        Get a :class:`ConnectionPool` based on the host, port, and scheme.
213
214        If ``port`` isn't given, it will be derived from the ``scheme`` using
215        ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
216        provided, it is merged with the instance's ``connection_pool_kw``
217        variable and used to create the new connection pool, if one is
218        needed.
219        """
220
221        if not host:
222            raise LocationValueError("No host specified.")
223
224        request_context = self._merge_pool_kwargs(pool_kwargs)
225        request_context['scheme'] = scheme or 'http'
226        if not port:
227            port = port_by_scheme.get(request_context['scheme'].lower(), 80)
228        request_context['port'] = port
229        request_context['host'] = host
230
231        return self.connection_from_context(request_context)
232
233    def connection_from_context(self, request_context):
234        """
235        Get a :class:`ConnectionPool` based on the request context.
236
237        ``request_context`` must at least contain the ``scheme`` key and its
238        value must be a key in ``key_fn_by_scheme`` instance variable.
239        """
240        scheme = request_context['scheme'].lower()
241        pool_key_constructor = self.key_fn_by_scheme[scheme]
242        pool_key = pool_key_constructor(request_context)
243
244        return self.connection_from_pool_key(pool_key, request_context=request_context)
245
246    def connection_from_pool_key(self, pool_key, request_context=None):
247        """
248        Get a :class:`ConnectionPool` based on the provided pool key.
249
250        ``pool_key`` should be a namedtuple that only contains immutable
251        objects. At a minimum it must have the ``scheme``, ``host``, and
252        ``port`` fields.
253        """
254        with self.pools.lock:
255            # If the scheme, host, or port doesn't match existing open
256            # connections, open a new ConnectionPool.
257            pool = self.pools.get(pool_key)
258            if pool:
259                return pool
260
261            # Make a fresh ConnectionPool of the desired type
262            scheme = request_context['scheme']
263            host = request_context['host']
264            port = request_context['port']
265            pool = self._new_pool(scheme, host, port, request_context=request_context)
266            self.pools[pool_key] = pool
267
268        return pool
269
270    def connection_from_url(self, url, pool_kwargs=None):
271        """
272        Similar to :func:`urllib3.connectionpool.connection_from_url`.
273
274        If ``pool_kwargs`` is not provided and a new pool needs to be
275        constructed, ``self.connection_pool_kw`` is used to initialize
276        the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
277        is provided, it is used instead. Note that if a new pool does not
278        need to be created for the request, the provided ``pool_kwargs`` are
279        not used.
280        """
281        u = parse_url(url)
282        return self.connection_from_host(u.host, port=u.port, scheme=u.scheme,
283                                         pool_kwargs=pool_kwargs)
284
285    def _merge_pool_kwargs(self, override):
286        """
287        Merge a dictionary of override values for self.connection_pool_kw.
288
289        This does not modify self.connection_pool_kw and returns a new dict.
290        Any keys in the override dictionary with a value of ``None`` are
291        removed from the merged dictionary.
292        """
293        base_pool_kwargs = self.connection_pool_kw.copy()
294        if override:
295            for key, value in override.items():
296                if value is None:
297                    try:
298                        del base_pool_kwargs[key]
299                    except KeyError:
300                        pass
301                else:
302                    base_pool_kwargs[key] = value
303        return base_pool_kwargs
304
305    def urlopen(self, method, url, redirect=True, **kw):
306        """
307        Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
308        with custom cross-host redirect logic and only sends the request-uri
309        portion of the ``url``.
310
311        The given ``url`` parameter must be absolute, such that an appropriate
312        :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
313        """
314        u = parse_url(url)
315        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
316
317        kw['assert_same_host'] = False
318        kw['redirect'] = False
319
320        if 'headers' not in kw:
321            kw['headers'] = self.headers.copy()
322
323        if self.proxy is not None and u.scheme == "http":
324            response = conn.urlopen(method, url, **kw)
325        else:
326            response = conn.urlopen(method, u.request_uri, **kw)
327
328        redirect_location = redirect and response.get_redirect_location()
329        if not redirect_location:
330            return response
331
332        # Support relative URLs for redirecting.
333        redirect_location = urljoin(url, redirect_location)
334
335        # RFC 7231, Section 6.4.4
336        if response.status == 303:
337            method = 'GET'
338
339        retries = kw.get('retries')
340        if not isinstance(retries, Retry):
341            retries = Retry.from_int(retries, redirect=redirect)
342
343        # Strip headers marked as unsafe to forward to the redirected location.
344        # Check remove_headers_on_redirect to avoid a potential network call within
345        # conn.is_same_host() which may use socket.gethostbyname() in the future.
346        if (retries.remove_headers_on_redirect
347                and not conn.is_same_host(redirect_location)):
348            headers = list(six.iterkeys(kw['headers']))
349            for header in headers:
350                if header.lower() in retries.remove_headers_on_redirect:
351                    kw['headers'].pop(header, None)
352
353        try:
354            retries = retries.increment(method, url, response=response, _pool=conn)
355        except MaxRetryError:
356            if retries.raise_on_redirect:
357                raise
358            return response
359
360        kw['retries'] = retries
361        kw['redirect'] = redirect
362
363        log.info("Redirecting %s -> %s", url, redirect_location)
364        return self.urlopen(method, redirect_location, **kw)
365
366
367class ProxyManager(PoolManager):
368    """
369    Behaves just like :class:`PoolManager`, but sends all requests through
370    the defined proxy, using the CONNECT method for HTTPS URLs.
371
372    :param proxy_url:
373        The URL of the proxy to be used.
374
375    :param proxy_headers:
376        A dictionary containing headers that will be sent to the proxy. In case
377        of HTTP they are being sent with each request, while in the
378        HTTPS/CONNECT case they are sent only once. Could be used for proxy
379        authentication.
380
381    Example:
382        >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
383        >>> r1 = proxy.request('GET', 'http://google.com/')
384        >>> r2 = proxy.request('GET', 'http://httpbin.org/')
385        >>> len(proxy.pools)
386        1
387        >>> r3 = proxy.request('GET', 'https://httpbin.org/')
388        >>> r4 = proxy.request('GET', 'https://twitter.com/')
389        >>> len(proxy.pools)
390        3
391
392    """
393
394    def __init__(self, proxy_url, num_pools=10, headers=None,
395                 proxy_headers=None, **connection_pool_kw):
396
397        if isinstance(proxy_url, HTTPConnectionPool):
398            proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
399                                        proxy_url.port)
400        proxy = parse_url(proxy_url)
401        if not proxy.port:
402            port = port_by_scheme.get(proxy.scheme, 80)
403            proxy = proxy._replace(port=port)
404
405        if proxy.scheme not in ("http", "https"):
406            raise ProxySchemeUnknown(proxy.scheme)
407
408        self.proxy = proxy
409        self.proxy_headers = proxy_headers or {}
410
411        connection_pool_kw['_proxy'] = self.proxy
412        connection_pool_kw['_proxy_headers'] = self.proxy_headers
413
414        super(ProxyManager, self).__init__(
415            num_pools, headers, **connection_pool_kw)
416
417    def connection_from_host(self, host, port=None, scheme='http', pool_kwargs=None):
418        if scheme == "https":
419            return super(ProxyManager, self).connection_from_host(
420                host, port, scheme, pool_kwargs=pool_kwargs)
421
422        return super(ProxyManager, self).connection_from_host(
423            self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs)
424
425    def _set_proxy_headers(self, url, headers=None):
426        """
427        Sets headers needed by proxies: specifically, the Accept and Host
428        headers. Only sets headers not provided by the user.
429        """
430        headers_ = {'Accept': '*/*'}
431
432        netloc = parse_url(url).netloc
433        if netloc:
434            headers_['Host'] = netloc
435
436        if headers:
437            headers_.update(headers)
438        return headers_
439
440    def urlopen(self, method, url, redirect=True, **kw):
441        "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
442        u = parse_url(url)
443
444        if u.scheme == "http":
445            # For proxied HTTPS requests, httplib sets the necessary headers
446            # on the CONNECT to the proxy. For HTTP, we'll definitely
447            # need to set 'Host' at the very least.
448            headers = kw.get('headers', self.headers)
449            kw['headers'] = self._set_proxy_headers(url, headers)
450
451        return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
452
453
454def proxy_from_url(url, **kw):
455    return ProxyManager(proxy_url=url, **kw)
456