1from __future__ import absolute_import
2import collections
3import functools
4import logging
5
6from ._collections import RecentlyUsedContainer
7from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
8from .connectionpool import port_by_scheme
9from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
10from .packages.six.moves.urllib.parse import urljoin
11from .request import RequestMethods
12from .util.url import parse_url
13from .util.retry import Retry
14
15
16__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
17
18
19log = logging.getLogger(__name__)
20
21SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
22                'ssl_version', 'ca_cert_dir', 'ssl_context')
23
24# The base fields to use when determining what pool to get a connection from;
25# these do not rely on the ``connection_pool_kw`` and can be determined by the
26# URL and potentially the ``urllib3.connection.port_by_scheme`` dictionary.
27#
28# All custom key schemes should include the fields in this key at a minimum.
29BasePoolKey = collections.namedtuple('BasePoolKey', ('scheme', 'host', 'port'))
30
31# The fields to use when determining what pool to get a HTTP and HTTPS
32# connection from. All additional fields must be present in the PoolManager's
33# ``connection_pool_kw`` instance variable.
34HTTPPoolKey = collections.namedtuple(
35    'HTTPPoolKey', BasePoolKey._fields + ('timeout', 'retries', 'strict',
36                                          'block', 'source_address')
37)
38HTTPSPoolKey = collections.namedtuple(
39    'HTTPSPoolKey', HTTPPoolKey._fields + SSL_KEYWORDS
40)
41
42
43def _default_key_normalizer(key_class, request_context):
44    """
45    Create a pool key of type ``key_class`` for a request.
46
47    According to RFC 3986, both the scheme and host are case-insensitive.
48    Therefore, this function normalizes both before constructing the pool
49    key for an HTTPS request. If you wish to change this behaviour, provide
50    alternate callables to ``key_fn_by_scheme``.
51
52    :param key_class:
53        The class to use when constructing the key. This should be a namedtuple
54        with the ``scheme`` and ``host`` keys at a minimum.
55
56    :param request_context:
57        A dictionary-like object that contain the context for a request.
58        It should contain a key for each field in the :class:`HTTPPoolKey`
59    """
60    context = {}
61    for key in key_class._fields:
62        context[key] = request_context.get(key)
63    context['scheme'] = context['scheme'].lower()
64    context['host'] = context['host'].lower()
65    return key_class(**context)
66
67
68# A dictionary that maps a scheme to a callable that creates a pool key.
69# This can be used to alter the way pool keys are constructed, if desired.
70# Each PoolManager makes a copy of this dictionary so they can be configured
71# globally here, or individually on the instance.
72key_fn_by_scheme = {
73    'http': functools.partial(_default_key_normalizer, HTTPPoolKey),
74    'https': functools.partial(_default_key_normalizer, HTTPSPoolKey),
75}
76
77pool_classes_by_scheme = {
78    'http': HTTPConnectionPool,
79    'https': HTTPSConnectionPool,
80}
81
82
83class PoolManager(RequestMethods):
84    """
85    Allows for arbitrary requests while transparently keeping track of
86    necessary connection pools for you.
87
88    :param num_pools:
89        Number of connection pools to cache before discarding the least
90        recently used pool.
91
92    :param headers:
93        Headers to include with all requests, unless other headers are given
94        explicitly.
95
96    :param \\**connection_pool_kw:
97        Additional parameters are used to create fresh
98        :class:`urllib3.connectionpool.ConnectionPool` instances.
99
100    Example::
101
102        >>> manager = PoolManager(num_pools=2)
103        >>> r = manager.request('GET', 'http://google.com/')
104        >>> r = manager.request('GET', 'http://google.com/mail')
105        >>> r = manager.request('GET', 'http://yahoo.com/')
106        >>> len(manager.pools)
107        2
108
109    """
110
111    proxy = None
112
113    def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
114        RequestMethods.__init__(self, headers)
115        self.connection_pool_kw = connection_pool_kw
116        self.pools = RecentlyUsedContainer(num_pools,
117                                           dispose_func=lambda p: p.close())
118
119        # Locally set the pool classes and keys so other PoolManagers can
120        # override them.
121        self.pool_classes_by_scheme = pool_classes_by_scheme
122        self.key_fn_by_scheme = key_fn_by_scheme.copy()
123
124    def __enter__(self):
125        return self
126
127    def __exit__(self, exc_type, exc_val, exc_tb):
128        self.clear()
129        # Return False to re-raise any potential exceptions
130        return False
131
132    def _new_pool(self, scheme, host, port):
133        """
134        Create a new :class:`ConnectionPool` based on host, port and scheme.
135
136        This method is used to actually create the connection pools handed out
137        by :meth:`connection_from_url` and companion methods. It is intended
138        to be overridden for customization.
139        """
140        pool_cls = self.pool_classes_by_scheme[scheme]
141        kwargs = self.connection_pool_kw
142        if scheme == 'http':
143            kwargs = self.connection_pool_kw.copy()
144            for kw in SSL_KEYWORDS:
145                kwargs.pop(kw, None)
146
147        return pool_cls(host, port, **kwargs)
148
149    def clear(self):
150        """
151        Empty our store of pools and direct them all to close.
152
153        This will not affect in-flight connections, but they will not be
154        re-used after completion.
155        """
156        self.pools.clear()
157
158    def connection_from_host(self, host, port=None, scheme='http'):
159        """
160        Get a :class:`ConnectionPool` based on the host, port, and scheme.
161
162        If ``port`` isn't given, it will be derived from the ``scheme`` using
163        ``urllib3.connectionpool.port_by_scheme``.
164        """
165
166        if not host:
167            raise LocationValueError("No host specified.")
168
169        request_context = self.connection_pool_kw.copy()
170        request_context['scheme'] = scheme or 'http'
171        if not port:
172            port = port_by_scheme.get(request_context['scheme'].lower(), 80)
173        request_context['port'] = port
174        request_context['host'] = host
175
176        return self.connection_from_context(request_context)
177
178    def connection_from_context(self, request_context):
179        """
180        Get a :class:`ConnectionPool` based on the request context.
181
182        ``request_context`` must at least contain the ``scheme`` key and its
183        value must be a key in ``key_fn_by_scheme`` instance variable.
184        """
185        scheme = request_context['scheme'].lower()
186        pool_key_constructor = self.key_fn_by_scheme[scheme]
187        pool_key = pool_key_constructor(request_context)
188
189        return self.connection_from_pool_key(pool_key)
190
191    def connection_from_pool_key(self, pool_key):
192        """
193        Get a :class:`ConnectionPool` based on the provided pool key.
194
195        ``pool_key`` should be a namedtuple that only contains immutable
196        objects. At a minimum it must have the ``scheme``, ``host``, and
197        ``port`` fields.
198        """
199        with self.pools.lock:
200            # If the scheme, host, or port doesn't match existing open
201            # connections, open a new ConnectionPool.
202            pool = self.pools.get(pool_key)
203            if pool:
204                return pool
205
206            # Make a fresh ConnectionPool of the desired type
207            pool = self._new_pool(pool_key.scheme, pool_key.host, pool_key.port)
208            self.pools[pool_key] = pool
209
210        return pool
211
212    def connection_from_url(self, url):
213        """
214        Similar to :func:`urllib3.connectionpool.connection_from_url` but
215        doesn't pass any additional parameters to the
216        :class:`urllib3.connectionpool.ConnectionPool` constructor.
217
218        Additional parameters are taken from the :class:`.PoolManager`
219        constructor.
220        """
221        u = parse_url(url)
222        return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
223
224    def urlopen(self, method, url, redirect=True, **kw):
225        """
226        Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
227        with custom cross-host redirect logic and only sends the request-uri
228        portion of the ``url``.
229
230        The given ``url`` parameter must be absolute, such that an appropriate
231        :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
232        """
233        u = parse_url(url)
234        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
235
236        kw['assert_same_host'] = False
237        kw['redirect'] = False
238        if 'headers' not in kw:
239            kw['headers'] = self.headers
240
241        if self.proxy is not None and u.scheme == "http":
242            response = conn.urlopen(method, url, **kw)
243        else:
244            response = conn.urlopen(method, u.request_uri, **kw)
245
246        redirect_location = redirect and response.get_redirect_location()
247        if not redirect_location:
248            return response
249
250        # Support relative URLs for redirecting.
251        redirect_location = urljoin(url, redirect_location)
252
253        # RFC 7231, Section 6.4.4
254        if response.status == 303:
255            method = 'GET'
256
257        retries = kw.get('retries')
258        if not isinstance(retries, Retry):
259            retries = Retry.from_int(retries, redirect=redirect)
260
261        try:
262            retries = retries.increment(method, url, response=response, _pool=conn)
263        except MaxRetryError:
264            if retries.raise_on_redirect:
265                raise
266            return response
267
268        kw['retries'] = retries
269        kw['redirect'] = redirect
270
271        log.info("Redirecting %s -> %s", url, redirect_location)
272        return self.urlopen(method, redirect_location, **kw)
273
274
275class ProxyManager(PoolManager):
276    """
277    Behaves just like :class:`PoolManager`, but sends all requests through
278    the defined proxy, using the CONNECT method for HTTPS URLs.
279
280    :param proxy_url:
281        The URL of the proxy to be used.
282
283    :param proxy_headers:
284        A dictionary contaning headers that will be sent to the proxy. In case
285        of HTTP they are being sent with each request, while in the
286        HTTPS/CONNECT case they are sent only once. Could be used for proxy
287        authentication.
288
289    Example:
290        >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
291        >>> r1 = proxy.request('GET', 'http://google.com/')
292        >>> r2 = proxy.request('GET', 'http://httpbin.org/')
293        >>> len(proxy.pools)
294        1
295        >>> r3 = proxy.request('GET', 'https://httpbin.org/')
296        >>> r4 = proxy.request('GET', 'https://twitter.com/')
297        >>> len(proxy.pools)
298        3
299
300    """
301
302    def __init__(self, proxy_url, num_pools=10, headers=None,
303                 proxy_headers=None, **connection_pool_kw):
304
305        if isinstance(proxy_url, HTTPConnectionPool):
306            proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
307                                        proxy_url.port)
308        proxy = parse_url(proxy_url)
309        if not proxy.port:
310            port = port_by_scheme.get(proxy.scheme, 80)
311            proxy = proxy._replace(port=port)
312
313        if proxy.scheme not in ("http", "https"):
314            raise ProxySchemeUnknown(proxy.scheme)
315
316        self.proxy = proxy
317        self.proxy_headers = proxy_headers or {}
318
319        connection_pool_kw['_proxy'] = self.proxy
320        connection_pool_kw['_proxy_headers'] = self.proxy_headers
321
322        super(ProxyManager, self).__init__(
323            num_pools, headers, **connection_pool_kw)
324
325    def connection_from_host(self, host, port=None, scheme='http'):
326        if scheme == "https":
327            return super(ProxyManager, self).connection_from_host(
328                host, port, scheme)
329
330        return super(ProxyManager, self).connection_from_host(
331            self.proxy.host, self.proxy.port, self.proxy.scheme)
332
333    def _set_proxy_headers(self, url, headers=None):
334        """
335        Sets headers needed by proxies: specifically, the Accept and Host
336        headers. Only sets headers not provided by the user.
337        """
338        headers_ = {'Accept': '*/*'}
339
340        netloc = parse_url(url).netloc
341        if netloc:
342            headers_['Host'] = netloc
343
344        if headers:
345            headers_.update(headers)
346        return headers_
347
348    def urlopen(self, method, url, redirect=True, **kw):
349        "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
350        u = parse_url(url)
351
352        if u.scheme == "http":
353            # For proxied HTTPS requests, httplib sets the necessary headers
354            # on the CONNECT to the proxy. For HTTP, we'll definitely
355            # need to set 'Host' at the very least.
356            headers = kw.get('headers', self.headers)
357            kw['headers'] = self._set_proxy_headers(url, headers)
358
359        return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
360
361
362def proxy_from_url(url, **kw):
363    return ProxyManager(proxy_url=url, **kw)
364