1from __future__ import absolute_import
2import collections
3import functools
4import logging
5import warnings
6
7from ._collections import RecentlyUsedContainer
8from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
9from .connectionpool import port_by_scheme
10from .exceptions import (
11    LocationValueError,
12    MaxRetryError,
13    ProxySchemeUnknown,
14    InvalidProxyConfigurationWarning,
15)
16from .packages import six
17from .packages.six.moves.urllib.parse import urljoin
18from .request import RequestMethods
19from .util.url import parse_url
20from .util.retry import Retry
21
22
23__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
24
25
26log = logging.getLogger(__name__)
27
28SSL_KEYWORDS = (
29    "key_file",
30    "cert_file",
31    "cert_reqs",
32    "ca_certs",
33    "ssl_version",
34    "ca_cert_dir",
35    "ssl_context",
36    "key_password",
37)
38
39# All known keyword arguments that could be provided to the pool manager, its
40# pools, or the underlying connections. This is used to construct a pool key.
41_key_fields = (
42    "key_scheme",  # str
43    "key_host",  # str
44    "key_port",  # int
45    "key_timeout",  # int or float or Timeout
46    "key_retries",  # int or Retry
47    "key_strict",  # bool
48    "key_block",  # bool
49    "key_source_address",  # str
50    "key_key_file",  # str
51    "key_key_password",  # str
52    "key_cert_file",  # str
53    "key_cert_reqs",  # str
54    "key_ca_certs",  # str
55    "key_ssl_version",  # str
56    "key_ca_cert_dir",  # str
57    "key_ssl_context",  # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
58    "key_maxsize",  # int
59    "key_headers",  # dict
60    "key__proxy",  # parsed proxy url
61    "key__proxy_headers",  # dict
62    "key_socket_options",  # list of (level (int), optname (int), value (int or str)) tuples
63    "key__socks_options",  # dict
64    "key_assert_hostname",  # bool or string
65    "key_assert_fingerprint",  # str
66    "key_server_hostname",  # str
67)
68
69#: The namedtuple class used to construct keys for the connection pool.
70#: All custom key schemes should include the fields in this key at a minimum.
71PoolKey = collections.namedtuple("PoolKey", _key_fields)
72
73
74def _default_key_normalizer(key_class, request_context):
75    """
76    Create a pool key out of a request context dictionary.
77
78    According to RFC 3986, both the scheme and host are case-insensitive.
79    Therefore, this function normalizes both before constructing the pool
80    key for an HTTPS request. If you wish to change this behaviour, provide
81    alternate callables to ``key_fn_by_scheme``.
82
83    :param key_class:
84        The class to use when constructing the key. This should be a namedtuple
85        with the ``scheme`` and ``host`` keys at a minimum.
86    :type  key_class: namedtuple
87    :param request_context:
88        A dictionary-like object that contain the context for a request.
89    :type  request_context: dict
90
91    :return: A namedtuple that can be used as a connection pool key.
92    :rtype:  PoolKey
93    """
94    # Since we mutate the dictionary, make a copy first
95    context = request_context.copy()
96    context["scheme"] = context["scheme"].lower()
97    context["host"] = context["host"].lower()
98
99    # These are both dictionaries and need to be transformed into frozensets
100    for key in ("headers", "_proxy_headers", "_socks_options"):
101        if key in context and context[key] is not None:
102            context[key] = frozenset(context[key].items())
103
104    # The socket_options key may be a list and needs to be transformed into a
105    # tuple.
106    socket_opts = context.get("socket_options")
107    if socket_opts is not None:
108        context["socket_options"] = tuple(socket_opts)
109
110    # Map the kwargs to the names in the namedtuple - this is necessary since
111    # namedtuples can't have fields starting with '_'.
112    for key in list(context.keys()):
113        context["key_" + key] = context.pop(key)
114
115    # Default to ``None`` for keys missing from the context
116    for field in key_class._fields:
117        if field not in context:
118            context[field] = None
119
120    return key_class(**context)
121
122
123#: A dictionary that maps a scheme to a callable that creates a pool key.
124#: This can be used to alter the way pool keys are constructed, if desired.
125#: Each PoolManager makes a copy of this dictionary so they can be configured
126#: globally here, or individually on the instance.
127key_fn_by_scheme = {
128    "http": functools.partial(_default_key_normalizer, PoolKey),
129    "https": functools.partial(_default_key_normalizer, PoolKey),
130}
131
132pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
133
134
135class PoolManager(RequestMethods):
136    """
137    Allows for arbitrary requests while transparently keeping track of
138    necessary connection pools for you.
139
140    :param num_pools:
141        Number of connection pools to cache before discarding the least
142        recently used pool.
143
144    :param headers:
145        Headers to include with all requests, unless other headers are given
146        explicitly.
147
148    :param \\**connection_pool_kw:
149        Additional parameters are used to create fresh
150        :class:`urllib3.connectionpool.ConnectionPool` instances.
151
152    Example::
153
154        >>> manager = PoolManager(num_pools=2)
155        >>> r = manager.request('GET', 'http://google.com/')
156        >>> r = manager.request('GET', 'http://google.com/mail')
157        >>> r = manager.request('GET', 'http://yahoo.com/')
158        >>> len(manager.pools)
159        2
160
161    """
162
163    proxy = None
164
165    def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
166        RequestMethods.__init__(self, headers)
167        self.connection_pool_kw = connection_pool_kw
168        self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close())
169
170        # Locally set the pool classes and keys so other PoolManagers can
171        # override them.
172        self.pool_classes_by_scheme = pool_classes_by_scheme
173        self.key_fn_by_scheme = key_fn_by_scheme.copy()
174
175    def __enter__(self):
176        return self
177
178    def __exit__(self, exc_type, exc_val, exc_tb):
179        self.clear()
180        # Return False to re-raise any potential exceptions
181        return False
182
183    def _new_pool(self, scheme, host, port, request_context=None):
184        """
185        Create a new :class:`ConnectionPool` based on host, port, scheme, and
186        any additional pool keyword arguments.
187
188        If ``request_context`` is provided, it is provided as keyword arguments
189        to the pool class used. This method is used to actually create the
190        connection pools handed out by :meth:`connection_from_url` and
191        companion methods. It is intended to be overridden for customization.
192        """
193        pool_cls = self.pool_classes_by_scheme[scheme]
194        if request_context is None:
195            request_context = self.connection_pool_kw.copy()
196
197        # Although the context has everything necessary to create the pool,
198        # this function has historically only used the scheme, host, and port
199        # in the positional args. When an API change is acceptable these can
200        # be removed.
201        for key in ("scheme", "host", "port"):
202            request_context.pop(key, None)
203
204        if scheme == "http":
205            for kw in SSL_KEYWORDS:
206                request_context.pop(kw, None)
207
208        return pool_cls(host, port, **request_context)
209
210    def clear(self):
211        """
212        Empty our store of pools and direct them all to close.
213
214        This will not affect in-flight connections, but they will not be
215        re-used after completion.
216        """
217        self.pools.clear()
218
219    def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
220        """
221        Get a :class:`ConnectionPool` based on the host, port, and scheme.
222
223        If ``port`` isn't given, it will be derived from the ``scheme`` using
224        ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
225        provided, it is merged with the instance's ``connection_pool_kw``
226        variable and used to create the new connection pool, if one is
227        needed.
228        """
229
230        if not host:
231            raise LocationValueError("No host specified.")
232
233        request_context = self._merge_pool_kwargs(pool_kwargs)
234        request_context["scheme"] = scheme or "http"
235        if not port:
236            port = port_by_scheme.get(request_context["scheme"].lower(), 80)
237        request_context["port"] = port
238        request_context["host"] = host
239
240        return self.connection_from_context(request_context)
241
242    def connection_from_context(self, request_context):
243        """
244        Get a :class:`ConnectionPool` based on the request context.
245
246        ``request_context`` must at least contain the ``scheme`` key and its
247        value must be a key in ``key_fn_by_scheme`` instance variable.
248        """
249        scheme = request_context["scheme"].lower()
250        pool_key_constructor = self.key_fn_by_scheme[scheme]
251        pool_key = pool_key_constructor(request_context)
252
253        return self.connection_from_pool_key(pool_key, request_context=request_context)
254
255    def connection_from_pool_key(self, pool_key, request_context=None):
256        """
257        Get a :class:`ConnectionPool` based on the provided pool key.
258
259        ``pool_key`` should be a namedtuple that only contains immutable
260        objects. At a minimum it must have the ``scheme``, ``host``, and
261        ``port`` fields.
262        """
263        with self.pools.lock:
264            # If the scheme, host, or port doesn't match existing open
265            # connections, open a new ConnectionPool.
266            pool = self.pools.get(pool_key)
267            if pool:
268                return pool
269
270            # Make a fresh ConnectionPool of the desired type
271            scheme = request_context["scheme"]
272            host = request_context["host"]
273            port = request_context["port"]
274            pool = self._new_pool(scheme, host, port, request_context=request_context)
275            self.pools[pool_key] = pool
276
277        return pool
278
279    def connection_from_url(self, url, pool_kwargs=None):
280        """
281        Similar to :func:`urllib3.connectionpool.connection_from_url`.
282
283        If ``pool_kwargs`` is not provided and a new pool needs to be
284        constructed, ``self.connection_pool_kw`` is used to initialize
285        the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
286        is provided, it is used instead. Note that if a new pool does not
287        need to be created for the request, the provided ``pool_kwargs`` are
288        not used.
289        """
290        u = parse_url(url)
291        return self.connection_from_host(
292            u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
293        )
294
295    def _merge_pool_kwargs(self, override):
296        """
297        Merge a dictionary of override values for self.connection_pool_kw.
298
299        This does not modify self.connection_pool_kw and returns a new dict.
300        Any keys in the override dictionary with a value of ``None`` are
301        removed from the merged dictionary.
302        """
303        base_pool_kwargs = self.connection_pool_kw.copy()
304        if override:
305            for key, value in override.items():
306                if value is None:
307                    try:
308                        del base_pool_kwargs[key]
309                    except KeyError:
310                        pass
311                else:
312                    base_pool_kwargs[key] = value
313        return base_pool_kwargs
314
315    def urlopen(self, method, url, redirect=True, **kw):
316        """
317        Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
318        with custom cross-host redirect logic and only sends the request-uri
319        portion of the ``url``.
320
321        The given ``url`` parameter must be absolute, such that an appropriate
322        :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
323        """
324        u = parse_url(url)
325        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
326
327        kw["assert_same_host"] = False
328        kw["redirect"] = False
329
330        if "headers" not in kw:
331            kw["headers"] = self.headers.copy()
332
333        if self.proxy is not None and u.scheme == "http":
334            response = conn.urlopen(method, url, **kw)
335        else:
336            response = conn.urlopen(method, u.request_uri, **kw)
337
338        redirect_location = redirect and response.get_redirect_location()
339        if not redirect_location:
340            return response
341
342        # Support relative URLs for redirecting.
343        redirect_location = urljoin(url, redirect_location)
344
345        # RFC 7231, Section 6.4.4
346        if response.status == 303:
347            method = "GET"
348
349        retries = kw.get("retries")
350        if not isinstance(retries, Retry):
351            retries = Retry.from_int(retries, redirect=redirect)
352
353        # Strip headers marked as unsafe to forward to the redirected location.
354        # Check remove_headers_on_redirect to avoid a potential network call within
355        # conn.is_same_host() which may use socket.gethostbyname() in the future.
356        if retries.remove_headers_on_redirect and not conn.is_same_host(
357            redirect_location
358        ):
359            headers = list(six.iterkeys(kw["headers"]))
360            for header in headers:
361                if header.lower() in retries.remove_headers_on_redirect:
362                    kw["headers"].pop(header, None)
363
364        try:
365            retries = retries.increment(method, url, response=response, _pool=conn)
366        except MaxRetryError:
367            if retries.raise_on_redirect:
368                response.drain_conn()
369                raise
370            return response
371
372        kw["retries"] = retries
373        kw["redirect"] = redirect
374
375        log.info("Redirecting %s -> %s", url, redirect_location)
376
377        response.drain_conn()
378        return self.urlopen(method, redirect_location, **kw)
379
380
381class ProxyManager(PoolManager):
382    """
383    Behaves just like :class:`PoolManager`, but sends all requests through
384    the defined proxy, using the CONNECT method for HTTPS URLs.
385
386    :param proxy_url:
387        The URL of the proxy to be used.
388
389    :param proxy_headers:
390        A dictionary containing headers that will be sent to the proxy. In case
391        of HTTP they are being sent with each request, while in the
392        HTTPS/CONNECT case they are sent only once. Could be used for proxy
393        authentication.
394
395    Example:
396        >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
397        >>> r1 = proxy.request('GET', 'http://google.com/')
398        >>> r2 = proxy.request('GET', 'http://httpbin.org/')
399        >>> len(proxy.pools)
400        1
401        >>> r3 = proxy.request('GET', 'https://httpbin.org/')
402        >>> r4 = proxy.request('GET', 'https://twitter.com/')
403        >>> len(proxy.pools)
404        3
405
406    """
407
408    def __init__(
409        self,
410        proxy_url,
411        num_pools=10,
412        headers=None,
413        proxy_headers=None,
414        **connection_pool_kw
415    ):
416
417        if isinstance(proxy_url, HTTPConnectionPool):
418            proxy_url = "%s://%s:%i" % (
419                proxy_url.scheme,
420                proxy_url.host,
421                proxy_url.port,
422            )
423        proxy = parse_url(proxy_url)
424        if not proxy.port:
425            port = port_by_scheme.get(proxy.scheme, 80)
426            proxy = proxy._replace(port=port)
427
428        if proxy.scheme not in ("http", "https"):
429            raise ProxySchemeUnknown(proxy.scheme)
430
431        self.proxy = proxy
432        self.proxy_headers = proxy_headers or {}
433
434        connection_pool_kw["_proxy"] = self.proxy
435        connection_pool_kw["_proxy_headers"] = self.proxy_headers
436
437        super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
438
439    def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
440        if scheme == "https":
441            return super(ProxyManager, self).connection_from_host(
442                host, port, scheme, pool_kwargs=pool_kwargs
443            )
444
445        return super(ProxyManager, self).connection_from_host(
446            self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs
447        )
448
449    def _set_proxy_headers(self, url, headers=None):
450        """
451        Sets headers needed by proxies: specifically, the Accept and Host
452        headers. Only sets headers not provided by the user.
453        """
454        headers_ = {"Accept": "*/*"}
455
456        netloc = parse_url(url).netloc
457        if netloc:
458            headers_["Host"] = netloc
459
460        if headers:
461            headers_.update(headers)
462        return headers_
463
464    def _validate_proxy_scheme_url_selection(self, url_scheme):
465        if url_scheme == "https" and self.proxy.scheme == "https":
466            warnings.warn(
467                "Your proxy configuration specified an HTTPS scheme for the proxy. "
468                "Are you sure you want to use HTTPS to contact the proxy? "
469                "This most likely indicates an error in your configuration. "
470                "Read this issue for more info: "
471                "https://github.com/urllib3/urllib3/issues/1850",
472                InvalidProxyConfigurationWarning,
473                stacklevel=3,
474            )
475
476    def urlopen(self, method, url, redirect=True, **kw):
477        "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
478        u = parse_url(url)
479        self._validate_proxy_scheme_url_selection(u.scheme)
480
481        if u.scheme == "http":
482            # For proxied HTTPS requests, httplib sets the necessary headers
483            # on the CONNECT to the proxy. For HTTP, we'll definitely
484            # need to set 'Host' at the very least.
485            headers = kw.get("headers", self.headers)
486            kw["headers"] = self._set_proxy_headers(url, headers)
487
488        return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
489
490
491def proxy_from_url(url, **kw):
492    return ProxyManager(proxy_url=url, **kw)
493