1from __future__ import absolute_import
2
3import collections
4import functools
5import logging
6
7from ._collections import RecentlyUsedContainer
8from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
9from .exceptions import (
10    LocationValueError,
11    MaxRetryError,
12    ProxySchemeUnknown,
13    ProxySchemeUnsupported,
14    URLSchemeUnknown,
15)
16from .packages import six
17from .packages.six.moves.urllib.parse import urljoin
18from .request import RequestMethods
19from .util.proxy import connection_requires_http_tunnel
20from .util.retry import Retry
21from .util.url import parse_url
22
23__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
24
25
26log = logging.getLogger(__name__)
27
28SSL_KEYWORDS = (
29    "key_file",
30    "cert_file",
31    "cert_reqs",
32    "ca_certs",
33    "ssl_version",
34    "ca_cert_dir",
35    "ssl_context",
36    "key_password",
37)
38
39# All known keyword arguments that could be provided to the pool manager, its
40# pools, or the underlying connections. This is used to construct a pool key.
41_key_fields = (
42    "key_scheme",  # str
43    "key_host",  # str
44    "key_port",  # int
45    "key_timeout",  # int or float or Timeout
46    "key_retries",  # int or Retry
47    "key_strict",  # bool
48    "key_block",  # bool
49    "key_source_address",  # str
50    "key_key_file",  # str
51    "key_key_password",  # str
52    "key_cert_file",  # str
53    "key_cert_reqs",  # str
54    "key_ca_certs",  # str
55    "key_ssl_version",  # str
56    "key_ca_cert_dir",  # str
57    "key_ssl_context",  # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
58    "key_maxsize",  # int
59    "key_headers",  # dict
60    "key__proxy",  # parsed proxy url
61    "key__proxy_headers",  # dict
62    "key__proxy_config",  # class
63    "key_socket_options",  # list of (level (int), optname (int), value (int or str)) tuples
64    "key__socks_options",  # dict
65    "key_assert_hostname",  # bool or string
66    "key_assert_fingerprint",  # str
67    "key_server_hostname",  # str
68)
69
70#: The namedtuple class used to construct keys for the connection pool.
71#: All custom key schemes should include the fields in this key at a minimum.
72PoolKey = collections.namedtuple("PoolKey", _key_fields)
73
74_proxy_config_fields = ("ssl_context", "use_forwarding_for_https")
75ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields)
76
77
78def _default_key_normalizer(key_class, request_context):
79    """
80    Create a pool key out of a request context dictionary.
81
82    According to RFC 3986, both the scheme and host are case-insensitive.
83    Therefore, this function normalizes both before constructing the pool
84    key for an HTTPS request. If you wish to change this behaviour, provide
85    alternate callables to ``key_fn_by_scheme``.
86
87    :param key_class:
88        The class to use when constructing the key. This should be a namedtuple
89        with the ``scheme`` and ``host`` keys at a minimum.
90    :type  key_class: namedtuple
91    :param request_context:
92        A dictionary-like object that contain the context for a request.
93    :type  request_context: dict
94
95    :return: A namedtuple that can be used as a connection pool key.
96    :rtype:  PoolKey
97    """
98    # Since we mutate the dictionary, make a copy first
99    context = request_context.copy()
100    context["scheme"] = context["scheme"].lower()
101    context["host"] = context["host"].lower()
102
103    # These are both dictionaries and need to be transformed into frozensets
104    for key in ("headers", "_proxy_headers", "_socks_options"):
105        if key in context and context[key] is not None:
106            context[key] = frozenset(context[key].items())
107
108    # The socket_options key may be a list and needs to be transformed into a
109    # tuple.
110    socket_opts = context.get("socket_options")
111    if socket_opts is not None:
112        context["socket_options"] = tuple(socket_opts)
113
114    # Map the kwargs to the names in the namedtuple - this is necessary since
115    # namedtuples can't have fields starting with '_'.
116    for key in list(context.keys()):
117        context["key_" + key] = context.pop(key)
118
119    # Default to ``None`` for keys missing from the context
120    for field in key_class._fields:
121        if field not in context:
122            context[field] = None
123
124    return key_class(**context)
125
126
127#: A dictionary that maps a scheme to a callable that creates a pool key.
128#: This can be used to alter the way pool keys are constructed, if desired.
129#: Each PoolManager makes a copy of this dictionary so they can be configured
130#: globally here, or individually on the instance.
131key_fn_by_scheme = {
132    "http": functools.partial(_default_key_normalizer, PoolKey),
133    "https": functools.partial(_default_key_normalizer, PoolKey),
134}
135
136pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
137
138
139class PoolManager(RequestMethods):
140    """
141    Allows for arbitrary requests while transparently keeping track of
142    necessary connection pools for you.
143
144    :param num_pools:
145        Number of connection pools to cache before discarding the least
146        recently used pool.
147
148    :param headers:
149        Headers to include with all requests, unless other headers are given
150        explicitly.
151
152    :param \\**connection_pool_kw:
153        Additional parameters are used to create fresh
154        :class:`urllib3.connectionpool.ConnectionPool` instances.
155
156    Example::
157
158        >>> manager = PoolManager(num_pools=2)
159        >>> r = manager.request('GET', 'http://google.com/')
160        >>> r = manager.request('GET', 'http://google.com/mail')
161        >>> r = manager.request('GET', 'http://yahoo.com/')
162        >>> len(manager.pools)
163        2
164
165    """
166
167    proxy = None
168    proxy_config = None
169
170    def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
171        RequestMethods.__init__(self, headers)
172        self.connection_pool_kw = connection_pool_kw
173        self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close())
174
175        # Locally set the pool classes and keys so other PoolManagers can
176        # override them.
177        self.pool_classes_by_scheme = pool_classes_by_scheme
178        self.key_fn_by_scheme = key_fn_by_scheme.copy()
179
180    def __enter__(self):
181        return self
182
183    def __exit__(self, exc_type, exc_val, exc_tb):
184        self.clear()
185        # Return False to re-raise any potential exceptions
186        return False
187
188    def _new_pool(self, scheme, host, port, request_context=None):
189        """
190        Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
191        any additional pool keyword arguments.
192
193        If ``request_context`` is provided, it is provided as keyword arguments
194        to the pool class used. This method is used to actually create the
195        connection pools handed out by :meth:`connection_from_url` and
196        companion methods. It is intended to be overridden for customization.
197        """
198        pool_cls = self.pool_classes_by_scheme[scheme]
199        if request_context is None:
200            request_context = self.connection_pool_kw.copy()
201
202        # Although the context has everything necessary to create the pool,
203        # this function has historically only used the scheme, host, and port
204        # in the positional args. When an API change is acceptable these can
205        # be removed.
206        for key in ("scheme", "host", "port"):
207            request_context.pop(key, None)
208
209        if scheme == "http":
210            for kw in SSL_KEYWORDS:
211                request_context.pop(kw, None)
212
213        return pool_cls(host, port, **request_context)
214
215    def clear(self):
216        """
217        Empty our store of pools and direct them all to close.
218
219        This will not affect in-flight connections, but they will not be
220        re-used after completion.
221        """
222        self.pools.clear()
223
224    def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
225        """
226        Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
227
228        If ``port`` isn't given, it will be derived from the ``scheme`` using
229        ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
230        provided, it is merged with the instance's ``connection_pool_kw``
231        variable and used to create the new connection pool, if one is
232        needed.
233        """
234
235        if not host:
236            raise LocationValueError("No host specified.")
237
238        request_context = self._merge_pool_kwargs(pool_kwargs)
239        request_context["scheme"] = scheme or "http"
240        if not port:
241            port = port_by_scheme.get(request_context["scheme"].lower(), 80)
242        request_context["port"] = port
243        request_context["host"] = host
244
245        return self.connection_from_context(request_context)
246
247    def connection_from_context(self, request_context):
248        """
249        Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
250
251        ``request_context`` must at least contain the ``scheme`` key and its
252        value must be a key in ``key_fn_by_scheme`` instance variable.
253        """
254        scheme = request_context["scheme"].lower()
255        pool_key_constructor = self.key_fn_by_scheme.get(scheme)
256        if not pool_key_constructor:
257            raise URLSchemeUnknown(scheme)
258        pool_key = pool_key_constructor(request_context)
259
260        return self.connection_from_pool_key(pool_key, request_context=request_context)
261
262    def connection_from_pool_key(self, pool_key, request_context=None):
263        """
264        Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
265
266        ``pool_key`` should be a namedtuple that only contains immutable
267        objects. At a minimum it must have the ``scheme``, ``host``, and
268        ``port`` fields.
269        """
270        with self.pools.lock:
271            # If the scheme, host, or port doesn't match existing open
272            # connections, open a new ConnectionPool.
273            pool = self.pools.get(pool_key)
274            if pool:
275                return pool
276
277            # Make a fresh ConnectionPool of the desired type
278            scheme = request_context["scheme"]
279            host = request_context["host"]
280            port = request_context["port"]
281            pool = self._new_pool(scheme, host, port, request_context=request_context)
282            self.pools[pool_key] = pool
283
284        return pool
285
286    def connection_from_url(self, url, pool_kwargs=None):
287        """
288        Similar to :func:`urllib3.connectionpool.connection_from_url`.
289
290        If ``pool_kwargs`` is not provided and a new pool needs to be
291        constructed, ``self.connection_pool_kw`` is used to initialize
292        the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
293        is provided, it is used instead. Note that if a new pool does not
294        need to be created for the request, the provided ``pool_kwargs`` are
295        not used.
296        """
297        u = parse_url(url)
298        return self.connection_from_host(
299            u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
300        )
301
302    def _merge_pool_kwargs(self, override):
303        """
304        Merge a dictionary of override values for self.connection_pool_kw.
305
306        This does not modify self.connection_pool_kw and returns a new dict.
307        Any keys in the override dictionary with a value of ``None`` are
308        removed from the merged dictionary.
309        """
310        base_pool_kwargs = self.connection_pool_kw.copy()
311        if override:
312            for key, value in override.items():
313                if value is None:
314                    try:
315                        del base_pool_kwargs[key]
316                    except KeyError:
317                        pass
318                else:
319                    base_pool_kwargs[key] = value
320        return base_pool_kwargs
321
322    def _proxy_requires_url_absolute_form(self, parsed_url):
323        """
324        Indicates if the proxy requires the complete destination URL in the
325        request.  Normally this is only needed when not using an HTTP CONNECT
326        tunnel.
327        """
328        if self.proxy is None:
329            return False
330
331        return not connection_requires_http_tunnel(
332            self.proxy, self.proxy_config, parsed_url.scheme
333        )
334
335    def _validate_proxy_scheme_url_selection(self, url_scheme):
336        """
337        Validates that were not attempting to do TLS in TLS connections on
338        Python2 or with unsupported SSL implementations.
339        """
340        if self.proxy is None or url_scheme != "https":
341            return
342
343        if self.proxy.scheme != "https":
344            return
345
346        if six.PY2 and not self.proxy_config.use_forwarding_for_https:
347            raise ProxySchemeUnsupported(
348                "Contacting HTTPS destinations through HTTPS proxies "
349                "'via CONNECT tunnels' is not supported in Python 2"
350            )
351
352    def urlopen(self, method, url, redirect=True, **kw):
353        """
354        Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
355        with custom cross-host redirect logic and only sends the request-uri
356        portion of the ``url``.
357
358        The given ``url`` parameter must be absolute, such that an appropriate
359        :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
360        """
361        u = parse_url(url)
362        self._validate_proxy_scheme_url_selection(u.scheme)
363
364        conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
365
366        kw["assert_same_host"] = False
367        kw["redirect"] = False
368
369        if "headers" not in kw:
370            kw["headers"] = self.headers.copy()
371
372        if self._proxy_requires_url_absolute_form(u):
373            response = conn.urlopen(method, url, **kw)
374        else:
375            response = conn.urlopen(method, u.request_uri, **kw)
376
377        redirect_location = redirect and response.get_redirect_location()
378        if not redirect_location:
379            return response
380
381        # Support relative URLs for redirecting.
382        redirect_location = urljoin(url, redirect_location)
383
384        # RFC 7231, Section 6.4.4
385        if response.status == 303:
386            method = "GET"
387
388        retries = kw.get("retries")
389        if not isinstance(retries, Retry):
390            retries = Retry.from_int(retries, redirect=redirect)
391
392        # Strip headers marked as unsafe to forward to the redirected location.
393        # Check remove_headers_on_redirect to avoid a potential network call within
394        # conn.is_same_host() which may use socket.gethostbyname() in the future.
395        if retries.remove_headers_on_redirect and not conn.is_same_host(
396            redirect_location
397        ):
398            headers = list(six.iterkeys(kw["headers"]))
399            for header in headers:
400                if header.lower() in retries.remove_headers_on_redirect:
401                    kw["headers"].pop(header, None)
402
403        try:
404            retries = retries.increment(method, url, response=response, _pool=conn)
405        except MaxRetryError:
406            if retries.raise_on_redirect:
407                response.drain_conn()
408                raise
409            return response
410
411        kw["retries"] = retries
412        kw["redirect"] = redirect
413
414        log.info("Redirecting %s -> %s", url, redirect_location)
415
416        response.drain_conn()
417        return self.urlopen(method, redirect_location, **kw)
418
419
420class ProxyManager(PoolManager):
421    """
422    Behaves just like :class:`PoolManager`, but sends all requests through
423    the defined proxy, using the CONNECT method for HTTPS URLs.
424
425    :param proxy_url:
426        The URL of the proxy to be used.
427
428    :param proxy_headers:
429        A dictionary containing headers that will be sent to the proxy. In case
430        of HTTP they are being sent with each request, while in the
431        HTTPS/CONNECT case they are sent only once. Could be used for proxy
432        authentication.
433
434    :param proxy_ssl_context:
435        The proxy SSL context is used to establish the TLS connection to the
436        proxy when using HTTPS proxies.
437
438    :param use_forwarding_for_https:
439        (Defaults to False) If set to True will forward requests to the HTTPS
440        proxy to be made on behalf of the client instead of creating a TLS
441        tunnel via the CONNECT method. **Enabling this flag means that request
442        and response headers and content will be visible from the HTTPS proxy**
443        whereas tunneling keeps request and response headers and content
444        private.  IP address, target hostname, SNI, and port are always visible
445        to an HTTPS proxy even when this flag is disabled.
446
447    Example:
448        >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
449        >>> r1 = proxy.request('GET', 'http://google.com/')
450        >>> r2 = proxy.request('GET', 'http://httpbin.org/')
451        >>> len(proxy.pools)
452        1
453        >>> r3 = proxy.request('GET', 'https://httpbin.org/')
454        >>> r4 = proxy.request('GET', 'https://twitter.com/')
455        >>> len(proxy.pools)
456        3
457
458    """
459
460    def __init__(
461        self,
462        proxy_url,
463        num_pools=10,
464        headers=None,
465        proxy_headers=None,
466        proxy_ssl_context=None,
467        use_forwarding_for_https=False,
468        **connection_pool_kw
469    ):
470
471        if isinstance(proxy_url, HTTPConnectionPool):
472            proxy_url = "%s://%s:%i" % (
473                proxy_url.scheme,
474                proxy_url.host,
475                proxy_url.port,
476            )
477        proxy = parse_url(proxy_url)
478
479        if proxy.scheme not in ("http", "https"):
480            raise ProxySchemeUnknown(proxy.scheme)
481
482        if not proxy.port:
483            port = port_by_scheme.get(proxy.scheme, 80)
484            proxy = proxy._replace(port=port)
485
486        self.proxy = proxy
487        self.proxy_headers = proxy_headers or {}
488        self.proxy_ssl_context = proxy_ssl_context
489        self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https)
490
491        connection_pool_kw["_proxy"] = self.proxy
492        connection_pool_kw["_proxy_headers"] = self.proxy_headers
493        connection_pool_kw["_proxy_config"] = self.proxy_config
494
495        super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
496
497    def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
498        if scheme == "https":
499            return super(ProxyManager, self).connection_from_host(
500                host, port, scheme, pool_kwargs=pool_kwargs
501            )
502
503        return super(ProxyManager, self).connection_from_host(
504            self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs
505        )
506
507    def _set_proxy_headers(self, url, headers=None):
508        """
509        Sets headers needed by proxies: specifically, the Accept and Host
510        headers. Only sets headers not provided by the user.
511        """
512        headers_ = {"Accept": "*/*"}
513
514        netloc = parse_url(url).netloc
515        if netloc:
516            headers_["Host"] = netloc
517
518        if headers:
519            headers_.update(headers)
520        return headers_
521
522    def urlopen(self, method, url, redirect=True, **kw):
523        "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
524        u = parse_url(url)
525        if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
526            # For connections using HTTP CONNECT, httplib sets the necessary
527            # headers on the CONNECT to the proxy. If we're not using CONNECT,
528            # we'll definitely need to set 'Host' at the very least.
529            headers = kw.get("headers", self.headers)
530            kw["headers"] = self._set_proxy_headers(url, headers)
531
532        return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
533
534
535def proxy_from_url(url, **kw):
536    return ProxyManager(proxy_url=url, **kw)
537