1"""
2This module provides a pool manager that uses Google App Engine's
3`URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
4
5Example usage::
6
7    from urllib3 import PoolManager
8    from urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
9
10    if is_appengine_sandbox():
11        # AppEngineManager uses AppEngine's URLFetch API behind the scenes
12        http = AppEngineManager()
13    else:
14        # PoolManager uses a socket-level API behind the scenes
15        http = PoolManager()
16
17    r = http.request('GET', 'https://google.com/')
18
19There are `limitations <https://cloud.google.com/appengine/docs/python/\
20urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
21the best choice for your application. There are three options for using
22urllib3 on Google App Engine:
23
241. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
25   cost-effective in many circumstances as long as your usage is within the
26   limitations.
272. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
28   Sockets also have `limitations and restrictions
29   <https://cloud.google.com/appengine/docs/python/sockets/\
30   #limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
31   To use sockets, be sure to specify the following in your ``app.yaml``::
32
33        env_variables:
34            GAE_USE_SOCKETS_HTTPLIB : 'true'
35
363. If you are using `App Engine Flexible
37<https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
38:class:`PoolManager` without any configuration or special environment variables.
39"""
40
41from __future__ import absolute_import
42import io
43import logging
44import warnings
45from ..packages.six.moves.urllib.parse import urljoin
46
47from ..exceptions import (
48    HTTPError,
49    HTTPWarning,
50    MaxRetryError,
51    ProtocolError,
52    TimeoutError,
53    SSLError
54)
55
56from ..request import RequestMethods
57from ..response import HTTPResponse
58from ..util.timeout import Timeout
59from ..util.retry import Retry
60from . import _appengine_environ
61
62try:
63    from google.appengine.api import urlfetch
64except ImportError:
65    urlfetch = None
66
67
68log = logging.getLogger(__name__)
69
70
71class AppEnginePlatformWarning(HTTPWarning):
72    pass
73
74
75class AppEnginePlatformError(HTTPError):
76    pass
77
78
79class AppEngineManager(RequestMethods):
80    """
81    Connection manager for Google App Engine sandbox applications.
82
83    This manager uses the URLFetch service directly instead of using the
84    emulated httplib, and is subject to URLFetch limitations as described in
85    the App Engine documentation `here
86    <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
87
88    Notably it will raise an :class:`AppEnginePlatformError` if:
89        * URLFetch is not available.
90        * If you attempt to use this on App Engine Flexible, as full socket
91          support is available.
92        * If a request size is more than 10 megabytes.
93        * If a response size is more than 32 megabtyes.
94        * If you use an unsupported request method such as OPTIONS.
95
96    Beyond those cases, it will raise normal urllib3 errors.
97    """
98
99    def __init__(self, headers=None, retries=None, validate_certificate=True,
100                 urlfetch_retries=True):
101        if not urlfetch:
102            raise AppEnginePlatformError(
103                "URLFetch is not available in this environment.")
104
105        if is_prod_appengine_mvms():
106            raise AppEnginePlatformError(
107                "Use normal urllib3.PoolManager instead of AppEngineManager"
108                "on Managed VMs, as using URLFetch is not necessary in "
109                "this environment.")
110
111        warnings.warn(
112            "urllib3 is using URLFetch on Google App Engine sandbox instead "
113            "of sockets. To use sockets directly instead of URLFetch see "
114            "https://urllib3.readthedocs.io/en/latest/reference/urllib3.contrib.html.",
115            AppEnginePlatformWarning)
116
117        RequestMethods.__init__(self, headers)
118        self.validate_certificate = validate_certificate
119        self.urlfetch_retries = urlfetch_retries
120
121        self.retries = retries or Retry.DEFAULT
122
123    def __enter__(self):
124        return self
125
126    def __exit__(self, exc_type, exc_val, exc_tb):
127        # Return False to re-raise any potential exceptions
128        return False
129
130    def urlopen(self, method, url, body=None, headers=None,
131                retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT,
132                **response_kw):
133
134        retries = self._get_retries(retries, redirect)
135
136        try:
137            follow_redirects = (
138                    redirect and
139                    retries.redirect != 0 and
140                    retries.total)
141            response = urlfetch.fetch(
142                url,
143                payload=body,
144                method=method,
145                headers=headers or {},
146                allow_truncated=False,
147                follow_redirects=self.urlfetch_retries and follow_redirects,
148                deadline=self._get_absolute_timeout(timeout),
149                validate_certificate=self.validate_certificate,
150            )
151        except urlfetch.DeadlineExceededError as e:
152            raise TimeoutError(self, e)
153
154        except urlfetch.InvalidURLError as e:
155            if 'too large' in str(e):
156                raise AppEnginePlatformError(
157                    "URLFetch request too large, URLFetch only "
158                    "supports requests up to 10mb in size.", e)
159            raise ProtocolError(e)
160
161        except urlfetch.DownloadError as e:
162            if 'Too many redirects' in str(e):
163                raise MaxRetryError(self, url, reason=e)
164            raise ProtocolError(e)
165
166        except urlfetch.ResponseTooLargeError as e:
167            raise AppEnginePlatformError(
168                "URLFetch response too large, URLFetch only supports"
169                "responses up to 32mb in size.", e)
170
171        except urlfetch.SSLCertificateError as e:
172            raise SSLError(e)
173
174        except urlfetch.InvalidMethodError as e:
175            raise AppEnginePlatformError(
176                "URLFetch does not support method: %s" % method, e)
177
178        http_response = self._urlfetch_response_to_http_response(
179            response, retries=retries, **response_kw)
180
181        # Handle redirect?
182        redirect_location = redirect and http_response.get_redirect_location()
183        if redirect_location:
184            # Check for redirect response
185            if (self.urlfetch_retries and retries.raise_on_redirect):
186                raise MaxRetryError(self, url, "too many redirects")
187            else:
188                if http_response.status == 303:
189                    method = 'GET'
190
191                try:
192                    retries = retries.increment(method, url, response=http_response, _pool=self)
193                except MaxRetryError:
194                    if retries.raise_on_redirect:
195                        raise MaxRetryError(self, url, "too many redirects")
196                    return http_response
197
198                retries.sleep_for_retry(http_response)
199                log.debug("Redirecting %s -> %s", url, redirect_location)
200                redirect_url = urljoin(url, redirect_location)
201                return self.urlopen(
202                    method, redirect_url, body, headers,
203                    retries=retries, redirect=redirect,
204                    timeout=timeout, **response_kw)
205
206        # Check if we should retry the HTTP response.
207        has_retry_after = bool(http_response.getheader('Retry-After'))
208        if retries.is_retry(method, http_response.status, has_retry_after):
209            retries = retries.increment(
210                method, url, response=http_response, _pool=self)
211            log.debug("Retry: %s", url)
212            retries.sleep(http_response)
213            return self.urlopen(
214                method, url,
215                body=body, headers=headers,
216                retries=retries, redirect=redirect,
217                timeout=timeout, **response_kw)
218
219        return http_response
220
221    def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
222
223        if is_prod_appengine():
224            # Production GAE handles deflate encoding automatically, but does
225            # not remove the encoding header.
226            content_encoding = urlfetch_resp.headers.get('content-encoding')
227
228            if content_encoding == 'deflate':
229                del urlfetch_resp.headers['content-encoding']
230
231        transfer_encoding = urlfetch_resp.headers.get('transfer-encoding')
232        # We have a full response's content,
233        # so let's make sure we don't report ourselves as chunked data.
234        if transfer_encoding == 'chunked':
235            encodings = transfer_encoding.split(",")
236            encodings.remove('chunked')
237            urlfetch_resp.headers['transfer-encoding'] = ','.join(encodings)
238
239        original_response = HTTPResponse(
240            # In order for decoding to work, we must present the content as
241            # a file-like object.
242            body=io.BytesIO(urlfetch_resp.content),
243            msg=urlfetch_resp.header_msg,
244            headers=urlfetch_resp.headers,
245            status=urlfetch_resp.status_code,
246            **response_kw
247        )
248
249        return HTTPResponse(
250            body=io.BytesIO(urlfetch_resp.content),
251            headers=urlfetch_resp.headers,
252            status=urlfetch_resp.status_code,
253            original_response=original_response,
254            **response_kw
255        )
256
257    def _get_absolute_timeout(self, timeout):
258        if timeout is Timeout.DEFAULT_TIMEOUT:
259            return None  # Defer to URLFetch's default.
260        if isinstance(timeout, Timeout):
261            if timeout._read is not None or timeout._connect is not None:
262                warnings.warn(
263                    "URLFetch does not support granular timeout settings, "
264                    "reverting to total or default URLFetch timeout.",
265                    AppEnginePlatformWarning)
266            return timeout.total
267        return timeout
268
269    def _get_retries(self, retries, redirect):
270        if not isinstance(retries, Retry):
271            retries = Retry.from_int(
272                retries, redirect=redirect, default=self.retries)
273
274        if retries.connect or retries.read or retries.redirect:
275            warnings.warn(
276                "URLFetch only supports total retries and does not "
277                "recognize connect, read, or redirect retry parameters.",
278                AppEnginePlatformWarning)
279
280        return retries
281
282
283# Alias methods from _appengine_environ to maintain public API interface.
284
285is_appengine = _appengine_environ.is_appengine
286is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
287is_local_appengine = _appengine_environ.is_local_appengine
288is_prod_appengine = _appengine_environ.is_prod_appengine
289is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms
290