1"""WSGI interface (see PEP 333 and 3333).
2
3Note that WSGI environ keys and values are 'native strings'; that is,
4whatever the type of "" is. For Python 2, that's a byte string; for Python 3,
5it's a unicode string. But PEP 3333 says: "even if Python's str type is
6actually Unicode "under the hood", the content of native strings must
7still be translatable to bytes via the Latin-1 encoding!"
8"""
9
10import sys as _sys
11import io
12
13import cherrypy as _cherrypy
14from cherrypy._cpcompat import ntou
15from cherrypy import _cperror
16from cherrypy.lib import httputil
17from cherrypy.lib import is_closable_iterator
18
19
20def downgrade_wsgi_ux_to_1x(environ):
21    """Return a new environ dict for WSGI 1.x from the given WSGI u.x environ.
22    """
23    env1x = {}
24
25    url_encoding = environ[ntou('wsgi.url_encoding')]
26    for k, v in environ.copy().items():
27        if k in [ntou('PATH_INFO'), ntou('SCRIPT_NAME'), ntou('QUERY_STRING')]:
28            v = v.encode(url_encoding)
29        elif isinstance(v, str):
30            v = v.encode('ISO-8859-1')
31        env1x[k.encode('ISO-8859-1')] = v
32
33    return env1x
34
35
36class VirtualHost(object):
37
38    """Select a different WSGI application based on the Host header.
39
40    This can be useful when running multiple sites within one CP server.
41    It allows several domains to point to different applications. For example::
42
43        root = Root()
44        RootApp = cherrypy.Application(root)
45        Domain2App = cherrypy.Application(root)
46        SecureApp = cherrypy.Application(Secure())
47
48        vhost = cherrypy._cpwsgi.VirtualHost(
49            RootApp,
50            domains={
51                'www.domain2.example': Domain2App,
52                'www.domain2.example:443': SecureApp,
53            },
54        )
55
56        cherrypy.tree.graft(vhost)
57    """
58    default = None
59    """Required. The default WSGI application."""
60
61    use_x_forwarded_host = True
62    """If True (the default), any "X-Forwarded-Host"
63    request header will be used instead of the "Host" header. This
64    is commonly added by HTTP servers (such as Apache) when proxying."""
65
66    domains = {}
67    """A dict of {host header value: application} pairs.
68    The incoming "Host" request header is looked up in this dict,
69    and, if a match is found, the corresponding WSGI application
70    will be called instead of the default. Note that you often need
71    separate entries for "example.com" and "www.example.com".
72    In addition, "Host" headers may contain the port number.
73    """
74
75    def __init__(self, default, domains=None, use_x_forwarded_host=True):
76        self.default = default
77        self.domains = domains or {}
78        self.use_x_forwarded_host = use_x_forwarded_host
79
80    def __call__(self, environ, start_response):
81        domain = environ.get('HTTP_HOST', '')
82        if self.use_x_forwarded_host:
83            domain = environ.get('HTTP_X_FORWARDED_HOST', domain)
84
85        nextapp = self.domains.get(domain)
86        if nextapp is None:
87            nextapp = self.default
88        return nextapp(environ, start_response)
89
90
91class InternalRedirector(object):
92
93    """WSGI middleware that handles raised cherrypy.InternalRedirect."""
94
95    def __init__(self, nextapp, recursive=False):
96        self.nextapp = nextapp
97        self.recursive = recursive
98
99    def __call__(self, environ, start_response):
100        redirections = []
101        while True:
102            environ = environ.copy()
103            try:
104                return self.nextapp(environ, start_response)
105            except _cherrypy.InternalRedirect:
106                ir = _sys.exc_info()[1]
107                sn = environ.get('SCRIPT_NAME', '')
108                path = environ.get('PATH_INFO', '')
109                qs = environ.get('QUERY_STRING', '')
110
111                # Add the *previous* path_info + qs to redirections.
112                old_uri = sn + path
113                if qs:
114                    old_uri += '?' + qs
115                redirections.append(old_uri)
116
117                if not self.recursive:
118                    # Check to see if the new URI has been redirected to
119                    # already
120                    new_uri = sn + ir.path
121                    if ir.query_string:
122                        new_uri += '?' + ir.query_string
123                    if new_uri in redirections:
124                        ir.request.close()
125                        tmpl = (
126                            'InternalRedirector visited the same URL twice: %r'
127                        )
128                        raise RuntimeError(tmpl % new_uri)
129
130                # Munge the environment and try again.
131                environ['REQUEST_METHOD'] = 'GET'
132                environ['PATH_INFO'] = ir.path
133                environ['QUERY_STRING'] = ir.query_string
134                environ['wsgi.input'] = io.BytesIO()
135                environ['CONTENT_LENGTH'] = '0'
136                environ['cherrypy.previous_request'] = ir.request
137
138
139class ExceptionTrapper(object):
140
141    """WSGI middleware that traps exceptions."""
142
143    def __init__(self, nextapp, throws=(KeyboardInterrupt, SystemExit)):
144        self.nextapp = nextapp
145        self.throws = throws
146
147    def __call__(self, environ, start_response):
148        return _TrappedResponse(
149            self.nextapp,
150            environ,
151            start_response,
152            self.throws
153        )
154
155
156class _TrappedResponse(object):
157
158    response = iter([])
159
160    def __init__(self, nextapp, environ, start_response, throws):
161        self.nextapp = nextapp
162        self.environ = environ
163        self.start_response = start_response
164        self.throws = throws
165        self.started_response = False
166        self.response = self.trap(
167            self.nextapp, self.environ, self.start_response,
168        )
169        self.iter_response = iter(self.response)
170
171    def __iter__(self):
172        self.started_response = True
173        return self
174
175    def __next__(self):
176        return self.trap(next, self.iter_response)
177
178    def close(self):
179        if hasattr(self.response, 'close'):
180            self.response.close()
181
182    def trap(self, func, *args, **kwargs):
183        try:
184            return func(*args, **kwargs)
185        except self.throws:
186            raise
187        except StopIteration:
188            raise
189        except Exception:
190            tb = _cperror.format_exc()
191            _cherrypy.log(tb, severity=40)
192            if not _cherrypy.request.show_tracebacks:
193                tb = ''
194            s, h, b = _cperror.bare_error(tb)
195            if True:
196                # What fun.
197                s = s.decode('ISO-8859-1')
198                h = [
199                    (k.decode('ISO-8859-1'), v.decode('ISO-8859-1'))
200                    for k, v in h
201                ]
202            if self.started_response:
203                # Empty our iterable (so future calls raise StopIteration)
204                self.iter_response = iter([])
205            else:
206                self.iter_response = iter(b)
207
208            try:
209                self.start_response(s, h, _sys.exc_info())
210            except Exception:
211                # "The application must not trap any exceptions raised by
212                # start_response, if it called start_response with exc_info.
213                # Instead, it should allow such exceptions to propagate
214                # back to the server or gateway."
215                # But we still log and call close() to clean up ourselves.
216                _cherrypy.log(traceback=True, severity=40)
217                raise
218
219            if self.started_response:
220                return b''.join(b)
221            else:
222                return b
223
224
225#                           WSGI-to-CP Adapter                           #
226
227
228class AppResponse(object):
229
230    """WSGI response iterable for CherryPy applications."""
231
232    def __init__(self, environ, start_response, cpapp):
233        self.cpapp = cpapp
234        try:
235            self.environ = environ
236            self.run()
237
238            r = _cherrypy.serving.response
239
240            outstatus = r.output_status
241            if not isinstance(outstatus, bytes):
242                raise TypeError('response.output_status is not a byte string.')
243
244            outheaders = []
245            for k, v in r.header_list:
246                if not isinstance(k, bytes):
247                    tmpl = 'response.header_list key %r is not a byte string.'
248                    raise TypeError(tmpl % k)
249                if not isinstance(v, bytes):
250                    tmpl = (
251                        'response.header_list value %r is not a byte string.'
252                    )
253                    raise TypeError(tmpl % v)
254                outheaders.append((k, v))
255
256            if True:
257                # According to PEP 3333, when using Python 3, the response
258                # status and headers must be bytes masquerading as unicode;
259                # that is, they must be of type "str" but are restricted to
260                # code points in the "latin-1" set.
261                outstatus = outstatus.decode('ISO-8859-1')
262                outheaders = [
263                    (k.decode('ISO-8859-1'), v.decode('ISO-8859-1'))
264                    for k, v in outheaders
265                ]
266
267            self.iter_response = iter(r.body)
268            self.write = start_response(outstatus, outheaders)
269        except BaseException:
270            self.close()
271            raise
272
273    def __iter__(self):
274        return self
275
276    def __next__(self):
277        return next(self.iter_response)
278
279    def close(self):
280        """Close and de-reference the current request and response. (Core)"""
281        streaming = _cherrypy.serving.response.stream
282        self.cpapp.release_serving()
283
284        # We avoid the expense of examining the iterator to see if it's
285        # closable unless we are streaming the response, as that's the
286        # only situation where we are going to have an iterator which
287        # may not have been exhausted yet.
288        if streaming and is_closable_iterator(self.iter_response):
289            iter_close = self.iter_response.close
290            try:
291                iter_close()
292            except Exception:
293                _cherrypy.log(traceback=True, severity=40)
294
295    def run(self):
296        """Create a Request object using environ."""
297        env = self.environ.get
298
299        local = httputil.Host(
300            '',
301            int(env('SERVER_PORT', 80) or -1),
302            env('SERVER_NAME', ''),
303        )
304        remote = httputil.Host(
305            env('REMOTE_ADDR', ''),
306            int(env('REMOTE_PORT', -1) or -1),
307            env('REMOTE_HOST', ''),
308        )
309        scheme = env('wsgi.url_scheme')
310        sproto = env('ACTUAL_SERVER_PROTOCOL', 'HTTP/1.1')
311        request, resp = self.cpapp.get_serving(local, remote, scheme, sproto)
312
313        # LOGON_USER is served by IIS, and is the name of the
314        # user after having been mapped to a local account.
315        # Both IIS and Apache set REMOTE_USER, when possible.
316        request.login = env('LOGON_USER') or env('REMOTE_USER') or None
317        request.multithread = self.environ['wsgi.multithread']
318        request.multiprocess = self.environ['wsgi.multiprocess']
319        request.wsgi_environ = self.environ
320        request.prev = env('cherrypy.previous_request', None)
321
322        meth = self.environ['REQUEST_METHOD']
323
324        path = httputil.urljoin(
325            self.environ.get('SCRIPT_NAME', ''),
326            self.environ.get('PATH_INFO', ''),
327        )
328        qs = self.environ.get('QUERY_STRING', '')
329
330        path, qs = self.recode_path_qs(path, qs) or (path, qs)
331
332        rproto = self.environ.get('SERVER_PROTOCOL')
333        headers = self.translate_headers(self.environ)
334        rfile = self.environ['wsgi.input']
335        request.run(meth, path, qs, rproto, headers, rfile)
336
337    headerNames = {
338        'HTTP_CGI_AUTHORIZATION': 'Authorization',
339        'CONTENT_LENGTH': 'Content-Length',
340        'CONTENT_TYPE': 'Content-Type',
341        'REMOTE_HOST': 'Remote-Host',
342        'REMOTE_ADDR': 'Remote-Addr',
343    }
344
345    def recode_path_qs(self, path, qs):
346        # This isn't perfect; if the given PATH_INFO is in the
347        # wrong encoding, it may fail to match the appropriate config
348        # section URI. But meh.
349        old_enc = self.environ.get('wsgi.url_encoding', 'ISO-8859-1')
350        new_enc = self.cpapp.find_config(
351            self.environ.get('PATH_INFO', ''),
352            'request.uri_encoding', 'utf-8',
353        )
354        if new_enc.lower() == old_enc.lower():
355            return
356
357        # Even though the path and qs are unicode, the WSGI server
358        # is required by PEP 3333 to coerce them to ISO-8859-1
359        # masquerading as unicode. So we have to encode back to
360        # bytes and then decode again using the "correct" encoding.
361        try:
362            return (
363                path.encode(old_enc).decode(new_enc),
364                qs.encode(old_enc).decode(new_enc),
365            )
366        except (UnicodeEncodeError, UnicodeDecodeError):
367            # Just pass them through without transcoding and hope.
368            pass
369
370    def translate_headers(self, environ):
371        """Translate CGI-environ header names to HTTP header names."""
372        for cgiName in environ:
373            # We assume all incoming header keys are uppercase already.
374            if cgiName in self.headerNames:
375                yield self.headerNames[cgiName], environ[cgiName]
376            elif cgiName[:5] == 'HTTP_':
377                # Hackish attempt at recovering original header names.
378                translatedHeader = cgiName[5:].replace('_', '-')
379                yield translatedHeader, environ[cgiName]
380
381
382class CPWSGIApp(object):
383
384    """A WSGI application object for a CherryPy Application."""
385
386    pipeline = [
387        ('ExceptionTrapper', ExceptionTrapper),
388        ('InternalRedirector', InternalRedirector),
389    ]
390    """A list of (name, wsgiapp) pairs. Each 'wsgiapp' MUST be a
391    constructor that takes an initial, positional 'nextapp' argument,
392    plus optional keyword arguments, and returns a WSGI application
393    (that takes environ and start_response arguments). The 'name' can
394    be any you choose, and will correspond to keys in self.config."""
395
396    head = None
397    """Rather than nest all apps in the pipeline on each call, it's only
398    done the first time, and the result is memoized into self.head. Set
399    this to None again if you change self.pipeline after calling self."""
400
401    config = {}
402    """A dict whose keys match names listed in the pipeline. Each
403    value is a further dict which will be passed to the corresponding
404    named WSGI callable (from the pipeline) as keyword arguments."""
405
406    response_class = AppResponse
407    """The class to instantiate and return as the next app in the WSGI chain.
408    """
409
410    def __init__(self, cpapp, pipeline=None):
411        self.cpapp = cpapp
412        self.pipeline = self.pipeline[:]
413        if pipeline:
414            self.pipeline.extend(pipeline)
415        self.config = self.config.copy()
416
417    def tail(self, environ, start_response):
418        """WSGI application callable for the actual CherryPy application.
419
420        You probably shouldn't call this; call self.__call__ instead,
421        so that any WSGI middleware in self.pipeline can run first.
422        """
423        return self.response_class(environ, start_response, self.cpapp)
424
425    def __call__(self, environ, start_response):
426        head = self.head
427        if head is None:
428            # Create and nest the WSGI apps in our pipeline (in reverse order).
429            # Then memoize the result in self.head.
430            head = self.tail
431            for name, callable in self.pipeline[::-1]:
432                conf = self.config.get(name, {})
433                head = callable(head, **conf)
434            self.head = head
435        return head(environ, start_response)
436
437    def namespace_handler(self, k, v):
438        """Config handler for the 'wsgi' namespace."""
439        if k == 'pipeline':
440            # Note this allows multiple 'wsgi.pipeline' config entries
441            # (but each entry will be processed in a 'random' order).
442            # It should also allow developers to set default middleware
443            # in code (passed to self.__init__) that deployers can add to
444            # (but not remove) via config.
445            self.pipeline.extend(v)
446        elif k == 'response_class':
447            self.response_class = v
448        else:
449            name, arg = k.split('.', 1)
450            bucket = self.config.setdefault(name, {})
451            bucket[arg] = v
452