1# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
2# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
3"""WSGI Wrappers for a Request and Response
4
5The WSGIRequest and WSGIResponse objects are light wrappers to make it easier
6to deal with an incoming request and sending a response.
7"""
8import re
9import warnings
10from pprint import pformat
11try:
12    # Python 3
13    from http.cookies import SimpleCookie
14except ImportError:
15    # Python 2
16    from Cookie import SimpleCookie
17import six
18
19from paste.request import EnvironHeaders, get_cookie_dict, \
20    parse_dict_querystring, parse_formvars
21from paste.util.multidict import MultiDict, UnicodeMultiDict
22from paste.registry import StackedObjectProxy
23from paste.response import HeaderDict
24from paste.wsgilib import encode_unicode_app_iter
25from paste.httpheaders import ACCEPT_LANGUAGE
26from paste.util.mimeparse import desired_matches
27
28__all__ = ['WSGIRequest', 'WSGIResponse']
29
30_CHARSET_RE = re.compile(r';\s*charset=([^;]*)', re.I)
31
32class DeprecatedSettings(StackedObjectProxy):
33    def _push_object(self, obj):
34        warnings.warn('paste.wsgiwrappers.settings is deprecated: Please use '
35                      'paste.wsgiwrappers.WSGIRequest.defaults instead',
36                      DeprecationWarning, 3)
37        WSGIResponse.defaults._push_object(obj)
38        StackedObjectProxy._push_object(self, obj)
39
40# settings is deprecated: use WSGIResponse.defaults instead
41settings = DeprecatedSettings(default=dict())
42
43class environ_getter(object):
44    """For delegating an attribute to a key in self.environ."""
45    # @@: Also __set__?  Should setting be allowed?
46    def __init__(self, key, default='', default_factory=None):
47        self.key = key
48        self.default = default
49        self.default_factory = default_factory
50    def __get__(self, obj, type=None):
51        if type is None:
52            return self
53        if self.key not in obj.environ:
54            if self.default_factory:
55                val = obj.environ[self.key] = self.default_factory()
56                return val
57            else:
58                return self.default
59        return obj.environ[self.key]
60
61    def __repr__(self):
62        return '<Proxy for WSGI environ %r key>' % self.key
63
64class WSGIRequest(object):
65    """WSGI Request API Object
66
67    This object represents a WSGI request with a more friendly interface.
68    This does not expose every detail of the WSGI environment, and attempts
69    to express nothing beyond what is available in the environment
70    dictionary.
71
72    The only state maintained in this object is the desired ``charset``,
73    its associated ``errors`` handler, and the ``decode_param_names``
74    option.
75
76    The incoming parameter values will be automatically coerced to unicode
77    objects of the ``charset`` encoding when ``charset`` is set. The
78    incoming parameter names are not decoded to unicode unless the
79    ``decode_param_names`` option is enabled.
80
81    When unicode is expected, ``charset`` will overridden by the the
82    value of the ``Content-Type`` header's charset parameter if one was
83    specified by the client.
84
85    The class variable ``defaults`` specifies default values for
86    ``charset``, ``errors``, and ``langauge``. These can be overridden for the
87    current request via the registry.
88
89    The ``language`` default value is considered the fallback during i18n
90    translations to ensure in odd cases that mixed languages don't occur should
91    the ``language`` file contain the string but not another language in the
92    accepted languages list. The ``language`` value only applies when getting
93    a list of accepted languages from the HTTP Accept header.
94
95    This behavior is duplicated from Aquarium, and may seem strange but is
96    very useful. Normally, everything in the code is in "en-us".  However,
97    the "en-us" translation catalog is usually empty.  If the user requests
98    ``["en-us", "zh-cn"]`` and a translation isn't found for a string in
99    "en-us", you don't want gettext to fallback to "zh-cn".  You want it to
100    just use the string itself.  Hence, if a string isn't found in the
101    ``language`` catalog, the string in the source code will be used.
102
103    *All* other state is kept in the environment dictionary; this is
104    essential for interoperability.
105
106    You are free to subclass this object.
107
108    """
109    defaults = StackedObjectProxy(default=dict(charset=None, errors='replace',
110                                               decode_param_names=False,
111                                               language='en-us'))
112    def __init__(self, environ):
113        self.environ = environ
114        # This isn't "state" really, since the object is derivative:
115        self.headers = EnvironHeaders(environ)
116
117        defaults = self.defaults._current_obj()
118        self.charset = defaults.get('charset')
119        if self.charset:
120            # There's a charset: params will be coerced to unicode. In that
121            # case, attempt to use the charset specified by the browser
122            browser_charset = self.determine_browser_charset()
123            if browser_charset:
124                self.charset = browser_charset
125        self.errors = defaults.get('errors', 'strict')
126        self.decode_param_names = defaults.get('decode_param_names', False)
127        self._languages = None
128
129    body = environ_getter('wsgi.input')
130    scheme = environ_getter('wsgi.url_scheme')
131    method = environ_getter('REQUEST_METHOD')
132    script_name = environ_getter('SCRIPT_NAME')
133    path_info = environ_getter('PATH_INFO')
134
135    def urlvars(self):
136        """
137        Return any variables matched in the URL (e.g.,
138        ``wsgiorg.routing_args``).
139        """
140        if 'paste.urlvars' in self.environ:
141            return self.environ['paste.urlvars']
142        elif 'wsgiorg.routing_args' in self.environ:
143            return self.environ['wsgiorg.routing_args'][1]
144        else:
145            return {}
146    urlvars = property(urlvars, doc=urlvars.__doc__)
147
148    def is_xhr(self):
149        """Returns a boolean if X-Requested-With is present and a XMLHttpRequest"""
150        return self.environ.get('HTTP_X_REQUESTED_WITH', '') == 'XMLHttpRequest'
151    is_xhr = property(is_xhr, doc=is_xhr.__doc__)
152
153    def host(self):
154        """Host name provided in HTTP_HOST, with fall-back to SERVER_NAME"""
155        return self.environ.get('HTTP_HOST', self.environ.get('SERVER_NAME'))
156    host = property(host, doc=host.__doc__)
157
158    def languages(self):
159        """Return a list of preferred languages, most preferred first.
160
161        The list may be empty.
162        """
163        if self._languages is not None:
164            return self._languages
165        acceptLanguage = self.environ.get('HTTP_ACCEPT_LANGUAGE')
166        langs = ACCEPT_LANGUAGE.parse(self.environ)
167        fallback = self.defaults.get('language', 'en-us')
168        if not fallback:
169            return langs
170        if fallback not in langs:
171            langs.append(fallback)
172        index = langs.index(fallback)
173        langs[index+1:] = []
174        self._languages = langs
175        return self._languages
176    languages = property(languages, doc=languages.__doc__)
177
178    def _GET(self):
179        return parse_dict_querystring(self.environ)
180
181    def GET(self):
182        """
183        Dictionary-like object representing the QUERY_STRING
184        parameters. Always present, if possibly empty.
185
186        If the same key is present in the query string multiple times, a
187        list of its values can be retrieved from the ``MultiDict`` via
188        the ``getall`` method.
189
190        Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
191        ``charset`` is set.
192        """
193        params = self._GET()
194        if self.charset:
195            params = UnicodeMultiDict(params, encoding=self.charset,
196                                      errors=self.errors,
197                                      decode_keys=self.decode_param_names)
198        return params
199    GET = property(GET, doc=GET.__doc__)
200
201    def _POST(self):
202        return parse_formvars(self.environ, include_get_vars=False,
203                              encoding=self.charset, errors=self.errors)
204
205    def POST(self):
206        """Dictionary-like object representing the POST body.
207
208        Most values are encoded strings, or unicode strings when
209        ``charset`` is set. There may also be FieldStorage objects
210        representing file uploads. If this is not a POST request, or the
211        body is not encoded fields (e.g., an XMLRPC request) then this
212        will be empty.
213
214        This will consume wsgi.input when first accessed if applicable,
215        but the raw version will be put in
216        environ['paste.parsed_formvars'].
217
218        Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
219        ``charset`` is set.
220        """
221        params = self._POST()
222        if self.charset:
223            params = UnicodeMultiDict(params, encoding=self.charset,
224                                      errors=self.errors,
225                                      decode_keys=self.decode_param_names)
226        return params
227    POST = property(POST, doc=POST.__doc__)
228
229    def params(self):
230        """Dictionary-like object of keys from POST, GET, URL dicts
231
232        Return a key value from the parameters, they are checked in the
233        following order: POST, GET, URL
234
235        Additional methods supported:
236
237        ``getlist(key)``
238            Returns a list of all the values by that key, collected from
239            POST, GET, URL dicts
240
241        Returns a ``MultiDict`` container or a ``UnicodeMultiDict`` when
242        ``charset`` is set.
243        """
244        params = MultiDict()
245        params.update(self._POST())
246        params.update(self._GET())
247        if self.charset:
248            params = UnicodeMultiDict(params, encoding=self.charset,
249                                      errors=self.errors,
250                                      decode_keys=self.decode_param_names)
251        return params
252    params = property(params, doc=params.__doc__)
253
254    def cookies(self):
255        """Dictionary of cookies keyed by cookie name.
256
257        Just a plain dictionary, may be empty but not None.
258
259        """
260        return get_cookie_dict(self.environ)
261    cookies = property(cookies, doc=cookies.__doc__)
262
263    def determine_browser_charset(self):
264        """
265        Determine the encoding as specified by the browser via the
266        Content-Type's charset parameter, if one is set
267        """
268        charset_match = _CHARSET_RE.search(self.headers.get('Content-Type', ''))
269        if charset_match:
270            return charset_match.group(1)
271
272    def match_accept(self, mimetypes):
273        """Return a list of specified mime-types that the browser's HTTP Accept
274        header allows in the order provided."""
275        return desired_matches(mimetypes,
276                               self.environ.get('HTTP_ACCEPT', '*/*'))
277
278    def __repr__(self):
279        """Show important attributes of the WSGIRequest"""
280        pf = pformat
281        msg = '<%s.%s object at 0x%x method=%s,' % \
282            (self.__class__.__module__, self.__class__.__name__,
283             id(self), pf(self.method))
284        msg += '\nscheme=%s, host=%s, script_name=%s, path_info=%s,' % \
285            (pf(self.scheme), pf(self.host), pf(self.script_name),
286             pf(self.path_info))
287        msg += '\nlanguages=%s,' % pf(self.languages)
288        if self.charset:
289            msg += ' charset=%s, errors=%s,' % (pf(self.charset),
290                                                pf(self.errors))
291        msg += '\nGET=%s,' % pf(self.GET)
292        msg += '\nPOST=%s,' % pf(self.POST)
293        msg += '\ncookies=%s>' % pf(self.cookies)
294        return msg
295
296class WSGIResponse(object):
297    """A basic HTTP response with content, headers, and out-bound cookies
298
299    The class variable ``defaults`` specifies default values for
300    ``content_type``, ``charset`` and ``errors``. These can be overridden
301    for the current request via the registry.
302
303    """
304    defaults = StackedObjectProxy(
305        default=dict(content_type='text/html', charset='utf-8',
306                     errors='strict', headers={'Cache-Control':'no-cache'})
307        )
308    def __init__(self, content=b'', mimetype=None, code=200):
309        self._iter = None
310        self._is_str_iter = True
311
312        self.content = content
313        self.headers = HeaderDict()
314        self.cookies = SimpleCookie()
315        self.status_code = code
316
317        defaults = self.defaults._current_obj()
318        if not mimetype:
319            mimetype = defaults.get('content_type', 'text/html')
320            charset = defaults.get('charset')
321            if charset:
322                mimetype = '%s; charset=%s' % (mimetype, charset)
323        self.headers.update(defaults.get('headers', {}))
324        self.headers['Content-Type'] = mimetype
325        self.errors = defaults.get('errors', 'strict')
326
327    def __str__(self):
328        """Returns a rendition of the full HTTP message, including headers.
329
330        When the content is an iterator, the actual content is replaced with the
331        output of str(iterator) (to avoid exhausting the iterator).
332        """
333        if self._is_str_iter:
334            content = ''.join(self.get_content())
335        else:
336            content = str(self.content)
337        return '\n'.join(['%s: %s' % (key, value)
338            for key, value in self.headers.headeritems()]) \
339            + '\n\n' + content
340
341    def __call__(self, environ, start_response):
342        """Convenience call to return output and set status information
343
344        Conforms to the WSGI interface for calling purposes only.
345
346        Example usage:
347
348        .. code-block:: python
349
350            def wsgi_app(environ, start_response):
351                response = WSGIResponse()
352                response.write("Hello world")
353                response.headers['Content-Type'] = 'latin1'
354                return response(environ, start_response)
355
356        """
357        status_text = STATUS_CODE_TEXT[self.status_code]
358        status = '%s %s' % (self.status_code, status_text)
359        response_headers = self.headers.headeritems()
360        for c in self.cookies.values():
361            response_headers.append(('Set-Cookie', c.output(header='')))
362        start_response(status, response_headers)
363        is_file = hasattr(self.content, 'read')
364        if 'wsgi.file_wrapper' in environ and is_file:
365            return environ['wsgi.file_wrapper'](self.content)
366        elif is_file:
367            return iter(lambda: self.content.read(), b'')
368        return self.get_content()
369
370    def determine_charset(self):
371        """
372        Determine the encoding as specified by the Content-Type's charset
373        parameter, if one is set
374        """
375        charset_match = _CHARSET_RE.search(self.headers.get('Content-Type', ''))
376        if charset_match:
377            return charset_match.group(1)
378
379    def has_header(self, header):
380        """
381        Case-insensitive check for a header
382        """
383        warnings.warn('WSGIResponse.has_header is deprecated, use '
384                      'WSGIResponse.headers.has_key instead', DeprecationWarning,
385                      2)
386        return self.headers.has_key(header)
387
388    def set_cookie(self, key, value='', max_age=None, expires=None, path='/',
389                   domain=None, secure=None, httponly=None):
390        """
391        Define a cookie to be sent via the outgoing HTTP headers
392        """
393        self.cookies[key] = value
394        for var_name, var_value in [
395            ('max_age', max_age), ('path', path), ('domain', domain),
396            ('secure', secure), ('expires', expires), ('httponly', httponly)]:
397            if var_value is not None and var_value is not False:
398                self.cookies[key][var_name.replace('_', '-')] = var_value
399
400    def delete_cookie(self, key, path='/', domain=None):
401        """
402        Notify the browser the specified cookie has expired and should be
403        deleted (via the outgoing HTTP headers)
404        """
405        self.cookies[key] = ''
406        if path is not None:
407            self.cookies[key]['path'] = path
408        if domain is not None:
409            self.cookies[key]['domain'] = domain
410        self.cookies[key]['expires'] = 0
411        self.cookies[key]['max-age'] = 0
412
413    def _set_content(self, content):
414        if not isinstance(content, (six.binary_type, six.text_type)):
415            self._iter = content
416            if isinstance(content, list):
417                self._is_str_iter = True
418            else:
419                self._is_str_iter = False
420        else:
421            self._iter = [content]
422            self._is_str_iter = True
423    content = property(lambda self: self._iter, _set_content,
424                       doc='Get/set the specified content, where content can '
425                       'be: a string, a list of strings, a generator function '
426                       'that yields strings, or an iterable object that '
427                       'produces strings.')
428
429    def get_content(self):
430        """
431        Returns the content as an iterable of strings, encoding each element of
432        the iterator from a Unicode object if necessary.
433        """
434        charset = self.determine_charset()
435        if charset:
436            return encode_unicode_app_iter(self.content, charset, self.errors)
437        else:
438            return self.content
439
440    def wsgi_response(self):
441        """
442        Return this WSGIResponse as a tuple of WSGI formatted data, including:
443        (status, headers, iterable)
444        """
445        status_text = STATUS_CODE_TEXT[self.status_code]
446        status = '%s %s' % (self.status_code, status_text)
447        response_headers = self.headers.headeritems()
448        for c in self.cookies.values():
449            response_headers.append(('Set-Cookie', c.output(header='')))
450        return status, response_headers, self.get_content()
451
452    # The remaining methods partially implement the file-like object interface.
453    # See http://docs.python.org/lib/bltin-file-objects.html
454    def write(self, content):
455        if not self._is_str_iter:
456            raise IOError("This %s instance's content is not writable: (content "
457                'is an iterator)' % self.__class__.__name__)
458        self.content.append(content)
459
460    def flush(self):
461        pass
462
463    def tell(self):
464        if not self._is_str_iter:
465            raise IOError('This %s instance cannot tell its position: (content '
466                'is an iterator)' % self.__class__.__name__)
467        return sum([len(chunk) for chunk in self._iter])
468
469    ########################################
470    ## Content-type and charset
471
472    def charset__get(self):
473        """
474        Get/set the charset (in the Content-Type)
475        """
476        header = self.headers.get('content-type')
477        if not header:
478            return None
479        match = _CHARSET_RE.search(header)
480        if match:
481            return match.group(1)
482        return None
483
484    def charset__set(self, charset):
485        if charset is None:
486            del self.charset
487            return
488        try:
489            header = self.headers.pop('content-type')
490        except KeyError:
491            raise AttributeError(
492                "You cannot set the charset when no content-type is defined")
493        match = _CHARSET_RE.search(header)
494        if match:
495            header = header[:match.start()] + header[match.end():]
496        header += '; charset=%s' % charset
497        self.headers['content-type'] = header
498
499    def charset__del(self):
500        try:
501            header = self.headers.pop('content-type')
502        except KeyError:
503            # Don't need to remove anything
504            return
505        match = _CHARSET_RE.search(header)
506        if match:
507            header = header[:match.start()] + header[match.end():]
508        self.headers['content-type'] = header
509
510    charset = property(charset__get, charset__set, charset__del, doc=charset__get.__doc__)
511
512    def content_type__get(self):
513        """
514        Get/set the Content-Type header (or None), *without* the
515        charset or any parameters.
516
517        If you include parameters (or ``;`` at all) when setting the
518        content_type, any existing parameters will be deleted;
519        otherwise they will be preserved.
520        """
521        header = self.headers.get('content-type')
522        if not header:
523            return None
524        return header.split(';', 1)[0]
525
526    def content_type__set(self, value):
527        if ';' not in value:
528            header = self.headers.get('content-type', '')
529            if ';' in header:
530                params = header.split(';', 1)[1]
531                value += ';' + params
532        self.headers['content-type'] = value
533
534    def content_type__del(self):
535        try:
536            del self.headers['content-type']
537        except KeyError:
538            pass
539
540    content_type = property(content_type__get, content_type__set,
541                            content_type__del, doc=content_type__get.__doc__)
542
543## @@ I'd love to remove this, but paste.httpexceptions.get_exception
544##    doesn't seem to work...
545# See http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
546STATUS_CODE_TEXT = {
547    100: 'CONTINUE',
548    101: 'SWITCHING PROTOCOLS',
549    200: 'OK',
550    201: 'CREATED',
551    202: 'ACCEPTED',
552    203: 'NON-AUTHORITATIVE INFORMATION',
553    204: 'NO CONTENT',
554    205: 'RESET CONTENT',
555    206: 'PARTIAL CONTENT',
556    226: 'IM USED',
557    300: 'MULTIPLE CHOICES',
558    301: 'MOVED PERMANENTLY',
559    302: 'FOUND',
560    303: 'SEE OTHER',
561    304: 'NOT MODIFIED',
562    305: 'USE PROXY',
563    306: 'RESERVED',
564    307: 'TEMPORARY REDIRECT',
565    400: 'BAD REQUEST',
566    401: 'UNAUTHORIZED',
567    402: 'PAYMENT REQUIRED',
568    403: 'FORBIDDEN',
569    404: 'NOT FOUND',
570    405: 'METHOD NOT ALLOWED',
571    406: 'NOT ACCEPTABLE',
572    407: 'PROXY AUTHENTICATION REQUIRED',
573    408: 'REQUEST TIMEOUT',
574    409: 'CONFLICT',
575    410: 'GONE',
576    411: 'LENGTH REQUIRED',
577    412: 'PRECONDITION FAILED',
578    413: 'REQUEST ENTITY TOO LARGE',
579    414: 'REQUEST-URI TOO LONG',
580    415: 'UNSUPPORTED MEDIA TYPE',
581    416: 'REQUESTED RANGE NOT SATISFIABLE',
582    417: 'EXPECTATION FAILED',
583    429: 'TOO MANY REQUESTS',
584    500: 'INTERNAL SERVER ERROR',
585    501: 'NOT IMPLEMENTED',
586    502: 'BAD GATEWAY',
587    503: 'SERVICE UNAVAILABLE',
588    504: 'GATEWAY TIMEOUT',
589    505: 'HTTP VERSION NOT SUPPORTED',
590}
591