1"""Functions for builtin CherryPy tools."""
2
3import logging
4import re
5from hashlib import md5
6
7import six
8from six.moves import urllib
9
10import cherrypy
11from cherrypy._cpcompat import text_or_bytes
12from cherrypy.lib import httputil as _httputil
13from cherrypy.lib import is_iterator
14
15
16#                     Conditional HTTP request support                     #
17
18def validate_etags(autotags=False, debug=False):
19    """Validate the current ETag against If-Match, If-None-Match headers.
20
21    If autotags is True, an ETag response-header value will be provided
22    from an MD5 hash of the response body (unless some other code has
23    already provided an ETag header). If False (the default), the ETag
24    will not be automatic.
25
26    WARNING: the autotags feature is not designed for URL's which allow
27    methods other than GET. For example, if a POST to the same URL returns
28    no content, the automatic ETag will be incorrect, breaking a fundamental
29    use for entity tags in a possibly destructive fashion. Likewise, if you
30    raise 304 Not Modified, the response body will be empty, the ETag hash
31    will be incorrect, and your application will break.
32    See :rfc:`2616` Section 14.24.
33    """
34    response = cherrypy.serving.response
35
36    # Guard against being run twice.
37    if hasattr(response, 'ETag'):
38        return
39
40    status, reason, msg = _httputil.valid_status(response.status)
41
42    etag = response.headers.get('ETag')
43
44    # Automatic ETag generation. See warning in docstring.
45    if etag:
46        if debug:
47            cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS')
48    elif not autotags:
49        if debug:
50            cherrypy.log('Autotags off', 'TOOLS.ETAGS')
51    elif status != 200:
52        if debug:
53            cherrypy.log('Status not 200', 'TOOLS.ETAGS')
54    else:
55        etag = response.collapse_body()
56        etag = '"%s"' % md5(etag).hexdigest()
57        if debug:
58            cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS')
59        response.headers['ETag'] = etag
60
61    response.ETag = etag
62
63    # "If the request would, without the If-Match header field, result in
64    # anything other than a 2xx or 412 status, then the If-Match header
65    # MUST be ignored."
66    if debug:
67        cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS')
68    if status >= 200 and status <= 299:
69        request = cherrypy.serving.request
70
71        conditions = request.headers.elements('If-Match') or []
72        conditions = [str(x) for x in conditions]
73        if debug:
74            cherrypy.log('If-Match conditions: %s' % repr(conditions),
75                         'TOOLS.ETAGS')
76        if conditions and not (conditions == ['*'] or etag in conditions):
77            raise cherrypy.HTTPError(412, 'If-Match failed: ETag %r did '
78                                     'not match %r' % (etag, conditions))
79
80        conditions = request.headers.elements('If-None-Match') or []
81        conditions = [str(x) for x in conditions]
82        if debug:
83            cherrypy.log('If-None-Match conditions: %s' % repr(conditions),
84                         'TOOLS.ETAGS')
85        if conditions == ['*'] or etag in conditions:
86            if debug:
87                cherrypy.log('request.method: %s' %
88                             request.method, 'TOOLS.ETAGS')
89            if request.method in ('GET', 'HEAD'):
90                raise cherrypy.HTTPRedirect([], 304)
91            else:
92                raise cherrypy.HTTPError(412, 'If-None-Match failed: ETag %r '
93                                         'matched %r' % (etag, conditions))
94
95
96def validate_since():
97    """Validate the current Last-Modified against If-Modified-Since headers.
98
99    If no code has set the Last-Modified response header, then no validation
100    will be performed.
101    """
102    response = cherrypy.serving.response
103    lastmod = response.headers.get('Last-Modified')
104    if lastmod:
105        status, reason, msg = _httputil.valid_status(response.status)
106
107        request = cherrypy.serving.request
108
109        since = request.headers.get('If-Unmodified-Since')
110        if since and since != lastmod:
111            if (status >= 200 and status <= 299) or status == 412:
112                raise cherrypy.HTTPError(412)
113
114        since = request.headers.get('If-Modified-Since')
115        if since and since == lastmod:
116            if (status >= 200 and status <= 299) or status == 304:
117                if request.method in ('GET', 'HEAD'):
118                    raise cherrypy.HTTPRedirect([], 304)
119                else:
120                    raise cherrypy.HTTPError(412)
121
122
123#                                Tool code                                #
124
125def allow(methods=None, debug=False):
126    """Raise 405 if request.method not in methods (default ['GET', 'HEAD']).
127
128    The given methods are case-insensitive, and may be in any order.
129    If only one method is allowed, you may supply a single string;
130    if more than one, supply a list of strings.
131
132    Regardless of whether the current method is allowed or not, this
133    also emits an 'Allow' response header, containing the given methods.
134    """
135    if not isinstance(methods, (tuple, list)):
136        methods = [methods]
137    methods = [m.upper() for m in methods if m]
138    if not methods:
139        methods = ['GET', 'HEAD']
140    elif 'GET' in methods and 'HEAD' not in methods:
141        methods.append('HEAD')
142
143    cherrypy.response.headers['Allow'] = ', '.join(methods)
144    if cherrypy.request.method not in methods:
145        if debug:
146            cherrypy.log('request.method %r not in methods %r' %
147                         (cherrypy.request.method, methods), 'TOOLS.ALLOW')
148        raise cherrypy.HTTPError(405)
149    else:
150        if debug:
151            cherrypy.log('request.method %r in methods %r' %
152                         (cherrypy.request.method, methods), 'TOOLS.ALLOW')
153
154
155def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For',
156          scheme='X-Forwarded-Proto', debug=False):
157    """Change the base URL (scheme://host[:port][/path]).
158
159    For running a CP server behind Apache, lighttpd, or other HTTP server.
160
161    For Apache and lighttpd, you should leave the 'local' argument at the
162    default value of 'X-Forwarded-Host'. For Squid, you probably want to set
163    tools.proxy.local = 'Origin'.
164
165    If you want the new request.base to include path info (not just the host),
166    you must explicitly set base to the full base path, and ALSO set 'local'
167    to '', so that the X-Forwarded-Host request header (which never includes
168    path info) does not override it. Regardless, the value for 'base' MUST
169    NOT end in a slash.
170
171    cherrypy.request.remote.ip (the IP address of the client) will be
172    rewritten if the header specified by the 'remote' arg is valid.
173    By default, 'remote' is set to 'X-Forwarded-For'. If you do not
174    want to rewrite remote.ip, set the 'remote' arg to an empty string.
175    """
176
177    request = cherrypy.serving.request
178
179    if scheme:
180        s = request.headers.get(scheme, None)
181        if debug:
182            cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY')
183        if s == 'on' and 'ssl' in scheme.lower():
184            # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header
185            scheme = 'https'
186        else:
187            # This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https'
188            scheme = s
189    if not scheme:
190        scheme = request.base[:request.base.find('://')]
191
192    if local:
193        lbase = request.headers.get(local, None)
194        if debug:
195            cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY')
196        if lbase is not None:
197            base = lbase.split(',')[0]
198    if not base:
199        default = urllib.parse.urlparse(request.base).netloc
200        base = request.headers.get('Host', default)
201
202    if base.find('://') == -1:
203        # add http:// or https:// if needed
204        base = scheme + '://' + base
205
206    request.base = base
207
208    if remote:
209        xff = request.headers.get(remote)
210        if debug:
211            cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY')
212        if xff:
213            if remote == 'X-Forwarded-For':
214                # Grab the first IP in a comma-separated list. Ref #1268.
215                xff = next(ip.strip() for ip in xff.split(','))
216            request.remote.ip = xff
217
218
219def ignore_headers(headers=('Range',), debug=False):
220    """Delete request headers whose field names are included in 'headers'.
221
222    This is a useful tool for working behind certain HTTP servers;
223    for example, Apache duplicates the work that CP does for 'Range'
224    headers, and will doubly-truncate the response.
225    """
226    request = cherrypy.serving.request
227    for name in headers:
228        if name in request.headers:
229            if debug:
230                cherrypy.log('Ignoring request header %r' % name,
231                             'TOOLS.IGNORE_HEADERS')
232            del request.headers[name]
233
234
235def response_headers(headers=None, debug=False):
236    """Set headers on the response."""
237    if debug:
238        cherrypy.log('Setting response headers: %s' % repr(headers),
239                     'TOOLS.RESPONSE_HEADERS')
240    for name, value in (headers or []):
241        cherrypy.serving.response.headers[name] = value
242
243
244response_headers.failsafe = True
245
246
247def referer(pattern, accept=True, accept_missing=False, error=403,
248            message='Forbidden Referer header.', debug=False):
249    """Raise HTTPError if Referer header does/does not match the given pattern.
250
251    pattern
252        A regular expression pattern to test against the Referer.
253
254    accept
255        If True, the Referer must match the pattern; if False,
256        the Referer must NOT match the pattern.
257
258    accept_missing
259        If True, permit requests with no Referer header.
260
261    error
262        The HTTP error code to return to the client on failure.
263
264    message
265        A string to include in the response body on failure.
266
267    """
268    try:
269        ref = cherrypy.serving.request.headers['Referer']
270        match = bool(re.match(pattern, ref))
271        if debug:
272            cherrypy.log('Referer %r matches %r' % (ref, pattern),
273                         'TOOLS.REFERER')
274        if accept == match:
275            return
276    except KeyError:
277        if debug:
278            cherrypy.log('No Referer header', 'TOOLS.REFERER')
279        if accept_missing:
280            return
281
282    raise cherrypy.HTTPError(error, message)
283
284
285class SessionAuth(object):
286
287    """Assert that the user is logged in."""
288
289    session_key = 'username'
290    debug = False
291
292    def check_username_and_password(self, username, password):
293        pass
294
295    def anonymous(self):
296        """Provide a temporary user name for anonymous users."""
297        pass
298
299    def on_login(self, username):
300        pass
301
302    def on_logout(self, username):
303        pass
304
305    def on_check(self, username):
306        pass
307
308    def login_screen(self, from_page='..', username='', error_msg='',
309                     **kwargs):
310        return (six.text_type("""<html><body>
311Message: %(error_msg)s
312<form method="post" action="do_login">
313    Login: <input type="text" name="username" value="%(username)s" size="10" />
314    <br />
315    Password: <input type="password" name="password" size="10" />
316    <br />
317    <input type="hidden" name="from_page" value="%(from_page)s" />
318    <br />
319    <input type="submit" />
320</form>
321</body></html>""") % vars()).encode('utf-8')
322
323    def do_login(self, username, password, from_page='..', **kwargs):
324        """Login. May raise redirect, or return True if request handled."""
325        response = cherrypy.serving.response
326        error_msg = self.check_username_and_password(username, password)
327        if error_msg:
328            body = self.login_screen(from_page, username, error_msg)
329            response.body = body
330            if 'Content-Length' in response.headers:
331                # Delete Content-Length header so finalize() recalcs it.
332                del response.headers['Content-Length']
333            return True
334        else:
335            cherrypy.serving.request.login = username
336            cherrypy.session[self.session_key] = username
337            self.on_login(username)
338            raise cherrypy.HTTPRedirect(from_page or '/')
339
340    def do_logout(self, from_page='..', **kwargs):
341        """Logout. May raise redirect, or return True if request handled."""
342        sess = cherrypy.session
343        username = sess.get(self.session_key)
344        sess[self.session_key] = None
345        if username:
346            cherrypy.serving.request.login = None
347            self.on_logout(username)
348        raise cherrypy.HTTPRedirect(from_page)
349
350    def do_check(self):
351        """Assert username. Raise redirect, or return True if request handled.
352        """
353        sess = cherrypy.session
354        request = cherrypy.serving.request
355        response = cherrypy.serving.response
356
357        username = sess.get(self.session_key)
358        if not username:
359            sess[self.session_key] = username = self.anonymous()
360            self._debug_message('No session[username], trying anonymous')
361        if not username:
362            url = cherrypy.url(qs=request.query_string)
363            self._debug_message(
364                'No username, routing to login_screen with from_page %(url)r',
365                locals(),
366            )
367            response.body = self.login_screen(url)
368            if 'Content-Length' in response.headers:
369                # Delete Content-Length header so finalize() recalcs it.
370                del response.headers['Content-Length']
371            return True
372        self._debug_message('Setting request.login to %(username)r', locals())
373        request.login = username
374        self.on_check(username)
375
376    def _debug_message(self, template, context={}):
377        if not self.debug:
378            return
379        cherrypy.log(template % context, 'TOOLS.SESSAUTH')
380
381    def run(self):
382        request = cherrypy.serving.request
383        response = cherrypy.serving.response
384
385        path = request.path_info
386        if path.endswith('login_screen'):
387            self._debug_message('routing %(path)r to login_screen', locals())
388            response.body = self.login_screen()
389            return True
390        elif path.endswith('do_login'):
391            if request.method != 'POST':
392                response.headers['Allow'] = 'POST'
393                self._debug_message('do_login requires POST')
394                raise cherrypy.HTTPError(405)
395            self._debug_message('routing %(path)r to do_login', locals())
396            return self.do_login(**request.params)
397        elif path.endswith('do_logout'):
398            if request.method != 'POST':
399                response.headers['Allow'] = 'POST'
400                raise cherrypy.HTTPError(405)
401            self._debug_message('routing %(path)r to do_logout', locals())
402            return self.do_logout(**request.params)
403        else:
404            self._debug_message('No special path, running do_check')
405            return self.do_check()
406
407
408def session_auth(**kwargs):
409    sa = SessionAuth()
410    for k, v in kwargs.items():
411        setattr(sa, k, v)
412    return sa.run()
413
414
415session_auth.__doc__ = (
416    """Session authentication hook.
417
418    Any attribute of the SessionAuth class may be overridden via a keyword arg
419    to this function:
420
421    """ + '\n'.join(['%s: %s' % (k, type(getattr(SessionAuth, k)).__name__)
422                     for k in dir(SessionAuth) if not k.startswith('__')])
423)
424
425
426def log_traceback(severity=logging.ERROR, debug=False):
427    """Write the last error's traceback to the cherrypy error log."""
428    cherrypy.log('', 'HTTP', severity=severity, traceback=True)
429
430
431def log_request_headers(debug=False):
432    """Write request headers to the cherrypy error log."""
433    h = ['  %s: %s' % (k, v) for k, v in cherrypy.serving.request.header_list]
434    cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), 'HTTP')
435
436
437def log_hooks(debug=False):
438    """Write request.hooks to the cherrypy error log."""
439    request = cherrypy.serving.request
440
441    msg = []
442    # Sort by the standard points if possible.
443    from cherrypy import _cprequest
444    points = _cprequest.hookpoints
445    for k in request.hooks.keys():
446        if k not in points:
447            points.append(k)
448
449    for k in points:
450        msg.append('    %s:' % k)
451        v = request.hooks.get(k, [])
452        v.sort()
453        for h in v:
454            msg.append('        %r' % h)
455    cherrypy.log('\nRequest Hooks for ' + cherrypy.url() +
456                 ':\n' + '\n'.join(msg), 'HTTP')
457
458
459def redirect(url='', internal=True, debug=False):
460    """Raise InternalRedirect or HTTPRedirect to the given url."""
461    if debug:
462        cherrypy.log('Redirecting %sto: %s' %
463                     ({True: 'internal ', False: ''}[internal], url),
464                     'TOOLS.REDIRECT')
465    if internal:
466        raise cherrypy.InternalRedirect(url)
467    else:
468        raise cherrypy.HTTPRedirect(url)
469
470
471def trailing_slash(missing=True, extra=False, status=None, debug=False):
472    """Redirect if path_info has (missing|extra) trailing slash."""
473    request = cherrypy.serving.request
474    pi = request.path_info
475
476    if debug:
477        cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' %
478                     (request.is_index, missing, extra, pi),
479                     'TOOLS.TRAILING_SLASH')
480    if request.is_index is True:
481        if missing:
482            if not pi.endswith('/'):
483                new_url = cherrypy.url(pi + '/', request.query_string)
484                raise cherrypy.HTTPRedirect(new_url, status=status or 301)
485    elif request.is_index is False:
486        if extra:
487            # If pi == '/', don't redirect to ''!
488            if pi.endswith('/') and pi != '/':
489                new_url = cherrypy.url(pi[:-1], request.query_string)
490                raise cherrypy.HTTPRedirect(new_url, status=status or 301)
491
492
493def flatten(debug=False):
494    """Wrap response.body in a generator that recursively iterates over body.
495
496    This allows cherrypy.response.body to consist of 'nested generators';
497    that is, a set of generators that yield generators.
498    """
499    def flattener(input):
500        numchunks = 0
501        for x in input:
502            if not is_iterator(x):
503                numchunks += 1
504                yield x
505            else:
506                for y in flattener(x):
507                    numchunks += 1
508                    yield y
509        if debug:
510            cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN')
511    response = cherrypy.serving.response
512    response.body = flattener(response.body)
513
514
515def accept(media=None, debug=False):
516    """Return the client's preferred media-type (from the given Content-Types).
517
518    If 'media' is None (the default), no test will be performed.
519
520    If 'media' is provided, it should be the Content-Type value (as a string)
521    or values (as a list or tuple of strings) which the current resource
522    can emit. The client's acceptable media ranges (as declared in the
523    Accept request header) will be matched in order to these Content-Type
524    values; the first such string is returned. That is, the return value
525    will always be one of the strings provided in the 'media' arg (or None
526    if 'media' is None).
527
528    If no match is found, then HTTPError 406 (Not Acceptable) is raised.
529    Note that most web browsers send */* as a (low-quality) acceptable
530    media range, which should match any Content-Type. In addition, "...if
531    no Accept header field is present, then it is assumed that the client
532    accepts all media types."
533
534    Matching types are checked in order of client preference first,
535    and then in the order of the given 'media' values.
536
537    Note that this function does not honor accept-params (other than "q").
538    """
539    if not media:
540        return
541    if isinstance(media, text_or_bytes):
542        media = [media]
543    request = cherrypy.serving.request
544
545    # Parse the Accept request header, and try to match one
546    # of the requested media-ranges (in order of preference).
547    ranges = request.headers.elements('Accept')
548    if not ranges:
549        # Any media type is acceptable.
550        if debug:
551            cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT')
552        return media[0]
553    else:
554        # Note that 'ranges' is sorted in order of preference
555        for element in ranges:
556            if element.qvalue > 0:
557                if element.value == '*/*':
558                    # Matches any type or subtype
559                    if debug:
560                        cherrypy.log('Match due to */*', 'TOOLS.ACCEPT')
561                    return media[0]
562                elif element.value.endswith('/*'):
563                    # Matches any subtype
564                    mtype = element.value[:-1]  # Keep the slash
565                    for m in media:
566                        if m.startswith(mtype):
567                            if debug:
568                                cherrypy.log('Match due to %s' % element.value,
569                                             'TOOLS.ACCEPT')
570                            return m
571                else:
572                    # Matches exact value
573                    if element.value in media:
574                        if debug:
575                            cherrypy.log('Match due to %s' % element.value,
576                                         'TOOLS.ACCEPT')
577                        return element.value
578
579    # No suitable media-range found.
580    ah = request.headers.get('Accept')
581    if ah is None:
582        msg = 'Your client did not send an Accept header.'
583    else:
584        msg = 'Your client sent this Accept header: %s.' % ah
585    msg += (' But this resource only emits these media types: %s.' %
586            ', '.join(media))
587    raise cherrypy.HTTPError(406, msg)
588
589
590class MonitoredHeaderMap(_httputil.HeaderMap):
591
592    def transform_key(self, key):
593        self.accessed_headers.add(key)
594        return super(MonitoredHeaderMap, self).transform_key(key)
595
596    def __init__(self):
597        self.accessed_headers = set()
598        super(MonitoredHeaderMap, self).__init__()
599
600
601def autovary(ignore=None, debug=False):
602    """Auto-populate the Vary response header based on request.header access.
603    """
604    request = cherrypy.serving.request
605
606    req_h = request.headers
607    request.headers = MonitoredHeaderMap()
608    request.headers.update(req_h)
609    if ignore is None:
610        ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type'])
611
612    def set_response_header():
613        resp_h = cherrypy.serving.response.headers
614        v = set([e.value for e in resp_h.elements('Vary')])
615        if debug:
616            cherrypy.log(
617                'Accessed headers: %s' % request.headers.accessed_headers,
618                'TOOLS.AUTOVARY')
619        v = v.union(request.headers.accessed_headers)
620        v = v.difference(ignore)
621        v = list(v)
622        v.sort()
623        resp_h['Vary'] = ', '.join(v)
624    request.hooks.attach('before_finalize', set_response_header, 95)
625
626
627def convert_params(exception=ValueError, error=400):
628    """Convert request params based on function annotations, with error handling.
629
630    exception
631        Exception class to catch.
632
633    status
634        The HTTP error code to return to the client on failure.
635    """
636    request = cherrypy.serving.request
637    types = request.handler.callable.__annotations__
638    with cherrypy.HTTPError.handle(exception, error):
639        for key in set(types).intersection(request.params):
640            request.params[key] = types[key](request.params[key])
641