1"""Tools
2
3This module implements tools used throughout circuits.web.
4These tools can also be used within Controlelrs and request handlers.
5"""
6
7import os
8import stat
9import hashlib
10import mimetypes
11import collections
12from time import mktime
13from email.utils import formatdate
14from datetime import datetime, timedelta
15from email.generator import _make_boundary
16
17from circuits import BaseComponent, handler
18from circuits.web.wrappers import Host
19
20mimetypes.init()
21mimetypes.add_type("image/x-dwg", ".dwg")
22mimetypes.add_type("image/x-icon", ".ico")
23mimetypes.add_type("text/javascript", ".js")
24mimetypes.add_type("application/xhtml+xml", ".xhtml")
25
26from . import _httpauth
27from .utils import get_ranges, compress
28from .errors import httperror, notfound, redirect, unauthorized
29
30
31def expires(request, response, secs=0, force=False):
32    """Tool for influencing cache mechanisms using the 'Expires' header.
33
34    'secs' must be either an int or a datetime.timedelta, and indicates the
35    number of seconds between response.time and when the response should
36    expire. The 'Expires' header will be set to (response.time + secs).
37
38    If 'secs' is zero, the 'Expires' header is set one year in the past, and
39    the following "cache prevention" headers are also set:
40    - 'Pragma': 'no-cache'
41    - 'Cache-Control': 'no-cache, must-revalidate'
42
43    If 'force' is False (the default), the following headers are checked:
44    'Etag', 'Last-Modified', 'Age', 'Expires'. If any are already present,
45    none of the above response headers are set.
46    """
47
48    headers = response.headers
49
50    cacheable = False
51    if not force:
52        # some header names that indicate that the response can be cached
53        for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'):
54            if indicator in headers:
55                cacheable = True
56                break
57
58    if not cacheable:
59        if isinstance(secs, timedelta):
60            secs = (86400 * secs.days) + secs.seconds
61
62        if secs == 0:
63            if force or "Pragma" not in headers:
64                headers["Pragma"] = "no-cache"
65            if request.protocol >= (1, 1):
66                if force or "Cache-Control" not in headers:
67                    headers["Cache-Control"] = "no-cache, must-revalidate"
68            # Set an explicit Expires date in the past.
69            now = datetime.now()
70            lastyear = now.replace(year=now.year - 1)
71            expiry = formatdate(
72                mktime(lastyear.timetuple()), usegmt=True
73            )
74        else:
75            expiry = formatdate(response.time + secs, usegmt=True)
76        if force or "Expires" not in headers:
77            headers["Expires"] = expiry
78
79
80def serve_file(request, response, path, type=None, disposition=None,
81               name=None):
82    """Set status, headers, and body in order to serve the given file.
83
84    The Content-Type header will be set to the type arg, if provided.
85    If not provided, the Content-Type will be guessed by the file extension
86    of the 'path' argument.
87
88    If disposition is not None, the Content-Disposition header will be set
89    to "<disposition>; filename=<name>". If name is None, it will be set
90    to the basename of path. If disposition is None, no Content-Disposition
91    header will be written.
92    """
93
94    if not os.path.isabs(path):
95        raise ValueError("'%s' is not an absolute path." % path)
96
97    try:
98        st = os.stat(path)
99    except OSError:
100        return notfound(request, response)
101
102    # Check if path is a directory.
103    if stat.S_ISDIR(st.st_mode):
104        # Let the caller deal with it as they like.
105        return notfound(request, response)
106
107    # Set the Last-Modified response header, so that
108    # modified-since validation code can work.
109    response.headers['Last-Modified'] = formatdate(
110        st.st_mtime, usegmt=True
111    )
112
113    result = validate_since(request, response)
114    if result is not None:
115        return result
116
117    if type is None:
118        # Set content-type based on filename extension
119        ext = ""
120        i = path.rfind('.')
121        if i != -1:
122            ext = path[i:].lower()
123        type = mimetypes.types_map.get(ext, "text/plain")
124    response.headers['Content-Type'] = type
125
126    if disposition is not None:
127        if name is None:
128            name = os.path.basename(path)
129        cd = '%s; filename="%s"' % (disposition, name)
130        response.headers["Content-Disposition"] = cd
131
132    # Set Content-Length and use an iterable (file object)
133    #   this way CP won't load the whole file in memory
134    c_len = st.st_size
135    bodyfile = open(path, 'rb')
136
137    # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code
138    if request.protocol >= (1, 1):
139        response.headers["Accept-Ranges"] = "bytes"
140        r = get_ranges(request.headers.get('Range'), c_len)
141        if r == []:
142            response.headers['Content-Range'] = "bytes */%s" % c_len
143            return httperror(request, response, 416)
144        if r:
145            if len(r) == 1:
146                # Return a single-part response.
147                start, stop = r[0]
148                r_len = stop - start
149                response.status = 206
150                response.headers['Content-Range'] = (
151                    "bytes %s-%s/%s" % (start, stop - 1, c_len)
152                )
153                response.headers['Content-Length'] = r_len
154                bodyfile.seek(start)
155                response.body = bodyfile.read(r_len)
156            else:
157                # Return a multipart/byteranges response.
158                response.status = 206
159                boundary = _make_boundary()
160                ct = "multipart/byteranges; boundary=%s" % boundary
161                response.headers['Content-Type'] = ct
162                if "Content-Length" in response.headers:
163                    # Delete Content-Length header so finalize() recalcs it.
164                    del response.headers["Content-Length"]
165
166                def file_ranges():
167                    # Apache compatibility:
168                    yield "\r\n"
169
170                    for start, stop in r:
171                        yield "--" + boundary
172                        yield "\r\nContent-type: %s" % type
173                        yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n"
174                               % (start, stop - 1, c_len))
175                        bodyfile.seek(start)
176                        yield bodyfile.read(stop - start)
177                        yield "\r\n"
178                    # Final boundary
179                    yield "--" + boundary + "--"
180
181                    # Apache compatibility:
182                    yield "\r\n"
183                response.body = file_ranges()
184        else:
185            response.headers['Content-Length'] = c_len
186            response.body = bodyfile
187    else:
188        response.headers['Content-Length'] = c_len
189        response.body = bodyfile
190
191    return response
192
193
194def serve_download(request, response, path, name=None):
195    """Serve 'path' as an application/x-download attachment."""
196
197    type = "application/x-download"
198    disposition = "attachment"
199
200    return serve_file(request, response, path, type, disposition, name)
201
202
203def validate_etags(request, response, autotags=False):
204    """Validate the current ETag against If-Match, If-None-Match headers.
205
206    If autotags is True, an ETag response-header value will be provided
207    from an MD5 hash of the response body (unless some other code has
208    already provided an ETag header). If False (the default), the ETag
209    will not be automatic.
210
211    WARNING: the autotags feature is not designed for URL's which allow
212    methods other than GET. For example, if a POST to the same URL returns
213    no content, the automatic ETag will be incorrect, breaking a fundamental
214    use for entity tags in a possibly destructive fashion. Likewise, if you
215    raise 304 Not Modified, the response body will be empty, the ETag hash
216    will be incorrect, and your application will break.
217    See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24
218    """
219
220    # Guard against being run twice.
221    if hasattr(response, "ETag"):
222        return
223
224    status = response.status
225
226    etag = response.headers.get('ETag')
227
228    # Automatic ETag generation. See warning in docstring.
229    if (not etag) and autotags:
230        if status == 200:
231            etag = response.collapse_body()
232            etag = '"%s"' % hashlib.md5.new(etag).hexdigest()
233            response.headers['ETag'] = etag
234
235    response.ETag = etag
236
237    # "If the request would, without the If-Match header field, result in
238    # anything other than a 2xx or 412 status, then the If-Match header
239    # MUST be ignored."
240    if status >= 200 and status <= 299:
241        conditions = request.headers.elements('If-Match') or []
242        conditions = [str(x) for x in conditions]
243        if conditions and not (conditions == ["*"] or etag in conditions):
244            return httperror(
245                request, response, 412,
246                description="If-Match failed: ETag %r did not match %r" % (
247                    etag, conditions
248                )
249            )
250
251        conditions = request.headers.elements('If-None-Match') or []
252        conditions = [str(x) for x in conditions]
253        if conditions == ["*"] or etag in conditions:
254            if request.method in ("GET", "HEAD"):
255                return redirect(request, response, [], code=304)
256            else:
257                return httperror(
258                    request, response, 412,
259                    description=(
260                        "If-None-Match failed: ETag %r matched %r" % (
261                            etag, conditions
262                        )
263                    )
264                )
265
266
267def validate_since(request, response):
268    """Validate the current Last-Modified against If-Modified-Since headers.
269
270    If no code has set the Last-Modified response header, then no validation
271    will be performed.
272    """
273
274    lastmod = response.headers.get('Last-Modified')
275    if lastmod:
276        status = response.status
277
278        since = request.headers.get('If-Unmodified-Since')
279        if since and since != lastmod:
280            if (status >= 200 and status <= 299) or status == 412:
281                return httperror(request, response, 412)
282
283        since = request.headers.get('If-Modified-Since')
284        if since and since == lastmod:
285            if (status >= 200 and status <= 299) or status == 304:
286                if request.method in ("GET", "HEAD"):
287                    return redirect(request, response, [], code=304)
288                else:
289                    return httperror(request, response, 412)
290
291
292def check_auth(request, response, realm, users, encrypt=None):
293    """Check Authentication
294
295    If an Authorization header contains credentials, return True, else False.
296
297    :param realm: The authentication realm.
298    :type  realm: str
299
300    :param users: A dict of the form: {username: password} or a callable
301                  returning a dict.
302    :type  users: dict or callable
303
304    :param encrypt: Callable used to encrypt the password returned from
305                    the user-agent. if None it defaults to a md5 encryption.
306    :type  encrypt: callable
307    """
308
309    if "Authorization" in request.headers:
310        # make sure the provided credentials are correctly set
311        ah = _httpauth.parseAuthorization(request.headers.get("Authorization"))
312        if ah is None:
313            return httperror(request, response, 400)
314
315        if not encrypt:
316            encrypt = _httpauth.DIGEST_AUTH_ENCODERS[_httpauth.MD5]
317
318        if isinstance(users, collections.Callable):
319            try:
320                # backward compatibility
321                users = users()  # expect it to return a dictionary
322
323                if not isinstance(users, dict):
324                    raise ValueError("Authentication users must be a dict")
325
326                # fetch the user password
327                password = users.get(ah["username"], None)
328            except TypeError:
329                # returns a password (encrypted or clear text)
330                password = users(ah["username"])
331        else:
332            if not isinstance(users, dict):
333                raise ValueError("Authentication users must be a dict")
334
335            # fetch the user password
336            password = users.get(ah["username"], None)
337
338        # validate the Authorization by re-computing it here
339        # and compare it with what the user-agent provided
340        if _httpauth.checkResponse(ah, password, method=request.method,
341                                   encrypt=encrypt, realm=realm):
342            request.login = ah["username"]
343            return True
344
345        request.login = False
346    return False
347
348
349def basic_auth(request, response, realm, users, encrypt=None):
350    """Perform Basic Authentication
351
352    If auth fails, returns an Unauthorized error  with a
353    basic authentication header.
354
355    :param realm: The authentication realm.
356    :type  realm: str
357
358    :param users: A dict of the form: {username: password} or a callable
359                  returning a dict.
360    :type  users: dict or callable
361
362    :param encrypt: Callable used to encrypt the password returned from
363                    the user-agent. if None it defaults to a md5 encryption.
364    :type  encrypt: callable
365    """
366
367    if check_auth(request, response, realm, users, encrypt):
368        return
369
370    # inform the user-agent this path is protected
371    response.headers["WWW-Authenticate"] = _httpauth.basicAuth(realm)
372
373    return unauthorized(request, response)
374
375
376def digest_auth(request, response, realm, users):
377    """Perform Digest Authentication
378
379    If auth fails, raise 401 with a digest authentication header.
380
381    :param realm: The authentication realm.
382    :type  realm: str
383
384    :param users: A dict of the form: {username: password} or a callable
385                  returning a dict.
386    :type  users: dict or callable
387    """
388
389    if check_auth(request, response, realm, users):
390        return
391
392    # inform the user-agent this path is protected
393    response.headers["WWW-Authenticate"] = _httpauth.digestAuth(realm)
394
395    return unauthorized(request, response)
396
397
398def gzip(response, level=4, mime_types=("text/html", "text/plain",)):
399    """Try to gzip the response body if Content-Type in mime_types.
400
401    response.headers['Content-Type'] must be set to one of the
402    values in the mime_types arg before calling this function.
403
404    No compression is performed if any of the following hold:
405        * The client sends no Accept-Encoding request header
406        * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header
407        * No 'gzip' or 'x-gzip' with a qvalue > 0 is present
408        * The 'identity' value is given with a qvalue > 0.
409    """
410
411    if not response.body:
412        # Response body is empty (might be a 304 for instance)
413        return response
414
415    # If returning cached content (which should already have been gzipped),
416    # don't re-zip.
417    if getattr(response.request, "cached", False):
418        return response
419
420    acceptable = response.request.headers.elements('Accept-Encoding')
421    if not acceptable:
422        # If no Accept-Encoding field is present in a request,
423        # the server MAY assume that the client will accept any
424        # content coding. In this case, if "identity" is one of
425        # the available content-codings, then the server SHOULD use
426        # the "identity" content-coding, unless it has additional
427        # information that a different content-coding is meaningful
428        # to the client.
429        return response
430
431    ct = response.headers.get('Content-Type', 'text/html').split(';')[0]
432    for coding in acceptable:
433        if coding.value == 'identity' and coding.qvalue != 0:
434            return response
435        if coding.value in ('gzip', 'x-gzip'):
436            if coding.qvalue == 0:
437                return response
438            if ct in mime_types:
439                # Return a generator that compresses the page
440                varies = response.headers.get("Vary", "")
441                varies = [x.strip() for x in varies.split(",") if x.strip()]
442                if "Accept-Encoding" not in varies:
443                    varies.append("Accept-Encoding")
444                response.headers['Vary'] = ", ".join(varies)
445
446                response.headers['Content-Encoding'] = 'gzip'
447                response.body = compress(response.body, level)
448                if "Content-Length" in response.headers:
449                    # Delete Content-Length header so finalize() recalcs it.
450                    del response.headers["Content-Length"]
451            return response
452    return httperror(
453        response.request, response, 406, description="identity, gzip"
454    )
455
456
457class ReverseProxy(BaseComponent):
458
459    headers = ('X-Real-IP', 'X-Forwarded-For')
460
461    def init(self, headers=None):
462        """Web Component for identifying the original client IP when a reverse proxy is used
463
464        :param headers: List of HTTP headers to read the original client IP
465        """
466
467        if headers:
468            self.headers = headers
469
470    @handler('request', priority=1)
471    def _on_request(self, req, *_):
472        ip = [v for v in map(req.headers.get, self.headers) if v]
473        req.remote = ip and Host(ip[0], "", ip[0]) or req.remote
474