1"""Tools 2 3This module implements tools used throughout circuits.web. 4These tools can also be used within Controlelrs and request handlers. 5""" 6 7import os 8import stat 9import hashlib 10import mimetypes 11import collections 12from time import mktime 13from email.utils import formatdate 14from datetime import datetime, timedelta 15from email.generator import _make_boundary 16 17from circuits import BaseComponent, handler 18from circuits.web.wrappers import Host 19 20mimetypes.init() 21mimetypes.add_type("image/x-dwg", ".dwg") 22mimetypes.add_type("image/x-icon", ".ico") 23mimetypes.add_type("text/javascript", ".js") 24mimetypes.add_type("application/xhtml+xml", ".xhtml") 25 26from . import _httpauth 27from .utils import get_ranges, compress 28from .errors import httperror, notfound, redirect, unauthorized 29 30 31def expires(request, response, secs=0, force=False): 32 """Tool for influencing cache mechanisms using the 'Expires' header. 33 34 'secs' must be either an int or a datetime.timedelta, and indicates the 35 number of seconds between response.time and when the response should 36 expire. The 'Expires' header will be set to (response.time + secs). 37 38 If 'secs' is zero, the 'Expires' header is set one year in the past, and 39 the following "cache prevention" headers are also set: 40 - 'Pragma': 'no-cache' 41 - 'Cache-Control': 'no-cache, must-revalidate' 42 43 If 'force' is False (the default), the following headers are checked: 44 'Etag', 'Last-Modified', 'Age', 'Expires'. If any are already present, 45 none of the above response headers are set. 46 """ 47 48 headers = response.headers 49 50 cacheable = False 51 if not force: 52 # some header names that indicate that the response can be cached 53 for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): 54 if indicator in headers: 55 cacheable = True 56 break 57 58 if not cacheable: 59 if isinstance(secs, timedelta): 60 secs = (86400 * secs.days) + secs.seconds 61 62 if secs == 0: 63 if force or "Pragma" not in headers: 64 headers["Pragma"] = "no-cache" 65 if request.protocol >= (1, 1): 66 if force or "Cache-Control" not in headers: 67 headers["Cache-Control"] = "no-cache, must-revalidate" 68 # Set an explicit Expires date in the past. 69 now = datetime.now() 70 lastyear = now.replace(year=now.year - 1) 71 expiry = formatdate( 72 mktime(lastyear.timetuple()), usegmt=True 73 ) 74 else: 75 expiry = formatdate(response.time + secs, usegmt=True) 76 if force or "Expires" not in headers: 77 headers["Expires"] = expiry 78 79 80def serve_file(request, response, path, type=None, disposition=None, 81 name=None): 82 """Set status, headers, and body in order to serve the given file. 83 84 The Content-Type header will be set to the type arg, if provided. 85 If not provided, the Content-Type will be guessed by the file extension 86 of the 'path' argument. 87 88 If disposition is not None, the Content-Disposition header will be set 89 to "<disposition>; filename=<name>". If name is None, it will be set 90 to the basename of path. If disposition is None, no Content-Disposition 91 header will be written. 92 """ 93 94 if not os.path.isabs(path): 95 raise ValueError("'%s' is not an absolute path." % path) 96 97 try: 98 st = os.stat(path) 99 except OSError: 100 return notfound(request, response) 101 102 # Check if path is a directory. 103 if stat.S_ISDIR(st.st_mode): 104 # Let the caller deal with it as they like. 105 return notfound(request, response) 106 107 # Set the Last-Modified response header, so that 108 # modified-since validation code can work. 109 response.headers['Last-Modified'] = formatdate( 110 st.st_mtime, usegmt=True 111 ) 112 113 result = validate_since(request, response) 114 if result is not None: 115 return result 116 117 if type is None: 118 # Set content-type based on filename extension 119 ext = "" 120 i = path.rfind('.') 121 if i != -1: 122 ext = path[i:].lower() 123 type = mimetypes.types_map.get(ext, "text/plain") 124 response.headers['Content-Type'] = type 125 126 if disposition is not None: 127 if name is None: 128 name = os.path.basename(path) 129 cd = '%s; filename="%s"' % (disposition, name) 130 response.headers["Content-Disposition"] = cd 131 132 # Set Content-Length and use an iterable (file object) 133 # this way CP won't load the whole file in memory 134 c_len = st.st_size 135 bodyfile = open(path, 'rb') 136 137 # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code 138 if request.protocol >= (1, 1): 139 response.headers["Accept-Ranges"] = "bytes" 140 r = get_ranges(request.headers.get('Range'), c_len) 141 if r == []: 142 response.headers['Content-Range'] = "bytes */%s" % c_len 143 return httperror(request, response, 416) 144 if r: 145 if len(r) == 1: 146 # Return a single-part response. 147 start, stop = r[0] 148 r_len = stop - start 149 response.status = 206 150 response.headers['Content-Range'] = ( 151 "bytes %s-%s/%s" % (start, stop - 1, c_len) 152 ) 153 response.headers['Content-Length'] = r_len 154 bodyfile.seek(start) 155 response.body = bodyfile.read(r_len) 156 else: 157 # Return a multipart/byteranges response. 158 response.status = 206 159 boundary = _make_boundary() 160 ct = "multipart/byteranges; boundary=%s" % boundary 161 response.headers['Content-Type'] = ct 162 if "Content-Length" in response.headers: 163 # Delete Content-Length header so finalize() recalcs it. 164 del response.headers["Content-Length"] 165 166 def file_ranges(): 167 # Apache compatibility: 168 yield "\r\n" 169 170 for start, stop in r: 171 yield "--" + boundary 172 yield "\r\nContent-type: %s" % type 173 yield ("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" 174 % (start, stop - 1, c_len)) 175 bodyfile.seek(start) 176 yield bodyfile.read(stop - start) 177 yield "\r\n" 178 # Final boundary 179 yield "--" + boundary + "--" 180 181 # Apache compatibility: 182 yield "\r\n" 183 response.body = file_ranges() 184 else: 185 response.headers['Content-Length'] = c_len 186 response.body = bodyfile 187 else: 188 response.headers['Content-Length'] = c_len 189 response.body = bodyfile 190 191 return response 192 193 194def serve_download(request, response, path, name=None): 195 """Serve 'path' as an application/x-download attachment.""" 196 197 type = "application/x-download" 198 disposition = "attachment" 199 200 return serve_file(request, response, path, type, disposition, name) 201 202 203def validate_etags(request, response, autotags=False): 204 """Validate the current ETag against If-Match, If-None-Match headers. 205 206 If autotags is True, an ETag response-header value will be provided 207 from an MD5 hash of the response body (unless some other code has 208 already provided an ETag header). If False (the default), the ETag 209 will not be automatic. 210 211 WARNING: the autotags feature is not designed for URL's which allow 212 methods other than GET. For example, if a POST to the same URL returns 213 no content, the automatic ETag will be incorrect, breaking a fundamental 214 use for entity tags in a possibly destructive fashion. Likewise, if you 215 raise 304 Not Modified, the response body will be empty, the ETag hash 216 will be incorrect, and your application will break. 217 See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.24 218 """ 219 220 # Guard against being run twice. 221 if hasattr(response, "ETag"): 222 return 223 224 status = response.status 225 226 etag = response.headers.get('ETag') 227 228 # Automatic ETag generation. See warning in docstring. 229 if (not etag) and autotags: 230 if status == 200: 231 etag = response.collapse_body() 232 etag = '"%s"' % hashlib.md5.new(etag).hexdigest() 233 response.headers['ETag'] = etag 234 235 response.ETag = etag 236 237 # "If the request would, without the If-Match header field, result in 238 # anything other than a 2xx or 412 status, then the If-Match header 239 # MUST be ignored." 240 if status >= 200 and status <= 299: 241 conditions = request.headers.elements('If-Match') or [] 242 conditions = [str(x) for x in conditions] 243 if conditions and not (conditions == ["*"] or etag in conditions): 244 return httperror( 245 request, response, 412, 246 description="If-Match failed: ETag %r did not match %r" % ( 247 etag, conditions 248 ) 249 ) 250 251 conditions = request.headers.elements('If-None-Match') or [] 252 conditions = [str(x) for x in conditions] 253 if conditions == ["*"] or etag in conditions: 254 if request.method in ("GET", "HEAD"): 255 return redirect(request, response, [], code=304) 256 else: 257 return httperror( 258 request, response, 412, 259 description=( 260 "If-None-Match failed: ETag %r matched %r" % ( 261 etag, conditions 262 ) 263 ) 264 ) 265 266 267def validate_since(request, response): 268 """Validate the current Last-Modified against If-Modified-Since headers. 269 270 If no code has set the Last-Modified response header, then no validation 271 will be performed. 272 """ 273 274 lastmod = response.headers.get('Last-Modified') 275 if lastmod: 276 status = response.status 277 278 since = request.headers.get('If-Unmodified-Since') 279 if since and since != lastmod: 280 if (status >= 200 and status <= 299) or status == 412: 281 return httperror(request, response, 412) 282 283 since = request.headers.get('If-Modified-Since') 284 if since and since == lastmod: 285 if (status >= 200 and status <= 299) or status == 304: 286 if request.method in ("GET", "HEAD"): 287 return redirect(request, response, [], code=304) 288 else: 289 return httperror(request, response, 412) 290 291 292def check_auth(request, response, realm, users, encrypt=None): 293 """Check Authentication 294 295 If an Authorization header contains credentials, return True, else False. 296 297 :param realm: The authentication realm. 298 :type realm: str 299 300 :param users: A dict of the form: {username: password} or a callable 301 returning a dict. 302 :type users: dict or callable 303 304 :param encrypt: Callable used to encrypt the password returned from 305 the user-agent. if None it defaults to a md5 encryption. 306 :type encrypt: callable 307 """ 308 309 if "Authorization" in request.headers: 310 # make sure the provided credentials are correctly set 311 ah = _httpauth.parseAuthorization(request.headers.get("Authorization")) 312 if ah is None: 313 return httperror(request, response, 400) 314 315 if not encrypt: 316 encrypt = _httpauth.DIGEST_AUTH_ENCODERS[_httpauth.MD5] 317 318 if isinstance(users, collections.Callable): 319 try: 320 # backward compatibility 321 users = users() # expect it to return a dictionary 322 323 if not isinstance(users, dict): 324 raise ValueError("Authentication users must be a dict") 325 326 # fetch the user password 327 password = users.get(ah["username"], None) 328 except TypeError: 329 # returns a password (encrypted or clear text) 330 password = users(ah["username"]) 331 else: 332 if not isinstance(users, dict): 333 raise ValueError("Authentication users must be a dict") 334 335 # fetch the user password 336 password = users.get(ah["username"], None) 337 338 # validate the Authorization by re-computing it here 339 # and compare it with what the user-agent provided 340 if _httpauth.checkResponse(ah, password, method=request.method, 341 encrypt=encrypt, realm=realm): 342 request.login = ah["username"] 343 return True 344 345 request.login = False 346 return False 347 348 349def basic_auth(request, response, realm, users, encrypt=None): 350 """Perform Basic Authentication 351 352 If auth fails, returns an Unauthorized error with a 353 basic authentication header. 354 355 :param realm: The authentication realm. 356 :type realm: str 357 358 :param users: A dict of the form: {username: password} or a callable 359 returning a dict. 360 :type users: dict or callable 361 362 :param encrypt: Callable used to encrypt the password returned from 363 the user-agent. if None it defaults to a md5 encryption. 364 :type encrypt: callable 365 """ 366 367 if check_auth(request, response, realm, users, encrypt): 368 return 369 370 # inform the user-agent this path is protected 371 response.headers["WWW-Authenticate"] = _httpauth.basicAuth(realm) 372 373 return unauthorized(request, response) 374 375 376def digest_auth(request, response, realm, users): 377 """Perform Digest Authentication 378 379 If auth fails, raise 401 with a digest authentication header. 380 381 :param realm: The authentication realm. 382 :type realm: str 383 384 :param users: A dict of the form: {username: password} or a callable 385 returning a dict. 386 :type users: dict or callable 387 """ 388 389 if check_auth(request, response, realm, users): 390 return 391 392 # inform the user-agent this path is protected 393 response.headers["WWW-Authenticate"] = _httpauth.digestAuth(realm) 394 395 return unauthorized(request, response) 396 397 398def gzip(response, level=4, mime_types=("text/html", "text/plain",)): 399 """Try to gzip the response body if Content-Type in mime_types. 400 401 response.headers['Content-Type'] must be set to one of the 402 values in the mime_types arg before calling this function. 403 404 No compression is performed if any of the following hold: 405 * The client sends no Accept-Encoding request header 406 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header 407 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present 408 * The 'identity' value is given with a qvalue > 0. 409 """ 410 411 if not response.body: 412 # Response body is empty (might be a 304 for instance) 413 return response 414 415 # If returning cached content (which should already have been gzipped), 416 # don't re-zip. 417 if getattr(response.request, "cached", False): 418 return response 419 420 acceptable = response.request.headers.elements('Accept-Encoding') 421 if not acceptable: 422 # If no Accept-Encoding field is present in a request, 423 # the server MAY assume that the client will accept any 424 # content coding. In this case, if "identity" is one of 425 # the available content-codings, then the server SHOULD use 426 # the "identity" content-coding, unless it has additional 427 # information that a different content-coding is meaningful 428 # to the client. 429 return response 430 431 ct = response.headers.get('Content-Type', 'text/html').split(';')[0] 432 for coding in acceptable: 433 if coding.value == 'identity' and coding.qvalue != 0: 434 return response 435 if coding.value in ('gzip', 'x-gzip'): 436 if coding.qvalue == 0: 437 return response 438 if ct in mime_types: 439 # Return a generator that compresses the page 440 varies = response.headers.get("Vary", "") 441 varies = [x.strip() for x in varies.split(",") if x.strip()] 442 if "Accept-Encoding" not in varies: 443 varies.append("Accept-Encoding") 444 response.headers['Vary'] = ", ".join(varies) 445 446 response.headers['Content-Encoding'] = 'gzip' 447 response.body = compress(response.body, level) 448 if "Content-Length" in response.headers: 449 # Delete Content-Length header so finalize() recalcs it. 450 del response.headers["Content-Length"] 451 return response 452 return httperror( 453 response.request, response, 406, description="identity, gzip" 454 ) 455 456 457class ReverseProxy(BaseComponent): 458 459 headers = ('X-Real-IP', 'X-Forwarded-For') 460 461 def init(self, headers=None): 462 """Web Component for identifying the original client IP when a reverse proxy is used 463 464 :param headers: List of HTTP headers to read the original client IP 465 """ 466 467 if headers: 468 self.headers = headers 469 470 @handler('request', priority=1) 471 def _on_request(self, req, *_): 472 ip = [v for v in map(req.headers.get, self.headers) if v] 473 req.remote = ip and Host(ip[0], "", ip[0]) or req.remote 474