1# -*- coding: utf-8 -*- 2""" 3 werkzeug.http 4 ~~~~~~~~~~~~~ 5 6 Werkzeug comes with a bunch of utilities that help Werkzeug to deal with 7 HTTP data. Most of the classes and functions provided by this module are 8 used by the wrappers, but they are useful on their own, too, especially if 9 the response and request objects are not used. 10 11 This covers some of the more HTTP centric features of WSGI, some other 12 utilities such as cookie handling are documented in the `werkzeug.utils` 13 module. 14 15 16 :copyright: 2007 Pallets 17 :license: BSD-3-Clause 18""" 19import base64 20import re 21import warnings 22from datetime import datetime 23from datetime import timedelta 24from hashlib import md5 25from time import gmtime 26from time import time 27 28from ._compat import integer_types 29from ._compat import iteritems 30from ._compat import PY2 31from ._compat import string_types 32from ._compat import text_type 33from ._compat import to_bytes 34from ._compat import to_unicode 35from ._compat import try_coerce_native 36from ._internal import _cookie_parse_impl 37from ._internal import _cookie_quote 38from ._internal import _make_cookie_domain 39 40try: 41 from email.utils import parsedate_tz 42except ImportError: 43 from email.Utils import parsedate_tz 44 45try: 46 from urllib.request import parse_http_list as _parse_list_header 47 from urllib.parse import unquote_to_bytes as _unquote 48except ImportError: 49 from urllib2 import parse_http_list as _parse_list_header 50 from urllib2 import unquote as _unquote 51 52_cookie_charset = "latin1" 53_basic_auth_charset = "utf-8" 54# for explanation of "media-range", etc. see Sections 5.3.{1,2} of RFC 7231 55_accept_re = re.compile( 56 r""" 57 ( # media-range capturing-parenthesis 58 [^\s;,]+ # type/subtype 59 (?:[ \t]*;[ \t]* # ";" 60 (?: # parameter non-capturing-parenthesis 61 [^\s;,q][^\s;,]* # token that doesn't start with "q" 62 | # or 63 q[^\s;,=][^\s;,]* # token that is more than just "q" 64 ) 65 )* # zero or more parameters 66 ) # end of media-range 67 (?:[ \t]*;[ \t]*q= # weight is a "q" parameter 68 (\d*(?:\.\d+)?) # qvalue capturing-parentheses 69 [^,]* # "extension" accept params: who cares? 70 )? # accept params are optional 71 """, 72 re.VERBOSE, 73) 74_token_chars = frozenset( 75 "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~" 76) 77_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)') 78_unsafe_header_chars = set('()<>@,;:"/[]?={} \t') 79_option_header_piece_re = re.compile( 80 r""" 81 ;\s*,?\s* # newlines were replaced with commas 82 (?P<key> 83 "[^"\\]*(?:\\.[^"\\]*)*" # quoted string 84 | 85 [^\s;,=*]+ # token 86 ) 87 (?:\*(?P<count>\d+))? # *1, optional continuation index 88 \s* 89 (?: # optionally followed by =value 90 (?: # equals sign, possibly with encoding 91 \*\s*=\s* # * indicates extended notation 92 (?: # optional encoding 93 (?P<encoding>[^\s]+?) 94 '(?P<language>[^\s]*?)' 95 )? 96 | 97 =\s* # basic notation 98 ) 99 (?P<value> 100 "[^"\\]*(?:\\.[^"\\]*)*" # quoted string 101 | 102 [^;,]+ # token 103 )? 104 )? 105 \s* 106 """, 107 flags=re.VERBOSE, 108) 109_option_header_start_mime_type = re.compile(r",\s*([^;,\s]+)([;,]\s*.+)?") 110 111_entity_headers = frozenset( 112 [ 113 "allow", 114 "content-encoding", 115 "content-language", 116 "content-length", 117 "content-location", 118 "content-md5", 119 "content-range", 120 "content-type", 121 "expires", 122 "last-modified", 123 ] 124) 125_hop_by_hop_headers = frozenset( 126 [ 127 "connection", 128 "keep-alive", 129 "proxy-authenticate", 130 "proxy-authorization", 131 "te", 132 "trailer", 133 "transfer-encoding", 134 "upgrade", 135 ] 136) 137 138 139HTTP_STATUS_CODES = { 140 100: "Continue", 141 101: "Switching Protocols", 142 102: "Processing", 143 103: "Early Hints", # see RFC 8297 144 200: "OK", 145 201: "Created", 146 202: "Accepted", 147 203: "Non Authoritative Information", 148 204: "No Content", 149 205: "Reset Content", 150 206: "Partial Content", 151 207: "Multi Status", 152 208: "Already Reported", # see RFC 5842 153 226: "IM Used", # see RFC 3229 154 300: "Multiple Choices", 155 301: "Moved Permanently", 156 302: "Found", 157 303: "See Other", 158 304: "Not Modified", 159 305: "Use Proxy", 160 306: "Switch Proxy", # unused 161 307: "Temporary Redirect", 162 308: "Permanent Redirect", 163 400: "Bad Request", 164 401: "Unauthorized", 165 402: "Payment Required", # unused 166 403: "Forbidden", 167 404: "Not Found", 168 405: "Method Not Allowed", 169 406: "Not Acceptable", 170 407: "Proxy Authentication Required", 171 408: "Request Timeout", 172 409: "Conflict", 173 410: "Gone", 174 411: "Length Required", 175 412: "Precondition Failed", 176 413: "Request Entity Too Large", 177 414: "Request URI Too Long", 178 415: "Unsupported Media Type", 179 416: "Requested Range Not Satisfiable", 180 417: "Expectation Failed", 181 418: "I'm a teapot", # see RFC 2324 182 421: "Misdirected Request", # see RFC 7540 183 422: "Unprocessable Entity", 184 423: "Locked", 185 424: "Failed Dependency", 186 425: "Too Early", # see RFC 8470 187 426: "Upgrade Required", 188 428: "Precondition Required", # see RFC 6585 189 429: "Too Many Requests", 190 431: "Request Header Fields Too Large", 191 449: "Retry With", # proprietary MS extension 192 451: "Unavailable For Legal Reasons", 193 500: "Internal Server Error", 194 501: "Not Implemented", 195 502: "Bad Gateway", 196 503: "Service Unavailable", 197 504: "Gateway Timeout", 198 505: "HTTP Version Not Supported", 199 506: "Variant Also Negotiates", # see RFC 2295 200 507: "Insufficient Storage", 201 508: "Loop Detected", # see RFC 5842 202 510: "Not Extended", 203 511: "Network Authentication Failed", # see RFC 6585 204} 205 206 207def wsgi_to_bytes(data): 208 """coerce wsgi unicode represented bytes to real ones""" 209 if isinstance(data, bytes): 210 return data 211 return data.encode("latin1") # XXX: utf8 fallback? 212 213 214def bytes_to_wsgi(data): 215 assert isinstance(data, bytes), "data must be bytes" 216 if isinstance(data, str): 217 return data 218 else: 219 return data.decode("latin1") 220 221 222def quote_header_value(value, extra_chars="", allow_token=True): 223 """Quote a header value if necessary. 224 225 .. versionadded:: 0.5 226 227 :param value: the value to quote. 228 :param extra_chars: a list of extra characters to skip quoting. 229 :param allow_token: if this is enabled token values are returned 230 unchanged. 231 """ 232 if isinstance(value, bytes): 233 value = bytes_to_wsgi(value) 234 value = str(value) 235 if allow_token: 236 token_chars = _token_chars | set(extra_chars) 237 if set(value).issubset(token_chars): 238 return value 239 return '"%s"' % value.replace("\\", "\\\\").replace('"', '\\"') 240 241 242def unquote_header_value(value, is_filename=False): 243 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`). 244 This does not use the real unquoting but what browsers are actually 245 using for quoting. 246 247 .. versionadded:: 0.5 248 249 :param value: the header value to unquote. 250 """ 251 if value and value[0] == value[-1] == '"': 252 # this is not the real unquoting, but fixing this so that the 253 # RFC is met will result in bugs with internet explorer and 254 # probably some other browsers as well. IE for example is 255 # uploading files with "C:\foo\bar.txt" as filename 256 value = value[1:-1] 257 258 # if this is a filename and the starting characters look like 259 # a UNC path, then just return the value without quotes. Using the 260 # replace sequence below on a UNC path has the effect of turning 261 # the leading double slash into a single slash and then 262 # _fix_ie_filename() doesn't work correctly. See #458. 263 if not is_filename or value[:2] != "\\\\": 264 return value.replace("\\\\", "\\").replace('\\"', '"') 265 return value 266 267 268def dump_options_header(header, options): 269 """The reverse function to :func:`parse_options_header`. 270 271 :param header: the header to dump 272 :param options: a dict of options to append. 273 """ 274 segments = [] 275 if header is not None: 276 segments.append(header) 277 for key, value in iteritems(options): 278 if value is None: 279 segments.append(key) 280 else: 281 segments.append("%s=%s" % (key, quote_header_value(value))) 282 return "; ".join(segments) 283 284 285def dump_header(iterable, allow_token=True): 286 """Dump an HTTP header again. This is the reversal of 287 :func:`parse_list_header`, :func:`parse_set_header` and 288 :func:`parse_dict_header`. This also quotes strings that include an 289 equals sign unless you pass it as dict of key, value pairs. 290 291 >>> dump_header({'foo': 'bar baz'}) 292 'foo="bar baz"' 293 >>> dump_header(('foo', 'bar baz')) 294 'foo, "bar baz"' 295 296 :param iterable: the iterable or dict of values to quote. 297 :param allow_token: if set to `False` tokens as values are disallowed. 298 See :func:`quote_header_value` for more details. 299 """ 300 if isinstance(iterable, dict): 301 items = [] 302 for key, value in iteritems(iterable): 303 if value is None: 304 items.append(key) 305 else: 306 items.append( 307 "%s=%s" % (key, quote_header_value(value, allow_token=allow_token)) 308 ) 309 else: 310 items = [quote_header_value(x, allow_token=allow_token) for x in iterable] 311 return ", ".join(items) 312 313 314def dump_csp_header(header): 315 """Dump a Content Security Policy header. 316 317 These are structured into policies such as "default-src 'self'; 318 script-src 'self'". 319 320 .. versionadded:: 1.0.0 321 Support for Content Security Policy headers was added. 322 323 """ 324 return "; ".join("%s %s" % (key, value) for key, value in iteritems(header)) 325 326 327def parse_list_header(value): 328 """Parse lists as described by RFC 2068 Section 2. 329 330 In particular, parse comma-separated lists where the elements of 331 the list may include quoted-strings. A quoted-string could 332 contain a comma. A non-quoted string could have quotes in the 333 middle. Quotes are removed automatically after parsing. 334 335 It basically works like :func:`parse_set_header` just that items 336 may appear multiple times and case sensitivity is preserved. 337 338 The return value is a standard :class:`list`: 339 340 >>> parse_list_header('token, "quoted value"') 341 ['token', 'quoted value'] 342 343 To create a header from the :class:`list` again, use the 344 :func:`dump_header` function. 345 346 :param value: a string with a list header. 347 :return: :class:`list` 348 """ 349 result = [] 350 for item in _parse_list_header(value): 351 if item[:1] == item[-1:] == '"': 352 item = unquote_header_value(item[1:-1]) 353 result.append(item) 354 return result 355 356 357def parse_dict_header(value, cls=dict): 358 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and 359 convert them into a python dict (or any other mapping object created from 360 the type with a dict like interface provided by the `cls` argument): 361 362 >>> d = parse_dict_header('foo="is a fish", bar="as well"') 363 >>> type(d) is dict 364 True 365 >>> sorted(d.items()) 366 [('bar', 'as well'), ('foo', 'is a fish')] 367 368 If there is no value for a key it will be `None`: 369 370 >>> parse_dict_header('key_without_value') 371 {'key_without_value': None} 372 373 To create a header from the :class:`dict` again, use the 374 :func:`dump_header` function. 375 376 .. versionchanged:: 0.9 377 Added support for `cls` argument. 378 379 :param value: a string with a dict header. 380 :param cls: callable to use for storage of parsed results. 381 :return: an instance of `cls` 382 """ 383 result = cls() 384 if not isinstance(value, text_type): 385 # XXX: validate 386 value = bytes_to_wsgi(value) 387 for item in _parse_list_header(value): 388 if "=" not in item: 389 result[item] = None 390 continue 391 name, value = item.split("=", 1) 392 if value[:1] == value[-1:] == '"': 393 value = unquote_header_value(value[1:-1]) 394 result[name] = value 395 return result 396 397 398def parse_options_header(value, multiple=False): 399 """Parse a ``Content-Type`` like header into a tuple with the content 400 type and the options: 401 402 >>> parse_options_header('text/html; charset=utf8') 403 ('text/html', {'charset': 'utf8'}) 404 405 This should not be used to parse ``Cache-Control`` like headers that use 406 a slightly different format. For these headers use the 407 :func:`parse_dict_header` function. 408 409 .. versionchanged:: 0.15 410 :rfc:`2231` parameter continuations are handled. 411 412 .. versionadded:: 0.5 413 414 :param value: the header to parse. 415 :param multiple: Whether try to parse and return multiple MIME types 416 :return: (mimetype, options) or (mimetype, options, mimetype, options, …) 417 if multiple=True 418 """ 419 if not value: 420 return "", {} 421 422 result = [] 423 424 value = "," + value.replace("\n", ",") 425 while value: 426 match = _option_header_start_mime_type.match(value) 427 if not match: 428 break 429 result.append(match.group(1)) # mimetype 430 options = {} 431 # Parse options 432 rest = match.group(2) 433 continued_encoding = None 434 while rest: 435 optmatch = _option_header_piece_re.match(rest) 436 if not optmatch: 437 break 438 option, count, encoding, language, option_value = optmatch.groups() 439 # Continuations don't have to supply the encoding after the 440 # first line. If we're in a continuation, track the current 441 # encoding to use for subsequent lines. Reset it when the 442 # continuation ends. 443 if not count: 444 continued_encoding = None 445 else: 446 if not encoding: 447 encoding = continued_encoding 448 continued_encoding = encoding 449 option = unquote_header_value(option) 450 if option_value is not None: 451 option_value = unquote_header_value(option_value, option == "filename") 452 if encoding is not None: 453 option_value = _unquote(option_value).decode(encoding) 454 if count: 455 # Continuations append to the existing value. For 456 # simplicity, this ignores the possibility of 457 # out-of-order indices, which shouldn't happen anyway. 458 options[option] = options.get(option, "") + option_value 459 else: 460 options[option] = option_value 461 rest = rest[optmatch.end() :] 462 result.append(options) 463 if multiple is False: 464 return tuple(result) 465 value = rest 466 467 return tuple(result) if result else ("", {}) 468 469 470def parse_accept_header(value, cls=None): 471 """Parses an HTTP Accept-* header. This does not implement a complete 472 valid algorithm but one that supports at least value and quality 473 extraction. 474 475 Returns a new :class:`Accept` object (basically a list of ``(value, quality)`` 476 tuples sorted by the quality with some additional accessor methods). 477 478 The second parameter can be a subclass of :class:`Accept` that is created 479 with the parsed values and returned. 480 481 :param value: the accept header string to be parsed. 482 :param cls: the wrapper class for the return value (can be 483 :class:`Accept` or a subclass thereof) 484 :return: an instance of `cls`. 485 """ 486 if cls is None: 487 cls = Accept 488 489 if not value: 490 return cls(None) 491 492 result = [] 493 for match in _accept_re.finditer(value): 494 quality = match.group(2) 495 if not quality: 496 quality = 1 497 else: 498 quality = max(min(float(quality), 1), 0) 499 result.append((match.group(1), quality)) 500 return cls(result) 501 502 503def parse_cache_control_header(value, on_update=None, cls=None): 504 """Parse a cache control header. The RFC differs between response and 505 request cache control, this method does not. It's your responsibility 506 to not use the wrong control statements. 507 508 .. versionadded:: 0.5 509 The `cls` was added. If not specified an immutable 510 :class:`~werkzeug.datastructures.RequestCacheControl` is returned. 511 512 :param value: a cache control header to be parsed. 513 :param on_update: an optional callable that is called every time a value 514 on the :class:`~werkzeug.datastructures.CacheControl` 515 object is changed. 516 :param cls: the class for the returned object. By default 517 :class:`~werkzeug.datastructures.RequestCacheControl` is used. 518 :return: a `cls` object. 519 """ 520 if cls is None: 521 cls = RequestCacheControl 522 if not value: 523 return cls(None, on_update) 524 return cls(parse_dict_header(value), on_update) 525 526 527def parse_csp_header(value, on_update=None, cls=None): 528 """Parse a Content Security Policy header. 529 530 .. versionadded:: 1.0.0 531 Support for Content Security Policy headers was added. 532 533 :param value: a csp header to be parsed. 534 :param on_update: an optional callable that is called every time a value 535 on the object is changed. 536 :param cls: the class for the returned object. By default 537 :class:`~werkzeug.datastructures.ContentSecurityPolicy` is used. 538 :return: a `cls` object. 539 """ 540 541 if cls is None: 542 cls = ContentSecurityPolicy 543 if value is None: 544 return cls(None, on_update) 545 items = [] 546 for policy in value.split(";"): 547 policy = policy.strip() 548 # Ignore badly formatted policies (no space) 549 if " " in policy: 550 directive, value = policy.strip().split(" ", 1) 551 items.append((directive.strip(), value.strip())) 552 return cls(items, on_update) 553 554 555def parse_set_header(value, on_update=None): 556 """Parse a set-like header and return a 557 :class:`~werkzeug.datastructures.HeaderSet` object: 558 559 >>> hs = parse_set_header('token, "quoted value"') 560 561 The return value is an object that treats the items case-insensitively 562 and keeps the order of the items: 563 564 >>> 'TOKEN' in hs 565 True 566 >>> hs.index('quoted value') 567 1 568 >>> hs 569 HeaderSet(['token', 'quoted value']) 570 571 To create a header from the :class:`HeaderSet` again, use the 572 :func:`dump_header` function. 573 574 :param value: a set header to be parsed. 575 :param on_update: an optional callable that is called every time a 576 value on the :class:`~werkzeug.datastructures.HeaderSet` 577 object is changed. 578 :return: a :class:`~werkzeug.datastructures.HeaderSet` 579 """ 580 if not value: 581 return HeaderSet(None, on_update) 582 return HeaderSet(parse_list_header(value), on_update) 583 584 585def parse_authorization_header(value): 586 """Parse an HTTP basic/digest authorization header transmitted by the web 587 browser. The return value is either `None` if the header was invalid or 588 not given, otherwise an :class:`~werkzeug.datastructures.Authorization` 589 object. 590 591 :param value: the authorization header to parse. 592 :return: a :class:`~werkzeug.datastructures.Authorization` object or `None`. 593 """ 594 if not value: 595 return 596 value = wsgi_to_bytes(value) 597 try: 598 auth_type, auth_info = value.split(None, 1) 599 auth_type = auth_type.lower() 600 except ValueError: 601 return 602 if auth_type == b"basic": 603 try: 604 username, password = base64.b64decode(auth_info).split(b":", 1) 605 except Exception: 606 return 607 return Authorization( 608 "basic", 609 { 610 "username": to_unicode(username, _basic_auth_charset), 611 "password": to_unicode(password, _basic_auth_charset), 612 }, 613 ) 614 elif auth_type == b"digest": 615 auth_map = parse_dict_header(auth_info) 616 for key in "username", "realm", "nonce", "uri", "response": 617 if key not in auth_map: 618 return 619 if "qop" in auth_map: 620 if not auth_map.get("nc") or not auth_map.get("cnonce"): 621 return 622 return Authorization("digest", auth_map) 623 624 625def parse_www_authenticate_header(value, on_update=None): 626 """Parse an HTTP WWW-Authenticate header into a 627 :class:`~werkzeug.datastructures.WWWAuthenticate` object. 628 629 :param value: a WWW-Authenticate header to parse. 630 :param on_update: an optional callable that is called every time a value 631 on the :class:`~werkzeug.datastructures.WWWAuthenticate` 632 object is changed. 633 :return: a :class:`~werkzeug.datastructures.WWWAuthenticate` object. 634 """ 635 if not value: 636 return WWWAuthenticate(on_update=on_update) 637 try: 638 auth_type, auth_info = value.split(None, 1) 639 auth_type = auth_type.lower() 640 except (ValueError, AttributeError): 641 return WWWAuthenticate(value.strip().lower(), on_update=on_update) 642 return WWWAuthenticate(auth_type, parse_dict_header(auth_info), on_update) 643 644 645def parse_if_range_header(value): 646 """Parses an if-range header which can be an etag or a date. Returns 647 a :class:`~werkzeug.datastructures.IfRange` object. 648 649 .. versionadded:: 0.7 650 """ 651 if not value: 652 return IfRange() 653 date = parse_date(value) 654 if date is not None: 655 return IfRange(date=date) 656 # drop weakness information 657 return IfRange(unquote_etag(value)[0]) 658 659 660def parse_range_header(value, make_inclusive=True): 661 """Parses a range header into a :class:`~werkzeug.datastructures.Range` 662 object. If the header is missing or malformed `None` is returned. 663 `ranges` is a list of ``(start, stop)`` tuples where the ranges are 664 non-inclusive. 665 666 .. versionadded:: 0.7 667 """ 668 if not value or "=" not in value: 669 return None 670 671 ranges = [] 672 last_end = 0 673 units, rng = value.split("=", 1) 674 units = units.strip().lower() 675 676 for item in rng.split(","): 677 item = item.strip() 678 if "-" not in item: 679 return None 680 if item.startswith("-"): 681 if last_end < 0: 682 return None 683 try: 684 begin = int(item) 685 except ValueError: 686 return None 687 end = None 688 last_end = -1 689 elif "-" in item: 690 begin, end = item.split("-", 1) 691 begin = begin.strip() 692 end = end.strip() 693 if not begin.isdigit(): 694 return None 695 begin = int(begin) 696 if begin < last_end or last_end < 0: 697 return None 698 if end: 699 if not end.isdigit(): 700 return None 701 end = int(end) + 1 702 if begin >= end: 703 return None 704 else: 705 end = None 706 last_end = end 707 ranges.append((begin, end)) 708 709 return Range(units, ranges) 710 711 712def parse_content_range_header(value, on_update=None): 713 """Parses a range header into a 714 :class:`~werkzeug.datastructures.ContentRange` object or `None` if 715 parsing is not possible. 716 717 .. versionadded:: 0.7 718 719 :param value: a content range header to be parsed. 720 :param on_update: an optional callable that is called every time a value 721 on the :class:`~werkzeug.datastructures.ContentRange` 722 object is changed. 723 """ 724 if value is None: 725 return None 726 try: 727 units, rangedef = (value or "").strip().split(None, 1) 728 except ValueError: 729 return None 730 731 if "/" not in rangedef: 732 return None 733 rng, length = rangedef.split("/", 1) 734 if length == "*": 735 length = None 736 elif length.isdigit(): 737 length = int(length) 738 else: 739 return None 740 741 if rng == "*": 742 return ContentRange(units, None, None, length, on_update=on_update) 743 elif "-" not in rng: 744 return None 745 746 start, stop = rng.split("-", 1) 747 try: 748 start = int(start) 749 stop = int(stop) + 1 750 except ValueError: 751 return None 752 753 if is_byte_range_valid(start, stop, length): 754 return ContentRange(units, start, stop, length, on_update=on_update) 755 756 757def quote_etag(etag, weak=False): 758 """Quote an etag. 759 760 :param etag: the etag to quote. 761 :param weak: set to `True` to tag it "weak". 762 """ 763 if '"' in etag: 764 raise ValueError("invalid etag") 765 etag = '"%s"' % etag 766 if weak: 767 etag = "W/" + etag 768 return etag 769 770 771def unquote_etag(etag): 772 """Unquote a single etag: 773 774 >>> unquote_etag('W/"bar"') 775 ('bar', True) 776 >>> unquote_etag('"bar"') 777 ('bar', False) 778 779 :param etag: the etag identifier to unquote. 780 :return: a ``(etag, weak)`` tuple. 781 """ 782 if not etag: 783 return None, None 784 etag = etag.strip() 785 weak = False 786 if etag.startswith(("W/", "w/")): 787 weak = True 788 etag = etag[2:] 789 if etag[:1] == etag[-1:] == '"': 790 etag = etag[1:-1] 791 return etag, weak 792 793 794def parse_etags(value): 795 """Parse an etag header. 796 797 :param value: the tag header to parse 798 :return: an :class:`~werkzeug.datastructures.ETags` object. 799 """ 800 if not value: 801 return ETags() 802 strong = [] 803 weak = [] 804 end = len(value) 805 pos = 0 806 while pos < end: 807 match = _etag_re.match(value, pos) 808 if match is None: 809 break 810 is_weak, quoted, raw = match.groups() 811 if raw == "*": 812 return ETags(star_tag=True) 813 elif quoted: 814 raw = quoted 815 if is_weak: 816 weak.append(raw) 817 else: 818 strong.append(raw) 819 pos = match.end() 820 return ETags(strong, weak) 821 822 823def generate_etag(data): 824 """Generate an etag for some data.""" 825 return md5(data).hexdigest() 826 827 828def parse_date(value): 829 """Parse one of the following date formats into a datetime object: 830 831 .. sourcecode:: text 832 833 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 834 Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 835 Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format 836 837 If parsing fails the return value is `None`. 838 839 :param value: a string with a supported date format. 840 :return: a :class:`datetime.datetime` object. 841 """ 842 if value: 843 t = parsedate_tz(value.strip()) 844 if t is not None: 845 try: 846 year = t[0] 847 # unfortunately that function does not tell us if two digit 848 # years were part of the string, or if they were prefixed 849 # with two zeroes. So what we do is to assume that 69-99 850 # refer to 1900, and everything below to 2000 851 if year >= 0 and year <= 68: 852 year += 2000 853 elif year >= 69 and year <= 99: 854 year += 1900 855 return datetime(*((year,) + t[1:7])) - timedelta(seconds=t[-1] or 0) 856 except (ValueError, OverflowError): 857 return None 858 859 860def _dump_date(d, delim): 861 """Used for `http_date` and `cookie_date`.""" 862 if d is None: 863 d = gmtime() 864 elif isinstance(d, datetime): 865 d = d.utctimetuple() 866 elif isinstance(d, (integer_types, float)): 867 d = gmtime(d) 868 return "%s, %02d%s%s%s%04d %02d:%02d:%02d GMT" % ( 869 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[d.tm_wday], 870 d.tm_mday, 871 delim, 872 ( 873 "Jan", 874 "Feb", 875 "Mar", 876 "Apr", 877 "May", 878 "Jun", 879 "Jul", 880 "Aug", 881 "Sep", 882 "Oct", 883 "Nov", 884 "Dec", 885 )[d.tm_mon - 1], 886 delim, 887 d.tm_year, 888 d.tm_hour, 889 d.tm_min, 890 d.tm_sec, 891 ) 892 893 894def cookie_date(expires=None): 895 """Formats the time to ensure compatibility with Netscape's cookie 896 standard. 897 898 Accepts a floating point number expressed in seconds since the epoch in, a 899 datetime object or a timetuple. All times in UTC. The :func:`parse_date` 900 function can be used to parse such a date. 901 902 Outputs a string in the format ``Wdy, DD-Mon-YYYY HH:MM:SS GMT``. 903 904 :param expires: If provided that date is used, otherwise the current. 905 """ 906 return _dump_date(expires, "-") 907 908 909def http_date(timestamp=None): 910 """Formats the time to match the RFC1123 date format. 911 912 Accepts a floating point number expressed in seconds since the epoch in, a 913 datetime object or a timetuple. All times in UTC. The :func:`parse_date` 914 function can be used to parse such a date. 915 916 Outputs a string in the format ``Wdy, DD Mon YYYY HH:MM:SS GMT``. 917 918 :param timestamp: If provided that date is used, otherwise the current. 919 """ 920 return _dump_date(timestamp, " ") 921 922 923def parse_age(value=None): 924 """Parses a base-10 integer count of seconds into a timedelta. 925 926 If parsing fails, the return value is `None`. 927 928 :param value: a string consisting of an integer represented in base-10 929 :return: a :class:`datetime.timedelta` object or `None`. 930 """ 931 if not value: 932 return None 933 try: 934 seconds = int(value) 935 except ValueError: 936 return None 937 if seconds < 0: 938 return None 939 try: 940 return timedelta(seconds=seconds) 941 except OverflowError: 942 return None 943 944 945def dump_age(age=None): 946 """Formats the duration as a base-10 integer. 947 948 :param age: should be an integer number of seconds, 949 a :class:`datetime.timedelta` object, or, 950 if the age is unknown, `None` (default). 951 """ 952 if age is None: 953 return 954 if isinstance(age, timedelta): 955 # do the equivalent of Python 2.7's timedelta.total_seconds(), 956 # but disregarding fractional seconds 957 age = age.seconds + (age.days * 24 * 3600) 958 959 age = int(age) 960 if age < 0: 961 raise ValueError("age cannot be negative") 962 963 return str(age) 964 965 966def is_resource_modified( 967 environ, etag=None, data=None, last_modified=None, ignore_if_range=True 968): 969 """Convenience method for conditional requests. 970 971 :param environ: the WSGI environment of the request to be checked. 972 :param etag: the etag for the response for comparison. 973 :param data: or alternatively the data of the response to automatically 974 generate an etag using :func:`generate_etag`. 975 :param last_modified: an optional date of the last modification. 976 :param ignore_if_range: If `False`, `If-Range` header will be taken into 977 account. 978 :return: `True` if the resource was modified, otherwise `False`. 979 980 .. versionchanged:: 1.0.0 981 The check is run for methods other than ``GET`` and ``HEAD``. 982 """ 983 if etag is None and data is not None: 984 etag = generate_etag(data) 985 elif data is not None: 986 raise TypeError("both data and etag given") 987 988 unmodified = False 989 if isinstance(last_modified, string_types): 990 last_modified = parse_date(last_modified) 991 992 # ensure that microsecond is zero because the HTTP spec does not transmit 993 # that either and we might have some false positives. See issue #39 994 if last_modified is not None: 995 last_modified = last_modified.replace(microsecond=0) 996 997 if_range = None 998 if not ignore_if_range and "HTTP_RANGE" in environ: 999 # https://tools.ietf.org/html/rfc7233#section-3.2 1000 # A server MUST ignore an If-Range header field received in a request 1001 # that does not contain a Range header field. 1002 if_range = parse_if_range_header(environ.get("HTTP_IF_RANGE")) 1003 1004 if if_range is not None and if_range.date is not None: 1005 modified_since = if_range.date 1006 else: 1007 modified_since = parse_date(environ.get("HTTP_IF_MODIFIED_SINCE")) 1008 1009 if modified_since and last_modified and last_modified <= modified_since: 1010 unmodified = True 1011 1012 if etag: 1013 etag, _ = unquote_etag(etag) 1014 if if_range is not None and if_range.etag is not None: 1015 unmodified = parse_etags(if_range.etag).contains(etag) 1016 else: 1017 if_none_match = parse_etags(environ.get("HTTP_IF_NONE_MATCH")) 1018 if if_none_match: 1019 # https://tools.ietf.org/html/rfc7232#section-3.2 1020 # "A recipient MUST use the weak comparison function when comparing 1021 # entity-tags for If-None-Match" 1022 unmodified = if_none_match.contains_weak(etag) 1023 1024 # https://tools.ietf.org/html/rfc7232#section-3.1 1025 # "Origin server MUST use the strong comparison function when 1026 # comparing entity-tags for If-Match" 1027 if_match = parse_etags(environ.get("HTTP_IF_MATCH")) 1028 if if_match: 1029 unmodified = not if_match.is_strong(etag) 1030 1031 return not unmodified 1032 1033 1034def remove_entity_headers(headers, allowed=("expires", "content-location")): 1035 """Remove all entity headers from a list or :class:`Headers` object. This 1036 operation works in-place. `Expires` and `Content-Location` headers are 1037 by default not removed. The reason for this is :rfc:`2616` section 1038 10.3.5 which specifies some entity headers that should be sent. 1039 1040 .. versionchanged:: 0.5 1041 added `allowed` parameter. 1042 1043 :param headers: a list or :class:`Headers` object. 1044 :param allowed: a list of headers that should still be allowed even though 1045 they are entity headers. 1046 """ 1047 allowed = set(x.lower() for x in allowed) 1048 headers[:] = [ 1049 (key, value) 1050 for key, value in headers 1051 if not is_entity_header(key) or key.lower() in allowed 1052 ] 1053 1054 1055def remove_hop_by_hop_headers(headers): 1056 """Remove all HTTP/1.1 "Hop-by-Hop" headers from a list or 1057 :class:`Headers` object. This operation works in-place. 1058 1059 .. versionadded:: 0.5 1060 1061 :param headers: a list or :class:`Headers` object. 1062 """ 1063 headers[:] = [ 1064 (key, value) for key, value in headers if not is_hop_by_hop_header(key) 1065 ] 1066 1067 1068def is_entity_header(header): 1069 """Check if a header is an entity header. 1070 1071 .. versionadded:: 0.5 1072 1073 :param header: the header to test. 1074 :return: `True` if it's an entity header, `False` otherwise. 1075 """ 1076 return header.lower() in _entity_headers 1077 1078 1079def is_hop_by_hop_header(header): 1080 """Check if a header is an HTTP/1.1 "Hop-by-Hop" header. 1081 1082 .. versionadded:: 0.5 1083 1084 :param header: the header to test. 1085 :return: `True` if it's an HTTP/1.1 "Hop-by-Hop" header, `False` otherwise. 1086 """ 1087 return header.lower() in _hop_by_hop_headers 1088 1089 1090def parse_cookie(header, charset="utf-8", errors="replace", cls=None): 1091 """Parse a cookie from a string or WSGI environ. 1092 1093 The same key can be provided multiple times, the values are stored 1094 in-order. The default :class:`MultiDict` will have the first value 1095 first, and all values can be retrieved with 1096 :meth:`MultiDict.getlist`. 1097 1098 :param header: The cookie header as a string, or a WSGI environ dict 1099 with a ``HTTP_COOKIE`` key. 1100 :param charset: The charset for the cookie values. 1101 :param errors: The error behavior for the charset decoding. 1102 :param cls: A dict-like class to store the parsed cookies in. 1103 Defaults to :class:`MultiDict`. 1104 1105 .. versionchanged:: 1.0.0 1106 Returns a :class:`MultiDict` instead of a 1107 ``TypeConversionDict``. 1108 1109 .. versionchanged:: 0.5 1110 Returns a :class:`TypeConversionDict` instead of a regular dict. 1111 The ``cls`` parameter was added. 1112 """ 1113 if isinstance(header, dict): 1114 header = header.get("HTTP_COOKIE", "") 1115 elif header is None: 1116 header = "" 1117 1118 # On Python 3, PEP 3333 sends headers through the environ as latin1 1119 # decoded strings. Encode strings back to bytes for parsing. 1120 if isinstance(header, text_type): 1121 header = header.encode("latin1", "replace") 1122 1123 if cls is None: 1124 cls = MultiDict 1125 1126 def _parse_pairs(): 1127 for key, val in _cookie_parse_impl(header): 1128 key = to_unicode(key, charset, errors, allow_none_charset=True) 1129 if not key: 1130 continue 1131 val = to_unicode(val, charset, errors, allow_none_charset=True) 1132 yield try_coerce_native(key), val 1133 1134 return cls(_parse_pairs()) 1135 1136 1137def dump_cookie( 1138 key, 1139 value="", 1140 max_age=None, 1141 expires=None, 1142 path="/", 1143 domain=None, 1144 secure=False, 1145 httponly=False, 1146 charset="utf-8", 1147 sync_expires=True, 1148 max_size=4093, 1149 samesite=None, 1150): 1151 """Creates a new Set-Cookie header without the ``Set-Cookie`` prefix 1152 The parameters are the same as in the cookie Morsel object in the 1153 Python standard library but it accepts unicode data, too. 1154 1155 On Python 3 the return value of this function will be a unicode 1156 string, on Python 2 it will be a native string. In both cases the 1157 return value is usually restricted to ascii as the vast majority of 1158 values are properly escaped, but that is no guarantee. If a unicode 1159 string is returned it's tunneled through latin1 as required by 1160 PEP 3333. 1161 1162 The return value is not ASCII safe if the key contains unicode 1163 characters. This is technically against the specification but 1164 happens in the wild. It's strongly recommended to not use 1165 non-ASCII values for the keys. 1166 1167 :param max_age: should be a number of seconds, or `None` (default) if 1168 the cookie should last only as long as the client's 1169 browser session. Additionally `timedelta` objects 1170 are accepted, too. 1171 :param expires: should be a `datetime` object or unix timestamp. 1172 :param path: limits the cookie to a given path, per default it will 1173 span the whole domain. 1174 :param domain: Use this if you want to set a cross-domain cookie. For 1175 example, ``domain=".example.com"`` will set a cookie 1176 that is readable by the domain ``www.example.com``, 1177 ``foo.example.com`` etc. Otherwise, a cookie will only 1178 be readable by the domain that set it. 1179 :param secure: The cookie will only be available via HTTPS 1180 :param httponly: disallow JavaScript to access the cookie. This is an 1181 extension to the cookie standard and probably not 1182 supported by all browsers. 1183 :param charset: the encoding for unicode values. 1184 :param sync_expires: automatically set expires if max_age is defined 1185 but expires not. 1186 :param max_size: Warn if the final header value exceeds this size. The 1187 default, 4093, should be safely `supported by most browsers 1188 <cookie_>`_. Set to 0 to disable this check. 1189 :param samesite: Limits the scope of the cookie such that it will 1190 only be attached to requests if those requests are same-site. 1191 1192 .. _`cookie`: http://browsercookielimits.squawky.net/ 1193 1194 .. versionchanged:: 1.0.0 1195 The string ``'None'`` is accepted for ``samesite``. 1196 """ 1197 key = to_bytes(key, charset) 1198 value = to_bytes(value, charset) 1199 1200 if path is not None: 1201 from .urls import iri_to_uri 1202 1203 path = iri_to_uri(path, charset) 1204 domain = _make_cookie_domain(domain) 1205 if isinstance(max_age, timedelta): 1206 max_age = (max_age.days * 60 * 60 * 24) + max_age.seconds 1207 if expires is not None: 1208 if not isinstance(expires, string_types): 1209 expires = cookie_date(expires) 1210 elif max_age is not None and sync_expires: 1211 expires = to_bytes(cookie_date(time() + max_age)) 1212 1213 if samesite is not None: 1214 samesite = samesite.title() 1215 1216 if samesite not in {"Strict", "Lax", "None"}: 1217 raise ValueError("SameSite must be 'Strict', 'Lax', or 'None'.") 1218 1219 buf = [key + b"=" + _cookie_quote(value)] 1220 1221 # XXX: In theory all of these parameters that are not marked with `None` 1222 # should be quoted. Because stdlib did not quote it before I did not 1223 # want to introduce quoting there now. 1224 for k, v, q in ( 1225 (b"Domain", domain, True), 1226 (b"Expires", expires, False), 1227 (b"Max-Age", max_age, False), 1228 (b"Secure", secure, None), 1229 (b"HttpOnly", httponly, None), 1230 (b"Path", path, False), 1231 (b"SameSite", samesite, False), 1232 ): 1233 if q is None: 1234 if v: 1235 buf.append(k) 1236 continue 1237 1238 if v is None: 1239 continue 1240 1241 tmp = bytearray(k) 1242 if not isinstance(v, (bytes, bytearray)): 1243 v = to_bytes(text_type(v), charset) 1244 if q: 1245 v = _cookie_quote(v) 1246 tmp += b"=" + v 1247 buf.append(bytes(tmp)) 1248 1249 # The return value will be an incorrectly encoded latin1 header on 1250 # Python 3 for consistency with the headers object and a bytestring 1251 # on Python 2 because that's how the API makes more sense. 1252 rv = b"; ".join(buf) 1253 if not PY2: 1254 rv = rv.decode("latin1") 1255 1256 # Warn if the final value of the cookie is larger than the limit. If the 1257 # cookie is too large, then it may be silently ignored by the browser, 1258 # which can be quite hard to debug. 1259 cookie_size = len(rv) 1260 1261 if max_size and cookie_size > max_size: 1262 value_size = len(value) 1263 warnings.warn( 1264 'The "{key}" cookie is too large: the value was {value_size} bytes' 1265 " but the header required {extra_size} extra bytes. The final size" 1266 " was {cookie_size} bytes but the limit is {max_size} bytes." 1267 " Browsers may silently ignore cookies larger than this.".format( 1268 key=key, 1269 value_size=value_size, 1270 extra_size=cookie_size - value_size, 1271 cookie_size=cookie_size, 1272 max_size=max_size, 1273 ), 1274 stacklevel=2, 1275 ) 1276 1277 return rv 1278 1279 1280def is_byte_range_valid(start, stop, length): 1281 """Checks if a given byte content range is valid for the given length. 1282 1283 .. versionadded:: 0.7 1284 """ 1285 if (start is None) != (stop is None): 1286 return False 1287 elif start is None: 1288 return length is None or length >= 0 1289 elif length is None: 1290 return 0 <= start < stop 1291 elif start >= stop: 1292 return False 1293 return 0 <= start < length 1294 1295 1296# circular dependencies 1297from .datastructures import Accept 1298from .datastructures import Authorization 1299from .datastructures import ContentRange 1300from .datastructures import ContentSecurityPolicy 1301from .datastructures import ETags 1302from .datastructures import HeaderSet 1303from .datastructures import IfRange 1304from .datastructures import MultiDict 1305from .datastructures import Range 1306from .datastructures import RequestCacheControl 1307from .datastructures import WWWAuthenticate 1308