1# -*- coding: utf-8 -*-
2"""
3    werkzeug.http
4    ~~~~~~~~~~~~~
5
6    Werkzeug comes with a bunch of utilities that help Werkzeug to deal with
7    HTTP data.  Most of the classes and functions provided by this module are
8    used by the wrappers, but they are useful on their own, too, especially if
9    the response and request objects are not used.
10
11    This covers some of the more HTTP centric features of WSGI, some other
12    utilities such as cookie handling are documented in the `werkzeug.utils`
13    module.
14
15
16    :copyright: 2007 Pallets
17    :license: BSD-3-Clause
18"""
19import base64
20import re
21import warnings
22from datetime import datetime
23from datetime import timedelta
24from hashlib import md5
25from time import gmtime
26from time import time
27
28from ._compat import integer_types
29from ._compat import iteritems
30from ._compat import PY2
31from ._compat import string_types
32from ._compat import text_type
33from ._compat import to_bytes
34from ._compat import to_unicode
35from ._compat import try_coerce_native
36from ._internal import _cookie_parse_impl
37from ._internal import _cookie_quote
38from ._internal import _make_cookie_domain
39
40try:
41    from email.utils import parsedate_tz
42except ImportError:
43    from email.Utils import parsedate_tz
44
45try:
46    from urllib.request import parse_http_list as _parse_list_header
47    from urllib.parse import unquote_to_bytes as _unquote
48except ImportError:
49    from urllib2 import parse_http_list as _parse_list_header
50    from urllib2 import unquote as _unquote
51
52_cookie_charset = "latin1"
53_basic_auth_charset = "utf-8"
54# for explanation of "media-range", etc. see Sections 5.3.{1,2} of RFC 7231
55_accept_re = re.compile(
56    r"""
57    (                       # media-range capturing-parenthesis
58      [^\s;,]+              # type/subtype
59      (?:[ \t]*;[ \t]*      # ";"
60        (?:                 # parameter non-capturing-parenthesis
61          [^\s;,q][^\s;,]*  # token that doesn't start with "q"
62        |                   # or
63          q[^\s;,=][^\s;,]* # token that is more than just "q"
64        )
65      )*                    # zero or more parameters
66    )                       # end of media-range
67    (?:[ \t]*;[ \t]*q=      # weight is a "q" parameter
68      (\d*(?:\.\d+)?)       # qvalue capturing-parentheses
69      [^,]*                 # "extension" accept params: who cares?
70    )?                      # accept params are optional
71    """,
72    re.VERBOSE,
73)
74_token_chars = frozenset(
75    "!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~"
76)
77_etag_re = re.compile(r'([Ww]/)?(?:"(.*?)"|(.*?))(?:\s*,\s*|$)')
78_unsafe_header_chars = set('()<>@,;:"/[]?={} \t')
79_option_header_piece_re = re.compile(
80    r"""
81    ;\s*,?\s*  # newlines were replaced with commas
82    (?P<key>
83        "[^"\\]*(?:\\.[^"\\]*)*"  # quoted string
84    |
85        [^\s;,=*]+  # token
86    )
87    (?:\*(?P<count>\d+))?  # *1, optional continuation index
88    \s*
89    (?:  # optionally followed by =value
90        (?:  # equals sign, possibly with encoding
91            \*\s*=\s*  # * indicates extended notation
92            (?:  # optional encoding
93                (?P<encoding>[^\s]+?)
94                '(?P<language>[^\s]*?)'
95            )?
96        |
97            =\s*  # basic notation
98        )
99        (?P<value>
100            "[^"\\]*(?:\\.[^"\\]*)*"  # quoted string
101        |
102            [^;,]+  # token
103        )?
104    )?
105    \s*
106    """,
107    flags=re.VERBOSE,
108)
109_option_header_start_mime_type = re.compile(r",\s*([^;,\s]+)([;,]\s*.+)?")
110
111_entity_headers = frozenset(
112    [
113        "allow",
114        "content-encoding",
115        "content-language",
116        "content-length",
117        "content-location",
118        "content-md5",
119        "content-range",
120        "content-type",
121        "expires",
122        "last-modified",
123    ]
124)
125_hop_by_hop_headers = frozenset(
126    [
127        "connection",
128        "keep-alive",
129        "proxy-authenticate",
130        "proxy-authorization",
131        "te",
132        "trailer",
133        "transfer-encoding",
134        "upgrade",
135    ]
136)
137
138
139HTTP_STATUS_CODES = {
140    100: "Continue",
141    101: "Switching Protocols",
142    102: "Processing",
143    103: "Early Hints",  # see RFC 8297
144    200: "OK",
145    201: "Created",
146    202: "Accepted",
147    203: "Non Authoritative Information",
148    204: "No Content",
149    205: "Reset Content",
150    206: "Partial Content",
151    207: "Multi Status",
152    208: "Already Reported",  # see RFC 5842
153    226: "IM Used",  # see RFC 3229
154    300: "Multiple Choices",
155    301: "Moved Permanently",
156    302: "Found",
157    303: "See Other",
158    304: "Not Modified",
159    305: "Use Proxy",
160    306: "Switch Proxy",  # unused
161    307: "Temporary Redirect",
162    308: "Permanent Redirect",
163    400: "Bad Request",
164    401: "Unauthorized",
165    402: "Payment Required",  # unused
166    403: "Forbidden",
167    404: "Not Found",
168    405: "Method Not Allowed",
169    406: "Not Acceptable",
170    407: "Proxy Authentication Required",
171    408: "Request Timeout",
172    409: "Conflict",
173    410: "Gone",
174    411: "Length Required",
175    412: "Precondition Failed",
176    413: "Request Entity Too Large",
177    414: "Request URI Too Long",
178    415: "Unsupported Media Type",
179    416: "Requested Range Not Satisfiable",
180    417: "Expectation Failed",
181    418: "I'm a teapot",  # see RFC 2324
182    421: "Misdirected Request",  # see RFC 7540
183    422: "Unprocessable Entity",
184    423: "Locked",
185    424: "Failed Dependency",
186    425: "Too Early",  # see RFC 8470
187    426: "Upgrade Required",
188    428: "Precondition Required",  # see RFC 6585
189    429: "Too Many Requests",
190    431: "Request Header Fields Too Large",
191    449: "Retry With",  # proprietary MS extension
192    451: "Unavailable For Legal Reasons",
193    500: "Internal Server Error",
194    501: "Not Implemented",
195    502: "Bad Gateway",
196    503: "Service Unavailable",
197    504: "Gateway Timeout",
198    505: "HTTP Version Not Supported",
199    506: "Variant Also Negotiates",  # see RFC 2295
200    507: "Insufficient Storage",
201    508: "Loop Detected",  # see RFC 5842
202    510: "Not Extended",
203    511: "Network Authentication Failed",  # see RFC 6585
204}
205
206
207def wsgi_to_bytes(data):
208    """coerce wsgi unicode represented bytes to real ones"""
209    if isinstance(data, bytes):
210        return data
211    return data.encode("latin1")  # XXX: utf8 fallback?
212
213
214def bytes_to_wsgi(data):
215    assert isinstance(data, bytes), "data must be bytes"
216    if isinstance(data, str):
217        return data
218    else:
219        return data.decode("latin1")
220
221
222def quote_header_value(value, extra_chars="", allow_token=True):
223    """Quote a header value if necessary.
224
225    .. versionadded:: 0.5
226
227    :param value: the value to quote.
228    :param extra_chars: a list of extra characters to skip quoting.
229    :param allow_token: if this is enabled token values are returned
230                        unchanged.
231    """
232    if isinstance(value, bytes):
233        value = bytes_to_wsgi(value)
234    value = str(value)
235    if allow_token:
236        token_chars = _token_chars | set(extra_chars)
237        if set(value).issubset(token_chars):
238            return value
239    return '"%s"' % value.replace("\\", "\\\\").replace('"', '\\"')
240
241
242def unquote_header_value(value, is_filename=False):
243    r"""Unquotes a header value.  (Reversal of :func:`quote_header_value`).
244    This does not use the real unquoting but what browsers are actually
245    using for quoting.
246
247    .. versionadded:: 0.5
248
249    :param value: the header value to unquote.
250    """
251    if value and value[0] == value[-1] == '"':
252        # this is not the real unquoting, but fixing this so that the
253        # RFC is met will result in bugs with internet explorer and
254        # probably some other browsers as well.  IE for example is
255        # uploading files with "C:\foo\bar.txt" as filename
256        value = value[1:-1]
257
258        # if this is a filename and the starting characters look like
259        # a UNC path, then just return the value without quotes.  Using the
260        # replace sequence below on a UNC path has the effect of turning
261        # the leading double slash into a single slash and then
262        # _fix_ie_filename() doesn't work correctly.  See #458.
263        if not is_filename or value[:2] != "\\\\":
264            return value.replace("\\\\", "\\").replace('\\"', '"')
265    return value
266
267
268def dump_options_header(header, options):
269    """The reverse function to :func:`parse_options_header`.
270
271    :param header: the header to dump
272    :param options: a dict of options to append.
273    """
274    segments = []
275    if header is not None:
276        segments.append(header)
277    for key, value in iteritems(options):
278        if value is None:
279            segments.append(key)
280        else:
281            segments.append("%s=%s" % (key, quote_header_value(value)))
282    return "; ".join(segments)
283
284
285def dump_header(iterable, allow_token=True):
286    """Dump an HTTP header again.  This is the reversal of
287    :func:`parse_list_header`, :func:`parse_set_header` and
288    :func:`parse_dict_header`.  This also quotes strings that include an
289    equals sign unless you pass it as dict of key, value pairs.
290
291    >>> dump_header({'foo': 'bar baz'})
292    'foo="bar baz"'
293    >>> dump_header(('foo', 'bar baz'))
294    'foo, "bar baz"'
295
296    :param iterable: the iterable or dict of values to quote.
297    :param allow_token: if set to `False` tokens as values are disallowed.
298                        See :func:`quote_header_value` for more details.
299    """
300    if isinstance(iterable, dict):
301        items = []
302        for key, value in iteritems(iterable):
303            if value is None:
304                items.append(key)
305            else:
306                items.append(
307                    "%s=%s" % (key, quote_header_value(value, allow_token=allow_token))
308                )
309    else:
310        items = [quote_header_value(x, allow_token=allow_token) for x in iterable]
311    return ", ".join(items)
312
313
314def dump_csp_header(header):
315    """Dump a Content Security Policy header.
316
317    These are structured into policies such as "default-src 'self';
318    script-src 'self'".
319
320    .. versionadded:: 1.0.0
321       Support for Content Security Policy headers was added.
322
323    """
324    return "; ".join("%s %s" % (key, value) for key, value in iteritems(header))
325
326
327def parse_list_header(value):
328    """Parse lists as described by RFC 2068 Section 2.
329
330    In particular, parse comma-separated lists where the elements of
331    the list may include quoted-strings.  A quoted-string could
332    contain a comma.  A non-quoted string could have quotes in the
333    middle.  Quotes are removed automatically after parsing.
334
335    It basically works like :func:`parse_set_header` just that items
336    may appear multiple times and case sensitivity is preserved.
337
338    The return value is a standard :class:`list`:
339
340    >>> parse_list_header('token, "quoted value"')
341    ['token', 'quoted value']
342
343    To create a header from the :class:`list` again, use the
344    :func:`dump_header` function.
345
346    :param value: a string with a list header.
347    :return: :class:`list`
348    """
349    result = []
350    for item in _parse_list_header(value):
351        if item[:1] == item[-1:] == '"':
352            item = unquote_header_value(item[1:-1])
353        result.append(item)
354    return result
355
356
357def parse_dict_header(value, cls=dict):
358    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
359    convert them into a python dict (or any other mapping object created from
360    the type with a dict like interface provided by the `cls` argument):
361
362    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
363    >>> type(d) is dict
364    True
365    >>> sorted(d.items())
366    [('bar', 'as well'), ('foo', 'is a fish')]
367
368    If there is no value for a key it will be `None`:
369
370    >>> parse_dict_header('key_without_value')
371    {'key_without_value': None}
372
373    To create a header from the :class:`dict` again, use the
374    :func:`dump_header` function.
375
376    .. versionchanged:: 0.9
377       Added support for `cls` argument.
378
379    :param value: a string with a dict header.
380    :param cls: callable to use for storage of parsed results.
381    :return: an instance of `cls`
382    """
383    result = cls()
384    if not isinstance(value, text_type):
385        # XXX: validate
386        value = bytes_to_wsgi(value)
387    for item in _parse_list_header(value):
388        if "=" not in item:
389            result[item] = None
390            continue
391        name, value = item.split("=", 1)
392        if value[:1] == value[-1:] == '"':
393            value = unquote_header_value(value[1:-1])
394        result[name] = value
395    return result
396
397
398def parse_options_header(value, multiple=False):
399    """Parse a ``Content-Type`` like header into a tuple with the content
400    type and the options:
401
402    >>> parse_options_header('text/html; charset=utf8')
403    ('text/html', {'charset': 'utf8'})
404
405    This should not be used to parse ``Cache-Control`` like headers that use
406    a slightly different format.  For these headers use the
407    :func:`parse_dict_header` function.
408
409    .. versionchanged:: 0.15
410        :rfc:`2231` parameter continuations are handled.
411
412    .. versionadded:: 0.5
413
414    :param value: the header to parse.
415    :param multiple: Whether try to parse and return multiple MIME types
416    :return: (mimetype, options) or (mimetype, options, mimetype, options, …)
417             if multiple=True
418    """
419    if not value:
420        return "", {}
421
422    result = []
423
424    value = "," + value.replace("\n", ",")
425    while value:
426        match = _option_header_start_mime_type.match(value)
427        if not match:
428            break
429        result.append(match.group(1))  # mimetype
430        options = {}
431        # Parse options
432        rest = match.group(2)
433        continued_encoding = None
434        while rest:
435            optmatch = _option_header_piece_re.match(rest)
436            if not optmatch:
437                break
438            option, count, encoding, language, option_value = optmatch.groups()
439            # Continuations don't have to supply the encoding after the
440            # first line. If we're in a continuation, track the current
441            # encoding to use for subsequent lines. Reset it when the
442            # continuation ends.
443            if not count:
444                continued_encoding = None
445            else:
446                if not encoding:
447                    encoding = continued_encoding
448                continued_encoding = encoding
449            option = unquote_header_value(option)
450            if option_value is not None:
451                option_value = unquote_header_value(option_value, option == "filename")
452                if encoding is not None:
453                    option_value = _unquote(option_value).decode(encoding)
454            if count:
455                # Continuations append to the existing value. For
456                # simplicity, this ignores the possibility of
457                # out-of-order indices, which shouldn't happen anyway.
458                options[option] = options.get(option, "") + option_value
459            else:
460                options[option] = option_value
461            rest = rest[optmatch.end() :]
462        result.append(options)
463        if multiple is False:
464            return tuple(result)
465        value = rest
466
467    return tuple(result) if result else ("", {})
468
469
470def parse_accept_header(value, cls=None):
471    """Parses an HTTP Accept-* header.  This does not implement a complete
472    valid algorithm but one that supports at least value and quality
473    extraction.
474
475    Returns a new :class:`Accept` object (basically a list of ``(value, quality)``
476    tuples sorted by the quality with some additional accessor methods).
477
478    The second parameter can be a subclass of :class:`Accept` that is created
479    with the parsed values and returned.
480
481    :param value: the accept header string to be parsed.
482    :param cls: the wrapper class for the return value (can be
483                         :class:`Accept` or a subclass thereof)
484    :return: an instance of `cls`.
485    """
486    if cls is None:
487        cls = Accept
488
489    if not value:
490        return cls(None)
491
492    result = []
493    for match in _accept_re.finditer(value):
494        quality = match.group(2)
495        if not quality:
496            quality = 1
497        else:
498            quality = max(min(float(quality), 1), 0)
499        result.append((match.group(1), quality))
500    return cls(result)
501
502
503def parse_cache_control_header(value, on_update=None, cls=None):
504    """Parse a cache control header.  The RFC differs between response and
505    request cache control, this method does not.  It's your responsibility
506    to not use the wrong control statements.
507
508    .. versionadded:: 0.5
509       The `cls` was added.  If not specified an immutable
510       :class:`~werkzeug.datastructures.RequestCacheControl` is returned.
511
512    :param value: a cache control header to be parsed.
513    :param on_update: an optional callable that is called every time a value
514                      on the :class:`~werkzeug.datastructures.CacheControl`
515                      object is changed.
516    :param cls: the class for the returned object.  By default
517                :class:`~werkzeug.datastructures.RequestCacheControl` is used.
518    :return: a `cls` object.
519    """
520    if cls is None:
521        cls = RequestCacheControl
522    if not value:
523        return cls(None, on_update)
524    return cls(parse_dict_header(value), on_update)
525
526
527def parse_csp_header(value, on_update=None, cls=None):
528    """Parse a Content Security Policy header.
529
530    .. versionadded:: 1.0.0
531       Support for Content Security Policy headers was added.
532
533    :param value: a csp header to be parsed.
534    :param on_update: an optional callable that is called every time a value
535                      on the object is changed.
536    :param cls: the class for the returned object.  By default
537                :class:`~werkzeug.datastructures.ContentSecurityPolicy` is used.
538    :return: a `cls` object.
539    """
540
541    if cls is None:
542        cls = ContentSecurityPolicy
543    if value is None:
544        return cls(None, on_update)
545    items = []
546    for policy in value.split(";"):
547        policy = policy.strip()
548        # Ignore badly formatted policies (no space)
549        if " " in policy:
550            directive, value = policy.strip().split(" ", 1)
551            items.append((directive.strip(), value.strip()))
552    return cls(items, on_update)
553
554
555def parse_set_header(value, on_update=None):
556    """Parse a set-like header and return a
557    :class:`~werkzeug.datastructures.HeaderSet` object:
558
559    >>> hs = parse_set_header('token, "quoted value"')
560
561    The return value is an object that treats the items case-insensitively
562    and keeps the order of the items:
563
564    >>> 'TOKEN' in hs
565    True
566    >>> hs.index('quoted value')
567    1
568    >>> hs
569    HeaderSet(['token', 'quoted value'])
570
571    To create a header from the :class:`HeaderSet` again, use the
572    :func:`dump_header` function.
573
574    :param value: a set header to be parsed.
575    :param on_update: an optional callable that is called every time a
576                      value on the :class:`~werkzeug.datastructures.HeaderSet`
577                      object is changed.
578    :return: a :class:`~werkzeug.datastructures.HeaderSet`
579    """
580    if not value:
581        return HeaderSet(None, on_update)
582    return HeaderSet(parse_list_header(value), on_update)
583
584
585def parse_authorization_header(value):
586    """Parse an HTTP basic/digest authorization header transmitted by the web
587    browser.  The return value is either `None` if the header was invalid or
588    not given, otherwise an :class:`~werkzeug.datastructures.Authorization`
589    object.
590
591    :param value: the authorization header to parse.
592    :return: a :class:`~werkzeug.datastructures.Authorization` object or `None`.
593    """
594    if not value:
595        return
596    value = wsgi_to_bytes(value)
597    try:
598        auth_type, auth_info = value.split(None, 1)
599        auth_type = auth_type.lower()
600    except ValueError:
601        return
602    if auth_type == b"basic":
603        try:
604            username, password = base64.b64decode(auth_info).split(b":", 1)
605        except Exception:
606            return
607        return Authorization(
608            "basic",
609            {
610                "username": to_unicode(username, _basic_auth_charset),
611                "password": to_unicode(password, _basic_auth_charset),
612            },
613        )
614    elif auth_type == b"digest":
615        auth_map = parse_dict_header(auth_info)
616        for key in "username", "realm", "nonce", "uri", "response":
617            if key not in auth_map:
618                return
619        if "qop" in auth_map:
620            if not auth_map.get("nc") or not auth_map.get("cnonce"):
621                return
622        return Authorization("digest", auth_map)
623
624
625def parse_www_authenticate_header(value, on_update=None):
626    """Parse an HTTP WWW-Authenticate header into a
627    :class:`~werkzeug.datastructures.WWWAuthenticate` object.
628
629    :param value: a WWW-Authenticate header to parse.
630    :param on_update: an optional callable that is called every time a value
631                      on the :class:`~werkzeug.datastructures.WWWAuthenticate`
632                      object is changed.
633    :return: a :class:`~werkzeug.datastructures.WWWAuthenticate` object.
634    """
635    if not value:
636        return WWWAuthenticate(on_update=on_update)
637    try:
638        auth_type, auth_info = value.split(None, 1)
639        auth_type = auth_type.lower()
640    except (ValueError, AttributeError):
641        return WWWAuthenticate(value.strip().lower(), on_update=on_update)
642    return WWWAuthenticate(auth_type, parse_dict_header(auth_info), on_update)
643
644
645def parse_if_range_header(value):
646    """Parses an if-range header which can be an etag or a date.  Returns
647    a :class:`~werkzeug.datastructures.IfRange` object.
648
649    .. versionadded:: 0.7
650    """
651    if not value:
652        return IfRange()
653    date = parse_date(value)
654    if date is not None:
655        return IfRange(date=date)
656    # drop weakness information
657    return IfRange(unquote_etag(value)[0])
658
659
660def parse_range_header(value, make_inclusive=True):
661    """Parses a range header into a :class:`~werkzeug.datastructures.Range`
662    object.  If the header is missing or malformed `None` is returned.
663    `ranges` is a list of ``(start, stop)`` tuples where the ranges are
664    non-inclusive.
665
666    .. versionadded:: 0.7
667    """
668    if not value or "=" not in value:
669        return None
670
671    ranges = []
672    last_end = 0
673    units, rng = value.split("=", 1)
674    units = units.strip().lower()
675
676    for item in rng.split(","):
677        item = item.strip()
678        if "-" not in item:
679            return None
680        if item.startswith("-"):
681            if last_end < 0:
682                return None
683            try:
684                begin = int(item)
685            except ValueError:
686                return None
687            end = None
688            last_end = -1
689        elif "-" in item:
690            begin, end = item.split("-", 1)
691            begin = begin.strip()
692            end = end.strip()
693            if not begin.isdigit():
694                return None
695            begin = int(begin)
696            if begin < last_end or last_end < 0:
697                return None
698            if end:
699                if not end.isdigit():
700                    return None
701                end = int(end) + 1
702                if begin >= end:
703                    return None
704            else:
705                end = None
706            last_end = end
707        ranges.append((begin, end))
708
709    return Range(units, ranges)
710
711
712def parse_content_range_header(value, on_update=None):
713    """Parses a range header into a
714    :class:`~werkzeug.datastructures.ContentRange` object or `None` if
715    parsing is not possible.
716
717    .. versionadded:: 0.7
718
719    :param value: a content range header to be parsed.
720    :param on_update: an optional callable that is called every time a value
721                      on the :class:`~werkzeug.datastructures.ContentRange`
722                      object is changed.
723    """
724    if value is None:
725        return None
726    try:
727        units, rangedef = (value or "").strip().split(None, 1)
728    except ValueError:
729        return None
730
731    if "/" not in rangedef:
732        return None
733    rng, length = rangedef.split("/", 1)
734    if length == "*":
735        length = None
736    elif length.isdigit():
737        length = int(length)
738    else:
739        return None
740
741    if rng == "*":
742        return ContentRange(units, None, None, length, on_update=on_update)
743    elif "-" not in rng:
744        return None
745
746    start, stop = rng.split("-", 1)
747    try:
748        start = int(start)
749        stop = int(stop) + 1
750    except ValueError:
751        return None
752
753    if is_byte_range_valid(start, stop, length):
754        return ContentRange(units, start, stop, length, on_update=on_update)
755
756
757def quote_etag(etag, weak=False):
758    """Quote an etag.
759
760    :param etag: the etag to quote.
761    :param weak: set to `True` to tag it "weak".
762    """
763    if '"' in etag:
764        raise ValueError("invalid etag")
765    etag = '"%s"' % etag
766    if weak:
767        etag = "W/" + etag
768    return etag
769
770
771def unquote_etag(etag):
772    """Unquote a single etag:
773
774    >>> unquote_etag('W/"bar"')
775    ('bar', True)
776    >>> unquote_etag('"bar"')
777    ('bar', False)
778
779    :param etag: the etag identifier to unquote.
780    :return: a ``(etag, weak)`` tuple.
781    """
782    if not etag:
783        return None, None
784    etag = etag.strip()
785    weak = False
786    if etag.startswith(("W/", "w/")):
787        weak = True
788        etag = etag[2:]
789    if etag[:1] == etag[-1:] == '"':
790        etag = etag[1:-1]
791    return etag, weak
792
793
794def parse_etags(value):
795    """Parse an etag header.
796
797    :param value: the tag header to parse
798    :return: an :class:`~werkzeug.datastructures.ETags` object.
799    """
800    if not value:
801        return ETags()
802    strong = []
803    weak = []
804    end = len(value)
805    pos = 0
806    while pos < end:
807        match = _etag_re.match(value, pos)
808        if match is None:
809            break
810        is_weak, quoted, raw = match.groups()
811        if raw == "*":
812            return ETags(star_tag=True)
813        elif quoted:
814            raw = quoted
815        if is_weak:
816            weak.append(raw)
817        else:
818            strong.append(raw)
819        pos = match.end()
820    return ETags(strong, weak)
821
822
823def generate_etag(data):
824    """Generate an etag for some data."""
825    return md5(data).hexdigest()
826
827
828def parse_date(value):
829    """Parse one of the following date formats into a datetime object:
830
831    .. sourcecode:: text
832
833        Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
834        Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
835        Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
836
837    If parsing fails the return value is `None`.
838
839    :param value: a string with a supported date format.
840    :return: a :class:`datetime.datetime` object.
841    """
842    if value:
843        t = parsedate_tz(value.strip())
844        if t is not None:
845            try:
846                year = t[0]
847                # unfortunately that function does not tell us if two digit
848                # years were part of the string, or if they were prefixed
849                # with two zeroes.  So what we do is to assume that 69-99
850                # refer to 1900, and everything below to 2000
851                if year >= 0 and year <= 68:
852                    year += 2000
853                elif year >= 69 and year <= 99:
854                    year += 1900
855                return datetime(*((year,) + t[1:7])) - timedelta(seconds=t[-1] or 0)
856            except (ValueError, OverflowError):
857                return None
858
859
860def _dump_date(d, delim):
861    """Used for `http_date` and `cookie_date`."""
862    if d is None:
863        d = gmtime()
864    elif isinstance(d, datetime):
865        d = d.utctimetuple()
866    elif isinstance(d, (integer_types, float)):
867        d = gmtime(d)
868    return "%s, %02d%s%s%s%04d %02d:%02d:%02d GMT" % (
869        ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[d.tm_wday],
870        d.tm_mday,
871        delim,
872        (
873            "Jan",
874            "Feb",
875            "Mar",
876            "Apr",
877            "May",
878            "Jun",
879            "Jul",
880            "Aug",
881            "Sep",
882            "Oct",
883            "Nov",
884            "Dec",
885        )[d.tm_mon - 1],
886        delim,
887        d.tm_year,
888        d.tm_hour,
889        d.tm_min,
890        d.tm_sec,
891    )
892
893
894def cookie_date(expires=None):
895    """Formats the time to ensure compatibility with Netscape's cookie
896    standard.
897
898    Accepts a floating point number expressed in seconds since the epoch in, a
899    datetime object or a timetuple.  All times in UTC.  The :func:`parse_date`
900    function can be used to parse such a date.
901
902    Outputs a string in the format ``Wdy, DD-Mon-YYYY HH:MM:SS GMT``.
903
904    :param expires: If provided that date is used, otherwise the current.
905    """
906    return _dump_date(expires, "-")
907
908
909def http_date(timestamp=None):
910    """Formats the time to match the RFC1123 date format.
911
912    Accepts a floating point number expressed in seconds since the epoch in, a
913    datetime object or a timetuple.  All times in UTC.  The :func:`parse_date`
914    function can be used to parse such a date.
915
916    Outputs a string in the format ``Wdy, DD Mon YYYY HH:MM:SS GMT``.
917
918    :param timestamp: If provided that date is used, otherwise the current.
919    """
920    return _dump_date(timestamp, " ")
921
922
923def parse_age(value=None):
924    """Parses a base-10 integer count of seconds into a timedelta.
925
926    If parsing fails, the return value is `None`.
927
928    :param value: a string consisting of an integer represented in base-10
929    :return: a :class:`datetime.timedelta` object or `None`.
930    """
931    if not value:
932        return None
933    try:
934        seconds = int(value)
935    except ValueError:
936        return None
937    if seconds < 0:
938        return None
939    try:
940        return timedelta(seconds=seconds)
941    except OverflowError:
942        return None
943
944
945def dump_age(age=None):
946    """Formats the duration as a base-10 integer.
947
948    :param age: should be an integer number of seconds,
949                a :class:`datetime.timedelta` object, or,
950                if the age is unknown, `None` (default).
951    """
952    if age is None:
953        return
954    if isinstance(age, timedelta):
955        # do the equivalent of Python 2.7's timedelta.total_seconds(),
956        # but disregarding fractional seconds
957        age = age.seconds + (age.days * 24 * 3600)
958
959    age = int(age)
960    if age < 0:
961        raise ValueError("age cannot be negative")
962
963    return str(age)
964
965
966def is_resource_modified(
967    environ, etag=None, data=None, last_modified=None, ignore_if_range=True
968):
969    """Convenience method for conditional requests.
970
971    :param environ: the WSGI environment of the request to be checked.
972    :param etag: the etag for the response for comparison.
973    :param data: or alternatively the data of the response to automatically
974                 generate an etag using :func:`generate_etag`.
975    :param last_modified: an optional date of the last modification.
976    :param ignore_if_range: If `False`, `If-Range` header will be taken into
977                            account.
978    :return: `True` if the resource was modified, otherwise `False`.
979
980    .. versionchanged:: 1.0.0
981        The check is run for methods other than ``GET`` and ``HEAD``.
982    """
983    if etag is None and data is not None:
984        etag = generate_etag(data)
985    elif data is not None:
986        raise TypeError("both data and etag given")
987
988    unmodified = False
989    if isinstance(last_modified, string_types):
990        last_modified = parse_date(last_modified)
991
992    # ensure that microsecond is zero because the HTTP spec does not transmit
993    # that either and we might have some false positives.  See issue #39
994    if last_modified is not None:
995        last_modified = last_modified.replace(microsecond=0)
996
997    if_range = None
998    if not ignore_if_range and "HTTP_RANGE" in environ:
999        # https://tools.ietf.org/html/rfc7233#section-3.2
1000        # A server MUST ignore an If-Range header field received in a request
1001        # that does not contain a Range header field.
1002        if_range = parse_if_range_header(environ.get("HTTP_IF_RANGE"))
1003
1004    if if_range is not None and if_range.date is not None:
1005        modified_since = if_range.date
1006    else:
1007        modified_since = parse_date(environ.get("HTTP_IF_MODIFIED_SINCE"))
1008
1009    if modified_since and last_modified and last_modified <= modified_since:
1010        unmodified = True
1011
1012    if etag:
1013        etag, _ = unquote_etag(etag)
1014        if if_range is not None and if_range.etag is not None:
1015            unmodified = parse_etags(if_range.etag).contains(etag)
1016        else:
1017            if_none_match = parse_etags(environ.get("HTTP_IF_NONE_MATCH"))
1018            if if_none_match:
1019                # https://tools.ietf.org/html/rfc7232#section-3.2
1020                # "A recipient MUST use the weak comparison function when comparing
1021                # entity-tags for If-None-Match"
1022                unmodified = if_none_match.contains_weak(etag)
1023
1024            # https://tools.ietf.org/html/rfc7232#section-3.1
1025            # "Origin server MUST use the strong comparison function when
1026            # comparing entity-tags for If-Match"
1027            if_match = parse_etags(environ.get("HTTP_IF_MATCH"))
1028            if if_match:
1029                unmodified = not if_match.is_strong(etag)
1030
1031    return not unmodified
1032
1033
1034def remove_entity_headers(headers, allowed=("expires", "content-location")):
1035    """Remove all entity headers from a list or :class:`Headers` object.  This
1036    operation works in-place.  `Expires` and `Content-Location` headers are
1037    by default not removed.  The reason for this is :rfc:`2616` section
1038    10.3.5 which specifies some entity headers that should be sent.
1039
1040    .. versionchanged:: 0.5
1041       added `allowed` parameter.
1042
1043    :param headers: a list or :class:`Headers` object.
1044    :param allowed: a list of headers that should still be allowed even though
1045                    they are entity headers.
1046    """
1047    allowed = set(x.lower() for x in allowed)
1048    headers[:] = [
1049        (key, value)
1050        for key, value in headers
1051        if not is_entity_header(key) or key.lower() in allowed
1052    ]
1053
1054
1055def remove_hop_by_hop_headers(headers):
1056    """Remove all HTTP/1.1 "Hop-by-Hop" headers from a list or
1057    :class:`Headers` object.  This operation works in-place.
1058
1059    .. versionadded:: 0.5
1060
1061    :param headers: a list or :class:`Headers` object.
1062    """
1063    headers[:] = [
1064        (key, value) for key, value in headers if not is_hop_by_hop_header(key)
1065    ]
1066
1067
1068def is_entity_header(header):
1069    """Check if a header is an entity header.
1070
1071    .. versionadded:: 0.5
1072
1073    :param header: the header to test.
1074    :return: `True` if it's an entity header, `False` otherwise.
1075    """
1076    return header.lower() in _entity_headers
1077
1078
1079def is_hop_by_hop_header(header):
1080    """Check if a header is an HTTP/1.1 "Hop-by-Hop" header.
1081
1082    .. versionadded:: 0.5
1083
1084    :param header: the header to test.
1085    :return: `True` if it's an HTTP/1.1 "Hop-by-Hop" header, `False` otherwise.
1086    """
1087    return header.lower() in _hop_by_hop_headers
1088
1089
1090def parse_cookie(header, charset="utf-8", errors="replace", cls=None):
1091    """Parse a cookie from a string or WSGI environ.
1092
1093    The same key can be provided multiple times, the values are stored
1094    in-order. The default :class:`MultiDict` will have the first value
1095    first, and all values can be retrieved with
1096    :meth:`MultiDict.getlist`.
1097
1098    :param header: The cookie header as a string, or a WSGI environ dict
1099        with a ``HTTP_COOKIE`` key.
1100    :param charset: The charset for the cookie values.
1101    :param errors: The error behavior for the charset decoding.
1102    :param cls: A dict-like class to store the parsed cookies in.
1103        Defaults to :class:`MultiDict`.
1104
1105    .. versionchanged:: 1.0.0
1106        Returns a :class:`MultiDict` instead of a
1107        ``TypeConversionDict``.
1108
1109    .. versionchanged:: 0.5
1110       Returns a :class:`TypeConversionDict` instead of a regular dict.
1111       The ``cls`` parameter was added.
1112    """
1113    if isinstance(header, dict):
1114        header = header.get("HTTP_COOKIE", "")
1115    elif header is None:
1116        header = ""
1117
1118    # On Python 3, PEP 3333 sends headers through the environ as latin1
1119    # decoded strings. Encode strings back to bytes for parsing.
1120    if isinstance(header, text_type):
1121        header = header.encode("latin1", "replace")
1122
1123    if cls is None:
1124        cls = MultiDict
1125
1126    def _parse_pairs():
1127        for key, val in _cookie_parse_impl(header):
1128            key = to_unicode(key, charset, errors, allow_none_charset=True)
1129            if not key:
1130                continue
1131            val = to_unicode(val, charset, errors, allow_none_charset=True)
1132            yield try_coerce_native(key), val
1133
1134    return cls(_parse_pairs())
1135
1136
1137def dump_cookie(
1138    key,
1139    value="",
1140    max_age=None,
1141    expires=None,
1142    path="/",
1143    domain=None,
1144    secure=False,
1145    httponly=False,
1146    charset="utf-8",
1147    sync_expires=True,
1148    max_size=4093,
1149    samesite=None,
1150):
1151    """Creates a new Set-Cookie header without the ``Set-Cookie`` prefix
1152    The parameters are the same as in the cookie Morsel object in the
1153    Python standard library but it accepts unicode data, too.
1154
1155    On Python 3 the return value of this function will be a unicode
1156    string, on Python 2 it will be a native string.  In both cases the
1157    return value is usually restricted to ascii as the vast majority of
1158    values are properly escaped, but that is no guarantee.  If a unicode
1159    string is returned it's tunneled through latin1 as required by
1160    PEP 3333.
1161
1162    The return value is not ASCII safe if the key contains unicode
1163    characters.  This is technically against the specification but
1164    happens in the wild.  It's strongly recommended to not use
1165    non-ASCII values for the keys.
1166
1167    :param max_age: should be a number of seconds, or `None` (default) if
1168                    the cookie should last only as long as the client's
1169                    browser session.  Additionally `timedelta` objects
1170                    are accepted, too.
1171    :param expires: should be a `datetime` object or unix timestamp.
1172    :param path: limits the cookie to a given path, per default it will
1173                 span the whole domain.
1174    :param domain: Use this if you want to set a cross-domain cookie. For
1175                   example, ``domain=".example.com"`` will set a cookie
1176                   that is readable by the domain ``www.example.com``,
1177                   ``foo.example.com`` etc. Otherwise, a cookie will only
1178                   be readable by the domain that set it.
1179    :param secure: The cookie will only be available via HTTPS
1180    :param httponly: disallow JavaScript to access the cookie.  This is an
1181                     extension to the cookie standard and probably not
1182                     supported by all browsers.
1183    :param charset: the encoding for unicode values.
1184    :param sync_expires: automatically set expires if max_age is defined
1185                         but expires not.
1186    :param max_size: Warn if the final header value exceeds this size. The
1187        default, 4093, should be safely `supported by most browsers
1188        <cookie_>`_. Set to 0 to disable this check.
1189    :param samesite: Limits the scope of the cookie such that it will
1190        only be attached to requests if those requests are same-site.
1191
1192    .. _`cookie`: http://browsercookielimits.squawky.net/
1193
1194    .. versionchanged:: 1.0.0
1195        The string ``'None'`` is accepted for ``samesite``.
1196    """
1197    key = to_bytes(key, charset)
1198    value = to_bytes(value, charset)
1199
1200    if path is not None:
1201        from .urls import iri_to_uri
1202
1203        path = iri_to_uri(path, charset)
1204    domain = _make_cookie_domain(domain)
1205    if isinstance(max_age, timedelta):
1206        max_age = (max_age.days * 60 * 60 * 24) + max_age.seconds
1207    if expires is not None:
1208        if not isinstance(expires, string_types):
1209            expires = cookie_date(expires)
1210    elif max_age is not None and sync_expires:
1211        expires = to_bytes(cookie_date(time() + max_age))
1212
1213    if samesite is not None:
1214        samesite = samesite.title()
1215
1216        if samesite not in {"Strict", "Lax", "None"}:
1217            raise ValueError("SameSite must be 'Strict', 'Lax', or 'None'.")
1218
1219    buf = [key + b"=" + _cookie_quote(value)]
1220
1221    # XXX: In theory all of these parameters that are not marked with `None`
1222    # should be quoted.  Because stdlib did not quote it before I did not
1223    # want to introduce quoting there now.
1224    for k, v, q in (
1225        (b"Domain", domain, True),
1226        (b"Expires", expires, False),
1227        (b"Max-Age", max_age, False),
1228        (b"Secure", secure, None),
1229        (b"HttpOnly", httponly, None),
1230        (b"Path", path, False),
1231        (b"SameSite", samesite, False),
1232    ):
1233        if q is None:
1234            if v:
1235                buf.append(k)
1236            continue
1237
1238        if v is None:
1239            continue
1240
1241        tmp = bytearray(k)
1242        if not isinstance(v, (bytes, bytearray)):
1243            v = to_bytes(text_type(v), charset)
1244        if q:
1245            v = _cookie_quote(v)
1246        tmp += b"=" + v
1247        buf.append(bytes(tmp))
1248
1249    # The return value will be an incorrectly encoded latin1 header on
1250    # Python 3 for consistency with the headers object and a bytestring
1251    # on Python 2 because that's how the API makes more sense.
1252    rv = b"; ".join(buf)
1253    if not PY2:
1254        rv = rv.decode("latin1")
1255
1256    # Warn if the final value of the cookie is larger than the limit. If the
1257    # cookie is too large, then it may be silently ignored by the browser,
1258    # which can be quite hard to debug.
1259    cookie_size = len(rv)
1260
1261    if max_size and cookie_size > max_size:
1262        value_size = len(value)
1263        warnings.warn(
1264            'The "{key}" cookie is too large: the value was {value_size} bytes'
1265            " but the header required {extra_size} extra bytes. The final size"
1266            " was {cookie_size} bytes but the limit is {max_size} bytes."
1267            " Browsers may silently ignore cookies larger than this.".format(
1268                key=key,
1269                value_size=value_size,
1270                extra_size=cookie_size - value_size,
1271                cookie_size=cookie_size,
1272                max_size=max_size,
1273            ),
1274            stacklevel=2,
1275        )
1276
1277    return rv
1278
1279
1280def is_byte_range_valid(start, stop, length):
1281    """Checks if a given byte content range is valid for the given length.
1282
1283    .. versionadded:: 0.7
1284    """
1285    if (start is None) != (stop is None):
1286        return False
1287    elif start is None:
1288        return length is None or length >= 0
1289    elif length is None:
1290        return 0 <= start < stop
1291    elif start >= stop:
1292        return False
1293    return 0 <= start < length
1294
1295
1296# circular dependencies
1297from .datastructures import Accept
1298from .datastructures import Authorization
1299from .datastructures import ContentRange
1300from .datastructures import ContentSecurityPolicy
1301from .datastructures import ETags
1302from .datastructures import HeaderSet
1303from .datastructures import IfRange
1304from .datastructures import MultiDict
1305from .datastructures import Range
1306from .datastructures import RequestCacheControl
1307from .datastructures import WWWAuthenticate
1308