1"""Parse, manipulate and render cookies in a convenient way.
2
3Copyright (c) 2011-2014, Sasha Hart.
4
5Permission is hereby granted, free of charge, to any person obtaining a copy of
6this software and associated documentation files (the "Software"), to deal in
7the Software without restriction, including without limitation the rights to
8use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9of the Software, and to permit persons to whom the Software is furnished to do
10so, subject to the following conditions:
11
12The above copyright notice and this permission notice shall be included in all
13copies or substantial portions of the Software.
14
15THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21SOFTWARE.
22"""
23__version__ = "2.2.1"
24import re
25import datetime
26import logging
27import sys
28from unicodedata import normalize
29if sys.version_info >= (3, 0, 0):  # pragma: no cover
30    from urllib.parse import (
31        quote as _default_quote, unquote as _default_unquote)
32    basestring = str
33    long = int
34else:  # pragma: no cover
35    from urllib import (
36        quote as _default_quote, unquote as _default_unquote)
37
38
39def _total_seconds(td):
40    """Wrapper to work around lack of .total_seconds() method in Python 3.1.
41    """
42    if hasattr(td, "total_seconds"):
43        return td.total_seconds()
44    return td.days * 3600 * 24 + td.seconds + td.microseconds / 100000.0
45
46# see test_encoding_assumptions for how these magical safe= parms were figured
47# out. the differences are because of what cookie-octet may contain
48# vs the more liberal spec for extension-av
49default_cookie_quote = lambda item: _default_quote(
50    item, safe='!#$%&\'()*+/:<=>?@[]^`{|}~')
51
52default_extension_quote = lambda item: _default_quote(
53    item, safe=' !"#$%&\'()*+,/:<=>?@[\\]^`{|}~')
54
55default_unquote = _default_unquote
56
57
58def _report_invalid_cookie(data):
59    "How this module logs a bad cookie when exception suppressed"
60    logging.error("invalid Cookie: %r", data)
61
62
63def _report_unknown_attribute(name):
64    "How this module logs an unknown attribute when exception suppressed"
65    logging.error("unknown Cookie attribute: %r", name)
66
67
68def _report_invalid_attribute(name, value, reason):
69    "How this module logs a bad attribute when exception suppressed"
70    logging.error("invalid Cookie attribute (%s): %r=%r", reason, name, value)
71
72
73class CookieError(Exception):
74    """Base class for this module's exceptions, so you can catch them all if
75    you want to.
76    """
77    def __init__(self):
78        Exception.__init__(self)
79
80
81class InvalidCookieError(CookieError):
82    """Raised when attempting to parse or construct a cookie which is
83    syntactically invalid (in any way that has possibly serious implications).
84    """
85    def __init__(self, data=None, message=""):
86        CookieError.__init__(self)
87        self.data = data
88        self.message = message
89
90    def __str__(self):
91        return '%r %r' % (self.message, self.data)
92
93
94class InvalidCookieAttributeError(CookieError):
95    """Raised when setting an invalid attribute on a Cookie.
96    """
97    def __init__(self, name, value, reason=None):
98        CookieError.__init__(self)
99        self.name = name
100        self.value = value
101        self.reason = reason
102
103    def __str__(self):
104        prefix = ("%s: " % self.reason) if self.reason else ""
105        if self.name is None:
106            return '%s%r' % (prefix, self.value)
107        return '%s%r = %r' % (prefix, self.name, self.value)
108
109
110class Definitions(object):
111    """Namespace to hold definitions used in cookie parsing (mostly pieces of
112    regex).
113
114    These are separated out for individual testing against examples and RFC
115    grammar, and kept here to avoid cluttering other namespaces.
116    """
117    # Most of the following are set down or cited in RFC 6265 4.1.1
118
119    # This is the grammar's 'cookie-name' defined as 'token' per RFC 2616 2.2.
120    COOKIE_NAME = r"!#$%&'*+\-.0-9A-Z^_`a-z|~"
121
122    # 'cookie-octet' - as used twice in definition of 'cookie-value'
123    COOKIE_OCTET = r"\x21\x23-\x2B\--\x3A\x3C-\x5B\]-\x7E"
124
125    # extension-av - also happens to be a superset of cookie-av and path-value
126    EXTENSION_AV = """ !"#$%&\\\\'()*+,\-./0-9:<=>?@A-Z[\\]^_`a-z{|}~"""
127
128    # This is for the first pass parse on a Set-Cookie: response header. It
129    # includes cookie-value, cookie-pair, set-cookie-string, cookie-av.
130    # extension-av is used to extract the chunk containing variable-length,
131    # unordered attributes. The second pass then uses ATTR to break out each
132    # attribute and extract it appropriately.
133    # As compared with the RFC production grammar, it is must more liberal with
134    # space characters, in order not to break on data made by barbarians.
135    SET_COOKIE_HEADER = """(?x) # Verbose mode
136        ^(?:Set-Cookie:[ ]*)?
137        (?P<name>[{name}:]+)
138        [ ]*=[ ]*
139
140        # Accept anything in quotes - this is not RFC 6265, but might ease
141        # working with older code that half-heartedly works with 2965. Accept
142        # spaces inside tokens up front, so we can deal with that error one
143        # cookie at a time, after this first pass.
144        (?P<value>(?:"{value}*")|(?:[{cookie_octet} ]*))
145        [ ]*
146
147        # Extract everything up to the end in one chunk, which will be broken
148        # down in the second pass. Don't match if there's any unexpected
149        # garbage at the end (hence the \Z; $ matches before newline).
150        (?P<attrs>(?:;[ ]*[{cookie_av}]+)*)
151        """.format(name=COOKIE_NAME, cookie_av=EXTENSION_AV + ";",
152                   cookie_octet=COOKIE_OCTET, value="[^;]")
153
154    # Now we specify the individual patterns for the attribute extraction pass
155    # of Set-Cookie parsing (mapping to *-av in the RFC grammar). Things which
156    # don't match any of these but are in extension-av are simply ignored;
157    # anything else should be rejected in the first pass (SET_COOKIE_HEADER).
158
159    # Max-Age attribute. These are digits, they are expressed this way
160    # because that is how they are expressed in the RFC.
161    MAX_AGE_AV = "Max-Age=(?P<max_age>[\x30-\x39]+)"
162
163    # Domain attribute; a label is one part of the domain
164    LABEL = '{let_dig}(?:(?:{let_dig_hyp}+)?{let_dig})?'.format(
165            let_dig="[A-Za-z0-9]", let_dig_hyp="[0-9A-Za-z\-]")
166    DOMAIN = "\.?(?:{label}\.)*(?:{label})".format(label=LABEL)
167    # Parse initial period though it's wrong, as RFC 6265 4.1.2.3
168    DOMAIN_AV = "Domain=(?P<domain>{domain})".format(domain=DOMAIN)
169
170    # Path attribute. We don't take special care with quotes because
171    # they are hardly used, they don't allow invalid characters per RFC 6265,
172    # and " is a valid character to occur in a path value anyway.
173    PATH_AV = 'Path=(?P<path>[%s]+)' % EXTENSION_AV
174
175    # Expires attribute. This gets big because of date parsing, which needs to
176    # support a large range of formats, so it's broken down into pieces.
177
178    # Generate a mapping of months to use in render/parse, to avoid
179    # localizations which might be produced by strftime (e.g. %a -> Mayo)
180    month_list = ["January", "February", "March", "April", "May", "June",
181                  "July", "August", "September", "October", "November",
182                  "December"]
183    month_abbr_list = [item[:3] for item in month_list]
184    month_numbers = {}
185    for index, name in enumerate(month_list):
186        name = name.lower()
187        month_numbers[name[:3]] = index + 1
188        month_numbers[name] = index + 1
189    # Use the same list to create regexps for months.
190    MONTH_SHORT = "(?:" + "|".join(item[:3] for item in month_list) + ")"
191    MONTH_LONG = "(?:" + "|".join(item for item in month_list) + ")"
192
193    # Same drill with weekdays, for the same reason.
194    weekday_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
195                    "Saturday", "Sunday"]
196    weekday_abbr_list = [item[:3] for item in weekday_list]
197    WEEKDAY_SHORT = "(?:" + "|".join(item[:3] for item in weekday_list) + ")"
198    WEEKDAY_LONG = "(?:" + "|".join(item for item in weekday_list) + ")"
199
200    # This regexp tries to exclude obvious nonsense in the first pass.
201    DAY_OF_MONTH = "(?:[0 ]?[1-9]|[12][0-9]|[3][01])(?!\d)"
202
203    # Here is the overall date format; ~99% of cases fold into one generalized
204    # syntax like RFC 1123, and many of the rest use asctime-like formats.
205    # (see test_date_formats for a full exegesis)
206    DATE = """(?ix) # Case-insensitive mode, verbose mode
207        (?:
208            (?P<weekday>(?:{wdy}|{weekday}),[ ])?
209            (?P<day>{day})
210            [ \-]
211            (?P<month>{mon}|{month})
212            [ \-]
213            # This does not support 3-digit years, which are rare and don't
214            # seem to have one canonical interpretation.
215            (?P<year>(?:\d{{2}}|\d{{4}}))
216            [ ]
217            # HH:MM[:SS] GMT
218            (?P<hour>(?:[ 0][0-9]|[01][0-9]|2[0-3]))
219            :(?P<minute>(?:0[0-9]|[1-5][0-9]))
220            (?::(?P<second>\d{{2}}))?
221            [ ]GMT
222        |
223            # Support asctime format, e.g. 'Sun Nov  6 08:49:37 1994'
224            (?P<weekday2>{wdy})[ ]
225            (?P<month2>{mon})[ ]
226            (?P<day2>[ ]\d|\d\d)[ ]
227            (?P<hour2>\d\d):
228            (?P<minute2>\d\d)
229            (?::(?P<second2>\d\d)?)[ ]
230            (?P<year2>\d\d\d\d)
231            (?:[ ]GMT)?  # GMT (Amazon)
232        )
233    """
234    DATE = DATE.format(wdy=WEEKDAY_SHORT, weekday=WEEKDAY_LONG,
235                       day=DAY_OF_MONTH, mon=MONTH_SHORT, month=MONTH_LONG)
236
237    EXPIRES_AV = "Expires=(?P<expires>%s)" % DATE
238
239    # Now we're ready to define a regexp which can match any number of attrs
240    # in the variable portion of the Set-Cookie header (like the unnamed latter
241    # part of set-cookie-string in the grammar). Each regexp of any complexity
242    # is split out for testing by itself.
243    ATTR = """(?ix)  # Case-insensitive mode, verbose mode
244        # Always start with start or semicolon and any number of spaces
245        (?:^|;)[ ]*(?:
246            # Big disjunction of attribute patterns (*_AV), with named capture
247            # groups to extract everything in one pass. Anything unrecognized
248            # goes in the 'unrecognized' capture group for reporting.
249            {expires}
250            |{max_age}
251            |{domain}
252            |{path}
253            |(?P<secure>Secure=?)
254            |(?P<httponly>HttpOnly=?)
255            |Version=(?P<version>[{stuff}]+)
256            |Comment=(?P<comment>[{stuff}]+)
257            |(?P<unrecognized>[{stuff}]+)
258        )
259        # End with any number of spaces not matched by the preceding (up to the
260        # next semicolon) - but do not capture these.
261        [ ]*
262    """.format(expires=EXPIRES_AV, max_age=MAX_AGE_AV, domain=DOMAIN_AV,
263               path=PATH_AV, stuff=EXTENSION_AV)
264
265    # For request data ("Cookie: ") parsing, with finditer cf. RFC 6265 4.2.1
266    COOKIE = """(?x) # Verbose mode
267        (?: # Either something close to valid...
268
269            # Match starts at start of string, or at separator.
270            # Split on comma for the sake of legacy code (RFC 2109/2965),
271            # and since it only breaks when invalid commas are put in values.
272            # see http://bugs.python.org/issue1210326
273            (?:^Cookie:|^|;|,)
274
275            # 1 or more valid token characters making up the name (captured)
276            # with colon added to accommodate users of some old Java apps, etc.
277            [ ]*
278            (?P<name>[{name}:]+)
279            [ ]*
280            =
281            [ ]*
282
283            # While 6265 provides only for cookie-octet, this allows just about
284            # anything in quotes (like in RFC 2616); people stuck on RFC
285            # 2109/2965 will expect it to work this way. The non-quoted token
286            # allows interior spaces ('\x20'), which is not valid. In both
287            # cases, the decision of whether to allow these is downstream.
288            (?P<value>
289                ["][^\00-\31"]*["]
290                |
291                [{value}]
292                |
293                [{value}][{value} ]*[{value}]+
294                |
295                )
296
297        # ... Or something way off-spec - extract to report and move on
298        |
299            (?P<invalid>[^;]+)
300        )
301        # Trailing spaces after value
302        [ ]*
303        # Must end with ; or be at end of string (don't consume this though,
304        # so use the lookahead assertion ?=
305        (?=;|\Z)
306    """.format(name=COOKIE_NAME, value=COOKIE_OCTET)
307
308    # Precompile externally useful definitions into re objects.
309    COOKIE_NAME_RE = re.compile("^([%s:]+)\Z" % COOKIE_NAME)
310    COOKIE_RE = re.compile(COOKIE)
311    SET_COOKIE_HEADER_RE = re.compile(SET_COOKIE_HEADER)
312    ATTR_RE = re.compile(ATTR)
313    DATE_RE = re.compile(DATE)
314    DOMAIN_RE = re.compile(DOMAIN)
315    PATH_RE = re.compile('^([%s]+)\Z' % EXTENSION_AV)
316    EOL = re.compile("(?:\r\n|\n)")
317
318
319def strip_spaces_and_quotes(value):
320    """Remove invalid whitespace and/or single pair of dquotes and return None
321    for empty strings.
322
323    Used to prepare cookie values, path, and domain attributes in a way which
324    tolerates simple formatting mistakes and standards variations.
325    """
326    value = value.strip() if value else ""
327    if value and len(value) > 1 and (value[0] == value[-1] == '"'):
328        value = value[1:-1]
329    if not value:
330        value = ""
331    return value
332
333
334def parse_string(data, unquote=default_unquote):
335    """Decode URL-encoded strings to UTF-8 containing the escaped chars.
336    """
337    if data is None:
338        return None
339
340    # We'll soon need to unquote to recover our UTF-8 data.
341    # In Python 2, unquote crashes on chars beyond ASCII. So encode functions
342    # had better not include anything beyond ASCII in data.
343    # In Python 3, unquote crashes on bytes objects, requiring conversion to
344    # str objects (unicode) using decode().
345    # But in Python 2, the same decode causes unquote to butcher the data.
346    # So in that case, just leave the bytes.
347    if isinstance(data, bytes):
348        if sys.version_info > (3, 0, 0):  # pragma: no cover
349            data = data.decode('ascii')
350    # Recover URL encoded data
351    unquoted = unquote(data)
352    # Without this step, Python 2 may have good URL decoded *bytes*,
353    # which will therefore not normalize as unicode and not compare to
354    # the original.
355    if isinstance(unquoted, bytes):
356        unquoted = unquoted.decode('utf-8')
357    return unquoted
358
359
360def parse_date(value):
361    """Parse an RFC 1123 or asctime-like format date string to produce
362    a Python datetime object (without a timezone).
363    """
364    # Do the regex magic; also enforces 2 or 4 digit years
365    match = Definitions.DATE_RE.match(value) if value else None
366    if not match:
367        return None
368    # We're going to extract and prepare captured data in 'data'.
369    data = {}
370    captured = match.groupdict()
371    fields = ['year', 'month', 'day', 'hour', 'minute', 'second']
372    # If we matched on the RFC 1123 family format
373    if captured['year']:
374        for field in fields:
375            data[field] = captured[field]
376    # If we matched on the asctime format, use year2 etc.
377    else:
378        for field in fields:
379            data[field] = captured[field + "2"]
380    year = data['year']
381    # Interpret lame 2-digit years - base the cutoff on UNIX epoch, in case
382    # someone sets a '70' cookie meaning 'distant past'. This won't break for
383    # 58 years and people who use 2-digit years are asking for it anyway.
384    if len(year) == 2:
385        if int(year) < 70:
386            year = "20" + year
387        else:
388            year = "19" + year
389    year = int(year)
390    # Clamp to [1900, 9999]: strftime has min 1900, datetime has max 9999
391    data['year'] = max(1900, min(year, 9999))
392    # Other things which are numbers should convert to integer
393    for field in ['day', 'hour', 'minute', 'second']:
394        if data[field] is None:
395            data[field] = 0
396        data[field] = int(data[field])
397    # Look up the number datetime needs for the named month
398    data['month'] = Definitions.month_numbers[data['month'].lower()]
399    return datetime.datetime(**data)
400
401
402def parse_domain(value):
403    """Parse and validate an incoming Domain attribute value.
404    """
405    value = strip_spaces_and_quotes(value)
406    if value:
407        assert valid_domain(value)
408    return value
409
410
411def parse_path(value):
412    """Parse and validate an incoming Path attribute value.
413    """
414    value = strip_spaces_and_quotes(value)
415    assert valid_path(value)
416    return value
417
418
419def parse_value(value, allow_spaces=True, unquote=default_unquote):
420    "Process a cookie value"
421    if value is None:
422        return None
423    value = strip_spaces_and_quotes(value)
424    value = parse_string(value, unquote=unquote)
425    if not allow_spaces:
426        assert ' ' not in value
427    return value
428
429
430def valid_name(name):
431    "Validate a cookie name string"
432    if isinstance(name, bytes):
433        name = name.decode('ascii')
434    if not Definitions.COOKIE_NAME_RE.match(name):
435        return False
436    # This module doesn't support $identifiers, which are part of an obsolete
437    # and highly complex standard which is never used.
438    if name[0] == "$":
439        return False
440    return True
441
442
443def valid_value(value, quote=default_cookie_quote, unquote=default_unquote):
444    """Validate a cookie value string.
445
446    This is generic across quote/unquote functions because it directly verifies
447    the encoding round-trip using the specified quote/unquote functions.
448    So if you use different quote/unquote functions, use something like this
449    as a replacement for valid_value::
450
451        my_valid_value = lambda value: valid_value(value, quote=my_quote,
452                                                          unquote=my_unquote)
453    """
454    if value is None:
455        return False
456
457    # Put the value through a round trip with the given quote and unquote
458    # functions, so we will know whether data will get lost or not in the event
459    # that we don't complain.
460    encoded = encode_cookie_value(value, quote=quote)
461    decoded = parse_string(encoded, unquote=unquote)
462
463    # If the original string made the round trip, this is a valid value for the
464    # given quote and unquote functions. Since the round trip can generate
465    # different unicode forms, normalize before comparing, so we can ignore
466    # trivial inequalities.
467    decoded_normalized = (normalize("NFKD", decoded)
468                          if not isinstance(decoded, bytes) else decoded)
469    value_normalized = (normalize("NFKD", value)
470                        if not isinstance(value, bytes) else value)
471    if decoded_normalized == value_normalized:
472        return True
473    return False
474
475
476def valid_date(date):
477    "Validate an expires datetime object"
478    # We want something that acts like a datetime. In particular,
479    # strings indicate a failure to parse down to an object and ints are
480    # nonstandard and ambiguous at best.
481    if not hasattr(date, 'tzinfo'):
482        return False
483    # Relevant RFCs define UTC as 'close enough' to GMT, and the maximum
484    # difference between UTC and GMT is often stated to be less than a second.
485    if date.tzinfo is None or _total_seconds(date.utcoffset()) < 1.1:
486        return True
487    return False
488
489
490def valid_domain(domain):
491    "Validate a cookie domain ASCII string"
492    # Using encoding on domain would confuse browsers into not sending cookies.
493    # Generate UnicodeDecodeError up front if it can't store as ASCII.
494    domain.encode('ascii')
495    # Domains starting with periods are not RFC-valid, but this is very common
496    # in existing cookies, so they should still parse with DOMAIN_AV.
497    if Definitions.DOMAIN_RE.match(domain):
498        return True
499    return False
500
501
502def valid_path(value):
503    "Validate a cookie path ASCII string"
504    # Generate UnicodeDecodeError if path can't store as ASCII.
505    value.encode("ascii")
506    # Cookies without leading slash will likely be ignored, raise ASAP.
507    if not (value and value[0] == "/"):
508        return False
509    if not Definitions.PATH_RE.match(value):
510        return False
511    return True
512
513
514def valid_max_age(number):
515    "Validate a cookie Max-Age"
516    if isinstance(number, basestring):
517        try:
518            number = long(number)
519        except (ValueError, TypeError):
520            return False
521    if number >= 0 and number % 1 == 0:
522        return True
523    return False
524
525
526def encode_cookie_value(data, quote=default_cookie_quote):
527    """URL-encode strings to make them safe for a cookie value.
528
529    By default this uses urllib quoting, as used in many other cookie
530    implementations and in other Python code, instead of an ad hoc escaping
531    mechanism which includes backslashes (these also being illegal chars in RFC
532    6265).
533    """
534    if data is None:
535        return None
536
537    # encode() to ASCII bytes so quote won't crash on non-ASCII.
538    # but doing that to bytes objects is nonsense.
539    # On Python 2 encode crashes if s is bytes containing non-ASCII.
540    # On Python 3 encode crashes on all byte objects.
541    if not isinstance(data, bytes):
542        data = data.encode("utf-8")
543
544    # URL encode data so it is safe for cookie value
545    quoted = quote(data)
546
547    # Don't force to bytes, so that downstream can use proper string API rather
548    # than crippled bytes, and to encourage encoding to be done just once.
549    return quoted
550
551
552def encode_extension_av(data, quote=default_extension_quote):
553    """URL-encode strings to make them safe for an extension-av
554    (extension attribute value): <any CHAR except CTLs or ";">
555    """
556    if not data:
557        return ''
558    return quote(data)
559
560
561def render_date(date):
562    """Render a date (e.g. an Expires value) per RFCs 6265/2616/1123.
563
564    Don't give this localized (timezone-aware) datetimes. If you use them,
565    convert them to GMT before passing them to this. There are too many
566    conversion corner cases to handle this universally.
567    """
568    if not date:
569        return None
570    assert valid_date(date)
571    # Avoid %a and %b, which can change with locale, breaking compliance
572    weekday = Definitions.weekday_abbr_list[date.weekday()]
573    month = Definitions.month_abbr_list[date.month - 1]
574    return date.strftime("{day}, %d {month} %Y %H:%M:%S GMT"
575                         ).format(day=weekday, month=month)
576
577
578def render_domain(domain):
579    if not domain:
580        return None
581    if domain[0] == '.':
582        return domain[1:]
583    return domain
584
585
586def _parse_request(header_data, ignore_bad_cookies=False):
587    """Turn one or more lines of 'Cookie:' header data into a dict mapping
588    cookie names to cookie values (raw strings).
589    """
590    cookies_dict = {}
591    for line in Definitions.EOL.split(header_data.strip()):
592        matches = Definitions.COOKIE_RE.finditer(line)
593        matches = [item for item in matches]
594        for match in matches:
595            invalid = match.group('invalid')
596            if invalid:
597                if not ignore_bad_cookies:
598                    raise InvalidCookieError(data=invalid)
599                _report_invalid_cookie(invalid)
600                continue
601            name = match.group('name')
602            values = cookies_dict.get(name)
603            value = match.group('value').strip('"')
604            if values:
605                values.append(value)
606            else:
607                cookies_dict[name] = [value]
608        if not matches:
609            if not ignore_bad_cookies:
610                raise InvalidCookieError(data=line)
611            _report_invalid_cookie(line)
612    return cookies_dict
613
614
615def parse_one_response(line, ignore_bad_cookies=False,
616                       ignore_bad_attributes=True):
617    """Turn one 'Set-Cookie:' line into a dict mapping attribute names to
618    attribute values (raw strings).
619    """
620    cookie_dict = {}
621    # Basic validation, extract name/value/attrs-chunk
622    match = Definitions.SET_COOKIE_HEADER_RE.match(line)
623    if not match:
624        if not ignore_bad_cookies:
625            raise InvalidCookieError(data=line)
626        _report_invalid_cookie(line)
627        return None
628    cookie_dict.update({
629        'name': match.group('name'),
630        'value': match.group('value')})
631    # Extract individual attrs from the attrs chunk
632    for match in Definitions.ATTR_RE.finditer(match.group('attrs')):
633        captured = dict((k, v) for (k, v) in match.groupdict().items() if v)
634        unrecognized = captured.get('unrecognized', None)
635        if unrecognized:
636            if not ignore_bad_attributes:
637                raise InvalidCookieAttributeError(None, unrecognized,
638                                                  "unrecognized")
639            _report_unknown_attribute(unrecognized)
640            continue
641        # for unary flags
642        for key in ('secure', 'httponly'):
643            if captured.get(key):
644                captured[key] = True
645        # ignore subcomponents of expires - they're still there to avoid doing
646        # two passes
647        timekeys = ('weekday', 'month', 'day', 'hour', 'minute', 'second',
648                    'year')
649        if 'year' in captured:
650            for key in timekeys:
651                del captured[key]
652        elif 'year2' in captured:
653            for key in timekeys:
654                del captured[key + "2"]
655        cookie_dict.update(captured)
656    return cookie_dict
657
658
659def _parse_response(header_data, ignore_bad_cookies=False,
660                    ignore_bad_attributes=True):
661    """Turn one or more lines of 'Set-Cookie:' header data into a list of dicts
662    mapping attribute names to attribute values (as plain strings).
663    """
664    cookie_dicts = []
665    for line in Definitions.EOL.split(header_data.strip()):
666        if not line:
667            break
668        cookie_dict = parse_one_response(
669            line, ignore_bad_cookies=ignore_bad_cookies,
670            ignore_bad_attributes=ignore_bad_attributes)
671        if not cookie_dict:
672            continue
673        cookie_dicts.append(cookie_dict)
674    if not cookie_dicts:
675        if not ignore_bad_cookies:
676            raise InvalidCookieError(data=header_data)
677        _report_invalid_cookie(header_data)
678    return cookie_dicts
679
680
681class Cookie(object):
682    """Provide a simple interface for creating, modifying, and rendering
683    individual HTTP cookies.
684
685    Cookie attributes are represented as normal Python object attributes.
686    Parsing, rendering and validation are reconfigurable per-attribute. The
687    default behavior is intended to comply with RFC 6265, URL-encoding illegal
688    characters where necessary. For example: the default behavior for the
689    Expires attribute is to parse strings as datetimes using parse_date,
690    validate that any set value is a datetime, and render the attribute per the
691    preferred date format in RFC 1123.
692    """
693    def __init__(self, name, value, **kwargs):
694        # If we don't have or can't set a name value, we don't want to return
695        # junk, so we must break control flow. And we don't want to use
696        # InvalidCookieAttributeError, because users may want to catch that to
697        # suppress all complaining about funky attributes.
698        try:
699            self.name = name
700        except InvalidCookieAttributeError:
701            raise InvalidCookieError(message="invalid name for new Cookie",
702                                     data=name)
703        value = value or ''
704        try:
705            self.value = value
706        except InvalidCookieAttributeError:
707            raise InvalidCookieError(message="invalid value for new Cookie",
708                                     data=value)
709        if kwargs:
710            self._set_attributes(kwargs, ignore_bad_attributes=False)
711
712    def _set_attributes(self, attrs, ignore_bad_attributes=False):
713        for attr_name, attr_value in attrs.items():
714            if not attr_name in self.attribute_names:
715                if not ignore_bad_attributes:
716                    raise InvalidCookieAttributeError(
717                        attr_name, attr_value,
718                        "unknown cookie attribute '%s'" % attr_name)
719                _report_unknown_attribute(attr_name)
720
721            try:
722                setattr(self, attr_name, attr_value)
723            except InvalidCookieAttributeError as error:
724                if not ignore_bad_attributes:
725                    raise
726                _report_invalid_attribute(attr_name, attr_value, error.reason)
727                continue
728
729    @classmethod
730    def from_dict(cls, cookie_dict, ignore_bad_attributes=True):
731        """Construct an instance from a dict of strings to parse.
732
733        The main difference between this and Cookie(name, value, **kwargs) is
734        that the values in the argument to this method are parsed.
735
736        If ignore_bad_attributes=True (default), values which did not parse
737        are set to '' in order to avoid passing bad data.
738        """
739        name = cookie_dict.get('name', None)
740        if not name:
741            raise InvalidCookieError("Cookie must have name")
742        raw_value = cookie_dict.get('value', '')
743        # Absence or failure of parser here is fatal; errors in present name
744        # and value should be found by Cookie.__init__.
745        value = cls.attribute_parsers['value'](raw_value)
746        cookie = cls(name, value)
747
748        # Parse values from serialized formats into objects
749        parsed = {}
750        for key, value in cookie_dict.items():
751            # Don't want to pass name/value to _set_attributes
752            if key in ('name', 'value'):
753                continue
754            parser = cls.attribute_parsers.get(key)
755            if not parser:
756                # Don't let totally unknown attributes pass silently
757                if not ignore_bad_attributes:
758                    raise InvalidCookieAttributeError(
759                        key, value, "unknown cookie attribute '%s'" % key)
760                _report_unknown_attribute(key)
761                continue
762            try:
763                parsed_value = parser(value)
764            except Exception as e:
765                reason = "did not parse with %r: %r" % (parser, e)
766                if not ignore_bad_attributes:
767                    raise InvalidCookieAttributeError(
768                        key, value, reason)
769                _report_invalid_attribute(key, value, reason)
770                parsed_value = ''
771            parsed[key] = parsed_value
772
773        # Set the parsed objects (does object validation automatically)
774        cookie._set_attributes(parsed, ignore_bad_attributes)
775        return cookie
776
777    @classmethod
778    def from_string(cls, line, ignore_bad_cookies=False,
779                    ignore_bad_attributes=True):
780        "Construct a Cookie object from a line of Set-Cookie header data."
781        cookie_dict = parse_one_response(
782            line, ignore_bad_cookies=ignore_bad_cookies,
783            ignore_bad_attributes=ignore_bad_attributes)
784        if not cookie_dict:
785            return None
786        return cls.from_dict(
787            cookie_dict, ignore_bad_attributes=ignore_bad_attributes)
788
789    def to_dict(self):
790        this_dict = {'name': self.name, 'value': self.value}
791        this_dict.update(self.attributes())
792        return this_dict
793
794    def validate(self, name, value):
795        """Validate a cookie attribute with an appropriate validator.
796
797        The value comes in already parsed (for example, an expires value
798        should be a datetime). Called automatically when an attribute
799        value is set.
800        """
801        validator = self.attribute_validators.get(name, None)
802        if validator:
803            return True if validator(value) else False
804        return True
805
806    def __setattr__(self, name, value):
807        """Attributes mentioned in attribute_names get validated using
808        functions in attribute_validators, raising an exception on failure.
809        Others get left alone.
810        """
811        if name in self.attribute_names or name in ("name", "value"):
812            if name == 'name' and not value:
813                raise InvalidCookieError(message="Cookies must have names")
814            # Ignore None values indicating unset attr. Other invalids should
815            # raise error so users of __setattr__ can learn.
816            if value is not None:
817                if not self.validate(name, value):
818                    raise InvalidCookieAttributeError(
819                        name, value, "did not validate with " +
820                        repr(self.attribute_validators.get(name)))
821        object.__setattr__(self, name, value)
822
823    def __getattr__(self, name):
824        """Provide for acting like everything in attribute_names is
825        automatically set to None, rather than having to do so explicitly and
826        only at import time.
827        """
828        if name in self.attribute_names:
829            return None
830        raise AttributeError(name)
831
832    def attributes(self):
833        """Export this cookie's attributes as a dict of encoded values.
834
835        This is an important part of the code for rendering attributes, e.g.
836        render_response().
837        """
838        dictionary = {}
839        # Only look for attributes registered in attribute_names.
840        for python_attr_name, cookie_attr_name in self.attribute_names.items():
841            value = getattr(self, python_attr_name)
842            renderer = self.attribute_renderers.get(python_attr_name, None)
843            if renderer:
844                value = renderer(value)
845            # If renderer returns None, or it's just natively none, then the
846            # value is suppressed entirely - does not appear in any rendering.
847            if not value:
848                continue
849            dictionary[cookie_attr_name] = value
850        return dictionary
851
852    def render_request(self):
853        """Render as a string formatted for HTTP request headers
854        (simple 'Cookie: ' style).
855        """
856        # Use whatever renderers are defined for name and value.
857        name, value = self.name, self.value
858        renderer = self.attribute_renderers.get('name', None)
859        if renderer:
860            name = renderer(name)
861        renderer = self.attribute_renderers.get('value', None)
862        if renderer:
863            value = renderer(value)
864        return ''.join((name, "=", value))
865
866    def render_response(self):
867        """Render as a string formatted for HTTP response headers
868        (detailed 'Set-Cookie: ' style).
869        """
870        # Use whatever renderers are defined for name and value.
871        # (.attributes() is responsible for all other rendering.)
872        name, value = self.name, self.value
873        renderer = self.attribute_renderers.get('name', None)
874        if renderer:
875            name = renderer(name)
876        renderer = self.attribute_renderers.get('value', None)
877        if renderer:
878            value = renderer(value)
879        return '; '.join(
880            ['{0}={1}'.format(name, value)] +
881            [key if isinstance(val, bool) else '='.join((key, val))
882             for key, val in self.attributes().items()]
883        )
884
885    def __eq__(self, other):
886        attrs = ['name', 'value'] + list(self.attribute_names.keys())
887        for attr in attrs:
888            mine = getattr(self, attr, None)
889            his = getattr(other, attr, None)
890            if isinstance(mine, bytes):
891                mine = mine.decode('utf-8')
892            if isinstance(his, bytes):
893                his = his.decode('utf-8')
894            if attr == 'domain':
895                if mine and mine[0] == '.':
896                    mine = mine[1:]
897                if his and his[0] == '.':
898                    his = his[1:]
899            if mine != his:
900                return False
901        return True
902
903    def __ne__(self, other):
904        return not self.__eq__(other)
905
906    # Add a name and its proper rendering to this dict to register an attribute
907    # as exportable. The key is the name of the Cookie object attribute in
908    # Python, and it is mapped to the name you want in the output.
909    # 'name' and 'value' should not be here.
910    attribute_names = {
911        'expires':  'Expires',
912        'max_age':  'Max-Age',
913        'domain':   'Domain',
914        'path':     'Path',
915        'comment':  'Comment',
916        'version':  'Version',
917        'secure':   'Secure',
918        'httponly': 'HttpOnly',
919    }
920
921    # Register single-parameter functions in this dictionary to have them
922    # used for encoding outgoing values (e.g. as RFC compliant strings,
923    # as base64, encrypted stuff, etc.)
924    # These are called by the property generated by cookie_attribute().
925    # Usually it would be wise not to define a renderer for name, but it is
926    # supported in case there is ever a real need.
927    attribute_renderers = {
928        'value':    encode_cookie_value,
929        'domain':   render_domain,
930        'expires':  render_date,
931        'max_age':  lambda item: str(item) if item is not None else None,
932        'secure':   lambda item: True if item else False,
933        'httponly': lambda item: True if item else False,
934        'comment':  encode_extension_av,
935        'version':  lambda item: (str(item) if isinstance(item, int)
936                                  else encode_extension_av(item)),
937    }
938
939    # Register single-parameter functions in this dictionary to have them used
940    # for decoding incoming values for use in the Python API (e.g. into nice
941    # objects, numbers, unicode strings, etc.)
942    # These are called by the property generated by cookie_attribute().
943    attribute_parsers = {
944        'value':    parse_value,
945        'expires':  parse_date,
946        'domain':   parse_domain,
947        'path':     parse_path,
948        'max_age':  lambda item: long(strip_spaces_and_quotes(item)),
949        'comment':  parse_string,
950        'version':  lambda item: int(strip_spaces_and_quotes(item)),
951        'secure':   lambda item: True if item else False,
952        'httponly': lambda item: True if item else False,
953    }
954
955    # Register single-parameter functions which return a true value for
956    # acceptable values, and a false value for unacceptable ones. An
957    # attribute's validator is run after it is parsed or when it is directly
958    # set, and InvalidCookieAttribute is raised if validation fails (and the
959    # validator doesn't raise a different exception prior)
960    attribute_validators = {
961        'name':     valid_name,
962        'value':    valid_value,
963        'expires':  valid_date,
964        'domain':   valid_domain,
965        'path':     valid_path,
966        'max_age':  valid_max_age,
967        'comment':  valid_value,
968        'version':  lambda number: re.match("^\d+\Z", str(number)),
969        'secure':   lambda item: item is True or item is False,
970        'httponly': lambda item: item is True or item is False,
971    }
972
973
974class Cookies(dict):
975    """Represent a set of cookies indexed by name.
976
977    This class bundles together a set of Cookie objects and provides
978    a convenient interface to them. for parsing and producing cookie headers.
979    In basic operation it acts just like a dict of Cookie objects, but it adds
980    additional convenience methods for the usual cookie tasks: add cookie
981    objects by their names, create new cookie objects under specified names,
982    parse HTTP request or response data into new cookie objects automatically
983    stored in the dict, and render the set in formats suitable for HTTP request
984    or response headers.
985    """
986    DEFAULT_COOKIE_CLASS = Cookie
987
988    def __init__(self, *args, **kwargs):
989        dict.__init__(self)
990        self.all_cookies = []
991        self.cookie_class = kwargs.get(
992            "_cookie_class", self.DEFAULT_COOKIE_CLASS)
993        self.add(*args, **kwargs)
994
995    def add(self, *args, **kwargs):
996        """Add Cookie objects by their names, or create new ones under
997        specified names.
998
999        Any unnamed arguments are interpreted as existing cookies, and
1000        are added under the value in their .name attribute. With keyword
1001        arguments, the key is interpreted as the cookie name and the
1002        value as the UNENCODED value stored in the cookie.
1003        """
1004        # Only the first one is accessible through the main interface,
1005        # others accessible through get_all (all_cookies).
1006        for cookie in args:
1007            self.all_cookies.append(cookie)
1008            if cookie.name in self:
1009                continue
1010            self[cookie.name] = cookie
1011        for key, value in kwargs.items():
1012            cookie = self.cookie_class(key, value)
1013            self.all_cookies.append(cookie)
1014            if key in self:
1015                continue
1016            self[key] = cookie
1017
1018    def get_all(self, key):
1019        return [cookie for cookie in self.all_cookies
1020                if cookie.name == key]
1021
1022    def parse_request(self, header_data, ignore_bad_cookies=False):
1023        """Parse 'Cookie' header data into Cookie objects, and add them to
1024        this Cookies object.
1025
1026        :arg header_data: string containing only 'Cookie:' request headers or
1027        header values (as in CGI/WSGI HTTP_COOKIE); if more than one, they must
1028        be separated by CRLF (\\r\\n).
1029
1030        :arg ignore_bad_cookies: if set, will log each syntactically invalid
1031        cookie (at the granularity of semicolon-delimited blocks) rather than
1032        raising an exception at the first bad cookie.
1033
1034        :returns: a Cookies instance containing Cookie objects parsed from
1035        header_data.
1036
1037        .. note::
1038        If you want to parse 'Set-Cookie:' response headers, please use
1039        parse_response instead. parse_request will happily turn 'expires=frob'
1040        into a separate cookie without complaining, according to the grammar.
1041        """
1042        cookies_dict = _parse_request(
1043            header_data, ignore_bad_cookies=ignore_bad_cookies)
1044        cookie_objects = []
1045        for name, values in cookies_dict.items():
1046            for value in values:
1047                # Use from_dict to check name and parse value
1048                cookie_dict = {'name': name, 'value': value}
1049                try:
1050                    cookie = self.cookie_class.from_dict(cookie_dict)
1051                except InvalidCookieError:
1052                    if not ignore_bad_cookies:
1053                        raise
1054                else:
1055                    cookie_objects.append(cookie)
1056        try:
1057            self.add(*cookie_objects)
1058        except InvalidCookieError:
1059            if not ignore_bad_cookies:
1060                raise
1061            _report_invalid_cookie(header_data)
1062        return self
1063
1064    def parse_response(self, header_data, ignore_bad_cookies=False,
1065                       ignore_bad_attributes=True):
1066        """Parse 'Set-Cookie' header data into Cookie objects, and add them to
1067        this Cookies object.
1068
1069        :arg header_data: string containing only 'Set-Cookie:' request headers
1070        or their corresponding header values; if more than one, they must be
1071        separated by CRLF (\\r\\n).
1072
1073        :arg ignore_bad_cookies: if set, will log each syntactically invalid
1074        cookie rather than raising an exception at the first bad cookie. (This
1075        includes cookies which have noncompliant characters in the attribute
1076        section).
1077
1078        :arg ignore_bad_attributes: defaults to True, which means to log but
1079        not raise an error when a particular attribute is unrecognized. (This
1080        does not necessarily mean that the attribute is invalid, although that
1081        would often be the case.) if unset, then an error will be raised at the
1082        first semicolon-delimited block which has an unknown attribute.
1083
1084        :returns: a Cookies instance containing Cookie objects parsed from
1085        header_data, each with recognized attributes populated.
1086
1087        .. note::
1088        If you want to parse 'Cookie:' headers (i.e., data like what's sent
1089        with an HTTP request, which has only name=value pairs and no
1090        attributes), then please use parse_request instead. Such lines often
1091        contain multiple name=value pairs, and parse_response will throw away
1092        the pairs after the first one, which will probably generate errors or
1093        confusing behavior. (Since there's no perfect way to automatically
1094        determine which kind of parsing to do, you have to tell it manually by
1095        choosing correctly from parse_request between part_response.)
1096        """
1097        cookie_dicts = _parse_response(
1098            header_data,
1099            ignore_bad_cookies=ignore_bad_cookies,
1100            ignore_bad_attributes=ignore_bad_attributes)
1101        cookie_objects = []
1102        for cookie_dict in cookie_dicts:
1103            cookie = self.cookie_class.from_dict(cookie_dict)
1104            cookie_objects.append(cookie)
1105        self.add(*cookie_objects)
1106        return self
1107
1108    @classmethod
1109    def from_request(cls, header_data, ignore_bad_cookies=False):
1110        "Construct a Cookies object from request header data."
1111        cookies = cls()
1112        cookies.parse_request(
1113            header_data, ignore_bad_cookies=ignore_bad_cookies)
1114        return cookies
1115
1116    @classmethod
1117    def from_response(cls, header_data, ignore_bad_cookies=False,
1118                      ignore_bad_attributes=True):
1119        "Construct a Cookies object from response header data."
1120        cookies = cls()
1121        cookies.parse_response(
1122            header_data,
1123            ignore_bad_cookies=ignore_bad_cookies,
1124            ignore_bad_attributes=ignore_bad_attributes)
1125        return cookies
1126
1127    def render_request(self, sort=True):
1128        """Render the dict's Cookie objects into a string formatted for HTTP
1129        request headers (simple 'Cookie: ' style).
1130        """
1131        if not sort:
1132            return ("; ".join(
1133                cookie.render_request() for cookie in self.values()))
1134        return ("; ".join(sorted(
1135            cookie.render_request() for cookie in self.values())))
1136
1137    def render_response(self, sort=True):
1138        """Render the dict's Cookie objects into list of strings formatted for
1139        HTTP response headers (detailed 'Set-Cookie: ' style).
1140        """
1141        rendered = [cookie.render_response() for cookie in self.values()]
1142        return rendered if not sort else sorted(rendered)
1143
1144    def __repr__(self):
1145        return "Cookies(%s)" % ', '.join("%s=%r" % (name, cookie.value) for
1146                                         (name, cookie) in self.items())
1147
1148    def __eq__(self, other):
1149        """Test if a Cookies object is globally 'equal' to another one by
1150        seeing if it looks like a dict such that d[k] == self[k]. This depends
1151        on each Cookie object reporting its equality correctly.
1152        """
1153        if not hasattr(other, "keys"):
1154            return False
1155        try:
1156            keys = sorted(set(self.keys()) | set(other.keys()))
1157            for key in keys:
1158                if not key in self:
1159                    return False
1160                if not key in other:
1161                    return False
1162                if self[key] != other[key]:
1163                    return False
1164        except (TypeError, KeyError):
1165            raise
1166        return True
1167
1168    def __ne__(self, other):
1169        return not self.__eq__(other)
1170