1# -*- coding: utf-8 -*-
2# -*- coding: utf-8 -*-
3import json as json_
4import re
5
6JSON_SCRIPTSAFE_MAPPER = {
7    '&': r'\u0026',
8    '<': r'\u003c',
9    '>': r'\u003e',
10    '\u2028': r'\u2028',
11    '\u2029': r'\u2029'
12}
13class JSON:
14    def loads(self, *args, **kwargs):
15        return json_.loads(*args, **kwargs)
16    def dumps(self, *args, **kwargs):
17        """ JSON used as JS in HTML (script tags) is problematic: <script>
18        tags are a special context which only waits for </script> but doesn't
19        interpret anything else, this means standard htmlescaping does not
20        work (it breaks double quotes, and e.g. `<` will become `&lt;` *in
21        the resulting JSON/JS* not just inside the page).
22
23        However, failing to escape embedded json means the json strings could
24        contains `</script>` and thus become XSS vector.
25
26        The solution turns out to be very simple: use JSON-level unicode
27        escapes for HTML-unsafe characters (e.g. "<" -> "\u003C". This removes
28        the XSS issue without breaking the json, and there is no difference to
29        the end result once it's been parsed back from JSON. So it will work
30        properly even for HTML attributes or raw text.
31
32        Also handle U+2028 and U+2029 the same way just in case as these are
33        interpreted as newlines in javascript but not in JSON, which could
34        lead to oddities and issues.
35
36        .. warning::
37
38            except inside <script> elements, this should be escaped following
39            the normal rules of the containing format
40
41        Cf https://code.djangoproject.com/ticket/17419#comment:27
42        """
43        # replacement can be done straight in the serialised JSON as the
44        # problematic characters are not JSON metacharacters (and can thus
45        # only occur in strings)
46        return re.sub(
47            r'[<>&\u2028\u2029]',
48            lambda m: JSON_SCRIPTSAFE_MAPPER[m[0]],
49            json_.dumps(*args, **kwargs),
50        )
51scriptsafe = JSON()
52