1# -*- coding: utf-8 -*- 2# -*- coding: utf-8 -*- 3import json as json_ 4import re 5 6JSON_SCRIPTSAFE_MAPPER = { 7 '&': r'\u0026', 8 '<': r'\u003c', 9 '>': r'\u003e', 10 '\u2028': r'\u2028', 11 '\u2029': r'\u2029' 12} 13class JSON: 14 def loads(self, *args, **kwargs): 15 return json_.loads(*args, **kwargs) 16 def dumps(self, *args, **kwargs): 17 """ JSON used as JS in HTML (script tags) is problematic: <script> 18 tags are a special context which only waits for </script> but doesn't 19 interpret anything else, this means standard htmlescaping does not 20 work (it breaks double quotes, and e.g. `<` will become `<` *in 21 the resulting JSON/JS* not just inside the page). 22 23 However, failing to escape embedded json means the json strings could 24 contains `</script>` and thus become XSS vector. 25 26 The solution turns out to be very simple: use JSON-level unicode 27 escapes for HTML-unsafe characters (e.g. "<" -> "\u003C". This removes 28 the XSS issue without breaking the json, and there is no difference to 29 the end result once it's been parsed back from JSON. So it will work 30 properly even for HTML attributes or raw text. 31 32 Also handle U+2028 and U+2029 the same way just in case as these are 33 interpreted as newlines in javascript but not in JSON, which could 34 lead to oddities and issues. 35 36 .. warning:: 37 38 except inside <script> elements, this should be escaped following 39 the normal rules of the containing format 40 41 Cf https://code.djangoproject.com/ticket/17419#comment:27 42 """ 43 # replacement can be done straight in the serialised JSON as the 44 # problematic characters are not JSON metacharacters (and can thus 45 # only occur in strings) 46 return re.sub( 47 r'[<>&\u2028\u2029]', 48 lambda m: JSON_SCRIPTSAFE_MAPPER[m[0]], 49 json_.dumps(*args, **kwargs), 50 ) 51scriptsafe = JSON() 52