1import sys 2from unittest import TestCase 3 4import simplejson as json 5import simplejson.decoder 6from simplejson.compat import b, PY3 7 8class TestScanString(TestCase): 9 # The bytes type is intentionally not used in most of these tests 10 # under Python 3 because the decoder immediately coerces to str before 11 # calling scanstring. In Python 2 we are testing the code paths 12 # for both unicode and str. 13 # 14 # The reason this is done is because Python 3 would require 15 # entirely different code paths for parsing bytes and str. 16 # 17 def test_py_scanstring(self): 18 self._test_scanstring(simplejson.decoder.py_scanstring) 19 20 def test_c_scanstring(self): 21 if not simplejson.decoder.c_scanstring: 22 return 23 self._test_scanstring(simplejson.decoder.c_scanstring) 24 25 self.assertTrue(isinstance(simplejson.decoder.c_scanstring('""', 0)[0], str)) 26 27 def _test_scanstring(self, scanstring): 28 if sys.maxunicode == 65535: 29 self.assertEqual( 30 scanstring(u'"z\U0001d120x"', 1, None, True), 31 (u'z\U0001d120x', 6)) 32 else: 33 self.assertEqual( 34 scanstring(u'"z\U0001d120x"', 1, None, True), 35 (u'z\U0001d120x', 5)) 36 37 self.assertEqual( 38 scanstring('"\\u007b"', 1, None, True), 39 (u'{', 8)) 40 41 self.assertEqual( 42 scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), 43 (u'A JSON payload should be an object or array, not a string.', 60)) 44 45 self.assertEqual( 46 scanstring('["Unclosed array"', 2, None, True), 47 (u'Unclosed array', 17)) 48 49 self.assertEqual( 50 scanstring('["extra comma",]', 2, None, True), 51 (u'extra comma', 14)) 52 53 self.assertEqual( 54 scanstring('["double extra comma",,]', 2, None, True), 55 (u'double extra comma', 21)) 56 57 self.assertEqual( 58 scanstring('["Comma after the close"],', 2, None, True), 59 (u'Comma after the close', 24)) 60 61 self.assertEqual( 62 scanstring('["Extra close"]]', 2, None, True), 63 (u'Extra close', 14)) 64 65 self.assertEqual( 66 scanstring('{"Extra comma": true,}', 2, None, True), 67 (u'Extra comma', 14)) 68 69 self.assertEqual( 70 scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), 71 (u'Extra value after close', 26)) 72 73 self.assertEqual( 74 scanstring('{"Illegal expression": 1 + 2}', 2, None, True), 75 (u'Illegal expression', 21)) 76 77 self.assertEqual( 78 scanstring('{"Illegal invocation": alert()}', 2, None, True), 79 (u'Illegal invocation', 21)) 80 81 self.assertEqual( 82 scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), 83 (u'Numbers cannot have leading zeroes', 37)) 84 85 self.assertEqual( 86 scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), 87 (u'Numbers cannot be hex', 24)) 88 89 self.assertEqual( 90 scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), 91 (u'Too deep', 30)) 92 93 self.assertEqual( 94 scanstring('{"Missing colon" null}', 2, None, True), 95 (u'Missing colon', 16)) 96 97 self.assertEqual( 98 scanstring('{"Double colon":: null}', 2, None, True), 99 (u'Double colon', 15)) 100 101 self.assertEqual( 102 scanstring('{"Comma instead of colon", null}', 2, None, True), 103 (u'Comma instead of colon', 25)) 104 105 self.assertEqual( 106 scanstring('["Colon instead of comma": false]', 2, None, True), 107 (u'Colon instead of comma', 25)) 108 109 self.assertEqual( 110 scanstring('["Bad value", truth]', 2, None, True), 111 (u'Bad value', 12)) 112 113 for c in map(chr, range(0x00, 0x1f)): 114 self.assertEqual( 115 scanstring(c + '"', 0, None, False), 116 (c, 2)) 117 self.assertRaises( 118 ValueError, 119 scanstring, c + '"', 0, None, True) 120 121 self.assertRaises(ValueError, scanstring, '', 0, None, True) 122 self.assertRaises(ValueError, scanstring, 'a', 0, None, True) 123 self.assertRaises(ValueError, scanstring, '\\', 0, None, True) 124 self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) 125 self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) 126 self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) 127 self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) 128 self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) 129 if sys.maxunicode > 65535: 130 self.assertRaises(ValueError, 131 scanstring, '\\ud834\\u"', 0, None, True) 132 self.assertRaises(ValueError, 133 scanstring, '\\ud834\\x0123"', 0, None, True) 134 135 def test_issue3623(self): 136 self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, 137 "xxx") 138 self.assertRaises(UnicodeDecodeError, 139 json.encoder.encode_basestring_ascii, b("xx\xff")) 140 141 def test_overflow(self): 142 # Python 2.5 does not have maxsize, Python 3 does not have maxint 143 maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) 144 assert maxsize is not None 145 self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", 146 maxsize + 1) 147 148 def test_surrogates(self): 149 scanstring = json.decoder.scanstring 150 151 def assertScan(given, expect, test_utf8=True): 152 givens = [given] 153 if not PY3 and test_utf8: 154 givens.append(given.encode('utf8')) 155 for given in givens: 156 (res, count) = scanstring(given, 1, None, True) 157 self.assertEqual(len(given), count) 158 self.assertEqual(res, expect) 159 160 assertScan( 161 u'"z\\ud834\\u0079x"', 162 u'z\ud834yx') 163 assertScan( 164 u'"z\\ud834\\udd20x"', 165 u'z\U0001d120x') 166 assertScan( 167 u'"z\\ud834\\ud834\\udd20x"', 168 u'z\ud834\U0001d120x') 169 assertScan( 170 u'"z\\ud834x"', 171 u'z\ud834x') 172 assertScan( 173 u'"z\\udd20x"', 174 u'z\udd20x') 175 assertScan( 176 u'"z\ud834x"', 177 u'z\ud834x') 178 # It may look strange to join strings together, but Python is drunk. 179 # https://gist.github.com/etrepum/5538443 180 assertScan( 181 u'"z\\ud834\udd20x12345"', 182 u''.join([u'z\ud834', u'\udd20x12345'])) 183 assertScan( 184 u'"z\ud834\\udd20x"', 185 u''.join([u'z\ud834', u'\udd20x'])) 186 # these have different behavior given UTF8 input, because the surrogate 187 # pair may be joined (in maxunicode > 65535 builds) 188 assertScan( 189 u''.join([u'"z\ud834', u'\udd20x"']), 190 u''.join([u'z\ud834', u'\udd20x']), 191 test_utf8=False) 192 193 self.assertRaises(ValueError, 194 scanstring, u'"z\\ud83x"', 1, None, True) 195 self.assertRaises(ValueError, 196 scanstring, u'"z\\ud834\\udd2x"', 1, None, True) 197