1"""Implementation of JSONDecoder 2""" 3import re 4 5from json import scanner 6try: 7 from _json import scanstring as c_scanstring 8except ImportError: 9 c_scanstring = None 10 11__all__ = ['JSONDecoder', 'JSONDecodeError'] 12 13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 14 15NaN = float('nan') 16PosInf = float('inf') 17NegInf = float('-inf') 18 19 20class JSONDecodeError(ValueError): 21 """Subclass of ValueError with the following additional properties: 22 23 msg: The unformatted error message 24 doc: The JSON document being parsed 25 pos: The start index of doc where parsing failed 26 lineno: The line corresponding to pos 27 colno: The column corresponding to pos 28 29 """ 30 # Note that this exception is used from _json 31 def __init__(self, msg, doc, pos): 32 lineno = doc.count('\n', 0, pos) + 1 33 colno = pos - doc.rfind('\n', 0, pos) 34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) 35 ValueError.__init__(self, errmsg) 36 self.msg = msg 37 self.doc = doc 38 self.pos = pos 39 self.lineno = lineno 40 self.colno = colno 41 42 def __reduce__(self): 43 return self.__class__, (self.msg, self.doc, self.pos) 44 45 46_CONSTANTS = { 47 '-Infinity': NegInf, 48 'Infinity': PosInf, 49 'NaN': NaN, 50} 51 52 53STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 54BACKSLASH = { 55 '"': '"', '\\': '\\', '/': '/', 56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', 57} 58 59def _decode_uXXXX(s, pos): 60 esc = s[pos + 1:pos + 5] 61 if len(esc) == 4 and esc[1] not in 'xX': 62 try: 63 return int(esc, 16) 64 except ValueError: 65 pass 66 msg = "Invalid \\uXXXX escape" 67 raise JSONDecodeError(msg, s, pos) 68 69def py_scanstring(s, end, strict=True, 70 _b=BACKSLASH, _m=STRINGCHUNK.match): 71 """Scan the string s for a JSON string. End is the index of the 72 character in s after the quote that started the JSON string. 73 Unescapes all valid JSON string escape sequences and raises ValueError 74 on attempt to decode an invalid string. If strict is False then literal 75 control characters are allowed in the string. 76 77 Returns a tuple of the decoded string and the index of the character in s 78 after the end quote.""" 79 chunks = [] 80 _append = chunks.append 81 begin = end - 1 82 while 1: 83 chunk = _m(s, end) 84 if chunk is None: 85 raise JSONDecodeError("Unterminated string starting at", s, begin) 86 end = chunk.end() 87 content, terminator = chunk.groups() 88 # Content is contains zero or more unescaped string characters 89 if content: 90 _append(content) 91 # Terminator is the end of string, a literal control character, 92 # or a backslash denoting that an escape sequence follows 93 if terminator == '"': 94 break 95 elif terminator != '\\': 96 if strict: 97 #msg = "Invalid control character %r at" % (terminator,) 98 msg = "Invalid control character {0!r} at".format(terminator) 99 raise JSONDecodeError(msg, s, end) 100 else: 101 _append(terminator) 102 continue 103 try: 104 esc = s[end] 105 except IndexError: 106 raise JSONDecodeError("Unterminated string starting at", 107 s, begin) from None 108 # If not a unicode escape sequence, must be in the lookup table 109 if esc != 'u': 110 try: 111 char = _b[esc] 112 except KeyError: 113 msg = "Invalid \\escape: {0!r}".format(esc) 114 raise JSONDecodeError(msg, s, end) 115 end += 1 116 else: 117 uni = _decode_uXXXX(s, end) 118 end += 5 119 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u': 120 uni2 = _decode_uXXXX(s, end + 1) 121 if 0xdc00 <= uni2 <= 0xdfff: 122 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 123 end += 6 124 char = chr(uni) 125 _append(char) 126 return ''.join(chunks), end 127 128 129# Use speedup if available 130scanstring = c_scanstring or py_scanstring 131 132WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 133WHITESPACE_STR = ' \t\n\r' 134 135 136def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, 137 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 138 s, end = s_and_end 139 pairs = [] 140 pairs_append = pairs.append 141 # Backwards compatibility 142 if memo is None: 143 memo = {} 144 memo_get = memo.setdefault 145 # Use a slice to prevent IndexError from being raised, the following 146 # check will raise a more specific ValueError if the string is empty 147 nextchar = s[end:end + 1] 148 # Normally we expect nextchar == '"' 149 if nextchar != '"': 150 if nextchar in _ws: 151 end = _w(s, end).end() 152 nextchar = s[end:end + 1] 153 # Trivial empty object 154 if nextchar == '}': 155 if object_pairs_hook is not None: 156 result = object_pairs_hook(pairs) 157 return result, end + 1 158 pairs = {} 159 if object_hook is not None: 160 pairs = object_hook(pairs) 161 return pairs, end + 1 162 elif nextchar != '"': 163 raise JSONDecodeError( 164 "Expecting property name enclosed in double quotes", s, end) 165 end += 1 166 while True: 167 key, end = scanstring(s, end, strict) 168 key = memo_get(key, key) 169 # To skip some function call overhead we optimize the fast paths where 170 # the JSON key separator is ": " or just ":". 171 if s[end:end + 1] != ':': 172 end = _w(s, end).end() 173 if s[end:end + 1] != ':': 174 raise JSONDecodeError("Expecting ':' delimiter", s, end) 175 end += 1 176 177 try: 178 if s[end] in _ws: 179 end += 1 180 if s[end] in _ws: 181 end = _w(s, end + 1).end() 182 except IndexError: 183 pass 184 185 try: 186 value, end = scan_once(s, end) 187 except StopIteration as err: 188 raise JSONDecodeError("Expecting value", s, err.value) from None 189 pairs_append((key, value)) 190 try: 191 nextchar = s[end] 192 if nextchar in _ws: 193 end = _w(s, end + 1).end() 194 nextchar = s[end] 195 except IndexError: 196 nextchar = '' 197 end += 1 198 199 if nextchar == '}': 200 break 201 elif nextchar != ',': 202 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 203 end = _w(s, end).end() 204 nextchar = s[end:end + 1] 205 end += 1 206 if nextchar != '"': 207 raise JSONDecodeError( 208 "Expecting property name enclosed in double quotes", s, end - 1) 209 if object_pairs_hook is not None: 210 result = object_pairs_hook(pairs) 211 return result, end 212 pairs = dict(pairs) 213 if object_hook is not None: 214 pairs = object_hook(pairs) 215 return pairs, end 216 217def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 218 s, end = s_and_end 219 values = [] 220 nextchar = s[end:end + 1] 221 if nextchar in _ws: 222 end = _w(s, end + 1).end() 223 nextchar = s[end:end + 1] 224 # Look-ahead for trivial empty array 225 if nextchar == ']': 226 return values, end + 1 227 _append = values.append 228 while True: 229 try: 230 value, end = scan_once(s, end) 231 except StopIteration as err: 232 raise JSONDecodeError("Expecting value", s, err.value) from None 233 _append(value) 234 nextchar = s[end:end + 1] 235 if nextchar in _ws: 236 end = _w(s, end + 1).end() 237 nextchar = s[end:end + 1] 238 end += 1 239 if nextchar == ']': 240 break 241 elif nextchar != ',': 242 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) 243 try: 244 if s[end] in _ws: 245 end += 1 246 if s[end] in _ws: 247 end = _w(s, end + 1).end() 248 except IndexError: 249 pass 250 251 return values, end 252 253 254class JSONDecoder(object): 255 """Simple JSON <http://json.org> decoder 256 257 Performs the following translations in decoding by default: 258 259 +---------------+-------------------+ 260 | JSON | Python | 261 +===============+===================+ 262 | object | dict | 263 +---------------+-------------------+ 264 | array | list | 265 +---------------+-------------------+ 266 | string | str | 267 +---------------+-------------------+ 268 | number (int) | int | 269 +---------------+-------------------+ 270 | number (real) | float | 271 +---------------+-------------------+ 272 | true | True | 273 +---------------+-------------------+ 274 | false | False | 275 +---------------+-------------------+ 276 | null | None | 277 +---------------+-------------------+ 278 279 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 280 their corresponding ``float`` values, which is outside the JSON spec. 281 282 """ 283 284 def __init__(self, *, object_hook=None, parse_float=None, 285 parse_int=None, parse_constant=None, strict=True, 286 object_pairs_hook=None): 287 """``object_hook``, if specified, will be called with the result 288 of every JSON object decoded and its return value will be used in 289 place of the given ``dict``. This can be used to provide custom 290 deserializations (e.g. to support JSON-RPC class hinting). 291 292 ``object_pairs_hook``, if specified will be called with the result of 293 every JSON object decoded with an ordered list of pairs. The return 294 value of ``object_pairs_hook`` will be used instead of the ``dict``. 295 This feature can be used to implement custom decoders. 296 If ``object_hook`` is also defined, the ``object_pairs_hook`` takes 297 priority. 298 299 ``parse_float``, if specified, will be called with the string 300 of every JSON float to be decoded. By default this is equivalent to 301 float(num_str). This can be used to use another datatype or parser 302 for JSON floats (e.g. decimal.Decimal). 303 304 ``parse_int``, if specified, will be called with the string 305 of every JSON int to be decoded. By default this is equivalent to 306 int(num_str). This can be used to use another datatype or parser 307 for JSON integers (e.g. float). 308 309 ``parse_constant``, if specified, will be called with one of the 310 following strings: -Infinity, Infinity, NaN. 311 This can be used to raise an exception if invalid JSON numbers 312 are encountered. 313 314 If ``strict`` is false (true is the default), then control 315 characters will be allowed inside strings. Control characters in 316 this context are those with character codes in the 0-31 range, 317 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 318 """ 319 self.object_hook = object_hook 320 self.parse_float = parse_float or float 321 self.parse_int = parse_int or int 322 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 323 self.strict = strict 324 self.object_pairs_hook = object_pairs_hook 325 self.parse_object = JSONObject 326 self.parse_array = JSONArray 327 self.parse_string = scanstring 328 self.memo = {} 329 self.scan_once = scanner.make_scanner(self) 330 331 332 def decode(self, s, _w=WHITESPACE.match): 333 """Return the Python representation of ``s`` (a ``str`` instance 334 containing a JSON document). 335 336 """ 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 338 end = _w(s, end).end() 339 if end != len(s): 340 raise JSONDecodeError("Extra data", s, end) 341 return obj 342 343 def raw_decode(self, s, idx=0): 344 """Decode a JSON document from ``s`` (a ``str`` beginning with 345 a JSON document) and return a 2-tuple of the Python 346 representation and the index in ``s`` where the document ended. 347 348 This can be used to decode a JSON document from a string that may 349 have extraneous data at the end. 350 351 """ 352 try: 353 obj, end = self.scan_once(s, idx) 354 except StopIteration as err: 355 raise JSONDecodeError("Expecting value", s, err.value) from None 356 return obj, end 357