1"""Implementation of JSONEncoder 2""" 3import re 4from decimal import Decimal 5 6def _import_speedups(): 7 try: 8 from . import _speedups 9 return _speedups.encode_basestring_ascii, _speedups.make_encoder 10 except ImportError: 11 return None, None 12c_encode_basestring_ascii, c_make_encoder = _import_speedups() 13 14from .decoder import PosInf 15 16ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') 17ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 18HAS_UTF8 = re.compile(r'[\x80-\xff]') 19ESCAPE_DCT = { 20 '\\': '\\\\', 21 '"': '\\"', 22 '\b': '\\b', 23 '\f': '\\f', 24 '\n': '\\n', 25 '\r': '\\r', 26 '\t': '\\t', 27 u'\u2028': '\\u2028', 28 u'\u2029': '\\u2029', 29} 30for i in range(0x20): 31 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 32 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 33 34FLOAT_REPR = repr 35 36def encode_basestring(s): 37 """Return a JSON representation of a Python string 38 39 """ 40 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 41 s = s.decode('utf-8') 42 def replace(match): 43 return ESCAPE_DCT[match.group(0)] 44 return u'"' + ESCAPE.sub(replace, s) + u'"' 45 46 47def py_encode_basestring_ascii(s): 48 """Return an ASCII-only JSON representation of a Python string 49 50 """ 51 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 52 s = s.decode('utf-8') 53 def replace(match): 54 s = match.group(0) 55 try: 56 return ESCAPE_DCT[s] 57 except KeyError: 58 n = ord(s) 59 if n < 0x10000: 60 #return '\\u{0:04x}'.format(n) 61 return '\\u%04x' % (n,) 62 else: 63 # surrogate pair 64 n -= 0x10000 65 s1 = 0xd800 | ((n >> 10) & 0x3ff) 66 s2 = 0xdc00 | (n & 0x3ff) 67 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 68 return '\\u%04x\\u%04x' % (s1, s2) 69 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 70 71 72encode_basestring_ascii = ( 73 c_encode_basestring_ascii or py_encode_basestring_ascii) 74 75class JSONEncoder(object): 76 """Extensible JSON <http://json.org> encoder for Python data structures. 77 78 Supports the following objects and types by default: 79 80 +-------------------+---------------+ 81 | Python | JSON | 82 +===================+===============+ 83 | dict, namedtuple | object | 84 +-------------------+---------------+ 85 | list, tuple | array | 86 +-------------------+---------------+ 87 | str, unicode | string | 88 +-------------------+---------------+ 89 | int, long, float | number | 90 +-------------------+---------------+ 91 | True | true | 92 +-------------------+---------------+ 93 | False | false | 94 +-------------------+---------------+ 95 | None | null | 96 +-------------------+---------------+ 97 98 To extend this to recognize other objects, subclass and implement a 99 ``.default()`` method with another method that returns a serializable 100 object for ``o`` if possible, otherwise it should call the superclass 101 implementation (to raise ``TypeError``). 102 103 """ 104 item_separator = ', ' 105 key_separator = ': ' 106 def __init__(self, skipkeys=False, ensure_ascii=True, 107 check_circular=True, allow_nan=True, sort_keys=False, 108 indent=None, separators=None, encoding='utf-8', default=None, 109 use_decimal=True, namedtuple_as_object=True, 110 tuple_as_array=True): 111 """Constructor for JSONEncoder, with sensible defaults. 112 113 If skipkeys is false, then it is a TypeError to attempt 114 encoding of keys that are not str, int, long, float or None. If 115 skipkeys is True, such items are simply skipped. 116 117 If ensure_ascii is true, the output is guaranteed to be str 118 objects with all incoming unicode characters escaped. If 119 ensure_ascii is false, the output will be unicode object. 120 121 If check_circular is true, then lists, dicts, and custom encoded 122 objects will be checked for circular references during encoding to 123 prevent an infinite recursion (which would cause an OverflowError). 124 Otherwise, no such check takes place. 125 126 If allow_nan is true, then NaN, Infinity, and -Infinity will be 127 encoded as such. This behavior is not JSON specification compliant, 128 but is consistent with most JavaScript based encoders and decoders. 129 Otherwise, it will be a ValueError to encode such floats. 130 131 If sort_keys is true, then the output of dictionaries will be 132 sorted by key; this is useful for regression tests to ensure 133 that JSON serializations can be compared on a day-to-day basis. 134 135 If indent is a string, then JSON array elements and object members 136 will be pretty-printed with a newline followed by that string repeated 137 for each level of nesting. ``None`` (the default) selects the most compact 138 representation without any newlines. For backwards compatibility with 139 versions of simplejson earlier than 2.1.0, an integer is also accepted 140 and is converted to a string with that many spaces. 141 142 If specified, separators should be a (item_separator, key_separator) 143 tuple. The default is (', ', ': '). To get the most compact JSON 144 representation you should specify (',', ':') to eliminate whitespace. 145 146 If specified, default is a function that gets called for objects 147 that can't otherwise be serialized. It should return a JSON encodable 148 version of the object or raise a ``TypeError``. 149 150 If encoding is not None, then all input strings will be 151 transformed into unicode using that encoding prior to JSON-encoding. 152 The default is UTF-8. 153 154 If use_decimal is true (not the default), ``decimal.Decimal`` will 155 be supported directly by the encoder. For the inverse, decode JSON 156 with ``parse_float=decimal.Decimal``. 157 158 If namedtuple_as_object is true (the default), objects with 159 ``_asdict()`` methods will be encoded as JSON objects. 160 161 If tuple_as_array is true (the default), tuple (and subclasses) will 162 be encoded as JSON arrays. 163 """ 164 165 self.skipkeys = skipkeys 166 self.ensure_ascii = ensure_ascii 167 self.check_circular = check_circular 168 self.allow_nan = allow_nan 169 self.sort_keys = sort_keys 170 self.use_decimal = use_decimal 171 self.namedtuple_as_object = namedtuple_as_object 172 self.tuple_as_array = tuple_as_array 173 if isinstance(indent, (int, long)): 174 indent = ' ' * indent 175 self.indent = indent 176 if separators is not None: 177 self.item_separator, self.key_separator = separators 178 elif indent is not None: 179 self.item_separator = ',' 180 if default is not None: 181 self.default = default 182 self.encoding = encoding 183 184 def default(self, o): 185 """Implement this method in a subclass such that it returns 186 a serializable object for ``o``, or calls the base implementation 187 (to raise a ``TypeError``). 188 189 For example, to support arbitrary iterators, you could 190 implement default like this:: 191 192 def default(self, o): 193 try: 194 iterable = iter(o) 195 except TypeError: 196 pass 197 else: 198 return list(iterable) 199 return JSONEncoder.default(self, o) 200 201 """ 202 raise TypeError(repr(o) + " is not JSON serializable") 203 204 def encode(self, o): 205 """Return a JSON string representation of a Python data structure. 206 207 >>> from simplejson import JSONEncoder 208 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 209 '{"foo": ["bar", "baz"]}' 210 211 """ 212 # This is for extremely simple cases and benchmarks. 213 if isinstance(o, basestring): 214 if isinstance(o, str): 215 _encoding = self.encoding 216 if (_encoding is not None 217 and not (_encoding == 'utf-8')): 218 o = o.decode(_encoding) 219 if self.ensure_ascii: 220 return encode_basestring_ascii(o) 221 else: 222 return encode_basestring(o) 223 # This doesn't pass the iterator directly to ''.join() because the 224 # exceptions aren't as detailed. The list call should be roughly 225 # equivalent to the PySequence_Fast that ''.join() would do. 226 chunks = self.iterencode(o, _one_shot=True) 227 if not isinstance(chunks, (list, tuple)): 228 chunks = list(chunks) 229 if self.ensure_ascii: 230 return ''.join(chunks) 231 else: 232 return u''.join(chunks) 233 234 def iterencode(self, o, _one_shot=False): 235 """Encode the given object and yield each string 236 representation as available. 237 238 For example:: 239 240 for chunk in JSONEncoder().iterencode(bigobject): 241 mysocket.write(chunk) 242 243 """ 244 if self.check_circular: 245 markers = {} 246 else: 247 markers = None 248 if self.ensure_ascii: 249 _encoder = encode_basestring_ascii 250 else: 251 _encoder = encode_basestring 252 if self.encoding != 'utf-8': 253 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): 254 if isinstance(o, str): 255 o = o.decode(_encoding) 256 return _orig_encoder(o) 257 258 def floatstr(o, allow_nan=self.allow_nan, 259 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): 260 # Check for specials. Note that this type of test is processor 261 # and/or platform-specific, so do tests which don't depend on 262 # the internals. 263 264 if o != o: 265 text = 'NaN' 266 elif o == _inf: 267 text = 'Infinity' 268 elif o == _neginf: 269 text = '-Infinity' 270 else: 271 return _repr(o) 272 273 if not allow_nan: 274 raise ValueError( 275 "Out of range float values are not JSON compliant: " + 276 repr(o)) 277 278 return text 279 280 281 key_memo = {} 282 if (_one_shot and c_make_encoder is not None 283 and self.indent is None): 284 _iterencode = c_make_encoder( 285 markers, self.default, _encoder, self.indent, 286 self.key_separator, self.item_separator, self.sort_keys, 287 self.skipkeys, self.allow_nan, key_memo, self.use_decimal, 288 self.namedtuple_as_object, self.tuple_as_array) 289 else: 290 _iterencode = _make_iterencode( 291 markers, self.default, _encoder, self.indent, floatstr, 292 self.key_separator, self.item_separator, self.sort_keys, 293 self.skipkeys, _one_shot, self.use_decimal, 294 self.namedtuple_as_object, self.tuple_as_array) 295 try: 296 return _iterencode(o, 0) 297 finally: 298 key_memo.clear() 299 300 301class JSONEncoderForHTML(JSONEncoder): 302 """An encoder that produces JSON safe to embed in HTML. 303 304 To embed JSON content in, say, a script tag on a web page, the 305 characters &, < and > should be escaped. They cannot be escaped 306 with the usual entities (e.g. &) because they are not expanded 307 within <script> tags. 308 """ 309 310 def encode(self, o): 311 # Override JSONEncoder.encode because it has hacks for 312 # performance that make things more complicated. 313 chunks = self.iterencode(o, True) 314 if self.ensure_ascii: 315 return ''.join(chunks) 316 else: 317 return u''.join(chunks) 318 319 def iterencode(self, o, _one_shot=False): 320 chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) 321 for chunk in chunks: 322 chunk = chunk.replace('&', '\\u0026') 323 chunk = chunk.replace('<', '\\u003c') 324 chunk = chunk.replace('>', '\\u003e') 325 yield chunk 326 327 328def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 329 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 330 _use_decimal, _namedtuple_as_object, _tuple_as_array, 331 ## HACK: hand-optimized bytecode; turn globals into locals 332 False=False, 333 True=True, 334 ValueError=ValueError, 335 basestring=basestring, 336 Decimal=Decimal, 337 dict=dict, 338 float=float, 339 id=id, 340 int=int, 341 isinstance=isinstance, 342 list=list, 343 long=long, 344 str=str, 345 tuple=tuple, 346 ): 347 348 def _iterencode_list(lst, _current_indent_level): 349 if not lst: 350 yield '[]' 351 return 352 if markers is not None: 353 markerid = id(lst) 354 if markerid in markers: 355 raise ValueError("Circular reference detected") 356 markers[markerid] = lst 357 buf = '[' 358 if _indent is not None: 359 _current_indent_level += 1 360 newline_indent = '\n' + (_indent * _current_indent_level) 361 separator = _item_separator + newline_indent 362 buf += newline_indent 363 else: 364 newline_indent = None 365 separator = _item_separator 366 first = True 367 for value in lst: 368 if first: 369 first = False 370 else: 371 buf = separator 372 if isinstance(value, basestring): 373 yield buf + _encoder(value) 374 elif value is None: 375 yield buf + 'null' 376 elif value is True: 377 yield buf + 'true' 378 elif value is False: 379 yield buf + 'false' 380 elif isinstance(value, (int, long)): 381 yield buf + str(value) 382 elif isinstance(value, float): 383 yield buf + _floatstr(value) 384 elif _use_decimal and isinstance(value, Decimal): 385 yield buf + str(value) 386 else: 387 yield buf 388 if isinstance(value, list): 389 chunks = _iterencode_list(value, _current_indent_level) 390 else: 391 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) 392 if _asdict and callable(_asdict): 393 chunks = _iterencode_dict(_asdict(), 394 _current_indent_level) 395 elif _tuple_as_array and isinstance(value, tuple): 396 chunks = _iterencode_list(value, _current_indent_level) 397 elif isinstance(value, dict): 398 chunks = _iterencode_dict(value, _current_indent_level) 399 else: 400 chunks = _iterencode(value, _current_indent_level) 401 for chunk in chunks: 402 yield chunk 403 if newline_indent is not None: 404 _current_indent_level -= 1 405 yield '\n' + (_indent * _current_indent_level) 406 yield ']' 407 if markers is not None: 408 del markers[markerid] 409 410 def _iterencode_dict(dct, _current_indent_level): 411 if not dct: 412 yield '{}' 413 return 414 if markers is not None: 415 markerid = id(dct) 416 if markerid in markers: 417 raise ValueError("Circular reference detected") 418 markers[markerid] = dct 419 yield '{' 420 if _indent is not None: 421 _current_indent_level += 1 422 newline_indent = '\n' + (_indent * _current_indent_level) 423 item_separator = _item_separator + newline_indent 424 yield newline_indent 425 else: 426 newline_indent = None 427 item_separator = _item_separator 428 first = True 429 if _sort_keys: 430 items = dct.items() 431 items.sort(key=lambda kv: kv[0]) 432 else: 433 items = dct.iteritems() 434 for key, value in items: 435 if isinstance(key, basestring): 436 pass 437 # JavaScript is weakly typed for these, so it makes sense to 438 # also allow them. Many encoders seem to do something like this. 439 elif isinstance(key, float): 440 key = _floatstr(key) 441 elif key is True: 442 key = 'true' 443 elif key is False: 444 key = 'false' 445 elif key is None: 446 key = 'null' 447 elif isinstance(key, (int, long)): 448 key = str(key) 449 elif _skipkeys: 450 continue 451 else: 452 raise TypeError("key " + repr(key) + " is not a string") 453 if first: 454 first = False 455 else: 456 yield item_separator 457 yield _encoder(key) 458 yield _key_separator 459 if isinstance(value, basestring): 460 yield _encoder(value) 461 elif value is None: 462 yield 'null' 463 elif value is True: 464 yield 'true' 465 elif value is False: 466 yield 'false' 467 elif isinstance(value, (int, long)): 468 yield str(value) 469 elif isinstance(value, float): 470 yield _floatstr(value) 471 elif _use_decimal and isinstance(value, Decimal): 472 yield str(value) 473 else: 474 if isinstance(value, list): 475 chunks = _iterencode_list(value, _current_indent_level) 476 else: 477 _asdict = _namedtuple_as_object and getattr(value, '_asdict', None) 478 if _asdict and callable(_asdict): 479 chunks = _iterencode_dict(_asdict(), 480 _current_indent_level) 481 elif _tuple_as_array and isinstance(value, tuple): 482 chunks = _iterencode_list(value, _current_indent_level) 483 elif isinstance(value, dict): 484 chunks = _iterencode_dict(value, _current_indent_level) 485 else: 486 chunks = _iterencode(value, _current_indent_level) 487 for chunk in chunks: 488 yield chunk 489 if newline_indent is not None: 490 _current_indent_level -= 1 491 yield '\n' + (_indent * _current_indent_level) 492 yield '}' 493 if markers is not None: 494 del markers[markerid] 495 496 def _iterencode(o, _current_indent_level): 497 if isinstance(o, basestring): 498 yield _encoder(o) 499 elif o is None: 500 yield 'null' 501 elif o is True: 502 yield 'true' 503 elif o is False: 504 yield 'false' 505 elif isinstance(o, (int, long)): 506 yield str(o) 507 elif isinstance(o, float): 508 yield _floatstr(o) 509 elif isinstance(o, list): 510 for chunk in _iterencode_list(o, _current_indent_level): 511 yield chunk 512 else: 513 _asdict = _namedtuple_as_object and getattr(o, '_asdict', None) 514 if _asdict and callable(_asdict): 515 for chunk in _iterencode_dict(_asdict(), _current_indent_level): 516 yield chunk 517 elif (_tuple_as_array and isinstance(o, tuple)): 518 for chunk in _iterencode_list(o, _current_indent_level): 519 yield chunk 520 elif isinstance(o, dict): 521 for chunk in _iterencode_dict(o, _current_indent_level): 522 yield chunk 523 elif _use_decimal and isinstance(o, Decimal): 524 yield str(o) 525 else: 526 if markers is not None: 527 markerid = id(o) 528 if markerid in markers: 529 raise ValueError("Circular reference detected") 530 markers[markerid] = o 531 o = _default(o) 532 for chunk in _iterencode(o, _current_indent_level): 533 yield chunk 534 if markers is not None: 535 del markers[markerid] 536 537 return _iterencode 538