1"""Implementation of JSONEncoder
2"""
3import re
4from decimal import Decimal
5
6def _import_speedups():
7    try:
8        from . import _speedups
9        return _speedups.encode_basestring_ascii, _speedups.make_encoder
10    except ImportError:
11        return None, None
12c_encode_basestring_ascii, c_make_encoder = _import_speedups()
13
14from .decoder import PosInf
15
16ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
17ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
18HAS_UTF8 = re.compile(r'[\x80-\xff]')
19ESCAPE_DCT = {
20    '\\': '\\\\',
21    '"': '\\"',
22    '\b': '\\b',
23    '\f': '\\f',
24    '\n': '\\n',
25    '\r': '\\r',
26    '\t': '\\t',
27    u'\u2028': '\\u2028',
28    u'\u2029': '\\u2029',
29}
30for i in range(0x20):
31    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33
34FLOAT_REPR = repr
35
36def encode_basestring(s):
37    """Return a JSON representation of a Python string
38
39    """
40    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
41        s = s.decode('utf-8')
42    def replace(match):
43        return ESCAPE_DCT[match.group(0)]
44    return u'"' + ESCAPE.sub(replace, s) + u'"'
45
46
47def py_encode_basestring_ascii(s):
48    """Return an ASCII-only JSON representation of a Python string
49
50    """
51    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
52        s = s.decode('utf-8')
53    def replace(match):
54        s = match.group(0)
55        try:
56            return ESCAPE_DCT[s]
57        except KeyError:
58            n = ord(s)
59            if n < 0x10000:
60                #return '\\u{0:04x}'.format(n)
61                return '\\u%04x' % (n,)
62            else:
63                # surrogate pair
64                n -= 0x10000
65                s1 = 0xd800 | ((n >> 10) & 0x3ff)
66                s2 = 0xdc00 | (n & 0x3ff)
67                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68                return '\\u%04x\\u%04x' % (s1, s2)
69    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
70
71
72encode_basestring_ascii = (
73    c_encode_basestring_ascii or py_encode_basestring_ascii)
74
75class JSONEncoder(object):
76    """Extensible JSON <http://json.org> encoder for Python data structures.
77
78    Supports the following objects and types by default:
79
80    +-------------------+---------------+
81    | Python            | JSON          |
82    +===================+===============+
83    | dict, namedtuple  | object        |
84    +-------------------+---------------+
85    | list, tuple       | array         |
86    +-------------------+---------------+
87    | str, unicode      | string        |
88    +-------------------+---------------+
89    | int, long, float  | number        |
90    +-------------------+---------------+
91    | True              | true          |
92    +-------------------+---------------+
93    | False             | false         |
94    +-------------------+---------------+
95    | None              | null          |
96    +-------------------+---------------+
97
98    To extend this to recognize other objects, subclass and implement a
99    ``.default()`` method with another method that returns a serializable
100    object for ``o`` if possible, otherwise it should call the superclass
101    implementation (to raise ``TypeError``).
102
103    """
104    item_separator = ', '
105    key_separator = ': '
106    def __init__(self, skipkeys=False, ensure_ascii=True,
107            check_circular=True, allow_nan=True, sort_keys=False,
108            indent=None, separators=None, encoding='utf-8', default=None,
109            use_decimal=True, namedtuple_as_object=True,
110            tuple_as_array=True):
111        """Constructor for JSONEncoder, with sensible defaults.
112
113        If skipkeys is false, then it is a TypeError to attempt
114        encoding of keys that are not str, int, long, float or None.  If
115        skipkeys is True, such items are simply skipped.
116
117        If ensure_ascii is true, the output is guaranteed to be str
118        objects with all incoming unicode characters escaped.  If
119        ensure_ascii is false, the output will be unicode object.
120
121        If check_circular is true, then lists, dicts, and custom encoded
122        objects will be checked for circular references during encoding to
123        prevent an infinite recursion (which would cause an OverflowError).
124        Otherwise, no such check takes place.
125
126        If allow_nan is true, then NaN, Infinity, and -Infinity will be
127        encoded as such.  This behavior is not JSON specification compliant,
128        but is consistent with most JavaScript based encoders and decoders.
129        Otherwise, it will be a ValueError to encode such floats.
130
131        If sort_keys is true, then the output of dictionaries will be
132        sorted by key; this is useful for regression tests to ensure
133        that JSON serializations can be compared on a day-to-day basis.
134
135        If indent is a string, then JSON array elements and object members
136        will be pretty-printed with a newline followed by that string repeated
137        for each level of nesting. ``None`` (the default) selects the most compact
138        representation without any newlines. For backwards compatibility with
139        versions of simplejson earlier than 2.1.0, an integer is also accepted
140        and is converted to a string with that many spaces.
141
142        If specified, separators should be a (item_separator, key_separator)
143        tuple.  The default is (', ', ': ').  To get the most compact JSON
144        representation you should specify (',', ':') to eliminate whitespace.
145
146        If specified, default is a function that gets called for objects
147        that can't otherwise be serialized.  It should return a JSON encodable
148        version of the object or raise a ``TypeError``.
149
150        If encoding is not None, then all input strings will be
151        transformed into unicode using that encoding prior to JSON-encoding.
152        The default is UTF-8.
153
154        If use_decimal is true (not the default), ``decimal.Decimal`` will
155        be supported directly by the encoder. For the inverse, decode JSON
156        with ``parse_float=decimal.Decimal``.
157
158        If namedtuple_as_object is true (the default), objects with
159        ``_asdict()`` methods will be encoded as JSON objects.
160
161        If tuple_as_array is true (the default), tuple (and subclasses) will
162        be encoded as JSON arrays.
163        """
164
165        self.skipkeys = skipkeys
166        self.ensure_ascii = ensure_ascii
167        self.check_circular = check_circular
168        self.allow_nan = allow_nan
169        self.sort_keys = sort_keys
170        self.use_decimal = use_decimal
171        self.namedtuple_as_object = namedtuple_as_object
172        self.tuple_as_array = tuple_as_array
173        if isinstance(indent, (int, long)):
174            indent = ' ' * indent
175        self.indent = indent
176        if separators is not None:
177            self.item_separator, self.key_separator = separators
178        elif indent is not None:
179            self.item_separator = ','
180        if default is not None:
181            self.default = default
182        self.encoding = encoding
183
184    def default(self, o):
185        """Implement this method in a subclass such that it returns
186        a serializable object for ``o``, or calls the base implementation
187        (to raise a ``TypeError``).
188
189        For example, to support arbitrary iterators, you could
190        implement default like this::
191
192            def default(self, o):
193                try:
194                    iterable = iter(o)
195                except TypeError:
196                    pass
197                else:
198                    return list(iterable)
199                return JSONEncoder.default(self, o)
200
201        """
202        raise TypeError(repr(o) + " is not JSON serializable")
203
204    def encode(self, o):
205        """Return a JSON string representation of a Python data structure.
206
207        >>> from simplejson import JSONEncoder
208        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
209        '{"foo": ["bar", "baz"]}'
210
211        """
212        # This is for extremely simple cases and benchmarks.
213        if isinstance(o, basestring):
214            if isinstance(o, str):
215                _encoding = self.encoding
216                if (_encoding is not None
217                        and not (_encoding == 'utf-8')):
218                    o = o.decode(_encoding)
219            if self.ensure_ascii:
220                return encode_basestring_ascii(o)
221            else:
222                return encode_basestring(o)
223        # This doesn't pass the iterator directly to ''.join() because the
224        # exceptions aren't as detailed.  The list call should be roughly
225        # equivalent to the PySequence_Fast that ''.join() would do.
226        chunks = self.iterencode(o, _one_shot=True)
227        if not isinstance(chunks, (list, tuple)):
228            chunks = list(chunks)
229        if self.ensure_ascii:
230            return ''.join(chunks)
231        else:
232            return u''.join(chunks)
233
234    def iterencode(self, o, _one_shot=False):
235        """Encode the given object and yield each string
236        representation as available.
237
238        For example::
239
240            for chunk in JSONEncoder().iterencode(bigobject):
241                mysocket.write(chunk)
242
243        """
244        if self.check_circular:
245            markers = {}
246        else:
247            markers = None
248        if self.ensure_ascii:
249            _encoder = encode_basestring_ascii
250        else:
251            _encoder = encode_basestring
252        if self.encoding != 'utf-8':
253            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
254                if isinstance(o, str):
255                    o = o.decode(_encoding)
256                return _orig_encoder(o)
257
258        def floatstr(o, allow_nan=self.allow_nan,
259                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
260            # Check for specials. Note that this type of test is processor
261            # and/or platform-specific, so do tests which don't depend on
262            # the internals.
263
264            if o != o:
265                text = 'NaN'
266            elif o == _inf:
267                text = 'Infinity'
268            elif o == _neginf:
269                text = '-Infinity'
270            else:
271                return _repr(o)
272
273            if not allow_nan:
274                raise ValueError(
275                    "Out of range float values are not JSON compliant: " +
276                    repr(o))
277
278            return text
279
280
281        key_memo = {}
282        if (_one_shot and c_make_encoder is not None
283                and self.indent is None):
284            _iterencode = c_make_encoder(
285                markers, self.default, _encoder, self.indent,
286                self.key_separator, self.item_separator, self.sort_keys,
287                self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
288                self.namedtuple_as_object, self.tuple_as_array)
289        else:
290            _iterencode = _make_iterencode(
291                markers, self.default, _encoder, self.indent, floatstr,
292                self.key_separator, self.item_separator, self.sort_keys,
293                self.skipkeys, _one_shot, self.use_decimal,
294                self.namedtuple_as_object, self.tuple_as_array)
295        try:
296            return _iterencode(o, 0)
297        finally:
298            key_memo.clear()
299
300
301class JSONEncoderForHTML(JSONEncoder):
302    """An encoder that produces JSON safe to embed in HTML.
303
304    To embed JSON content in, say, a script tag on a web page, the
305    characters &, < and > should be escaped. They cannot be escaped
306    with the usual entities (e.g. &amp;) because they are not expanded
307    within <script> tags.
308    """
309
310    def encode(self, o):
311        # Override JSONEncoder.encode because it has hacks for
312        # performance that make things more complicated.
313        chunks = self.iterencode(o, True)
314        if self.ensure_ascii:
315            return ''.join(chunks)
316        else:
317            return u''.join(chunks)
318
319    def iterencode(self, o, _one_shot=False):
320        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
321        for chunk in chunks:
322            chunk = chunk.replace('&', '\\u0026')
323            chunk = chunk.replace('<', '\\u003c')
324            chunk = chunk.replace('>', '\\u003e')
325            yield chunk
326
327
328def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
329        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
330        _use_decimal, _namedtuple_as_object, _tuple_as_array,
331        ## HACK: hand-optimized bytecode; turn globals into locals
332        False=False,
333        True=True,
334        ValueError=ValueError,
335        basestring=basestring,
336        Decimal=Decimal,
337        dict=dict,
338        float=float,
339        id=id,
340        int=int,
341        isinstance=isinstance,
342        list=list,
343        long=long,
344        str=str,
345        tuple=tuple,
346    ):
347
348    def _iterencode_list(lst, _current_indent_level):
349        if not lst:
350            yield '[]'
351            return
352        if markers is not None:
353            markerid = id(lst)
354            if markerid in markers:
355                raise ValueError("Circular reference detected")
356            markers[markerid] = lst
357        buf = '['
358        if _indent is not None:
359            _current_indent_level += 1
360            newline_indent = '\n' + (_indent * _current_indent_level)
361            separator = _item_separator + newline_indent
362            buf += newline_indent
363        else:
364            newline_indent = None
365            separator = _item_separator
366        first = True
367        for value in lst:
368            if first:
369                first = False
370            else:
371                buf = separator
372            if isinstance(value, basestring):
373                yield buf + _encoder(value)
374            elif value is None:
375                yield buf + 'null'
376            elif value is True:
377                yield buf + 'true'
378            elif value is False:
379                yield buf + 'false'
380            elif isinstance(value, (int, long)):
381                yield buf + str(value)
382            elif isinstance(value, float):
383                yield buf + _floatstr(value)
384            elif _use_decimal and isinstance(value, Decimal):
385                yield buf + str(value)
386            else:
387                yield buf
388                if isinstance(value, list):
389                    chunks = _iterencode_list(value, _current_indent_level)
390                else:
391                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
392                    if _asdict and callable(_asdict):
393                        chunks = _iterencode_dict(_asdict(),
394                                                  _current_indent_level)
395                    elif _tuple_as_array and isinstance(value, tuple):
396                        chunks = _iterencode_list(value, _current_indent_level)
397                    elif isinstance(value, dict):
398                        chunks = _iterencode_dict(value, _current_indent_level)
399                    else:
400                        chunks = _iterencode(value, _current_indent_level)
401                for chunk in chunks:
402                    yield chunk
403        if newline_indent is not None:
404            _current_indent_level -= 1
405            yield '\n' + (_indent * _current_indent_level)
406        yield ']'
407        if markers is not None:
408            del markers[markerid]
409
410    def _iterencode_dict(dct, _current_indent_level):
411        if not dct:
412            yield '{}'
413            return
414        if markers is not None:
415            markerid = id(dct)
416            if markerid in markers:
417                raise ValueError("Circular reference detected")
418            markers[markerid] = dct
419        yield '{'
420        if _indent is not None:
421            _current_indent_level += 1
422            newline_indent = '\n' + (_indent * _current_indent_level)
423            item_separator = _item_separator + newline_indent
424            yield newline_indent
425        else:
426            newline_indent = None
427            item_separator = _item_separator
428        first = True
429        if _sort_keys:
430            items = dct.items()
431            items.sort(key=lambda kv: kv[0])
432        else:
433            items = dct.iteritems()
434        for key, value in items:
435            if isinstance(key, basestring):
436                pass
437            # JavaScript is weakly typed for these, so it makes sense to
438            # also allow them.  Many encoders seem to do something like this.
439            elif isinstance(key, float):
440                key = _floatstr(key)
441            elif key is True:
442                key = 'true'
443            elif key is False:
444                key = 'false'
445            elif key is None:
446                key = 'null'
447            elif isinstance(key, (int, long)):
448                key = str(key)
449            elif _skipkeys:
450                continue
451            else:
452                raise TypeError("key " + repr(key) + " is not a string")
453            if first:
454                first = False
455            else:
456                yield item_separator
457            yield _encoder(key)
458            yield _key_separator
459            if isinstance(value, basestring):
460                yield _encoder(value)
461            elif value is None:
462                yield 'null'
463            elif value is True:
464                yield 'true'
465            elif value is False:
466                yield 'false'
467            elif isinstance(value, (int, long)):
468                yield str(value)
469            elif isinstance(value, float):
470                yield _floatstr(value)
471            elif _use_decimal and isinstance(value, Decimal):
472                yield str(value)
473            else:
474                if isinstance(value, list):
475                    chunks = _iterencode_list(value, _current_indent_level)
476                else:
477                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
478                    if _asdict and callable(_asdict):
479                        chunks = _iterencode_dict(_asdict(),
480                                                  _current_indent_level)
481                    elif _tuple_as_array and isinstance(value, tuple):
482                        chunks = _iterencode_list(value, _current_indent_level)
483                    elif isinstance(value, dict):
484                        chunks = _iterencode_dict(value, _current_indent_level)
485                    else:
486                        chunks = _iterencode(value, _current_indent_level)
487                for chunk in chunks:
488                    yield chunk
489        if newline_indent is not None:
490            _current_indent_level -= 1
491            yield '\n' + (_indent * _current_indent_level)
492        yield '}'
493        if markers is not None:
494            del markers[markerid]
495
496    def _iterencode(o, _current_indent_level):
497        if isinstance(o, basestring):
498            yield _encoder(o)
499        elif o is None:
500            yield 'null'
501        elif o is True:
502            yield 'true'
503        elif o is False:
504            yield 'false'
505        elif isinstance(o, (int, long)):
506            yield str(o)
507        elif isinstance(o, float):
508            yield _floatstr(o)
509        elif isinstance(o, list):
510            for chunk in _iterencode_list(o, _current_indent_level):
511                yield chunk
512        else:
513            _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
514            if _asdict and callable(_asdict):
515                for chunk in _iterencode_dict(_asdict(), _current_indent_level):
516                    yield chunk
517            elif (_tuple_as_array and isinstance(o, tuple)):
518                for chunk in _iterencode_list(o, _current_indent_level):
519                    yield chunk
520            elif isinstance(o, dict):
521                for chunk in _iterencode_dict(o, _current_indent_level):
522                    yield chunk
523            elif _use_decimal and isinstance(o, Decimal):
524                yield str(o)
525            else:
526                if markers is not None:
527                    markerid = id(o)
528                    if markerid in markers:
529                        raise ValueError("Circular reference detected")
530                    markers[markerid] = o
531                o = _default(o)
532                for chunk in _iterencode(o, _current_indent_level):
533                    yield chunk
534                if markers is not None:
535                    del markers[markerid]
536
537    return _iterencode
538