1"""Utilities to manipulate JSON objects."""
2
3# NOTE: this is a copy of ipykernel/jsonutils.py (+blackified)
4
5# Copyright (c) IPython Development Team.
6# Distributed under the terms of the Modified BSD License.
7
8import math
9import numbers
10import re
11import types
12from binascii import b2a_base64
13from datetime import datetime
14from typing import Dict
15
16from ipython_genutils import py3compat
17from ipython_genutils.py3compat import iteritems, unicode_type
18
19next_attr_name = '__next__' if py3compat.PY3 else 'next'
20
21# -----------------------------------------------------------------------------
22# Globals and constants
23# -----------------------------------------------------------------------------
24
25# timestamp formats
26ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
27ISO8601_PAT = re.compile(
28    r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$"
29)
30
31# holy crap, strptime is not threadsafe.
32# Calling it once at import seems to help.
33datetime.strptime("1", "%d")
34
35# -----------------------------------------------------------------------------
36# Classes and functions
37# -----------------------------------------------------------------------------
38
39
40# constants for identifying png/jpeg data
41PNG = b'\x89PNG\r\n\x1a\n'
42# front of PNG base64-encoded
43PNG64 = b'iVBORw0KG'
44JPEG = b'\xff\xd8'
45# front of JPEG base64-encoded
46JPEG64 = b'/9'
47# constants for identifying gif data
48GIF_64 = b'R0lGODdh'
49GIF89_64 = b'R0lGODlh'
50# front of PDF base64-encoded
51PDF64 = b'JVBER'
52
53
54def encode_images(format_dict: Dict) -> Dict[str, str]:
55    """b64-encodes images in a displaypub format dict
56
57    Perhaps this should be handled in json_clean itself?
58
59    Parameters
60    ----------
61
62    format_dict : dict
63        A dictionary of display data keyed by mime-type
64
65    Returns
66    -------
67
68    format_dict : dict
69        A copy of the same dictionary,
70        but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
71        is base64-encoded.
72
73    """
74
75    # no need for handling of ambiguous bytestrings on Python 3,
76    # where bytes objects always represent binary data and thus
77    # base64-encoded.
78    if py3compat.PY3:
79        return format_dict
80
81    encoded = format_dict.copy()
82
83    pngdata = format_dict.get('image/png')
84    if isinstance(pngdata, bytes):
85        # make sure we don't double-encode
86        if not pngdata.startswith(PNG64):
87            pngdata = b2a_base64(pngdata)
88        encoded['image/png'] = pngdata.decode('ascii')
89
90    jpegdata = format_dict.get('image/jpeg')
91    if isinstance(jpegdata, bytes):
92        # make sure we don't double-encode
93        if not jpegdata.startswith(JPEG64):
94            jpegdata = b2a_base64(jpegdata)
95        encoded['image/jpeg'] = jpegdata.decode('ascii')
96
97    gifdata = format_dict.get('image/gif')
98    if isinstance(gifdata, bytes):
99        # make sure we don't double-encode
100        if not gifdata.startswith((GIF_64, GIF89_64)):
101            gifdata = b2a_base64(gifdata)
102        encoded['image/gif'] = gifdata.decode('ascii')
103
104    pdfdata = format_dict.get('application/pdf')
105    if isinstance(pdfdata, bytes):
106        # make sure we don't double-encode
107        if not pdfdata.startswith(PDF64):
108            pdfdata = b2a_base64(pdfdata)
109        encoded['application/pdf'] = pdfdata.decode('ascii')
110
111    return encoded
112
113
114def json_clean(obj):
115    """Clean an object to ensure it's safe to encode in JSON.
116
117    Atomic, immutable objects are returned unmodified.  Sets and tuples are
118    converted to lists, lists are copied and dicts are also copied.
119
120    Note: dicts whose keys could cause collisions upon encoding (such as a dict
121    with both the number 1 and the string '1' as keys) will cause a ValueError
122    to be raised.
123
124    Parameters
125    ----------
126    obj : any python object
127
128    Returns
129    -------
130    out : object
131
132      A version of the input which will not cause an encoding error when
133      encoded as JSON.  Note that this function does not *encode* its inputs,
134      it simply sanitizes it so that there will be no encoding errors later.
135
136    """
137    # types that are 'atomic' and ok in json as-is.
138    atomic_ok = (unicode_type, type(None))
139
140    # containers that we need to convert into lists
141    container_to_list = (tuple, set, types.GeneratorType)
142
143    # Since bools are a subtype of Integrals, which are a subtype of Reals,
144    # we have to check them in that order.
145
146    if isinstance(obj, bool):
147        return obj
148
149    if isinstance(obj, numbers.Integral):
150        # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
151        return int(obj)
152
153    if isinstance(obj, numbers.Real):
154        # cast out-of-range floats to their reprs
155        if math.isnan(obj) or math.isinf(obj):
156            return repr(obj)
157        return float(obj)
158
159    if isinstance(obj, atomic_ok):
160        return obj
161
162    if isinstance(obj, bytes):
163        if py3compat.PY3:
164            # unanmbiguous binary data is base64-encoded
165            # (this probably should have happened upstream)
166            return b2a_base64(obj).decode('ascii')
167        else:
168            # Python 2 bytestr is ambiguous,
169            # needs special handling for possible binary bytestrings.
170            # imperfect workaround: if ascii, assume text.
171            # otherwise assume binary, base64-encode (py3 behavior).
172            try:
173                return obj.decode('ascii')
174            except UnicodeDecodeError:
175                return b2a_base64(obj).decode('ascii')
176
177    if isinstance(obj, container_to_list) or (
178        hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)
179    ):
180        obj = list(obj)
181
182    if isinstance(obj, list):
183        return [json_clean(x) for x in obj]
184
185    if isinstance(obj, dict):
186        # First, validate that the dict won't lose data in conversion due to
187        # key collisions after stringification.  This can happen with keys like
188        # True and 'true' or 1 and '1', which collide in JSON.
189        nkeys = len(obj)
190        nkeys_collapsed = len(set(map(unicode_type, obj)))
191        if nkeys != nkeys_collapsed:
192            raise ValueError(
193                'dict cannot be safely converted to JSON: '
194                'key collision would lead to dropped values'
195            )
196        # If all OK, proceed by making the new dict that will be json-safe
197        out = {}
198        for k, v in iteritems(obj):
199            out[unicode_type(k)] = json_clean(v)
200        return out
201    if isinstance(obj, datetime):
202        return obj.strftime(ISO8601)
203
204    # we don't understand it, it's probably an unserializable object
205    raise ValueError("Can't clean for JSON: %r" % obj)
206