1"""Utilities to manipulate JSON objects.""" 2 3# NOTE: this is a copy of ipykernel/jsonutils.py (+blackified) 4 5# Copyright (c) IPython Development Team. 6# Distributed under the terms of the Modified BSD License. 7 8import math 9import numbers 10import re 11import types 12from binascii import b2a_base64 13from datetime import datetime 14from typing import Dict 15 16from ipython_genutils import py3compat 17from ipython_genutils.py3compat import iteritems, unicode_type 18 19next_attr_name = '__next__' if py3compat.PY3 else 'next' 20 21# ----------------------------------------------------------------------------- 22# Globals and constants 23# ----------------------------------------------------------------------------- 24 25# timestamp formats 26ISO8601 = "%Y-%m-%dT%H:%M:%S.%f" 27ISO8601_PAT = re.compile( 28 r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$" 29) 30 31# holy crap, strptime is not threadsafe. 32# Calling it once at import seems to help. 33datetime.strptime("1", "%d") 34 35# ----------------------------------------------------------------------------- 36# Classes and functions 37# ----------------------------------------------------------------------------- 38 39 40# constants for identifying png/jpeg data 41PNG = b'\x89PNG\r\n\x1a\n' 42# front of PNG base64-encoded 43PNG64 = b'iVBORw0KG' 44JPEG = b'\xff\xd8' 45# front of JPEG base64-encoded 46JPEG64 = b'/9' 47# constants for identifying gif data 48GIF_64 = b'R0lGODdh' 49GIF89_64 = b'R0lGODlh' 50# front of PDF base64-encoded 51PDF64 = b'JVBER' 52 53 54def encode_images(format_dict: Dict) -> Dict[str, str]: 55 """b64-encodes images in a displaypub format dict 56 57 Perhaps this should be handled in json_clean itself? 58 59 Parameters 60 ---------- 61 62 format_dict : dict 63 A dictionary of display data keyed by mime-type 64 65 Returns 66 ------- 67 68 format_dict : dict 69 A copy of the same dictionary, 70 but binary image data ('image/png', 'image/jpeg' or 'application/pdf') 71 is base64-encoded. 72 73 """ 74 75 # no need for handling of ambiguous bytestrings on Python 3, 76 # where bytes objects always represent binary data and thus 77 # base64-encoded. 78 if py3compat.PY3: 79 return format_dict 80 81 encoded = format_dict.copy() 82 83 pngdata = format_dict.get('image/png') 84 if isinstance(pngdata, bytes): 85 # make sure we don't double-encode 86 if not pngdata.startswith(PNG64): 87 pngdata = b2a_base64(pngdata) 88 encoded['image/png'] = pngdata.decode('ascii') 89 90 jpegdata = format_dict.get('image/jpeg') 91 if isinstance(jpegdata, bytes): 92 # make sure we don't double-encode 93 if not jpegdata.startswith(JPEG64): 94 jpegdata = b2a_base64(jpegdata) 95 encoded['image/jpeg'] = jpegdata.decode('ascii') 96 97 gifdata = format_dict.get('image/gif') 98 if isinstance(gifdata, bytes): 99 # make sure we don't double-encode 100 if not gifdata.startswith((GIF_64, GIF89_64)): 101 gifdata = b2a_base64(gifdata) 102 encoded['image/gif'] = gifdata.decode('ascii') 103 104 pdfdata = format_dict.get('application/pdf') 105 if isinstance(pdfdata, bytes): 106 # make sure we don't double-encode 107 if not pdfdata.startswith(PDF64): 108 pdfdata = b2a_base64(pdfdata) 109 encoded['application/pdf'] = pdfdata.decode('ascii') 110 111 return encoded 112 113 114def json_clean(obj): 115 """Clean an object to ensure it's safe to encode in JSON. 116 117 Atomic, immutable objects are returned unmodified. Sets and tuples are 118 converted to lists, lists are copied and dicts are also copied. 119 120 Note: dicts whose keys could cause collisions upon encoding (such as a dict 121 with both the number 1 and the string '1' as keys) will cause a ValueError 122 to be raised. 123 124 Parameters 125 ---------- 126 obj : any python object 127 128 Returns 129 ------- 130 out : object 131 132 A version of the input which will not cause an encoding error when 133 encoded as JSON. Note that this function does not *encode* its inputs, 134 it simply sanitizes it so that there will be no encoding errors later. 135 136 """ 137 # types that are 'atomic' and ok in json as-is. 138 atomic_ok = (unicode_type, type(None)) 139 140 # containers that we need to convert into lists 141 container_to_list = (tuple, set, types.GeneratorType) 142 143 # Since bools are a subtype of Integrals, which are a subtype of Reals, 144 # we have to check them in that order. 145 146 if isinstance(obj, bool): 147 return obj 148 149 if isinstance(obj, numbers.Integral): 150 # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598) 151 return int(obj) 152 153 if isinstance(obj, numbers.Real): 154 # cast out-of-range floats to their reprs 155 if math.isnan(obj) or math.isinf(obj): 156 return repr(obj) 157 return float(obj) 158 159 if isinstance(obj, atomic_ok): 160 return obj 161 162 if isinstance(obj, bytes): 163 if py3compat.PY3: 164 # unanmbiguous binary data is base64-encoded 165 # (this probably should have happened upstream) 166 return b2a_base64(obj).decode('ascii') 167 else: 168 # Python 2 bytestr is ambiguous, 169 # needs special handling for possible binary bytestrings. 170 # imperfect workaround: if ascii, assume text. 171 # otherwise assume binary, base64-encode (py3 behavior). 172 try: 173 return obj.decode('ascii') 174 except UnicodeDecodeError: 175 return b2a_base64(obj).decode('ascii') 176 177 if isinstance(obj, container_to_list) or ( 178 hasattr(obj, '__iter__') and hasattr(obj, next_attr_name) 179 ): 180 obj = list(obj) 181 182 if isinstance(obj, list): 183 return [json_clean(x) for x in obj] 184 185 if isinstance(obj, dict): 186 # First, validate that the dict won't lose data in conversion due to 187 # key collisions after stringification. This can happen with keys like 188 # True and 'true' or 1 and '1', which collide in JSON. 189 nkeys = len(obj) 190 nkeys_collapsed = len(set(map(unicode_type, obj))) 191 if nkeys != nkeys_collapsed: 192 raise ValueError( 193 'dict cannot be safely converted to JSON: ' 194 'key collision would lead to dropped values' 195 ) 196 # If all OK, proceed by making the new dict that will be json-safe 197 out = {} 198 for k, v in iteritems(obj): 199 out[unicode_type(k)] = json_clean(v) 200 return out 201 if isinstance(obj, datetime): 202 return obj.strftime(ISO8601) 203 204 # we don't understand it, it's probably an unserializable object 205 raise ValueError("Can't clean for JSON: %r" % obj) 206