1import warnings 2from json import loads as json_loads 3from os import fsync 4from sys import exc_info 5 6from json_tricks.utils import is_py3, dict_default, gzip_compress, gzip_decompress, JsonTricksDeprecation 7from .utils import str_type, NoNumpyException # keep 'unused' imports 8from .comment import strip_comments # keep 'unused' imports 9#TODO @mark: imports removed? 10from .encoders import TricksEncoder, json_date_time_encode, \ 11 class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, \ 12 nonumpy_encode, nopandas_encode, pandas_encode, noenum_instance_encode, \ 13 enum_instance_encode, pathlib_encode # keep 'unused' imports 14from .decoders import TricksPairHook, \ 15 json_date_time_hook, ClassInstanceHook, \ 16 json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, \ 17 json_nonumpy_obj_hook, \ 18 nopandas_hook, pandas_hook, EnumInstanceHook, \ 19 noenum_hook, pathlib_hook, nopathlib_hook # keep 'unused' imports 20 21 22ENCODING = 'UTF-8' 23 24 25_cih_instance = ClassInstanceHook() 26_eih_instance = EnumInstanceHook() 27DEFAULT_ENCODERS = [json_date_time_encode, json_complex_encode, json_set_encode, 28 numeric_types_encode, class_instance_encode, ] 29DEFAULT_HOOKS = [json_date_time_hook, json_complex_hook, json_set_hook, 30 numeric_types_hook, _cih_instance, ] 31 32 33#TODO @mark: add properties to all built-in encoders (for speed - but it should keep working without) 34try: 35 import enum 36except ImportError: 37 DEFAULT_ENCODERS = [noenum_instance_encode,] + DEFAULT_ENCODERS 38 DEFAULT_HOOKS = [noenum_hook,] + DEFAULT_HOOKS 39else: 40 DEFAULT_ENCODERS = [enum_instance_encode,] + DEFAULT_ENCODERS 41 DEFAULT_HOOKS = [_eih_instance,] + DEFAULT_HOOKS 42 43try: 44 import numpy 45except ImportError: 46 DEFAULT_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS 47 DEFAULT_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS 48else: 49 # numpy encode needs to be before complex 50 DEFAULT_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS 51 DEFAULT_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS 52 53try: 54 import pandas 55except ImportError: 56 DEFAULT_ENCODERS = [nopandas_encode,] + DEFAULT_ENCODERS 57 DEFAULT_HOOKS = [nopandas_hook,] + DEFAULT_HOOKS 58else: 59 DEFAULT_ENCODERS = [pandas_encode,] + DEFAULT_ENCODERS 60 DEFAULT_HOOKS = [pandas_hook,] + DEFAULT_HOOKS 61 62try: 63 import pathlib 64except: 65 # No need to include a "nopathlib_encode" hook since we would not encounter 66 # the Path object if pathlib isn't available. However, we *could* encounter 67 # a serialized Path object (produced by a version of Python with pathlib). 68 DEFAULT_HOOKS = [nopathlib_hook,] + DEFAULT_HOOKS 69else: 70 DEFAULT_ENCODERS = [pathlib_encode,] + DEFAULT_ENCODERS 71 DEFAULT_HOOKS = [pathlib_hook,] + DEFAULT_HOOKS 72 73 74DEFAULT_NONP_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS # DEPRECATED 75DEFAULT_NONP_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED 76 77 78def dumps(obj, sort_keys=None, cls=None, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), 79 primitives=False, compression=None, allow_nan=False, conv_str_byte=False, fallback_encoders=(), 80 properties=None, **jsonkwargs): 81 """ 82 Convert a nested data structure to a json string. 83 84 :param obj: The Python object to convert. 85 :param sort_keys: Keep this False if you want order to be preserved. 86 :param cls: The json encoder class to use, defaults to NoNumpyEncoder which gives a warning for numpy arrays. 87 :param obj_encoders: Iterable of encoders to use to convert arbitrary objects into json-able promitives. 88 :param extra_obj_encoders: Like `obj_encoders` but on top of them: use this to add encoders without replacing defaults. Since v3.5 these happen before default encoders. 89 :param fallback_encoders: These are extra `obj_encoders` that 1) are ran after all others and 2) only run if the object hasn't yet been changed. 90 :param allow_nan: Allow NaN and Infinity values, which is a (useful) violation of the JSON standard (default False). 91 :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). 92 :param properties: A dictionary of properties that is passed to each encoder that will accept it. 93 :return: The string containing the json-encoded version of obj. 94 95 Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. 96 """ 97 if not hasattr(extra_obj_encoders, '__iter__'): 98 raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') 99 encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) 100 properties = properties or {} 101 dict_default(properties, 'primitives', primitives) 102 dict_default(properties, 'compression', compression) 103 dict_default(properties, 'allow_nan', allow_nan) 104 if cls is None: 105 cls = TricksEncoder 106 txt = cls(sort_keys=sort_keys, obj_encoders=encoders, allow_nan=allow_nan, 107 primitives=primitives, fallback_encoders=fallback_encoders, 108 properties=properties, **jsonkwargs).encode(obj) 109 if not is_py3 and isinstance(txt, str): 110 txt = unicode(txt, ENCODING) 111 if not compression: 112 return txt 113 if compression is True: 114 compression = 5 115 txt = txt.encode(ENCODING) 116 gzstring = gzip_compress(txt, compresslevel=compression) 117 return gzstring 118 119 120def dump(obj, fp, sort_keys=None, cls=None, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), 121 primitives=False, compression=None, force_flush=False, allow_nan=False, conv_str_byte=False, 122 fallback_encoders=(), properties=None, **jsonkwargs): 123 """ 124 Convert a nested data structure to a json string. 125 126 :param fp: File handle or path to write to. 127 :param compression: The gzip compression level, or None for no compression. 128 :param force_flush: If True, flush the file handle used, when possibly also in the operating system (default False). 129 130 The other arguments are identical to `dumps`. 131 """ 132 if (isinstance(obj, str_type) or hasattr(obj, 'write')) and isinstance(fp, (list, dict)): 133 raise ValueError('json-tricks dump arguments are in the wrong order: provide the data to be serialized before file handle') 134 txt = dumps(obj, sort_keys=sort_keys, cls=cls, obj_encoders=obj_encoders, extra_obj_encoders=extra_obj_encoders, 135 primitives=primitives, compression=compression, allow_nan=allow_nan, conv_str_byte=conv_str_byte, 136 fallback_encoders=fallback_encoders, properties=properties, **jsonkwargs) 137 if isinstance(fp, str_type): 138 if compression: 139 fh = open(fp, 'wb+') 140 else: 141 fh = open(fp, 'w+') 142 else: 143 fh = fp 144 if conv_str_byte: 145 try: 146 fh.write(b'') 147 except TypeError: 148 pass 149 # if not isinstance(txt, str_type): 150 # # Cannot write bytes, so must be in text mode, but we didn't get a text 151 # if not compression: 152 # txt = txt.decode(ENCODING) 153 else: 154 try: 155 fh.write(u'') 156 except TypeError: 157 if isinstance(txt, str_type): 158 txt = txt.encode(ENCODING) 159 try: 160 if compression and 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, str_type): 161 raise IOError('If compression is enabled, the file must be opened in binary mode.') 162 try: 163 fh.write(txt) 164 except TypeError as err: 165 err.args = (err.args[0] + '. A possible reason is that the file is not opened in binary mode; ' 166 'be sure to set file mode to something like "wb".',) 167 raise 168 finally: 169 if force_flush: 170 fh.flush() 171 try: 172 if fh.fileno() is not None: 173 fsync(fh.fileno()) 174 except (ValueError,): 175 pass 176 if isinstance(fp, str_type): 177 fh.close() 178 return txt 179 180 181def loads(string, preserve_order=True, ignore_comments=None, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, 182 extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, 183 properties=None, **jsonkwargs): 184 """ 185 Convert a nested data structure to a json string. 186 187 :param string: The string containing a json encoded data structure. 188 :param decode_cls_instances: True to attempt to decode class instances (requires the environment to be similar the the encoding one). 189 :param preserve_order: Whether to preserve order by using OrderedDicts or not. 190 :param ignore_comments: Remove comments (starting with # or //). 191 :param decompression: True to use gzip decompression, False to use raw data, None to automatically determine (default). Assumes utf-8 encoding! 192 :param obj_pairs_hooks: A list of dictionary hooks to apply. 193 :param extra_obj_pairs_hooks: Like `obj_pairs_hooks` but on top of them: use this to add hooks without replacing defaults. Since v3.5 these happen before default hooks. 194 :param cls_lookup_map: If set to a dict, for example ``globals()``, then classes encoded from __main__ are looked up this dict. 195 :param allow_duplicates: If set to False, an error will be raised when loading a json-map that contains duplicate keys. 196 :param parse_float: A function to parse strings to integers (e.g. Decimal). There is also `parse_int`. 197 :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). 198 :return: The string containing the json-encoded version of obj. 199 200 Other arguments are passed on to json_func. 201 """ 202 if not hasattr(extra_obj_pairs_hooks, '__iter__'): 203 raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`') 204 if decompression is None: 205 decompression = isinstance(string, bytes) and string[:2] == b'\x1f\x8b' 206 if decompression: 207 string = gzip_decompress(string).decode(ENCODING) 208 if not isinstance(string, str_type): 209 if conv_str_byte: 210 string = string.decode(ENCODING) 211 else: 212 raise TypeError(('The input was of non-string type "{0:}" in `json_tricks.load(s)`. ' 213 'Bytes cannot be automatically decoding since the encoding is not known. Recommended ' 214 'way is to instead encode the bytes to a string and pass that string to `load(s)`, ' 215 'for example bytevar.encode("utf-8") if utf-8 is the encoding. Alternatively you can ' 216 'force an attempt by passing conv_str_byte=True, but this may cause decoding issues.') 217 .format(type(string))) 218 if ignore_comments or ignore_comments is None: 219 new_string = strip_comments(string) 220 if ignore_comments is None and not getattr(loads, '_ignore_comments_warned', False) and string != new_string: 221 warnings.warn('`json_tricks.load(s)` stripped some comments, but `ignore_comments` was ' 222 'not passed; in the next major release, the behaviour when `ignore_comments` is not ' 223 'passed will change; it is recommended to explicitly pass `ignore_comments=True` if ' 224 'you want to strip comments; see https://github.com/mverleg/pyjson_tricks/issues/74', 225 JsonTricksDeprecation) 226 loads._ignore_comments_warned = True 227 string = new_string 228 properties = properties or {} 229 dict_default(properties, 'preserve_order', preserve_order) 230 dict_default(properties, 'ignore_comments', ignore_comments) 231 dict_default(properties, 'decompression', decompression) 232 dict_default(properties, 'cls_lookup_map', cls_lookup_map) 233 dict_default(properties, 'allow_duplicates', allow_duplicates) 234 hooks = tuple(extra_obj_pairs_hooks) + tuple(obj_pairs_hooks) 235 hook = TricksPairHook(ordered=preserve_order, obj_pairs_hooks=hooks, allow_duplicates=allow_duplicates, properties=properties) 236 return json_loads(string, object_pairs_hook=hook, **jsonkwargs) 237 238 239def load(fp, preserve_order=True, ignore_comments=None, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, 240 extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, 241 properties=None, **jsonkwargs): 242 """ 243 Convert a nested data structure to a json string. 244 245 :param fp: File handle or path to load from. 246 247 The other arguments are identical to loads. 248 """ 249 try: 250 if isinstance(fp, str_type): 251 if decompression is not None: 252 open_binary = bool(decompression) 253 else: 254 with open(fp, 'rb') as fh: 255 # This attempts to detect gzip mode; gzip should always 256 # have this header, and text json can't have it. 257 open_binary = (fh.read(2) == b'\x1f\x8b') 258 with open(fp, 'rb' if open_binary else 'r') as fh: 259 string = fh.read() 260 else: 261 string = fp.read() 262 except UnicodeDecodeError as err: 263 # todo: not covered in tests, is it relevant? 264 raise Exception('There was a problem decoding the file content. A possible reason is that the file is not ' + 265 'opened in binary mode; be sure to set file mode to something like "rb".').with_traceback(exc_info()[2]) 266 return loads(string, preserve_order=preserve_order, ignore_comments=ignore_comments, decompression=decompression, 267 obj_pairs_hooks=obj_pairs_hooks, extra_obj_pairs_hooks=extra_obj_pairs_hooks, cls_lookup_map=cls_lookup_map, 268 allow_duplicates=allow_duplicates, conv_str_byte=conv_str_byte, properties=properties, **jsonkwargs) 269 270 271