1# Copyright (c) 2019 Iotic Labs Ltd. All rights reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# https://github.com/Iotic-Labs/py-ubjson/blob/master/LICENSE 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14 15 16"""UBJSON draft v12 decoder""" 17 18from io import BytesIO 19from struct import Struct, pack, error as StructError 20from decimal import Decimal, DecimalException 21 22from .compat import raise_from, intern_unicode 23from .markers import (TYPE_NONE, TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, 24 TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, 25 TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT) 26 27__TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, 28 TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START, 29 OBJECT_START)) 30__TYPES_NO_DATA = frozenset((TYPE_NULL, TYPE_BOOL_FALSE, TYPE_BOOL_TRUE)) 31__TYPES_INT = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64)) 32 33__SMALL_INTS_DECODED = {pack('>b', i): i for i in range(-128, 128)} 34__SMALL_UINTS_DECODED = {pack('>B', i): i for i in range(256)} 35__UNPACK_INT16 = Struct('>h').unpack 36__UNPACK_INT32 = Struct('>i').unpack 37__UNPACK_INT64 = Struct('>q').unpack 38__UNPACK_FLOAT32 = Struct('>f').unpack 39__UNPACK_FLOAT64 = Struct('>d').unpack 40 41 42class DecoderException(ValueError): 43 """Raised when decoding of a UBJSON stream fails.""" 44 45 def __init__(self, message, position=None): 46 if position is not None: 47 super(DecoderException, self).__init__('%s (at byte %d)' % (message, position), position) 48 else: 49 super(DecoderException, self).__init__(str(message), None) 50 51 @property 52 def position(self): 53 """Position in stream where decoding failed. Can be None in case where decoding from string of when file-like 54 object does not support tell(). 55 """ 56 return self.args[1] # pylint: disable=unsubscriptable-object 57 58 59# pylint: disable=unused-argument 60def __decode_high_prec(fp_read, marker): 61 length = __decode_int_non_negative(fp_read, fp_read(1)) 62 raw = fp_read(length) 63 if len(raw) < length: 64 raise DecoderException('High prec. too short') 65 try: 66 return Decimal(raw.decode('utf-8')) 67 except UnicodeError as ex: 68 raise_from(DecoderException('Failed to decode decimal string'), ex) 69 except DecimalException as ex: 70 raise_from(DecoderException('Failed to decode decimal'), ex) 71 72 73def __decode_int_non_negative(fp_read, marker): 74 if marker not in __TYPES_INT: 75 raise DecoderException('Integer marker expected') 76 value = __METHOD_MAP[marker](fp_read, marker) 77 if value < 0: 78 raise DecoderException('Negative count/length unexpected') 79 return value 80 81 82def __decode_int8(fp_read, marker): 83 try: 84 return __SMALL_INTS_DECODED[fp_read(1)] 85 except KeyError as ex: 86 raise_from(DecoderException('Failed to unpack int8'), ex) 87 88 89def __decode_uint8(fp_read, marker): 90 try: 91 return __SMALL_UINTS_DECODED[fp_read(1)] 92 except KeyError as ex: 93 raise_from(DecoderException('Failed to unpack uint8'), ex) 94 95 96def __decode_int16(fp_read, marker): 97 try: 98 return __UNPACK_INT16(fp_read(2))[0] 99 except StructError as ex: 100 raise_from(DecoderException('Failed to unpack int16'), ex) 101 102 103def __decode_int32(fp_read, marker): 104 try: 105 return __UNPACK_INT32(fp_read(4))[0] 106 except StructError as ex: 107 raise_from(DecoderException('Failed to unpack int32'), ex) 108 109 110def __decode_int64(fp_read, marker): 111 try: 112 return __UNPACK_INT64(fp_read(8))[0] 113 except StructError as ex: 114 raise_from(DecoderException('Failed to unpack int64'), ex) 115 116 117def __decode_float32(fp_read, marker): 118 try: 119 return __UNPACK_FLOAT32(fp_read(4))[0] 120 except StructError as ex: 121 raise_from(DecoderException('Failed to unpack float32'), ex) 122 123 124def __decode_float64(fp_read, marker): 125 try: 126 return __UNPACK_FLOAT64(fp_read(8))[0] 127 except StructError as ex: 128 raise_from(DecoderException('Failed to unpack float64'), ex) 129 130 131def __decode_char(fp_read, marker): 132 raw = fp_read(1) 133 if not raw: 134 raise DecoderException('Char missing') 135 try: 136 return raw.decode('utf-8') 137 except UnicodeError as ex: 138 raise_from(DecoderException('Failed to decode char'), ex) 139 140 141def __decode_string(fp_read, marker): 142 # current marker is string identifier, so read next byte which identifies integer type 143 length = __decode_int_non_negative(fp_read, fp_read(1)) 144 raw = fp_read(length) 145 if len(raw) < length: 146 raise DecoderException('String too short') 147 try: 148 return raw.decode('utf-8') 149 except UnicodeError as ex: 150 raise_from(DecoderException('Failed to decode string'), ex) 151 152 153# same as string, except there is no 'S' marker 154def __decode_object_key(fp_read, marker, intern_object_keys): 155 length = __decode_int_non_negative(fp_read, marker) 156 raw = fp_read(length) 157 if len(raw) < length: 158 raise DecoderException('String too short') 159 try: 160 return intern_unicode(raw.decode('utf-8')) if intern_object_keys else raw.decode('utf-8') 161 except UnicodeError as ex: 162 raise_from(DecoderException('Failed to decode object key'), ex) 163 164 165__METHOD_MAP = {TYPE_NULL: (lambda _, __: None), 166 TYPE_BOOL_TRUE: (lambda _, __: True), 167 TYPE_BOOL_FALSE: (lambda _, __: False), 168 TYPE_INT8: __decode_int8, 169 TYPE_UINT8: __decode_uint8, 170 TYPE_INT16: __decode_int16, 171 TYPE_INT32: __decode_int32, 172 TYPE_INT64: __decode_int64, 173 TYPE_FLOAT32: __decode_float32, 174 TYPE_FLOAT64: __decode_float64, 175 TYPE_HIGH_PREC: __decode_high_prec, 176 TYPE_CHAR: __decode_char, 177 TYPE_STRING: __decode_string} 178 179 180def __get_container_params(fp_read, in_mapping, no_bytes): 181 marker = fp_read(1) 182 if marker == CONTAINER_TYPE: 183 marker = fp_read(1) 184 if marker not in __TYPES: 185 raise DecoderException('Invalid container type') 186 type_ = marker 187 marker = fp_read(1) 188 else: 189 type_ = TYPE_NONE 190 if marker == CONTAINER_COUNT: 191 count = __decode_int_non_negative(fp_read, fp_read(1)) 192 counting = True 193 194 # special cases (no data (None or bool) / bytes array) will be handled in calling functions 195 if not (type_ in __TYPES_NO_DATA or 196 (type_ == TYPE_UINT8 and not in_mapping and not no_bytes)): 197 # Reading ahead is just to capture type, which will not exist if type is fixed 198 marker = fp_read(1) if (in_mapping or type_ == TYPE_NONE) else type_ 199 200 elif type_ == TYPE_NONE: 201 # set to one to indicate that not finished yet 202 count = 1 203 counting = False 204 else: 205 raise DecoderException('Container type without count') 206 return marker, counting, count, type_ 207 208 209def __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, # pylint: disable=too-many-branches 210 intern_object_keys): 211 marker, counting, count, type_ = __get_container_params(fp_read, True, no_bytes) 212 has_pairs_hook = object_pairs_hook is not None 213 obj = [] if has_pairs_hook else {} 214 215 # special case - no data (None or bool) 216 if type_ in __TYPES_NO_DATA: 217 value = __METHOD_MAP[type_](fp_read, type_) 218 if has_pairs_hook: 219 for _ in range(count): 220 obj.append((__decode_object_key(fp_read, fp_read(1), intern_object_keys), value)) 221 return object_pairs_hook(obj) 222 223 for _ in range(count): 224 obj[__decode_object_key(fp_read, fp_read(1), intern_object_keys)] = value 225 return object_hook(obj) 226 227 while count > 0 and (counting or marker != OBJECT_END): 228 if marker == TYPE_NOOP: 229 marker = fp_read(1) 230 continue 231 232 # decode key for object 233 key = __decode_object_key(fp_read, marker, intern_object_keys) 234 marker = fp_read(1) if type_ == TYPE_NONE else type_ 235 236 # decode value 237 try: 238 value = __METHOD_MAP[marker](fp_read, marker) 239 except KeyError: 240 handled = False 241 else: 242 handled = True 243 244 # handle outside above except (on KeyError) so do not have unfriendly "exception within except" backtrace 245 if not handled: 246 if marker == ARRAY_START: 247 value = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys) 248 elif marker == OBJECT_START: 249 value = __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys) 250 else: 251 raise DecoderException('Invalid marker within object') 252 253 if has_pairs_hook: 254 obj.append((key, value)) 255 else: 256 obj[key] = value 257 if counting: 258 count -= 1 259 if count > 0: 260 marker = fp_read(1) 261 262 return object_pairs_hook(obj) if has_pairs_hook else object_hook(obj) 263 264 265def __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys): 266 marker, counting, count, type_ = __get_container_params(fp_read, False, no_bytes) 267 268 # special case - no data (None or bool) 269 if type_ in __TYPES_NO_DATA: 270 return [__METHOD_MAP[type_](fp_read, type_)] * count 271 272 # special case - bytes array 273 if type_ == TYPE_UINT8 and not no_bytes: 274 container = fp_read(count) 275 if len(container) < count: 276 raise DecoderException('Container bytes array too short') 277 return container 278 279 container = [] 280 while count > 0 and (counting or marker != ARRAY_END): 281 if marker == TYPE_NOOP: 282 marker = fp_read(1) 283 continue 284 285 # decode value 286 try: 287 value = __METHOD_MAP[marker](fp_read, marker) 288 except KeyError: 289 handled = False 290 else: 291 handled = True 292 293 # handle outside above except (on KeyError) so do not have unfriendly "exception within except" backtrace 294 if not handled: 295 if marker == ARRAY_START: 296 value = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys) 297 elif marker == OBJECT_START: 298 value = __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys) 299 else: 300 raise DecoderException('Invalid marker within array') 301 302 container.append(value) 303 if counting: 304 count -= 1 305 if count and type_ == TYPE_NONE: 306 marker = fp_read(1) 307 308 return container 309 310 311def __object_hook_noop(obj): 312 return obj 313 314 315def load(fp, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_object_keys=False): 316 """Decodes and returns UBJSON from the given file-like object 317 318 Args: 319 fp: read([size])-able object 320 no_bytes (bool): If set, typed UBJSON arrays (uint8) will not be 321 converted to a bytes instance and instead treated like 322 any other array (i.e. result in a list). 323 object_hook (callable): Called with the result of any object literal 324 decoded (instead of dict). 325 object_pairs_hook (callable): Called with the result of any object 326 literal decoded with an ordered list of 327 pairs (instead of dict). Takes precedence 328 over object_hook. 329 intern_object_keys (bool): If set, object keys are interned which can 330 provide a memory saving when many repeated 331 keys are used. NOTE: This is not supported 332 in Python2 (since interning does not apply 333 to unicode) and wil be ignored. 334 335 Returns: 336 Decoded object 337 338 Raises: 339 DecoderException: If an encoding failure occured. 340 341 UBJSON types are mapped to Python types as follows. Numbers in brackets 342 denote Python version. 343 344 +----------------------------------+---------------+ 345 | UBJSON | Python | 346 +==================================+===============+ 347 | object | dict | 348 +----------------------------------+---------------+ 349 | array | list | 350 +----------------------------------+---------------+ 351 | string | (3) str | 352 | | (2) unicode | 353 +----------------------------------+---------------+ 354 | uint8, int8, int16, int32, int64 | (3) int | 355 | | (2) int, long | 356 +----------------------------------+---------------+ 357 | float32, float64 | float | 358 +----------------------------------+---------------+ 359 | high_precision | Decimal | 360 +----------------------------------+---------------+ 361 | array (typed, uint8) | (3) bytes | 362 | | (2) str | 363 +----------------------------------+---------------+ 364 | true | True | 365 +----------------------------------+---------------+ 366 | false | False | 367 +----------------------------------+---------------+ 368 | null | None | 369 +----------------------------------+---------------+ 370 """ 371 if object_pairs_hook is None and object_hook is None: 372 object_hook = __object_hook_noop 373 374 if not callable(fp.read): 375 raise TypeError('fp.read not callable') 376 fp_read = fp.read 377 378 marker = fp_read(1) 379 try: 380 try: 381 return __METHOD_MAP[marker](fp_read, marker) 382 except KeyError: 383 pass 384 if marker == ARRAY_START: 385 return __decode_array(fp_read, bool(no_bytes), object_hook, object_pairs_hook, intern_object_keys) 386 if marker == OBJECT_START: 387 return __decode_object(fp_read, bool(no_bytes), object_hook, object_pairs_hook, intern_object_keys) 388 raise DecoderException('Invalid marker') 389 except DecoderException as ex: 390 raise_from(DecoderException(ex.args[0], position=(fp.tell() if hasattr(fp, 'tell') else None)), ex) 391 392 393def loadb(chars, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_object_keys=False): 394 """Decodes and returns UBJSON from the given bytes or bytesarray object. See 395 load() for available arguments.""" 396 with BytesIO(chars) as fp: 397 return load(fp, no_bytes=no_bytes, object_hook=object_hook, object_pairs_hook=object_pairs_hook, 398 intern_object_keys=intern_object_keys) 399