1# Copyright (c) 2019 Iotic Labs Ltd. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     https://github.com/Iotic-Labs/py-ubjson/blob/master/LICENSE
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15
16"""UBJSON draft v12 decoder"""
17
18from io import BytesIO
19from struct import Struct, pack, error as StructError
20from decimal import Decimal, DecimalException
21
22from .compat import raise_from, intern_unicode
23from .markers import (TYPE_NONE, TYPE_NULL, TYPE_NOOP, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8,
24                      TYPE_INT16, TYPE_INT32, TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR,
25                      TYPE_STRING, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, CONTAINER_TYPE, CONTAINER_COUNT)
26
27__TYPES = frozenset((TYPE_NULL, TYPE_BOOL_TRUE, TYPE_BOOL_FALSE, TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32,
28                     TYPE_INT64, TYPE_FLOAT32, TYPE_FLOAT64, TYPE_HIGH_PREC, TYPE_CHAR, TYPE_STRING, ARRAY_START,
29                     OBJECT_START))
30__TYPES_NO_DATA = frozenset((TYPE_NULL, TYPE_BOOL_FALSE, TYPE_BOOL_TRUE))
31__TYPES_INT = frozenset((TYPE_INT8, TYPE_UINT8, TYPE_INT16, TYPE_INT32, TYPE_INT64))
32
33__SMALL_INTS_DECODED = {pack('>b', i): i for i in range(-128, 128)}
34__SMALL_UINTS_DECODED = {pack('>B', i): i for i in range(256)}
35__UNPACK_INT16 = Struct('>h').unpack
36__UNPACK_INT32 = Struct('>i').unpack
37__UNPACK_INT64 = Struct('>q').unpack
38__UNPACK_FLOAT32 = Struct('>f').unpack
39__UNPACK_FLOAT64 = Struct('>d').unpack
40
41
42class DecoderException(ValueError):
43    """Raised when decoding of a UBJSON stream fails."""
44
45    def __init__(self, message, position=None):
46        if position is not None:
47            super(DecoderException, self).__init__('%s (at byte %d)' % (message, position), position)
48        else:
49            super(DecoderException, self).__init__(str(message), None)
50
51    @property
52    def position(self):
53        """Position in stream where decoding failed. Can be None in case where decoding from string of when file-like
54        object does not support tell().
55        """
56        return self.args[1]  # pylint: disable=unsubscriptable-object
57
58
59# pylint: disable=unused-argument
60def __decode_high_prec(fp_read, marker):
61    length = __decode_int_non_negative(fp_read, fp_read(1))
62    raw = fp_read(length)
63    if len(raw) < length:
64        raise DecoderException('High prec. too short')
65    try:
66        return Decimal(raw.decode('utf-8'))
67    except UnicodeError as ex:
68        raise_from(DecoderException('Failed to decode decimal string'), ex)
69    except DecimalException as ex:
70        raise_from(DecoderException('Failed to decode decimal'), ex)
71
72
73def __decode_int_non_negative(fp_read, marker):
74    if marker not in __TYPES_INT:
75        raise DecoderException('Integer marker expected')
76    value = __METHOD_MAP[marker](fp_read, marker)
77    if value < 0:
78        raise DecoderException('Negative count/length unexpected')
79    return value
80
81
82def __decode_int8(fp_read, marker):
83    try:
84        return __SMALL_INTS_DECODED[fp_read(1)]
85    except KeyError as ex:
86        raise_from(DecoderException('Failed to unpack int8'), ex)
87
88
89def __decode_uint8(fp_read, marker):
90    try:
91        return __SMALL_UINTS_DECODED[fp_read(1)]
92    except KeyError as ex:
93        raise_from(DecoderException('Failed to unpack uint8'), ex)
94
95
96def __decode_int16(fp_read, marker):
97    try:
98        return __UNPACK_INT16(fp_read(2))[0]
99    except StructError as ex:
100        raise_from(DecoderException('Failed to unpack int16'), ex)
101
102
103def __decode_int32(fp_read, marker):
104    try:
105        return __UNPACK_INT32(fp_read(4))[0]
106    except StructError as ex:
107        raise_from(DecoderException('Failed to unpack int32'), ex)
108
109
110def __decode_int64(fp_read, marker):
111    try:
112        return __UNPACK_INT64(fp_read(8))[0]
113    except StructError as ex:
114        raise_from(DecoderException('Failed to unpack int64'), ex)
115
116
117def __decode_float32(fp_read, marker):
118    try:
119        return __UNPACK_FLOAT32(fp_read(4))[0]
120    except StructError as ex:
121        raise_from(DecoderException('Failed to unpack float32'), ex)
122
123
124def __decode_float64(fp_read, marker):
125    try:
126        return __UNPACK_FLOAT64(fp_read(8))[0]
127    except StructError as ex:
128        raise_from(DecoderException('Failed to unpack float64'), ex)
129
130
131def __decode_char(fp_read, marker):
132    raw = fp_read(1)
133    if not raw:
134        raise DecoderException('Char missing')
135    try:
136        return raw.decode('utf-8')
137    except UnicodeError as ex:
138        raise_from(DecoderException('Failed to decode char'), ex)
139
140
141def __decode_string(fp_read, marker):
142    # current marker is string identifier, so read next byte which identifies integer type
143    length = __decode_int_non_negative(fp_read, fp_read(1))
144    raw = fp_read(length)
145    if len(raw) < length:
146        raise DecoderException('String too short')
147    try:
148        return raw.decode('utf-8')
149    except UnicodeError as ex:
150        raise_from(DecoderException('Failed to decode string'), ex)
151
152
153# same as string, except there is no 'S' marker
154def __decode_object_key(fp_read, marker, intern_object_keys):
155    length = __decode_int_non_negative(fp_read, marker)
156    raw = fp_read(length)
157    if len(raw) < length:
158        raise DecoderException('String too short')
159    try:
160        return intern_unicode(raw.decode('utf-8')) if intern_object_keys else raw.decode('utf-8')
161    except UnicodeError as ex:
162        raise_from(DecoderException('Failed to decode object key'), ex)
163
164
165__METHOD_MAP = {TYPE_NULL: (lambda _, __: None),
166                TYPE_BOOL_TRUE: (lambda _, __: True),
167                TYPE_BOOL_FALSE: (lambda _, __: False),
168                TYPE_INT8: __decode_int8,
169                TYPE_UINT8: __decode_uint8,
170                TYPE_INT16: __decode_int16,
171                TYPE_INT32: __decode_int32,
172                TYPE_INT64: __decode_int64,
173                TYPE_FLOAT32: __decode_float32,
174                TYPE_FLOAT64: __decode_float64,
175                TYPE_HIGH_PREC: __decode_high_prec,
176                TYPE_CHAR: __decode_char,
177                TYPE_STRING: __decode_string}
178
179
180def __get_container_params(fp_read, in_mapping, no_bytes):
181    marker = fp_read(1)
182    if marker == CONTAINER_TYPE:
183        marker = fp_read(1)
184        if marker not in __TYPES:
185            raise DecoderException('Invalid container type')
186        type_ = marker
187        marker = fp_read(1)
188    else:
189        type_ = TYPE_NONE
190    if marker == CONTAINER_COUNT:
191        count = __decode_int_non_negative(fp_read, fp_read(1))
192        counting = True
193
194        # special cases (no data (None or bool) / bytes array) will be handled in calling functions
195        if not (type_ in __TYPES_NO_DATA or
196                (type_ == TYPE_UINT8 and not in_mapping and not no_bytes)):
197            # Reading ahead is just to capture type, which will not exist if type is fixed
198            marker = fp_read(1) if (in_mapping or type_ == TYPE_NONE) else type_
199
200    elif type_ == TYPE_NONE:
201        # set to one to indicate that not finished yet
202        count = 1
203        counting = False
204    else:
205        raise DecoderException('Container type without count')
206    return marker, counting, count, type_
207
208
209def __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook,  # pylint: disable=too-many-branches
210                    intern_object_keys):
211    marker, counting, count, type_ = __get_container_params(fp_read, True, no_bytes)
212    has_pairs_hook = object_pairs_hook is not None
213    obj = [] if has_pairs_hook else {}
214
215    # special case - no data (None or bool)
216    if type_ in __TYPES_NO_DATA:
217        value = __METHOD_MAP[type_](fp_read, type_)
218        if has_pairs_hook:
219            for _ in range(count):
220                obj.append((__decode_object_key(fp_read, fp_read(1), intern_object_keys), value))
221            return object_pairs_hook(obj)
222
223        for _ in range(count):
224            obj[__decode_object_key(fp_read, fp_read(1), intern_object_keys)] = value
225        return object_hook(obj)
226
227    while count > 0 and (counting or marker != OBJECT_END):
228        if marker == TYPE_NOOP:
229            marker = fp_read(1)
230            continue
231
232        # decode key for object
233        key = __decode_object_key(fp_read, marker, intern_object_keys)
234        marker = fp_read(1) if type_ == TYPE_NONE else type_
235
236        # decode value
237        try:
238            value = __METHOD_MAP[marker](fp_read, marker)
239        except KeyError:
240            handled = False
241        else:
242            handled = True
243
244        # handle outside above except (on KeyError) so do not have unfriendly "exception within except" backtrace
245        if not handled:
246            if marker == ARRAY_START:
247                value = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys)
248            elif marker == OBJECT_START:
249                value = __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys)
250            else:
251                raise DecoderException('Invalid marker within object')
252
253        if has_pairs_hook:
254            obj.append((key, value))
255        else:
256            obj[key] = value
257        if counting:
258            count -= 1
259        if count > 0:
260            marker = fp_read(1)
261
262    return object_pairs_hook(obj) if has_pairs_hook else object_hook(obj)
263
264
265def __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys):
266    marker, counting, count, type_ = __get_container_params(fp_read, False, no_bytes)
267
268    # special case - no data (None or bool)
269    if type_ in __TYPES_NO_DATA:
270        return [__METHOD_MAP[type_](fp_read, type_)] * count
271
272    # special case - bytes array
273    if type_ == TYPE_UINT8 and not no_bytes:
274        container = fp_read(count)
275        if len(container) < count:
276            raise DecoderException('Container bytes array too short')
277        return container
278
279    container = []
280    while count > 0 and (counting or marker != ARRAY_END):
281        if marker == TYPE_NOOP:
282            marker = fp_read(1)
283            continue
284
285        # decode value
286        try:
287            value = __METHOD_MAP[marker](fp_read, marker)
288        except KeyError:
289            handled = False
290        else:
291            handled = True
292
293        # handle outside above except (on KeyError) so do not have unfriendly "exception within except" backtrace
294        if not handled:
295            if marker == ARRAY_START:
296                value = __decode_array(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys)
297            elif marker == OBJECT_START:
298                value = __decode_object(fp_read, no_bytes, object_hook, object_pairs_hook, intern_object_keys)
299            else:
300                raise DecoderException('Invalid marker within array')
301
302        container.append(value)
303        if counting:
304            count -= 1
305        if count and type_ == TYPE_NONE:
306            marker = fp_read(1)
307
308    return container
309
310
311def __object_hook_noop(obj):
312    return obj
313
314
315def load(fp, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_object_keys=False):
316    """Decodes and returns UBJSON from the given file-like object
317
318    Args:
319        fp: read([size])-able object
320        no_bytes (bool): If set, typed UBJSON arrays (uint8) will not be
321                         converted to a bytes instance and instead treated like
322                         any other array (i.e. result in a list).
323        object_hook (callable): Called with the result of any object literal
324                                decoded (instead of dict).
325        object_pairs_hook (callable): Called with the result of any object
326                                      literal decoded with an ordered list of
327                                      pairs (instead of dict). Takes precedence
328                                      over object_hook.
329        intern_object_keys (bool): If set, object keys are interned which can
330                                   provide a memory saving when many repeated
331                                   keys are used. NOTE: This is not supported
332                                   in Python2 (since interning does not apply
333                                   to unicode) and wil be ignored.
334
335    Returns:
336        Decoded object
337
338    Raises:
339        DecoderException: If an encoding failure occured.
340
341    UBJSON types are mapped to Python types as follows.  Numbers in brackets
342    denote Python version.
343
344        +----------------------------------+---------------+
345        | UBJSON                           | Python        |
346        +==================================+===============+
347        | object                           | dict          |
348        +----------------------------------+---------------+
349        | array                            | list          |
350        +----------------------------------+---------------+
351        | string                           | (3) str       |
352        |                                  | (2) unicode   |
353        +----------------------------------+---------------+
354        | uint8, int8, int16, int32, int64 | (3) int       |
355        |                                  | (2) int, long |
356        +----------------------------------+---------------+
357        | float32, float64                 | float         |
358        +----------------------------------+---------------+
359        | high_precision                   | Decimal       |
360        +----------------------------------+---------------+
361        | array (typed, uint8)             | (3) bytes     |
362        |                                  | (2) str       |
363        +----------------------------------+---------------+
364        | true                             | True          |
365        +----------------------------------+---------------+
366        | false                            | False         |
367        +----------------------------------+---------------+
368        | null                             | None          |
369        +----------------------------------+---------------+
370    """
371    if object_pairs_hook is None and object_hook is None:
372        object_hook = __object_hook_noop
373
374    if not callable(fp.read):
375        raise TypeError('fp.read not callable')
376    fp_read = fp.read
377
378    marker = fp_read(1)
379    try:
380        try:
381            return __METHOD_MAP[marker](fp_read, marker)
382        except KeyError:
383            pass
384        if marker == ARRAY_START:
385            return __decode_array(fp_read, bool(no_bytes), object_hook, object_pairs_hook, intern_object_keys)
386        if marker == OBJECT_START:
387            return __decode_object(fp_read, bool(no_bytes), object_hook, object_pairs_hook, intern_object_keys)
388        raise DecoderException('Invalid marker')
389    except DecoderException as ex:
390        raise_from(DecoderException(ex.args[0], position=(fp.tell() if hasattr(fp, 'tell') else None)), ex)
391
392
393def loadb(chars, no_bytes=False, object_hook=None, object_pairs_hook=None, intern_object_keys=False):
394    """Decodes and returns UBJSON from the given bytes or bytesarray object. See
395       load() for available arguments."""
396    with BytesIO(chars) as fp:
397        return load(fp, no_bytes=no_bytes, object_hook=object_hook, object_pairs_hook=object_pairs_hook,
398                    intern_object_keys=intern_object_keys)
399