1import sys
2import re
3import os
4import datetime
5import logging
6import warnings
7import time
8from ast import literal_eval
9from decimal import Decimal, localcontext
10from collections import namedtuple, OrderedDict
11from itertools import repeat
12from ordered_set import OrderedSet
13from threading import Timer
14
15
16class np_type:
17    pass
18
19
20try:
21    import numpy as np
22except ImportError:  # pragma: no cover. The case without Numpy is tested locally only.
23    np = None  # pragma: no cover.
24    np_array_factory = 'numpy not available'  # pragma: no cover.
25    np_ndarray = np_type  # pragma: no cover.
26    np_bool_ = np_type  # pragma: no cover.
27    np_int8 = np_type  # pragma: no cover.
28    np_int16 = np_type  # pragma: no cover.
29    np_int32 = np_type  # pragma: no cover.
30    np_int64 = np_type  # pragma: no cover.
31    np_uint8 = np_type  # pragma: no cover.
32    np_uint16 = np_type  # pragma: no cover.
33    np_uint32 = np_type  # pragma: no cover.
34    np_uint64 = np_type  # pragma: no cover.
35    np_intp = np_type  # pragma: no cover.
36    np_uintp = np_type  # pragma: no cover.
37    np_float32 = np_type  # pragma: no cover.
38    np_float64 = np_type  # pragma: no cover.
39    np_float_ = np_type  # pragma: no cover.
40    np_complex64 = np_type  # pragma: no cover.
41    np_complex128 = np_type  # pragma: no cover.
42    np_complex_ = np_type  # pragma: no cover.
43else:
44    np_array_factory = np.array
45    np_ndarray = np.ndarray
46    np_bool_ = np.bool_
47    np_int8 = np.int8
48    np_int16 = np.int16
49    np_int32 = np.int32
50    np_int64 = np.int64
51    np_uint8 = np.uint8
52    np_uint16 = np.uint16
53    np_uint32 = np.uint32
54    np_uint64 = np.uint64
55    np_intp = np.intp
56    np_uintp = np.uintp
57    np_float32 = np.float32
58    np_float64 = np.float64
59    np_float_ = np.float_
60    np_complex64 = np.complex64
61    np_complex128 = np.complex128
62    np_complex_ = np.complex_
63
64numpy_numbers = (
65    np_int8, np_int16, np_int32, np_int64, np_uint8,
66    np_uint16, np_uint32, np_uint64, np_intp, np_uintp,
67    np_float32, np_float64, np_float_, np_complex64,
68    np_complex128, np_complex_,)
69
70numpy_dtypes = set(numpy_numbers)
71numpy_dtypes.add(np_bool_)
72
73numpy_dtype_str_to_type = {
74    item.__name__: item for item in numpy_dtypes
75}
76
77logger = logging.getLogger(__name__)
78
79py_major_version = sys.version_info.major
80py_minor_version = sys.version_info.minor
81
82py_current_version = Decimal("{}.{}".format(py_major_version, py_minor_version))
83
84py2 = py_major_version == 2
85py3 = py_major_version == 3
86py4 = py_major_version == 4
87
88MINIMUM_PY_DICT_TYPE_SORTED = Decimal('3.6')
89DICT_IS_SORTED = py_current_version >= MINIMUM_PY_DICT_TYPE_SORTED
90
91
92class OrderedDictPlus(OrderedDict):
93    """
94    This class is only used when a python version is used where
95    the built-in dictionary is not ordered.
96    """
97
98    def __repr__(self):  # pragma: no cover. Only used in pypy3 and py3.5
99        return str(dict(self))  # pragma: no cover. Only used in pypy3 and py3.5
100
101    __str__ = __repr__
102
103    def copy(self):  # pragma: no cover. Only used in pypy3 and py3.5
104        result = OrderedDictPlus()  # pragma: no cover. Only used in pypy3 and py3.5
105        for k, v in self.items():  # pragma: no cover. Only used in pypy3 and py3.5
106            result[k] = v  # pragma: no cover. Only used in pypy3 and py3.5
107        return result  # pragma: no cover. Only used in pypy3 and py3.5
108
109
110if DICT_IS_SORTED:
111    dict_ = dict
112else:
113    dict_ = OrderedDictPlus  # pragma: no cover. Only used in pypy3 and py3.5
114
115if py4:
116    logger.warning('Python 4 is not supported yet. Switching logic to Python 3.')  # pragma: no cover
117    py3 = True  # pragma: no cover
118
119if py2:  # pragma: no cover
120    sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0')
121
122pypy3 = py3 and hasattr(sys, "pypy_translation_info")
123
124strings = (str, bytes)  # which are both basestring
125unicode_type = str
126bytes_type = bytes
127only_numbers = (int, float, complex, Decimal) + numpy_numbers
128datetimes = (datetime.datetime, datetime.date, datetime.timedelta, datetime.time)
129times = (datetime.datetime, datetime.time)
130numbers = only_numbers + datetimes
131booleans = (bool, np_bool_)
132
133IndexedHash = namedtuple('IndexedHash', 'indexes item')
134
135current_dir = os.path.dirname(os.path.abspath(__file__))
136
137ID_PREFIX = '!>*id'
138
139ZERO_DECIMAL_CHARACTERS = set("-0.")
140
141KEY_TO_VAL_STR = "{}:{}"
142
143TREE_VIEW = 'tree'
144TEXT_VIEW = 'text'
145DELTA_VIEW = '_delta'
146
147
148def short_repr(item, max_length=15):
149    """Short representation of item if it is too long"""
150    item = repr(item)
151    if len(item) > max_length:
152        item = '{}...{}'.format(item[:max_length - 3], item[-1])
153    return item
154
155
156class ListItemRemovedOrAdded:  # pragma: no cover
157    """Class of conditions to be checked"""
158    pass
159
160
161class OtherTypes:
162    def __repr__(self):
163        return "Error: {}".format(self.__class__.__name__)  # pragma: no cover
164
165    __str__ = __repr__
166
167
168class Skipped(OtherTypes):
169    pass
170
171
172class Unprocessed(OtherTypes):
173    pass
174
175
176class NotHashed(OtherTypes):
177    pass
178
179
180class NotPresent:  # pragma: no cover
181    """
182    In a change tree, this indicated that a previously existing object has been removed -- or will only be added
183    in the future.
184    We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D
185    """
186
187    def __repr__(self):
188        return 'not present'  # pragma: no cover
189
190    __str__ = __repr__
191
192
193class CannotCompare(Exception):
194    """
195    Exception when two items cannot be compared in the compare function.
196    """
197    pass
198
199
200unprocessed = Unprocessed()
201skipped = Skipped()
202not_hashed = NotHashed()
203notpresent = NotPresent()
204
205# Disabling remapping from old to new keys since the mapping is deprecated.
206RemapDict = dict_
207
208
209# class RemapDict(dict_):
210#     """
211#     DISABLED
212#     Remap Dictionary.
213
214#     For keys that have a new, longer name, remap the old key to the new key.
215#     Other keys that don't have a new name are handled as before.
216#     """
217
218#     def __getitem__(self, old_key):
219#         new_key = EXPANDED_KEY_MAP.get(old_key, old_key)
220#         if new_key != old_key:
221#             logger.warning(
222#                 "DeepDiff Deprecation: %s is renamed to %s. Please start using "
223#                 "the new unified naming convention.", old_key, new_key)
224#         if new_key in self:
225#             return self.get(new_key)
226#         else:  # pragma: no cover
227#             raise KeyError(new_key)
228
229
230class indexed_set(set):
231    """
232    A set class that lets you get an item by index
233
234    >>> a = indexed_set()
235    >>> a.add(10)
236    >>> a.add(20)
237    >>> a[0]
238    10
239    """
240
241
242JSON_CONVERTOR = {
243    Decimal: float,
244    OrderedSet: list,
245    type: lambda x: x.__name__,
246    bytes: lambda x: x.decode('utf-8')
247}
248
249
250def json_convertor_default(default_mapping=None):
251    _convertor_mapping = JSON_CONVERTOR.copy()
252    if default_mapping:
253        _convertor_mapping.update(default_mapping)
254
255    def _convertor(obj):
256        for original_type, convert_to in _convertor_mapping.items():
257            if isinstance(obj, original_type):
258                return convert_to(obj)
259        raise TypeError('We do not know how to convert {} of type {} for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type.'.format(obj, type(obj)))
260
261    return _convertor
262
263
264def add_to_frozen_set(parents_ids, item_id):
265    return parents_ids | {item_id}
266
267
268def convert_item_or_items_into_set_else_none(items):
269    if items:
270        if isinstance(items, strings):
271            items = {items}
272        else:
273            items = set(items)
274    else:
275        items = None
276    return items
277
278
279RE_COMPILED_TYPE = type(re.compile(''))
280
281
282def convert_item_or_items_into_compiled_regexes_else_none(items):
283    if items:
284        if isinstance(items, (strings, RE_COMPILED_TYPE)):
285            items = [items]
286        items = [i if isinstance(i, RE_COMPILED_TYPE) else re.compile(i) for i in items]
287    else:
288        items = None
289    return items
290
291
292def get_id(obj):
293    """
294    Adding some characters to id so they are not just integers to reduce the risk of collision.
295    """
296    return "{}{}".format(ID_PREFIX, id(obj))
297
298
299def get_type(obj):
300    """
301    Get the type of object or if it is a class, return the class itself.
302    """
303    if isinstance(obj, np_ndarray):
304        return obj.dtype.type
305    return obj if type(obj) is type else type(obj)
306
307
308def numpy_dtype_string_to_type(dtype_str):
309    return numpy_dtype_str_to_type[dtype_str]
310
311
312def type_in_type_group(item, type_group):
313    return get_type(item) in type_group
314
315
316def type_is_subclass_of_type_group(item, type_group):
317    return isinstance(item, type_group) \
318           or (isinstance(item, type) and issubclass(item, type_group)) \
319           or type_in_type_group(item, type_group)
320
321
322def get_doc(doc_filename):
323    try:
324        with open(os.path.join(current_dir, '../docs/', doc_filename), 'r') as doc_file:
325            doc = doc_file.read()
326    except Exception:  # pragma: no cover
327        doc = 'Failed to load the docstrings. Please visit: https://zepworks.com/deepdiff/current/'  # pragma: no cover
328    return doc
329
330
331number_formatting = {
332    "f": r'{:.%sf}',
333    "e": r'{:.%se}',
334}
335
336
337def number_to_string(number, significant_digits, number_format_notation="f"):
338    """
339    Convert numbers to string considering significant digits.
340    """
341    try:
342        using = number_formatting[number_format_notation]
343    except KeyError:
344        raise ValueError("number_format_notation got invalid value of {}. The valid values are 'f' and 'e'".format(number_format_notation)) from None
345    if isinstance(number, Decimal):
346        tup = number.as_tuple()
347        with localcontext() as ctx:
348            ctx.prec = len(tup.digits) + tup.exponent + significant_digits
349            number = number.quantize(Decimal('0.' + '0' * significant_digits))
350    elif not isinstance(number, numbers):
351        return number
352    result = (using % significant_digits).format(number)
353    # Special case for 0: "-0.00" should compare equal to "0.00"
354    if set(result) <= ZERO_DECIMAL_CHARACTERS:
355        result = "0.00"
356    # https://bugs.python.org/issue36622
357    if number_format_notation == 'e' and isinstance(number, float):
358        result = result.replace('+0', '+')
359    return result
360
361
362class DeepDiffDeprecationWarning(DeprecationWarning):
363    """
364    Use this warning instead of DeprecationWarning
365    """
366    pass
367
368
369def cartesian_product(a, b):
370    """
371    Get the Cartesian product of two iterables
372
373    **parameters**
374
375    a: list of lists
376    b: iterable to do the Cartesian product
377    """
378
379    for i in a:
380        for j in b:
381            yield i + (j,)
382
383
384def cartesian_product_of_shape(dimentions, result=None):
385    """
386    Cartesian product of a dimentions iterable.
387    This is mainly used to traverse Numpy ndarrays.
388
389    Each array has dimentions that are defines in ndarray.shape
390    """
391    if result is None:
392        result = ((),)  # a tuple with an empty tuple
393    for dimension in dimentions:
394        result = cartesian_product(result, range(dimension))
395    return result
396
397
398def get_numpy_ndarray_rows(obj, shape=None):
399    """
400    Convert a multi dimensional numpy array to list of rows
401    """
402    if shape is None:
403        shape = obj.shape
404
405    dimentions = shape[:-1]
406    for path_tuple in cartesian_product_of_shape(dimentions):
407        result = obj
408        for index in path_tuple:
409            result = result[index]
410        yield path_tuple, result
411
412
413class _NotFound:
414
415    def __eq__(self, other):
416        return False
417
418    __req__ = __eq__
419
420    def __repr__(self):
421        return 'not found'
422
423    __str__ = __repr__
424
425
426not_found = _NotFound()
427
428warnings.simplefilter('once', DeepDiffDeprecationWarning)
429
430
431class OrderedSetPlus(OrderedSet):
432
433    def lpop(self):
434        """
435        Remove and return the first element from the set.
436        Raises KeyError if the set is empty.
437        Example:
438            >>> oset = OrderedSet([1, 2, 3])
439            >>> oset.lpop()
440            1
441        """
442        if not self.items:
443            raise KeyError('lpop from an empty set')
444
445        elem = self.items[0]
446        del self.items[0]
447        del self.map[elem]
448        return elem
449
450    def __repr__(self):
451        return str(list(self))
452
453    __str__ = __repr__
454
455
456class RepeatedTimer:
457    """
458    Threaded Repeated Timer by MestreLion
459    https://stackoverflow.com/a/38317060/1497443
460    """
461
462    def __init__(self, interval, function, *args, **kwargs):
463        self._timer = None
464        self.interval = interval
465        self.function = function
466        self.args = args
467        self.start_time = time.time()
468        self.kwargs = kwargs
469        self.is_running = False
470        self.start()
471
472    def _get_duration_sec(self):
473        return int(time.time() - self.start_time)
474
475    def _run(self):
476        self.is_running = False
477        self.start()
478        self.function(*self.args, **self.kwargs)
479
480    def start(self):
481        self.kwargs.update(duration=self._get_duration_sec())
482        if not self.is_running:
483            self._timer = Timer(self.interval, self._run)
484            self._timer.start()
485            self.is_running = True
486
487    def stop(self):
488        duration = self._get_duration_sec()
489        self._timer.cancel()
490        self.is_running = False
491        return duration
492
493
494def _eval_decimal(params):
495    return Decimal(params)
496
497
498def _eval_datetime(params):
499    params = f'({params})'
500    params = literal_eval(params)
501    return datetime.datetime(*params)
502
503
504def _eval_date(params):
505    params = f'({params})'
506    params = literal_eval(params)
507    return datetime.date(*params)
508
509
510LITERAL_EVAL_PRE_PROCESS = [
511    ('Decimal(', ')', _eval_decimal),
512    ('datetime.datetime(', ')', _eval_datetime),
513    ('datetime.date(', ')', _eval_date),
514]
515
516
517def literal_eval_extended(item):
518    """
519    An extend version of literal_eval
520    """
521    try:
522        return literal_eval(item)
523    except (SyntaxError, ValueError):
524        for begin, end, func in LITERAL_EVAL_PRE_PROCESS:
525            if item.startswith(begin) and item.endswith(end):
526                # Extracting and removing extra quotes so for example "Decimal('10.1')" becomes "'10.1'" and then '10.1'
527                params = item[len(begin): -len(end)].strip('\'\"')
528                return func(params)
529        raise
530
531
532def time_to_seconds(t):
533    return (t.hour * 60 + t.minute) * 60 + t.second
534
535
536def datetime_normalize(truncate_datetime, obj):
537    if truncate_datetime:
538        if truncate_datetime == 'second':
539            obj = obj.replace(microsecond=0)
540        elif truncate_datetime == 'minute':
541            obj = obj.replace(second=0, microsecond=0)
542        elif truncate_datetime == 'hour':
543            obj = obj.replace(minute=0, second=0, microsecond=0)
544        elif truncate_datetime == 'day':
545            obj = obj.replace(hour=0, minute=0, second=0, microsecond=0)
546    if isinstance(obj, datetime.datetime):
547        obj = obj.replace(tzinfo=datetime.timezone.utc)
548    elif isinstance(obj, datetime.time):
549        obj = time_to_seconds(obj)
550    return obj
551
552
553def get_truncate_datetime(truncate_datetime):
554    """
555    Validates truncate_datetime value
556    """
557    if truncate_datetime not in {None, 'second', 'minute', 'hour', 'day'}:
558        raise ValueError("truncate_datetime must be second, minute, hour or day")
559    return truncate_datetime
560
561
562def cartesian_product_numpy(*arrays):
563    """
564    Cartesian product of Numpy arrays by Paul Panzer
565    https://stackoverflow.com/a/49445693/1497443
566    """
567    la = len(arrays)
568    dtype = np.result_type(*arrays)
569    arr = np.empty((la, *map(len, arrays)), dtype=dtype)
570    idx = slice(None), *repeat(None, la)
571    for i, a in enumerate(arrays):
572        arr[i, ...] = a[idx[:la - i]]
573    return arr.reshape(la, -1).T
574
575
576def diff_numpy_array(A, B):
577    """
578    Numpy Array A - B
579    return items in A that are not in B
580    By Divakar
581    https://stackoverflow.com/a/52417967/1497443
582    """
583    return A[~np.in1d(A, B)]
584
585
586PYTHON_TYPE_TO_NUMPY_TYPE = {
587    int: np_int64,
588    float: np_float64,
589    Decimal: np_float64
590}
591
592
593def get_homogeneous_numpy_compatible_type_of_seq(seq):
594    """
595    Return with the numpy dtype if the array can be converted to a non-object numpy array.
596    Originally written by mgilson https://stackoverflow.com/a/13252348/1497443
597    This is the modified version.
598    """
599    iseq = iter(seq)
600    first_type = type(next(iseq))
601    if first_type in {int, float, Decimal}:
602        type_ = first_type if all((type(x) is first_type) for x in iseq) else False
603        return PYTHON_TYPE_TO_NUMPY_TYPE.get(type_, False)
604    else:
605        return False
606