1import sys 2import re 3import os 4import datetime 5import logging 6import warnings 7import time 8from ast import literal_eval 9from decimal import Decimal, localcontext 10from collections import namedtuple, OrderedDict 11from itertools import repeat 12from ordered_set import OrderedSet 13from threading import Timer 14 15 16class np_type: 17 pass 18 19 20try: 21 import numpy as np 22except ImportError: # pragma: no cover. The case without Numpy is tested locally only. 23 np = None # pragma: no cover. 24 np_array_factory = 'numpy not available' # pragma: no cover. 25 np_ndarray = np_type # pragma: no cover. 26 np_bool_ = np_type # pragma: no cover. 27 np_int8 = np_type # pragma: no cover. 28 np_int16 = np_type # pragma: no cover. 29 np_int32 = np_type # pragma: no cover. 30 np_int64 = np_type # pragma: no cover. 31 np_uint8 = np_type # pragma: no cover. 32 np_uint16 = np_type # pragma: no cover. 33 np_uint32 = np_type # pragma: no cover. 34 np_uint64 = np_type # pragma: no cover. 35 np_intp = np_type # pragma: no cover. 36 np_uintp = np_type # pragma: no cover. 37 np_float32 = np_type # pragma: no cover. 38 np_float64 = np_type # pragma: no cover. 39 np_float_ = np_type # pragma: no cover. 40 np_complex64 = np_type # pragma: no cover. 41 np_complex128 = np_type # pragma: no cover. 42 np_complex_ = np_type # pragma: no cover. 43else: 44 np_array_factory = np.array 45 np_ndarray = np.ndarray 46 np_bool_ = np.bool_ 47 np_int8 = np.int8 48 np_int16 = np.int16 49 np_int32 = np.int32 50 np_int64 = np.int64 51 np_uint8 = np.uint8 52 np_uint16 = np.uint16 53 np_uint32 = np.uint32 54 np_uint64 = np.uint64 55 np_intp = np.intp 56 np_uintp = np.uintp 57 np_float32 = np.float32 58 np_float64 = np.float64 59 np_float_ = np.float_ 60 np_complex64 = np.complex64 61 np_complex128 = np.complex128 62 np_complex_ = np.complex_ 63 64numpy_numbers = ( 65 np_int8, np_int16, np_int32, np_int64, np_uint8, 66 np_uint16, np_uint32, np_uint64, np_intp, np_uintp, 67 np_float32, np_float64, np_float_, np_complex64, 68 np_complex128, np_complex_,) 69 70numpy_dtypes = set(numpy_numbers) 71numpy_dtypes.add(np_bool_) 72 73numpy_dtype_str_to_type = { 74 item.__name__: item for item in numpy_dtypes 75} 76 77logger = logging.getLogger(__name__) 78 79py_major_version = sys.version_info.major 80py_minor_version = sys.version_info.minor 81 82py_current_version = Decimal("{}.{}".format(py_major_version, py_minor_version)) 83 84py2 = py_major_version == 2 85py3 = py_major_version == 3 86py4 = py_major_version == 4 87 88MINIMUM_PY_DICT_TYPE_SORTED = Decimal('3.6') 89DICT_IS_SORTED = py_current_version >= MINIMUM_PY_DICT_TYPE_SORTED 90 91 92class OrderedDictPlus(OrderedDict): 93 """ 94 This class is only used when a python version is used where 95 the built-in dictionary is not ordered. 96 """ 97 98 def __repr__(self): # pragma: no cover. Only used in pypy3 and py3.5 99 return str(dict(self)) # pragma: no cover. Only used in pypy3 and py3.5 100 101 __str__ = __repr__ 102 103 def copy(self): # pragma: no cover. Only used in pypy3 and py3.5 104 result = OrderedDictPlus() # pragma: no cover. Only used in pypy3 and py3.5 105 for k, v in self.items(): # pragma: no cover. Only used in pypy3 and py3.5 106 result[k] = v # pragma: no cover. Only used in pypy3 and py3.5 107 return result # pragma: no cover. Only used in pypy3 and py3.5 108 109 110if DICT_IS_SORTED: 111 dict_ = dict 112else: 113 dict_ = OrderedDictPlus # pragma: no cover. Only used in pypy3 and py3.5 114 115if py4: 116 logger.warning('Python 4 is not supported yet. Switching logic to Python 3.') # pragma: no cover 117 py3 = True # pragma: no cover 118 119if py2: # pragma: no cover 120 sys.exit('Python 2 is not supported anymore. The last version of DeepDiff that supported Py2 was 3.3.0') 121 122pypy3 = py3 and hasattr(sys, "pypy_translation_info") 123 124strings = (str, bytes) # which are both basestring 125unicode_type = str 126bytes_type = bytes 127only_numbers = (int, float, complex, Decimal) + numpy_numbers 128datetimes = (datetime.datetime, datetime.date, datetime.timedelta, datetime.time) 129times = (datetime.datetime, datetime.time) 130numbers = only_numbers + datetimes 131booleans = (bool, np_bool_) 132 133IndexedHash = namedtuple('IndexedHash', 'indexes item') 134 135current_dir = os.path.dirname(os.path.abspath(__file__)) 136 137ID_PREFIX = '!>*id' 138 139ZERO_DECIMAL_CHARACTERS = set("-0.") 140 141KEY_TO_VAL_STR = "{}:{}" 142 143TREE_VIEW = 'tree' 144TEXT_VIEW = 'text' 145DELTA_VIEW = '_delta' 146 147 148def short_repr(item, max_length=15): 149 """Short representation of item if it is too long""" 150 item = repr(item) 151 if len(item) > max_length: 152 item = '{}...{}'.format(item[:max_length - 3], item[-1]) 153 return item 154 155 156class ListItemRemovedOrAdded: # pragma: no cover 157 """Class of conditions to be checked""" 158 pass 159 160 161class OtherTypes: 162 def __repr__(self): 163 return "Error: {}".format(self.__class__.__name__) # pragma: no cover 164 165 __str__ = __repr__ 166 167 168class Skipped(OtherTypes): 169 pass 170 171 172class Unprocessed(OtherTypes): 173 pass 174 175 176class NotHashed(OtherTypes): 177 pass 178 179 180class NotPresent: # pragma: no cover 181 """ 182 In a change tree, this indicated that a previously existing object has been removed -- or will only be added 183 in the future. 184 We previously used None for this but this caused problem when users actually added and removed None. Srsly guys? :D 185 """ 186 187 def __repr__(self): 188 return 'not present' # pragma: no cover 189 190 __str__ = __repr__ 191 192 193class CannotCompare(Exception): 194 """ 195 Exception when two items cannot be compared in the compare function. 196 """ 197 pass 198 199 200unprocessed = Unprocessed() 201skipped = Skipped() 202not_hashed = NotHashed() 203notpresent = NotPresent() 204 205# Disabling remapping from old to new keys since the mapping is deprecated. 206RemapDict = dict_ 207 208 209# class RemapDict(dict_): 210# """ 211# DISABLED 212# Remap Dictionary. 213 214# For keys that have a new, longer name, remap the old key to the new key. 215# Other keys that don't have a new name are handled as before. 216# """ 217 218# def __getitem__(self, old_key): 219# new_key = EXPANDED_KEY_MAP.get(old_key, old_key) 220# if new_key != old_key: 221# logger.warning( 222# "DeepDiff Deprecation: %s is renamed to %s. Please start using " 223# "the new unified naming convention.", old_key, new_key) 224# if new_key in self: 225# return self.get(new_key) 226# else: # pragma: no cover 227# raise KeyError(new_key) 228 229 230class indexed_set(set): 231 """ 232 A set class that lets you get an item by index 233 234 >>> a = indexed_set() 235 >>> a.add(10) 236 >>> a.add(20) 237 >>> a[0] 238 10 239 """ 240 241 242JSON_CONVERTOR = { 243 Decimal: float, 244 OrderedSet: list, 245 type: lambda x: x.__name__, 246 bytes: lambda x: x.decode('utf-8') 247} 248 249 250def json_convertor_default(default_mapping=None): 251 _convertor_mapping = JSON_CONVERTOR.copy() 252 if default_mapping: 253 _convertor_mapping.update(default_mapping) 254 255 def _convertor(obj): 256 for original_type, convert_to in _convertor_mapping.items(): 257 if isinstance(obj, original_type): 258 return convert_to(obj) 259 raise TypeError('We do not know how to convert {} of type {} for json serialization. Please pass the default_mapping parameter with proper mapping of the object to a basic python type.'.format(obj, type(obj))) 260 261 return _convertor 262 263 264def add_to_frozen_set(parents_ids, item_id): 265 return parents_ids | {item_id} 266 267 268def convert_item_or_items_into_set_else_none(items): 269 if items: 270 if isinstance(items, strings): 271 items = {items} 272 else: 273 items = set(items) 274 else: 275 items = None 276 return items 277 278 279RE_COMPILED_TYPE = type(re.compile('')) 280 281 282def convert_item_or_items_into_compiled_regexes_else_none(items): 283 if items: 284 if isinstance(items, (strings, RE_COMPILED_TYPE)): 285 items = [items] 286 items = [i if isinstance(i, RE_COMPILED_TYPE) else re.compile(i) for i in items] 287 else: 288 items = None 289 return items 290 291 292def get_id(obj): 293 """ 294 Adding some characters to id so they are not just integers to reduce the risk of collision. 295 """ 296 return "{}{}".format(ID_PREFIX, id(obj)) 297 298 299def get_type(obj): 300 """ 301 Get the type of object or if it is a class, return the class itself. 302 """ 303 if isinstance(obj, np_ndarray): 304 return obj.dtype.type 305 return obj if type(obj) is type else type(obj) 306 307 308def numpy_dtype_string_to_type(dtype_str): 309 return numpy_dtype_str_to_type[dtype_str] 310 311 312def type_in_type_group(item, type_group): 313 return get_type(item) in type_group 314 315 316def type_is_subclass_of_type_group(item, type_group): 317 return isinstance(item, type_group) \ 318 or (isinstance(item, type) and issubclass(item, type_group)) \ 319 or type_in_type_group(item, type_group) 320 321 322def get_doc(doc_filename): 323 try: 324 with open(os.path.join(current_dir, '../docs/', doc_filename), 'r') as doc_file: 325 doc = doc_file.read() 326 except Exception: # pragma: no cover 327 doc = 'Failed to load the docstrings. Please visit: https://zepworks.com/deepdiff/current/' # pragma: no cover 328 return doc 329 330 331number_formatting = { 332 "f": r'{:.%sf}', 333 "e": r'{:.%se}', 334} 335 336 337def number_to_string(number, significant_digits, number_format_notation="f"): 338 """ 339 Convert numbers to string considering significant digits. 340 """ 341 try: 342 using = number_formatting[number_format_notation] 343 except KeyError: 344 raise ValueError("number_format_notation got invalid value of {}. The valid values are 'f' and 'e'".format(number_format_notation)) from None 345 if isinstance(number, Decimal): 346 tup = number.as_tuple() 347 with localcontext() as ctx: 348 ctx.prec = len(tup.digits) + tup.exponent + significant_digits 349 number = number.quantize(Decimal('0.' + '0' * significant_digits)) 350 elif not isinstance(number, numbers): 351 return number 352 result = (using % significant_digits).format(number) 353 # Special case for 0: "-0.00" should compare equal to "0.00" 354 if set(result) <= ZERO_DECIMAL_CHARACTERS: 355 result = "0.00" 356 # https://bugs.python.org/issue36622 357 if number_format_notation == 'e' and isinstance(number, float): 358 result = result.replace('+0', '+') 359 return result 360 361 362class DeepDiffDeprecationWarning(DeprecationWarning): 363 """ 364 Use this warning instead of DeprecationWarning 365 """ 366 pass 367 368 369def cartesian_product(a, b): 370 """ 371 Get the Cartesian product of two iterables 372 373 **parameters** 374 375 a: list of lists 376 b: iterable to do the Cartesian product 377 """ 378 379 for i in a: 380 for j in b: 381 yield i + (j,) 382 383 384def cartesian_product_of_shape(dimentions, result=None): 385 """ 386 Cartesian product of a dimentions iterable. 387 This is mainly used to traverse Numpy ndarrays. 388 389 Each array has dimentions that are defines in ndarray.shape 390 """ 391 if result is None: 392 result = ((),) # a tuple with an empty tuple 393 for dimension in dimentions: 394 result = cartesian_product(result, range(dimension)) 395 return result 396 397 398def get_numpy_ndarray_rows(obj, shape=None): 399 """ 400 Convert a multi dimensional numpy array to list of rows 401 """ 402 if shape is None: 403 shape = obj.shape 404 405 dimentions = shape[:-1] 406 for path_tuple in cartesian_product_of_shape(dimentions): 407 result = obj 408 for index in path_tuple: 409 result = result[index] 410 yield path_tuple, result 411 412 413class _NotFound: 414 415 def __eq__(self, other): 416 return False 417 418 __req__ = __eq__ 419 420 def __repr__(self): 421 return 'not found' 422 423 __str__ = __repr__ 424 425 426not_found = _NotFound() 427 428warnings.simplefilter('once', DeepDiffDeprecationWarning) 429 430 431class OrderedSetPlus(OrderedSet): 432 433 def lpop(self): 434 """ 435 Remove and return the first element from the set. 436 Raises KeyError if the set is empty. 437 Example: 438 >>> oset = OrderedSet([1, 2, 3]) 439 >>> oset.lpop() 440 1 441 """ 442 if not self.items: 443 raise KeyError('lpop from an empty set') 444 445 elem = self.items[0] 446 del self.items[0] 447 del self.map[elem] 448 return elem 449 450 def __repr__(self): 451 return str(list(self)) 452 453 __str__ = __repr__ 454 455 456class RepeatedTimer: 457 """ 458 Threaded Repeated Timer by MestreLion 459 https://stackoverflow.com/a/38317060/1497443 460 """ 461 462 def __init__(self, interval, function, *args, **kwargs): 463 self._timer = None 464 self.interval = interval 465 self.function = function 466 self.args = args 467 self.start_time = time.time() 468 self.kwargs = kwargs 469 self.is_running = False 470 self.start() 471 472 def _get_duration_sec(self): 473 return int(time.time() - self.start_time) 474 475 def _run(self): 476 self.is_running = False 477 self.start() 478 self.function(*self.args, **self.kwargs) 479 480 def start(self): 481 self.kwargs.update(duration=self._get_duration_sec()) 482 if not self.is_running: 483 self._timer = Timer(self.interval, self._run) 484 self._timer.start() 485 self.is_running = True 486 487 def stop(self): 488 duration = self._get_duration_sec() 489 self._timer.cancel() 490 self.is_running = False 491 return duration 492 493 494def _eval_decimal(params): 495 return Decimal(params) 496 497 498def _eval_datetime(params): 499 params = f'({params})' 500 params = literal_eval(params) 501 return datetime.datetime(*params) 502 503 504def _eval_date(params): 505 params = f'({params})' 506 params = literal_eval(params) 507 return datetime.date(*params) 508 509 510LITERAL_EVAL_PRE_PROCESS = [ 511 ('Decimal(', ')', _eval_decimal), 512 ('datetime.datetime(', ')', _eval_datetime), 513 ('datetime.date(', ')', _eval_date), 514] 515 516 517def literal_eval_extended(item): 518 """ 519 An extend version of literal_eval 520 """ 521 try: 522 return literal_eval(item) 523 except (SyntaxError, ValueError): 524 for begin, end, func in LITERAL_EVAL_PRE_PROCESS: 525 if item.startswith(begin) and item.endswith(end): 526 # Extracting and removing extra quotes so for example "Decimal('10.1')" becomes "'10.1'" and then '10.1' 527 params = item[len(begin): -len(end)].strip('\'\"') 528 return func(params) 529 raise 530 531 532def time_to_seconds(t): 533 return (t.hour * 60 + t.minute) * 60 + t.second 534 535 536def datetime_normalize(truncate_datetime, obj): 537 if truncate_datetime: 538 if truncate_datetime == 'second': 539 obj = obj.replace(microsecond=0) 540 elif truncate_datetime == 'minute': 541 obj = obj.replace(second=0, microsecond=0) 542 elif truncate_datetime == 'hour': 543 obj = obj.replace(minute=0, second=0, microsecond=0) 544 elif truncate_datetime == 'day': 545 obj = obj.replace(hour=0, minute=0, second=0, microsecond=0) 546 if isinstance(obj, datetime.datetime): 547 obj = obj.replace(tzinfo=datetime.timezone.utc) 548 elif isinstance(obj, datetime.time): 549 obj = time_to_seconds(obj) 550 return obj 551 552 553def get_truncate_datetime(truncate_datetime): 554 """ 555 Validates truncate_datetime value 556 """ 557 if truncate_datetime not in {None, 'second', 'minute', 'hour', 'day'}: 558 raise ValueError("truncate_datetime must be second, minute, hour or day") 559 return truncate_datetime 560 561 562def cartesian_product_numpy(*arrays): 563 """ 564 Cartesian product of Numpy arrays by Paul Panzer 565 https://stackoverflow.com/a/49445693/1497443 566 """ 567 la = len(arrays) 568 dtype = np.result_type(*arrays) 569 arr = np.empty((la, *map(len, arrays)), dtype=dtype) 570 idx = slice(None), *repeat(None, la) 571 for i, a in enumerate(arrays): 572 arr[i, ...] = a[idx[:la - i]] 573 return arr.reshape(la, -1).T 574 575 576def diff_numpy_array(A, B): 577 """ 578 Numpy Array A - B 579 return items in A that are not in B 580 By Divakar 581 https://stackoverflow.com/a/52417967/1497443 582 """ 583 return A[~np.in1d(A, B)] 584 585 586PYTHON_TYPE_TO_NUMPY_TYPE = { 587 int: np_int64, 588 float: np_float64, 589 Decimal: np_float64 590} 591 592 593def get_homogeneous_numpy_compatible_type_of_seq(seq): 594 """ 595 Return with the numpy dtype if the array can be converted to a non-object numpy array. 596 Originally written by mgilson https://stackoverflow.com/a/13252348/1497443 597 This is the modified version. 598 """ 599 iseq = iter(seq) 600 first_type = type(next(iseq)) 601 if first_type in {int, float, Decimal}: 602 type_ = first_type if all((type(x) is first_type) for x in iseq) else False 603 return PYTHON_TYPE_TO_NUMPY_TYPE.get(type_, False) 604 else: 605 return False 606