1#!/usr/bin/env python
2
3# In order to run the docstrings:
4# python3 -m deepdiff.diff
5# You might need to run it many times since dictionaries come in different orders
6# every time you run the docstrings.
7# However the docstring expects it in a specific order in order to pass!
8import difflib
9import logging
10from copy import deepcopy
11from math import isclose as is_close
12from collections.abc import Mapping, Iterable
13from collections import defaultdict
14from itertools import zip_longest
15from ordered_set import OrderedSet
16from deepdiff.helper import (strings, bytes_type, numbers, times, ListItemRemovedOrAdded, notpresent,
17                             IndexedHash, unprocessed, add_to_frozen_set,
18                             convert_item_or_items_into_set_else_none, get_type,
19                             convert_item_or_items_into_compiled_regexes_else_none,
20                             type_is_subclass_of_type_group, type_in_type_group, get_doc,
21                             number_to_string, datetime_normalize, KEY_TO_VAL_STR, booleans,
22                             np_ndarray, get_numpy_ndarray_rows, OrderedSetPlus, RepeatedTimer,
23                             TEXT_VIEW, TREE_VIEW, DELTA_VIEW,
24                             np, get_truncate_datetime, dict_, CannotCompare)
25from deepdiff.serialization import SerializationMixin
26from deepdiff.distance import DistanceMixin
27from deepdiff.model import (
28    RemapDict, ResultDict, TextResult, TreeResult, DiffLevel,
29    DictRelationship, AttributeRelationship,
30    SubscriptableIterableRelationship, NonSubscriptableIterableRelationship,
31    SetRelationship, NumpyArrayRelationship, CUSTOM_FIELD)
32from deepdiff.deephash import DeepHash, combine_hashes_lists
33from deepdiff.base import Base
34from deepdiff.lfucache import LFUCache, DummyLFU
35
36logger = logging.getLogger(__name__)
37
38MAX_PASSES_REACHED_MSG = (
39    'DeepDiff has reached the max number of passes of {}. '
40    'You can possibly get more accurate results by increasing the max_passes parameter.')
41
42MAX_DIFFS_REACHED_MSG = (
43    'DeepDiff has reached the max number of diffs of {}. '
44    'You can possibly get more accurate results by increasing the max_diffs parameter.')
45
46
47notpresent_indexed = IndexedHash(indexes=[0], item=notpresent)
48
49doc = get_doc('diff_doc.rst')
50
51
52PROGRESS_MSG = "DeepDiff {} seconds in progress. Pass #{}, Diff #{}"
53
54
55def _report_progress(_stats, progress_logger, duration):
56    """
57    Report the progress every few seconds.
58    """
59    progress_logger(PROGRESS_MSG.format(duration, _stats[PASSES_COUNT], _stats[DIFF_COUNT]))
60
61
62DISTANCE_CACHE_HIT_COUNT = 'DISTANCE CACHE HIT COUNT'
63DIFF_COUNT = 'DIFF COUNT'
64PASSES_COUNT = 'PASSES COUNT'
65MAX_PASS_LIMIT_REACHED = 'MAX PASS LIMIT REACHED'
66MAX_DIFF_LIMIT_REACHED = 'MAX DIFF LIMIT REACHED'
67DISTANCE_CACHE_ENABLED = 'DISTANCE CACHE ENABLED'
68PREVIOUS_DIFF_COUNT = 'PREVIOUS DIFF COUNT'
69PREVIOUS_DISTANCE_CACHE_HIT_COUNT = 'PREVIOUS DISTANCE CACHE HIT COUNT'
70CANT_FIND_NUMPY_MSG = 'Unable to import numpy. This must be a bug in DeepDiff since a numpy array is detected.'
71INVALID_VIEW_MSG = 'The only valid values for the view parameter are text and tree. But {} was passed.'
72CUTOFF_RANGE_ERROR_MSG = 'cutoff_distance_for_pairs needs to be a positive float max 1.'
73VERBOSE_LEVEL_RANGE_MSG = 'verbose_level should be 0, 1, or 2.'
74PURGE_LEVEL_RANGE_MSG = 'cache_purge_level should be 0, 1, or 2.'
75_ENABLE_CACHE_EVERY_X_DIFF = '_ENABLE_CACHE_EVERY_X_DIFF'
76
77# What is the threshold to consider 2 items to be pairs. Only used when ignore_order = True.
78CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT = 0.3
79
80# What is the threshold to calculate pairs of items between 2 iterables.
81# For example 2 iterables that have nothing in common, do not need their pairs to be calculated.
82CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT = 0.7
83
84DEEPHASH_PARAM_KEYS = (
85    'exclude_types',
86    'exclude_paths',
87    'exclude_regex_paths',
88    'hasher',
89    'significant_digits',
90    'number_format_notation',
91    'ignore_string_type_changes',
92    'ignore_numeric_type_changes',
93    'ignore_type_in_groups',
94    'ignore_type_subclasses',
95    'ignore_string_case',
96    'exclude_obj_callback',
97    'ignore_private_variables',)
98
99
100class DeepDiff(ResultDict, SerializationMixin, DistanceMixin, Base):
101    __doc__ = doc
102
103    CACHE_AUTO_ADJUST_THRESHOLD = 0.25
104
105    def __init__(self,
106                 t1,
107                 t2,
108                 cutoff_distance_for_pairs=CUTOFF_DISTANCE_FOR_PAIRS_DEFAULT,
109                 cutoff_intersection_for_pairs=CUTOFF_INTERSECTION_FOR_PAIRS_DEFAULT,
110                 cache_size=0,
111                 cache_tuning_sample_size=0,
112                 cache_purge_level=1,
113                 exclude_paths=None,
114                 exclude_regex_paths=None,
115                 exclude_types=None,
116                 exclude_obj_callback=None,
117                 get_deep_distance=False,
118                 group_by=None,
119                 hasher=None,
120                 hashes=None,
121                 ignore_order=False,
122                 ignore_order_func=None,
123                 ignore_type_in_groups=None,
124                 ignore_string_type_changes=False,
125                 ignore_numeric_type_changes=False,
126                 ignore_type_subclasses=False,
127                 ignore_string_case=False,
128                 ignore_nan_inequality=False,
129                 ignore_private_variables=True,
130                 log_frequency_in_sec=0,
131                 math_epsilon=None,
132                 max_passes=10000000,
133                 max_diffs=None,
134                 number_format_notation="f",
135                 number_to_string_func=None,
136                 progress_logger=logger.info,
137                 report_repetition=False,
138                 significant_digits=None,
139                 truncate_datetime=None,
140                 verbose_level=1,
141                 view=TEXT_VIEW,
142                 iterable_compare_func=None,
143                 custom_operators=None,
144                 _original_type=None,
145                 _parameters=None,
146                 _shared_parameters=None,
147                 **kwargs):
148        super().__init__()
149        if kwargs:
150            raise ValueError((
151                "The following parameter(s) are not valid: %s\n"
152                "The valid parameters are ignore_order, report_repetition, significant_digits, "
153                "number_format_notation, exclude_paths, exclude_types, exclude_regex_paths, ignore_type_in_groups, "
154                "ignore_string_type_changes, ignore_numeric_type_changes, ignore_type_subclasses, truncate_datetime, "
155                "ignore_private_variables, ignore_nan_inequality, number_to_string_func, verbose_level, "
156                "view, hasher, hashes, max_passes, max_diffs, "
157                "cutoff_distance_for_pairs, cutoff_intersection_for_pairs, log_frequency_in_sec, cache_size, "
158                "cache_tuning_sample_size, get_deep_distance, group_by, cache_purge_level, "
159                "math_epsilon, iterable_compare_func, _original_type, "
160                "ignore_order_func, custom_operators, "
161                "_parameters and _shared_parameters.") % ', '.join(kwargs.keys()))
162
163        if _parameters:
164            self.__dict__.update(_parameters)
165        else:
166            self.custom_operators = custom_operators or []
167            self.ignore_order = ignore_order
168
169            self.ignore_order_func = ignore_order_func or (lambda *_args, **_kwargs: ignore_order)
170
171            ignore_type_in_groups = ignore_type_in_groups or []
172            if numbers == ignore_type_in_groups or numbers in ignore_type_in_groups:
173                ignore_numeric_type_changes = True
174            self.ignore_numeric_type_changes = ignore_numeric_type_changes
175            if strings == ignore_type_in_groups or strings in ignore_type_in_groups:
176                ignore_string_type_changes = True
177            self.ignore_string_type_changes = ignore_string_type_changes
178            self.ignore_type_in_groups = self.get_ignore_types_in_groups(
179                ignore_type_in_groups=ignore_type_in_groups,
180                ignore_string_type_changes=ignore_string_type_changes,
181                ignore_numeric_type_changes=ignore_numeric_type_changes,
182                ignore_type_subclasses=ignore_type_subclasses)
183            self.report_repetition = report_repetition
184            self.exclude_paths = convert_item_or_items_into_set_else_none(exclude_paths)
185            self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
186            self.exclude_types = set(exclude_types) if exclude_types else None
187            self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None  # we need tuple for checking isinstance
188            self.ignore_type_subclasses = ignore_type_subclasses
189            self.type_check_func = type_is_subclass_of_type_group if ignore_type_subclasses else type_in_type_group
190            self.ignore_string_case = ignore_string_case
191            self.exclude_obj_callback = exclude_obj_callback
192            self.number_to_string = number_to_string_func or number_to_string
193            self.iterable_compare_func = iterable_compare_func
194            self.ignore_private_variables = ignore_private_variables
195            self.ignore_nan_inequality = ignore_nan_inequality
196            self.hasher = hasher
197            self.cache_tuning_sample_size = cache_tuning_sample_size
198            self.group_by = group_by
199
200            self.significant_digits = self.get_significant_digits(significant_digits, ignore_numeric_type_changes)
201            self.math_epsilon = math_epsilon
202            if self.math_epsilon is not None and self.ignore_order:
203                logger.warning("math_epsilon will be ignored. It cannot be used when ignore_order is True.")
204            self.truncate_datetime = get_truncate_datetime(truncate_datetime)
205            self.number_format_notation = number_format_notation
206            if verbose_level in {0, 1, 2}:
207                self.verbose_level = verbose_level
208            else:
209                raise ValueError(VERBOSE_LEVEL_RANGE_MSG)
210            if cache_purge_level not in {0, 1, 2}:
211                raise ValueError(PURGE_LEVEL_RANGE_MSG)
212            self.view = view
213            # Setting up the cache for dynamic programming. One dictionary per instance of root of DeepDiff running.
214            self.max_passes = max_passes
215            self.max_diffs = max_diffs
216            self.cutoff_distance_for_pairs = float(cutoff_distance_for_pairs)
217            self.cutoff_intersection_for_pairs = float(cutoff_intersection_for_pairs)
218            if self.cutoff_distance_for_pairs < 0 or self.cutoff_distance_for_pairs > 1:
219                raise ValueError(CUTOFF_RANGE_ERROR_MSG)
220            # _Parameters are the clean _parameters to initialize DeepDiff with so we avoid all the above
221            # cleaning functionalities when running DeepDiff recursively.
222            # However DeepHash has its own set of _parameters that are slightly different than DeepDIff.
223            # DeepDiff _parameters are transformed to DeepHash _parameters via _get_deephash_params method.
224            self.progress_logger = progress_logger
225            self.cache_size = cache_size
226            _parameters = self.__dict__.copy()
227
228        # Non-Root
229        if _shared_parameters:
230            self.is_root = False
231            self._shared_parameters = _shared_parameters
232            self.__dict__.update(_shared_parameters)
233            # We are in some pass other than root
234            progress_timer = None
235        # Root
236        else:
237            self.is_root = True
238            # Caching the DeepDiff results for dynamic programming
239            self._distance_cache = LFUCache(cache_size) if cache_size else DummyLFU()
240            self._stats = {
241                PASSES_COUNT: 0,
242                DIFF_COUNT: 0,
243                DISTANCE_CACHE_HIT_COUNT: 0,
244                PREVIOUS_DIFF_COUNT: 0,
245                PREVIOUS_DISTANCE_CACHE_HIT_COUNT: 0,
246                MAX_PASS_LIMIT_REACHED: False,
247                MAX_DIFF_LIMIT_REACHED: False,
248                DISTANCE_CACHE_ENABLED: bool(cache_size),
249            }
250            self.hashes = dict_() if hashes is None else hashes
251            self._numpy_paths = dict_()  # if _numpy_paths is None else _numpy_paths
252            self._shared_parameters = {
253                'hashes': self.hashes,
254                '_stats': self._stats,
255                '_distance_cache': self._distance_cache,
256                '_numpy_paths': self._numpy_paths,
257                _ENABLE_CACHE_EVERY_X_DIFF: self.cache_tuning_sample_size * 10,
258            }
259            if log_frequency_in_sec:
260                # Creating a progress log reporter that runs in a separate thread every log_frequency_in_sec seconds.
261                progress_timer = RepeatedTimer(log_frequency_in_sec, _report_progress, self._stats, progress_logger)
262            else:
263                progress_timer = None
264
265        self._parameters = _parameters
266        self.deephash_parameters = self._get_deephash_params()
267        self.tree = TreeResult()
268        if group_by and self.is_root:
269            try:
270                original_t1 = t1
271                t1 = self._group_iterable_to_dict(t1, group_by, item_name='t1')
272            except (KeyError, ValueError):
273                pass
274            else:
275                try:
276                    t2 = self._group_iterable_to_dict(t2, group_by, item_name='t2')
277                except (KeyError, ValueError):
278                    t1 = original_t1
279
280        self.t1 = t1
281        self.t2 = t2
282
283        try:
284            root = DiffLevel(t1, t2, verbose_level=self.verbose_level)
285            # _original_type is only used to pass the original type of the data. Currently only used for numpy arrays.
286            # The reason is that we convert the numpy array to python list and then later for distance calculations
287            # we convert only the the last dimension of it into numpy arrays.
288            self._diff(root, parents_ids=frozenset({id(t1)}), _original_type=_original_type)
289
290            if get_deep_distance and view in {TEXT_VIEW, TREE_VIEW}:
291                self.tree['deep_distance'] = self._get_rough_distance()
292
293            self.tree.remove_empty_keys()
294            view_results = self._get_view_results(self.view)
295            self.update(view_results)
296        finally:
297            if self.is_root:
298                if cache_purge_level:
299                    del self._distance_cache
300                    del self.hashes
301                del self._shared_parameters
302                del self._parameters
303                for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT,
304                            DISTANCE_CACHE_ENABLED):
305                    del self._stats[key]
306                if progress_timer:
307                    duration = progress_timer.stop()
308                    self._stats['DURATION SEC'] = duration
309                    logger.info('stats {}'.format(self.get_stats()))
310                if cache_purge_level == 2:
311                    self.__dict__.clear()
312
313    def _get_deephash_params(self):
314        result = {key: self._parameters[key] for key in DEEPHASH_PARAM_KEYS}
315        result['ignore_repetition'] = not self.report_repetition
316        result['number_to_string_func'] = self.number_to_string
317        return result
318
319    def _report_result(self, report_type, level):
320        """
321        Add a detected change to the reference-style result dictionary.
322        report_type will be added to level.
323        (We'll create the text-style report from there later.)
324        :param report_type: A well defined string key describing the type of change.
325                            Examples: "set_item_added", "values_changed"
326        :param parent: A DiffLevel object describing the objects in question in their
327                       before-change and after-change object structure.
328
329        :rtype: None
330        """
331
332        if not self._skip_this(level):
333            level.report_type = report_type
334            self.tree[report_type].add(level)
335
336    def custom_report_result(self, report_type, level, extra_info=None):
337        """
338        Add a detected change to the reference-style result dictionary.
339        report_type will be added to level.
340        (We'll create the text-style report from there later.)
341        :param report_type: A well defined string key describing the type of change.
342                            Examples: "set_item_added", "values_changed"
343        :param parent: A DiffLevel object describing the objects in question in their
344                       before-change and after-change object structure.
345        :param extra_info: A dict that describe this result
346        :rtype: None
347        """
348
349        if not self._skip_this(level):
350            level.report_type = report_type
351            level.additional[CUSTOM_FIELD] = extra_info
352            self.tree[report_type].add(level)
353
354    @staticmethod
355    def _dict_from_slots(object):
356        def unmangle(attribute):
357            if attribute.startswith('__') and attribute != '__weakref__':
358                return '_{type}{attribute}'.format(
359                    type=type(object).__name__,
360                    attribute=attribute
361                )
362            return attribute
363
364        all_slots = []
365
366        if isinstance(object, type):
367            mro = object.__mro__  # pragma: no cover. I have not been able to write a test for this case. But we still check for it.
368        else:
369            mro = object.__class__.__mro__
370
371        for type_in_mro in mro:
372            slots = getattr(type_in_mro, '__slots__', None)
373            if slots:
374                if isinstance(slots, strings):
375                    all_slots.append(slots)
376                else:
377                    all_slots.extend(slots)
378
379        return {i: getattr(object, unmangle(i)) for i in all_slots}
380
381    def _diff_obj(self, level, parents_ids=frozenset(),
382                  is_namedtuple=False):
383        """Difference of 2 objects"""
384        try:
385            if is_namedtuple:
386                t1 = level.t1._asdict()
387                t2 = level.t2._asdict()
388            else:
389                t1 = level.t1.__dict__
390                t2 = level.t2.__dict__
391        except AttributeError:
392            try:
393                t1 = self._dict_from_slots(level.t1)
394                t2 = self._dict_from_slots(level.t2)
395            except AttributeError:
396                self._report_result('unprocessed', level)
397                return
398
399        self._diff_dict(
400            level,
401            parents_ids,
402            print_as_attribute=True,
403            override=True,
404            override_t1=t1,
405            override_t2=t2)
406
407    def _skip_this(self, level):
408        """
409        Check whether this comparison should be skipped because one of the objects to compare meets exclusion criteria.
410        :rtype: bool
411        """
412        skip = False
413        if self.exclude_paths and level.path() in self.exclude_paths:
414            skip = True
415        elif self.exclude_regex_paths and any(
416                [exclude_regex_path.search(level.path()) for exclude_regex_path in self.exclude_regex_paths]):
417            skip = True
418        elif self.exclude_types_tuple and \
419                (isinstance(level.t1, self.exclude_types_tuple) or isinstance(level.t2, self.exclude_types_tuple)):
420            skip = True
421        elif self.exclude_obj_callback and \
422                (self.exclude_obj_callback(level.t1, level.path()) or self.exclude_obj_callback(level.t2, level.path())):
423            skip = True
424
425        return skip
426
427    def _get_clean_to_keys_mapping(self, keys, level):
428        """
429        Get a dictionary of cleaned value of keys to the keys themselves.
430        This is mainly used to transform the keys when the type changes of keys should be ignored.
431
432        TODO: needs also some key conversion for groups of types other than the built-in strings and numbers.
433        """
434        result = dict_()
435        for key in keys:
436            if self.ignore_string_type_changes and isinstance(key, bytes):
437                clean_key = key.decode('utf-8')
438            elif isinstance(key, numbers):
439                type_ = "number" if self.ignore_numeric_type_changes else key.__class__.__name__
440                clean_key = self.number_to_string(key, significant_digits=self.significant_digits,
441                                                  number_format_notation=self.number_format_notation)
442                clean_key = KEY_TO_VAL_STR.format(type_, clean_key)
443            else:
444                clean_key = key
445            if clean_key in result:
446                logger.warning(('{} and {} in {} become the same key when ignore_numeric_type_changes'
447                                'or ignore_numeric_type_changes are set to be true.').format(
448                                    key, result[clean_key], level.path()))
449            else:
450                result[clean_key] = key
451        return result
452
453    def _diff_dict(self,
454                    level,
455                    parents_ids=frozenset([]),
456                    print_as_attribute=False,
457                    override=False,
458                    override_t1=None,
459                    override_t2=None):
460        """Difference of 2 dictionaries"""
461        if override:
462            # for special stuff like custom objects and named tuples we receive preprocessed t1 and t2
463            # but must not spoil the chain (=level) with it
464            t1 = override_t1
465            t2 = override_t2
466        else:
467            t1 = level.t1
468            t2 = level.t2
469
470        if print_as_attribute:
471            item_added_key = "attribute_added"
472            item_removed_key = "attribute_removed"
473            rel_class = AttributeRelationship
474        else:
475            item_added_key = "dictionary_item_added"
476            item_removed_key = "dictionary_item_removed"
477            rel_class = DictRelationship
478
479        if self.ignore_private_variables:
480            t1_keys = OrderedSet([key for key in t1 if not(isinstance(key, str) and key.startswith('__'))])
481            t2_keys = OrderedSet([key for key in t2 if not(isinstance(key, str) and key.startswith('__'))])
482        else:
483            t1_keys = OrderedSet(t1.keys())
484            t2_keys = OrderedSet(t2.keys())
485        if self.ignore_string_type_changes or self.ignore_numeric_type_changes:
486            t1_clean_to_keys = self._get_clean_to_keys_mapping(keys=t1_keys, level=level)
487            t2_clean_to_keys = self._get_clean_to_keys_mapping(keys=t2_keys, level=level)
488            t1_keys = OrderedSet(t1_clean_to_keys.keys())
489            t2_keys = OrderedSet(t2_clean_to_keys.keys())
490        else:
491            t1_clean_to_keys = t2_clean_to_keys = None
492
493        t_keys_intersect = t2_keys.intersection(t1_keys)
494
495        t_keys_added = t2_keys - t_keys_intersect
496        t_keys_removed = t1_keys - t_keys_intersect
497
498        for key in t_keys_added:
499            if self._count_diff() is StopIteration:
500                return
501
502            key = t2_clean_to_keys[key] if t2_clean_to_keys else key
503            change_level = level.branch_deeper(
504                notpresent,
505                t2[key],
506                child_relationship_class=rel_class,
507                child_relationship_param=key)
508            self._report_result(item_added_key, change_level)
509
510        for key in t_keys_removed:
511            if self._count_diff() is StopIteration:
512                return  # pragma: no cover. This is already covered for addition.
513
514            key = t1_clean_to_keys[key] if t1_clean_to_keys else key
515            change_level = level.branch_deeper(
516                t1[key],
517                notpresent,
518                child_relationship_class=rel_class,
519                child_relationship_param=key)
520            self._report_result(item_removed_key, change_level)
521
522        for key in t_keys_intersect:  # key present in both dicts - need to compare values
523            if self._count_diff() is StopIteration:
524                return  # pragma: no cover. This is already covered for addition.
525
526            key1 = t1_clean_to_keys[key] if t1_clean_to_keys else key
527            key2 = t2_clean_to_keys[key] if t2_clean_to_keys else key
528            item_id = id(t1[key1])
529            if parents_ids and item_id in parents_ids:
530                continue
531            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
532
533            # Go one level deeper
534            next_level = level.branch_deeper(
535                t1[key1],
536                t2[key2],
537                child_relationship_class=rel_class,
538                child_relationship_param=key)
539            self._diff(next_level, parents_ids_added)
540
541    def _diff_set(self, level):
542        """Difference of sets"""
543        t1_hashtable = self._create_hashtable(level, 't1')
544        t2_hashtable = self._create_hashtable(level, 't2')
545
546        t1_hashes = set(t1_hashtable.keys())
547        t2_hashes = set(t2_hashtable.keys())
548
549        hashes_added = t2_hashes - t1_hashes
550        hashes_removed = t1_hashes - t2_hashes
551
552        items_added = [t2_hashtable[i].item for i in hashes_added]
553        items_removed = [t1_hashtable[i].item for i in hashes_removed]
554
555        for item in items_added:
556            if self._count_diff() is StopIteration:
557                return  # pragma: no cover. This is already covered for addition.
558
559            change_level = level.branch_deeper(
560                notpresent, item, child_relationship_class=SetRelationship)
561            self._report_result('set_item_added', change_level)
562
563        for item in items_removed:
564            if self._count_diff() is StopIteration:
565                return  # pragma: no cover. This is already covered for addition.
566
567            change_level = level.branch_deeper(
568                item, notpresent, child_relationship_class=SetRelationship)
569            self._report_result('set_item_removed', change_level)
570
571    @staticmethod
572    def _iterables_subscriptable(t1, t2):
573        try:
574            if getattr(t1, '__getitem__') and getattr(t2, '__getitem__'):
575                return True
576            else:  # pragma: no cover
577                return False  # should never happen
578        except AttributeError:
579            return False
580
581    def _diff_iterable(self, level, parents_ids=frozenset(), _original_type=None):
582        """Difference of iterables"""
583        if self.ignore_order_func(level):
584            self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type)
585        else:
586            self._diff_iterable_in_order(level, parents_ids, _original_type=_original_type)
587
588    def _compare_in_order(self, level):
589        """
590        Default compare if `iterable_compare_func` is not provided.
591        This will compare in sequence order.
592        """
593
594        return [((i, i), (x, y)) for i, (x, y) in enumerate(
595            zip_longest(
596                level.t1, level.t2, fillvalue=ListItemRemovedOrAdded))]
597
598    def _get_matching_pairs(self, level):
599        """
600        Given a level get matching pairs. This returns list of two tuples in the form:
601        [
602          (t1 index, t2 index), (t1 item, t2 item)
603        ]
604
605        This will compare using the passed in `iterable_compare_func` if available.
606        Default it to compare in order
607        """
608
609        if(self.iterable_compare_func is None):
610            # Match in order if there is no compare function provided
611            return self._compare_in_order(level)
612        try:
613            matches = []
614            y_matched = set()
615            y_index_matched = set()
616            for i, x in enumerate(level.t1):
617                x_found = False
618                for j, y in enumerate(level.t2):
619
620                    if(j in y_index_matched):
621                        # This ensures a one-to-one relationship of matches from t1 to t2.
622                        # If y this index in t2 has already been matched to another x
623                        # it cannot have another match, so just continue.
624                        continue
625
626                    if(self.iterable_compare_func(x, y, level)):
627                        deep_hash = DeepHash(y,
628                                             hashes=self.hashes,
629                                             apply_hash=True,
630                                             **self.deephash_parameters,
631                                             )
632                        y_index_matched.add(j)
633                        y_matched.add(deep_hash[y])
634                        matches.append(((i, j), (x, y)))
635                        x_found = True
636                        break
637
638                if(not x_found):
639                    matches.append(((i, -1), (x, ListItemRemovedOrAdded)))
640            for j, y in enumerate(level.t2):
641
642                deep_hash = DeepHash(y,
643                                     hashes=self.hashes,
644                                     apply_hash=True,
645                                     **self.deephash_parameters,
646                                     )
647                if(deep_hash[y] not in y_matched):
648                    matches.append(((-1, j), (ListItemRemovedOrAdded, y)))
649            return matches
650        except CannotCompare:
651            return self._compare_in_order(level)
652
653    def _diff_iterable_in_order(self, level, parents_ids=frozenset(), _original_type=None):
654        # We're handling both subscriptable and non-subscriptable iterables. Which one is it?
655        subscriptable = self._iterables_subscriptable(level.t1, level.t2)
656        if subscriptable:
657            child_relationship_class = SubscriptableIterableRelationship
658        else:
659            child_relationship_class = NonSubscriptableIterableRelationship
660
661        for (i, j), (x, y) in self._get_matching_pairs(level):
662            if self._count_diff() is StopIteration:
663                return  # pragma: no cover. This is already covered for addition.
664
665            if y is ListItemRemovedOrAdded:  # item removed completely
666                change_level = level.branch_deeper(
667                    x,
668                    notpresent,
669                    child_relationship_class=child_relationship_class,
670                    child_relationship_param=i)
671                self._report_result('iterable_item_removed', change_level)
672
673            elif x is ListItemRemovedOrAdded:  # new item added
674                change_level = level.branch_deeper(
675                    notpresent,
676                    y,
677                    child_relationship_class=child_relationship_class,
678                    child_relationship_param=j)
679                self._report_result('iterable_item_added', change_level)
680
681            else:  # check if item value has changed
682
683                if (i != j):
684                    # Item moved
685                    change_level = level.branch_deeper(
686                        x,
687                        y,
688                        child_relationship_class=child_relationship_class,
689                        child_relationship_param=i,
690                        child_relationship_param2=j
691                    )
692                    self._report_result('iterable_item_moved', change_level)
693
694                item_id = id(x)
695                if parents_ids and item_id in parents_ids:
696                    continue
697                parents_ids_added = add_to_frozen_set(parents_ids, item_id)
698
699                # Go one level deeper
700                next_level = level.branch_deeper(
701                    x,
702                    y,
703                    child_relationship_class=child_relationship_class,
704                    child_relationship_param=i)
705                self._diff(next_level, parents_ids_added)
706
707    def _diff_str(self, level):
708        """Compare strings"""
709        if self.ignore_string_case:
710            level.t1 = level.t1.lower()
711            level.t2 = level.t2.lower()
712
713        if type(level.t1) == type(level.t2) and level.t1 == level.t2:  # NOQA
714            return
715
716        # do we add a diff for convenience?
717        do_diff = True
718        t1_str = level.t1
719        t2_str = level.t2
720
721        if isinstance(level.t1, bytes_type):
722            try:
723                t1_str = level.t1.decode('ascii')
724            except UnicodeDecodeError:
725                do_diff = False
726
727        if isinstance(level.t2, bytes_type):
728            try:
729                t2_str = level.t2.decode('ascii')
730            except UnicodeDecodeError:
731                do_diff = False
732
733        if t1_str == t2_str:
734            return
735
736        if do_diff:
737            if '\n' in t1_str or '\n' in t2_str:
738                diff = difflib.unified_diff(
739                    t1_str.splitlines(), t2_str.splitlines(), lineterm='')
740                diff = list(diff)
741                if diff:
742                    level.additional['diff'] = '\n'.join(diff)
743
744        self._report_result('values_changed', level)
745
746    def _diff_tuple(self, level, parents_ids):
747        # Checking to see if it has _fields. Which probably means it is a named
748        # tuple.
749        try:
750            level.t1._asdict
751        # It must be a normal tuple
752        except AttributeError:
753            self._diff_iterable(level, parents_ids)
754        # We assume it is a namedtuple then
755        else:
756            self._diff_obj(level, parents_ids, is_namedtuple=True)
757
758    def _add_hash(self, hashes, item_hash, item, i):
759        if item_hash in hashes:
760            hashes[item_hash].indexes.append(i)
761        else:
762            hashes[item_hash] = IndexedHash(indexes=[i], item=item)
763
764    def _create_hashtable(self, level, t):
765        """Create hashtable of {item_hash: (indexes, item)}"""
766        obj = getattr(level, t)
767
768        local_hashes = dict_()
769        for (i, item) in enumerate(obj):
770            try:
771                parent = "{}[{}]".format(level.path(), i)
772                # Note: in the DeepDiff we only calculate the hash of items when we have to.
773                # So self.hashes does not include hashes of all objects in t1 and t2.
774                # It only includes the ones needed when comparing iterables.
775                # The self.hashes dictionary gets shared between different runs of DeepHash
776                # So that any object that is already calculated to have a hash is not re-calculated.
777                deep_hash = DeepHash(item,
778                                     hashes=self.hashes,
779                                     parent=parent,
780                                     apply_hash=True,
781                                     **self.deephash_parameters,
782                                     )
783                item_hash = deep_hash[item]
784            except Exception as e:  # pragma: no cover
785                logger.error("Can not produce a hash for %s."
786                             "Not counting this object.\n %s" %
787                             (level.path(), e))
788            else:
789                if item_hash is unprocessed:  # pragma: no cover
790                    logger.warning("Item %s was not processed while hashing "
791                                   "thus not counting this object." %
792                                   level.path())
793                else:
794                    self._add_hash(hashes=local_hashes, item_hash=item_hash, item=item, i=i)
795
796        # Also we hash the iterables themselves too so that we can later create cache keys from those hashes.
797        try:
798            DeepHash(
799                obj,
800                hashes=self.hashes,
801                parent=level.path(),
802                apply_hash=True,
803                **self.deephash_parameters,
804            )
805        except Exception as e:  # pragma: no cover
806            logger.error("Can not produce a hash for iterable %s. %s" %
807                         (level.path(), e))
808        return local_hashes
809
810    @staticmethod
811    def _get_distance_cache_key(added_hash, removed_hash):
812        key1, key2 = (added_hash, removed_hash) if added_hash > removed_hash else (removed_hash, added_hash)
813        if isinstance(key1, int):
814            # If the hash function produces integers we convert them to hex values.
815            # This was used when the default hash function was Murmur3 128bit which produces integers.
816            key1 = hex(key1).encode('utf-8')
817            key2 = hex(key2).encode('utf-8')
818        elif isinstance(key1, str):
819            key1 = key1.encode('utf-8')
820            key2 = key2.encode('utf-8')
821        return key1 + b'--' + key2 + b'dc'
822
823    def _get_rough_distance_of_hashed_objs(
824            self, added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type=None):
825        # We need the rough distance between the 2 objects to see if they qualify to be pairs or not
826        _distance = cache_key = None
827        if self._stats[DISTANCE_CACHE_ENABLED]:
828            cache_key = self._get_distance_cache_key(added_hash, removed_hash)
829            if cache_key in self._distance_cache:
830                self._stats[DISTANCE_CACHE_HIT_COUNT] += 1
831                _distance = self._distance_cache.get(cache_key)
832        if _distance is None:
833            # We can only cache the rough distance and not the actual diff result for reuse.
834            # The reason is that we have modified the parameters explicitly so they are different and can't
835            # be used for diff reporting
836            diff = DeepDiff(
837                removed_hash_obj.item, added_hash_obj.item,
838                _parameters=self._parameters,
839                _shared_parameters=self._shared_parameters,
840                view=DELTA_VIEW,
841                _original_type=_original_type,
842                iterable_compare_func=self.iterable_compare_func,
843            )
844            _distance = diff._get_rough_distance()
845            if cache_key and self._stats[DISTANCE_CACHE_ENABLED]:
846                self._distance_cache.set(cache_key, value=_distance)
847        return _distance
848
849    def _get_most_in_common_pairs_in_iterables(
850            self, hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type):
851        """
852        Get the closest pairs between items that are removed and items that are added.
853
854        returns a dictionary of hashes that are closest to each other.
855        The dictionary is going to be symmetrical so any key will be a value too and otherwise.
856
857        Note that due to the current reporting structure in DeepDiff, we don't compare an item that
858        was added to an item that is in both t1 and t2.
859
860        For example
861
862        [{1, 2}, {4, 5, 6}]
863        [{1, 2}, {1, 2, 3}]
864
865        is only compared between {4, 5, 6} and {1, 2, 3} even though technically {1, 2, 3} is
866        just one item different than {1, 2}
867
868        Perhaps in future we can have a report key that is item duplicated and modified instead of just added.
869        """
870        cache_key = None
871        if self._stats[DISTANCE_CACHE_ENABLED]:
872            cache_key = combine_hashes_lists(items=[hashes_added, hashes_removed], prefix='pairs_cache')
873            if cache_key in self._distance_cache:
874                return self._distance_cache.get(cache_key).copy()
875
876        # A dictionary of hashes to distances and each distance to an ordered set of hashes.
877        # It tells us about the distance of each object from other objects.
878        # And the objects with the same distances are grouped together in an ordered set.
879        # It also includes a "max" key that is just the value of the biggest current distance in the
880        # most_in_common_pairs dictionary.
881        most_in_common_pairs = defaultdict(lambda: defaultdict(OrderedSetPlus))
882        pairs = dict_()
883
884        pre_calced_distances = None
885
886        if hashes_added and hashes_removed and np and len(hashes_added) > 1 and len(hashes_removed) > 1:
887            # pre-calculates distances ONLY for 1D arrays whether an _original_type
888            # was explicitly passed or a homogeneous array is detected.
889            # Numpy is needed for this optimization.
890            pre_calced_distances = self._precalculate_numpy_arrays_distance(
891                hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)
892
893        if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:
894            pre_calced_distances = self._precalculate_distance_by_custom_compare_func(
895                hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)
896
897        for added_hash in hashes_added:
898            for removed_hash in hashes_removed:
899                added_hash_obj = t2_hashtable[added_hash]
900                removed_hash_obj = t1_hashtable[removed_hash]
901
902                # Loop is detected
903                if id(removed_hash_obj.item) in parents_ids:
904                    continue
905
906                _distance = None
907                if pre_calced_distances:
908                    _distance = pre_calced_distances.get("{}--{}".format(added_hash, removed_hash))
909                if _distance is None:
910                    _distance = self._get_rough_distance_of_hashed_objs(
911                        added_hash, removed_hash, added_hash_obj, removed_hash_obj, _original_type)
912                # Left for future debugging
913                # print(f'{Fore.RED}distance of {added_hash_obj.item} and {removed_hash_obj.item}: {_distance}{Style.RESET_ALL}')
914                # Discard potential pairs that are too far.
915                if _distance >= self.cutoff_distance_for_pairs:
916                    continue
917                pairs_of_item = most_in_common_pairs[added_hash]
918                pairs_of_item[_distance].add(removed_hash)
919        used_to_hashes = set()
920
921        distances_to_from_hashes = defaultdict(OrderedSetPlus)
922        for from_hash, distances_to_to_hashes in most_in_common_pairs.items():
923            # del distances_to_to_hashes['max']
924            for dist in distances_to_to_hashes:
925                distances_to_from_hashes[dist].add(from_hash)
926
927        for dist in sorted(distances_to_from_hashes.keys()):
928            from_hashes = distances_to_from_hashes[dist]
929            while from_hashes:
930                from_hash = from_hashes.lpop()
931                if from_hash not in used_to_hashes:
932                    to_hashes = most_in_common_pairs[from_hash][dist]
933                    while to_hashes:
934                        to_hash = to_hashes.lpop()
935                        if to_hash not in used_to_hashes:
936                            used_to_hashes.add(from_hash)
937                            used_to_hashes.add(to_hash)
938                            # Left for future debugging:
939                            # print(f'{bcolors.FAIL}Adding {t2_hashtable[from_hash].item} as a pairs of {t1_hashtable[to_hash].item} with distance of {dist}{bcolors.ENDC}')
940                            pairs[from_hash] = to_hash
941
942        inverse_pairs = {v: k for k, v in pairs.items()}
943        pairs.update(inverse_pairs)
944        if cache_key and self._stats[DISTANCE_CACHE_ENABLED]:
945            self._distance_cache.set(cache_key, value=pairs)
946        return pairs.copy()
947
948    def _diff_iterable_with_deephash(self, level, parents_ids, _original_type=None):
949        """Diff of hashable or unhashable iterables. Only used when ignoring the order."""
950
951        full_t1_hashtable = self._create_hashtable(level, 't1')
952        full_t2_hashtable = self._create_hashtable(level, 't2')
953        t1_hashes = OrderedSetPlus(full_t1_hashtable.keys())
954        t2_hashes = OrderedSetPlus(full_t2_hashtable.keys())
955        hashes_added = t2_hashes - t1_hashes
956        hashes_removed = t1_hashes - t2_hashes
957
958        # Deciding whether to calculate pairs or not.
959        if (len(hashes_added) + len(hashes_removed)) / (len(full_t1_hashtable) + len(full_t2_hashtable) + 1) > self.cutoff_intersection_for_pairs:
960            get_pairs = False
961        else:
962            get_pairs = True
963
964        # reduce the size of hashtables
965        if self.report_repetition:
966            t1_hashtable = full_t1_hashtable
967            t2_hashtable = full_t2_hashtable
968        else:
969            t1_hashtable = {k: v for k, v in full_t1_hashtable.items() if k in hashes_removed}
970            t2_hashtable = {k: v for k, v in full_t2_hashtable.items() if k in hashes_added}
971
972        if self._stats[PASSES_COUNT] < self.max_passes and get_pairs:
973            self._stats[PASSES_COUNT] += 1
974            pairs = self._get_most_in_common_pairs_in_iterables(
975                hashes_added, hashes_removed, t1_hashtable, t2_hashtable, parents_ids, _original_type)
976        elif get_pairs:
977            if not self._stats[MAX_PASS_LIMIT_REACHED]:
978                self._stats[MAX_PASS_LIMIT_REACHED] = True
979                logger.warning(MAX_PASSES_REACHED_MSG.format(self.max_passes))
980            pairs = dict_()
981        else:
982            pairs = dict_()
983
984        def get_other_pair(hash_value, in_t1=True):
985            """
986            Gets the other paired indexed hash item to the hash_value in the pairs dictionary
987            in_t1: are we looking for the other pair in t1 or t2?
988            """
989            if in_t1:
990                hashtable = t1_hashtable
991                the_other_hashes = hashes_removed
992            else:
993                hashtable = t2_hashtable
994                the_other_hashes = hashes_added
995            other = pairs.pop(hash_value, notpresent)
996            if other is notpresent:
997                other = notpresent_indexed
998            else:
999                # The pairs are symmetrical.
1000                # removing the other direction of pair
1001                # so it does not get used.
1002                del pairs[other]
1003                the_other_hashes.remove(other)
1004                other = hashtable[other]
1005            return other
1006
1007        if self.report_repetition:
1008            for hash_value in hashes_added:
1009                if self._count_diff() is StopIteration:
1010                    return  # pragma: no cover. This is already covered for addition (when report_repetition=False).
1011                other = get_other_pair(hash_value)
1012                item_id = id(other.item)
1013                indexes = t2_hashtable[hash_value].indexes if other.item is notpresent else other.indexes
1014                for i in indexes:
1015                    change_level = level.branch_deeper(
1016                        other.item,
1017                        t2_hashtable[hash_value].item,
1018                        child_relationship_class=SubscriptableIterableRelationship,
1019                        child_relationship_param=i
1020                    )
1021                    if other.item is notpresent:
1022                        self._report_result('iterable_item_added', change_level)
1023                    else:
1024                        parents_ids_added = add_to_frozen_set(parents_ids, item_id)
1025                        self._diff(change_level, parents_ids_added)
1026            for hash_value in hashes_removed:
1027                if self._count_diff() is StopIteration:
1028                    return  # pragma: no cover. This is already covered for addition.
1029                other = get_other_pair(hash_value, in_t1=False)
1030                item_id = id(other.item)
1031                for i in t1_hashtable[hash_value].indexes:
1032                    change_level = level.branch_deeper(
1033                        t1_hashtable[hash_value].item,
1034                        other.item,
1035                        child_relationship_class=SubscriptableIterableRelationship,
1036                        child_relationship_param=i)
1037                    if other.item is notpresent:
1038                        self._report_result('iterable_item_removed', change_level)
1039                    else:
1040                        # I was not able to make a test case for the following 2 lines since the cases end up
1041                        # getting resolved above in the hashes_added calcs. However I am leaving these 2 lines
1042                        # in case things change in future.
1043                        parents_ids_added = add_to_frozen_set(parents_ids, item_id)  # pragma: no cover.
1044                        self._diff(change_level, parents_ids_added)  # pragma: no cover.
1045
1046            items_intersect = t2_hashes.intersection(t1_hashes)
1047
1048            for hash_value in items_intersect:
1049                t1_indexes = t1_hashtable[hash_value].indexes
1050                t2_indexes = t2_hashtable[hash_value].indexes
1051                t1_indexes_len = len(t1_indexes)
1052                t2_indexes_len = len(t2_indexes)
1053                if t1_indexes_len != t2_indexes_len:  # this is a repetition change!
1054                    # create "change" entry, keep current level untouched to handle further changes
1055                    repetition_change_level = level.branch_deeper(
1056                        t1_hashtable[hash_value].item,
1057                        t2_hashtable[hash_value].item,  # nb: those are equal!
1058                        child_relationship_class=SubscriptableIterableRelationship,
1059                        child_relationship_param=t1_hashtable[hash_value]
1060                        .indexes[0])
1061                    repetition_change_level.additional['repetition'] = RemapDict(
1062                        old_repeat=t1_indexes_len,
1063                        new_repeat=t2_indexes_len,
1064                        old_indexes=t1_indexes,
1065                        new_indexes=t2_indexes)
1066                    self._report_result('repetition_change',
1067                                         repetition_change_level)
1068
1069        else:
1070            for hash_value in hashes_added:
1071                if self._count_diff() is StopIteration:
1072                    return
1073                other = get_other_pair(hash_value)
1074                item_id = id(other.item)
1075                index = t2_hashtable[hash_value].indexes[0] if other.item is notpresent else other.indexes[0]
1076                change_level = level.branch_deeper(
1077                    other.item,
1078                    t2_hashtable[hash_value].item,
1079                    child_relationship_class=SubscriptableIterableRelationship,
1080                    child_relationship_param=index)
1081                if other.item is notpresent:
1082                    self._report_result('iterable_item_added', change_level)
1083                else:
1084                    parents_ids_added = add_to_frozen_set(parents_ids, item_id)
1085                    self._diff(change_level, parents_ids_added)
1086
1087            for hash_value in hashes_removed:
1088                if self._count_diff() is StopIteration:
1089                    return  # pragma: no cover. This is already covered for addition.
1090                other = get_other_pair(hash_value, in_t1=False)
1091                item_id = id(other.item)
1092                change_level = level.branch_deeper(
1093                    t1_hashtable[hash_value].item,
1094                    other.item,
1095                    child_relationship_class=SubscriptableIterableRelationship,
1096                    child_relationship_param=t1_hashtable[hash_value].indexes[
1097                        0])
1098                if other.item is notpresent:
1099                    self._report_result('iterable_item_removed', change_level)
1100                else:
1101                    # Just like the case when report_repetition = True, these lines never run currently.
1102                    # However they will stay here in case things change in future.
1103                    parents_ids_added = add_to_frozen_set(parents_ids, item_id)  # pragma: no cover.
1104                    self._diff(change_level, parents_ids_added)  # pragma: no cover.
1105
1106    def _diff_booleans(self, level):
1107        if level.t1 != level.t2:
1108            self._report_result('values_changed', level)
1109
1110    def _diff_numbers(self, level):
1111        """Diff Numbers"""
1112        t1_type = "number" if self.ignore_numeric_type_changes else level.t1.__class__.__name__
1113        t2_type = "number" if self.ignore_numeric_type_changes else level.t2.__class__.__name__
1114
1115        if self.math_epsilon is not None:
1116            if not is_close(level.t1, level.t2, abs_tol=self.math_epsilon):
1117                self._report_result('values_changed', level)
1118        elif self.significant_digits is None:
1119            if level.t1 != level.t2:
1120                self._report_result('values_changed', level)
1121        else:
1122            # Bernhard10: I use string formatting for comparison, to be consistent with usecases where
1123            # data is read from files that were previousely written from python and
1124            # to be consistent with on-screen representation of numbers.
1125            # Other options would be abs(t1-t2)<10**-self.significant_digits
1126            # or math.is_close (python3.5+)
1127            # Note that abs(3.25-3.251) = 0.0009999999999998899 < 0.001
1128            # Note also that "{:.3f}".format(1.1135) = 1.113, but "{:.3f}".format(1.11351) = 1.114
1129            # For Decimals, format seems to round 2.5 to 2 and 3.5 to 4 (to closest even number)
1130            t1_s = self.number_to_string(level.t1,
1131                                         significant_digits=self.significant_digits,
1132                                         number_format_notation=self.number_format_notation)
1133            t2_s = self.number_to_string(level.t2,
1134                                         significant_digits=self.significant_digits,
1135                                         number_format_notation=self.number_format_notation)
1136
1137            t1_s = KEY_TO_VAL_STR.format(t1_type, t1_s)
1138            t2_s = KEY_TO_VAL_STR.format(t2_type, t2_s)
1139            if t1_s != t2_s:
1140                self._report_result('values_changed', level)
1141
1142    def _diff_datetimes(self, level):
1143        """Diff DateTimes"""
1144        if self.truncate_datetime:
1145            level.t1 = datetime_normalize(self.truncate_datetime, level.t1)
1146            level.t2 = datetime_normalize(self.truncate_datetime, level.t2)
1147
1148        if level.t1 != level.t2:
1149            self._report_result('values_changed', level)
1150
1151    def _diff_numpy_array(self, level, parents_ids=frozenset()):
1152        """Diff numpy arrays"""
1153        if level.path() not in self._numpy_paths:
1154            self._numpy_paths[level.path()] = get_type(level.t2).__name__
1155        if np is None:
1156            # This line should never be run. If it is ever called means the type check detected a numpy array
1157            # which means numpy module needs to be available. So np can't be None.
1158            raise ImportError(CANT_FIND_NUMPY_MSG)  # pragma: no cover
1159
1160        if not self.ignore_order_func(level):
1161            # fast checks
1162            if self.significant_digits is None:
1163                if np.array_equal(level.t1, level.t2):
1164                    return  # all good
1165            else:
1166                try:
1167                    np.testing.assert_almost_equal(level.t1, level.t2, decimal=self.significant_digits)
1168                    return  # all good
1169                except AssertionError:
1170                    pass    # do detailed checking below
1171
1172        # compare array meta-data
1173        _original_type = level.t1.dtype
1174        if level.t1.shape != level.t2.shape:
1175            # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier.
1176            # They will be converted back to Numpy at their final dimension.
1177            level.t1 = level.t1.tolist()
1178            level.t2 = level.t2.tolist()
1179            self._diff_iterable(level, parents_ids, _original_type=_original_type)
1180        else:
1181            # metadata same -- the difference is in the content
1182            shape = level.t1.shape
1183            dimensions = len(shape)
1184            if dimensions == 1:
1185                self._diff_iterable(level, parents_ids, _original_type=_original_type)
1186            elif self.ignore_order_func(level):
1187                # arrays are converted to python lists so that certain features of DeepDiff can apply on them easier.
1188                # They will be converted back to Numpy at their final dimension.
1189                level.t1 = level.t1.tolist()
1190                level.t2 = level.t2.tolist()
1191                self._diff_iterable_with_deephash(level, parents_ids, _original_type=_original_type)
1192            else:
1193                for (t1_path, t1_row), (t2_path, t2_row) in zip(
1194                        get_numpy_ndarray_rows(level.t1, shape),
1195                        get_numpy_ndarray_rows(level.t2, shape)):
1196
1197                    new_level = level.branch_deeper(
1198                        t1_row,
1199                        t2_row,
1200                        child_relationship_class=NumpyArrayRelationship,
1201                        child_relationship_param=t1_path)
1202
1203                    self._diff_iterable_in_order(new_level, parents_ids, _original_type=_original_type)
1204
1205    def _diff_types(self, level):
1206        """Diff types"""
1207        level.report_type = 'type_changes'
1208        self._report_result('type_changes', level)
1209
1210    def _count_diff(self):
1211        if (self.max_diffs is not None and self._stats[DIFF_COUNT] > self.max_diffs):
1212            if not self._stats[MAX_DIFF_LIMIT_REACHED]:
1213                self._stats[MAX_DIFF_LIMIT_REACHED] = True
1214                logger.warning(MAX_DIFFS_REACHED_MSG.format(self.max_diffs))
1215            return StopIteration
1216        self._stats[DIFF_COUNT] += 1
1217        if self.cache_size and self.cache_tuning_sample_size:
1218            self._auto_tune_cache()
1219
1220    def _auto_tune_cache(self):
1221        take_sample = (self._stats[DIFF_COUNT] % self.cache_tuning_sample_size == 0)
1222        if self.cache_tuning_sample_size:
1223            if self._stats[DISTANCE_CACHE_ENABLED]:
1224                if take_sample:
1225                    self._auto_off_cache()
1226            # Turn on the cache once in a while
1227            elif self._stats[DIFF_COUNT] % self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] == 0:
1228                self.progress_logger('Re-enabling the distance and level caches.')
1229                # decreasing the sampling frequency
1230                self._shared_parameters[_ENABLE_CACHE_EVERY_X_DIFF] *= 10
1231                self._stats[DISTANCE_CACHE_ENABLED] = True
1232        if take_sample:
1233            for key in (PREVIOUS_DIFF_COUNT, PREVIOUS_DISTANCE_CACHE_HIT_COUNT):
1234                self._stats[key] = self._stats[key[9:]]
1235
1236    def _auto_off_cache(self):
1237        """
1238        Auto adjust the cache based on the usage
1239        """
1240        if self._stats[DISTANCE_CACHE_ENABLED]:
1241            angle = (self._stats[DISTANCE_CACHE_HIT_COUNT] - self._stats['PREVIOUS {}'.format(DISTANCE_CACHE_HIT_COUNT)]) / (self._stats[DIFF_COUNT] - self._stats[PREVIOUS_DIFF_COUNT])
1242            if angle < self.CACHE_AUTO_ADJUST_THRESHOLD:
1243                self._stats[DISTANCE_CACHE_ENABLED] = False
1244                self.progress_logger('Due to minimal cache hits, {} is disabled.'.format('distance cache'))
1245
1246    def _use_custom_operator(self, level):
1247        """
1248        For each level we check all custom operators.
1249        If any one of them was a match for the level, we run the diff of the operator.
1250        If the operator returned True, the operator must have decided these objects should not
1251        be compared anymore. It might have already reported their results.
1252        In that case the report will appear in the final results of this diff.
1253        Otherwise basically the 2 objects in the level are being omitted from the results.
1254        """
1255
1256        # used = False
1257
1258        # for operator in self.custom_operators:
1259        #     if operator.match(level):
1260        #         prevent_default = operator.diff(level, self)
1261        #         used = True if prevent_default is None else prevent_default
1262
1263        # return used
1264
1265        for operator in self.custom_operators:
1266            if operator.match(level):
1267                prevent_default = operator.give_up_diffing(level=level, diff_instance=self)
1268                if prevent_default:
1269                    return True
1270
1271        return False
1272
1273    def _diff(self, level, parents_ids=frozenset(), _original_type=None):
1274        """
1275        The main diff method
1276
1277        **parameters**
1278
1279        level: the tree level or tree node
1280        parents_ids: the ids of all the parent objects in the tree from the current node.
1281        _original_type: If the objects had an original type that was different than what currently exists in the level.t1 and t2
1282        """
1283        if self._count_diff() is StopIteration:
1284            return
1285
1286        if self._use_custom_operator(level):
1287            return
1288
1289        if level.t1 is level.t2:
1290            return
1291
1292        if self._skip_this(level):
1293            return
1294
1295        if get_type(level.t1) != get_type(level.t2):
1296            report_type_change = True
1297            for type_group in self.ignore_type_in_groups:
1298                if self.type_check_func(level.t1, type_group) and self.type_check_func(level.t2, type_group):
1299                    report_type_change = False
1300                    break
1301            if report_type_change:
1302                self._diff_types(level)
1303                return
1304            # This is an edge case where t1=None or t2=None and None is in the ignore type group.
1305            if level.t1 is None or level.t2 is None:
1306                self._report_result('values_changed', level)
1307                return
1308
1309        if self.ignore_nan_inequality and isinstance(level.t1, float) and str(level.t1) == str(level.t2) == 'nan':
1310            return
1311
1312        if isinstance(level.t1, booleans):
1313            self._diff_booleans(level)
1314
1315        if isinstance(level.t1, strings):
1316            self._diff_str(level)
1317
1318        elif isinstance(level.t1, times):
1319            self._diff_datetimes(level)
1320
1321        elif isinstance(level.t1, numbers):
1322            self._diff_numbers(level)
1323
1324        elif isinstance(level.t1, Mapping):
1325            self._diff_dict(level, parents_ids)
1326
1327        elif isinstance(level.t1, tuple):
1328            self._diff_tuple(level, parents_ids)
1329
1330        elif isinstance(level.t1, (set, frozenset, OrderedSet)):
1331            self._diff_set(level)
1332
1333        elif isinstance(level.t1, np_ndarray):
1334            self._diff_numpy_array(level, parents_ids)
1335
1336        elif isinstance(level.t1, Iterable):
1337            self._diff_iterable(level, parents_ids, _original_type=_original_type)
1338
1339        else:
1340            self._diff_obj(level, parents_ids)
1341
1342    def _get_view_results(self, view):
1343        """
1344        Get the results based on the view
1345        """
1346        result = self.tree
1347        if not self.report_repetition:  # and self.is_root:
1348            result.mutual_add_removes_to_become_value_changes()
1349        if view == TREE_VIEW:
1350            pass
1351        elif view == TEXT_VIEW:
1352            result = TextResult(tree_results=self.tree, verbose_level=self.verbose_level)
1353            result.remove_empty_keys()
1354        elif view == DELTA_VIEW:
1355            result = self._to_delta_dict(report_repetition_required=False)
1356        else:
1357            raise ValueError(INVALID_VIEW_MSG.format(view))
1358        return result
1359
1360    def _group_iterable_to_dict(self, item, group_by, item_name):
1361        """
1362        Convert a list of dictionaries into a dictionary of dictionaries
1363        where the key is the value of the group_by key in each dictionary.
1364        """
1365        if isinstance(item, Iterable) and not isinstance(item, Mapping):
1366            result = {}
1367            item_copy = deepcopy(item)
1368            for row in item_copy:
1369                if isinstance(row, Mapping):
1370                    try:
1371                        key = row.pop(group_by)
1372                    except KeyError:
1373                        logger.error("Unable to group {} by {}. The key is missing in {}".format(item_name, group_by, row))
1374                        raise
1375                    result[key] = row
1376                else:
1377                    msg = "Unable to group {} by {} since the item {} is not a dictionary.".format(item_name, group_by, row)
1378                    logger.error(msg)
1379                    raise ValueError(msg)
1380            return result
1381        msg = "Unable to group {} by {}".format(item_name, group_by)
1382        logger.error(msg)
1383        raise ValueError(msg)
1384
1385    def get_stats(self):
1386        """
1387        Get some stats on internals of the DeepDiff run.
1388        """
1389        return self._stats
1390
1391
1392if __name__ == "__main__":  # pragma: no cover
1393    import doctest
1394    doctest.testmod()
1395