1"""
2 Calculate the difference between two dictionaries as:
3    (1) items added
4    (2) items removed
5    (3) keys same in both but changed values
6    (4) keys same in both and unchanged values
7
8  Originally posted at http://stackoverflow.com/questions/1165352/fast-comparison-between-two-python-dictionary/1165552#1165552
9  Available at repository: https://github.com/hughdbrown/dictdiffer
10
11  Added the ability to recursively compare dictionaries
12"""
13
14import copy
15from collections.abc import Mapping
16
17
18def diff(current_dict, past_dict):
19    return DictDiffer(current_dict, past_dict)
20
21
22class DictDiffer:
23    """
24    Calculate the difference between two dictionaries as:
25    (1) items added
26    (2) items removed
27    (3) keys same in both but changed values
28    (4) keys same in both and unchanged values
29    """
30
31    def __init__(self, current_dict, past_dict):
32        self.current_dict, self.past_dict = current_dict, past_dict
33        self.set_current, self.set_past = set(list(current_dict)), set(list(past_dict))
34        self.intersect = self.set_current.intersection(self.set_past)
35
36    def added(self):
37        return self.set_current - self.intersect
38
39    def removed(self):
40        return self.set_past - self.intersect
41
42    def changed(self):
43        return {o for o in self.intersect if self.past_dict[o] != self.current_dict[o]}
44
45    def unchanged(self):
46        return {o for o in self.intersect if self.past_dict[o] == self.current_dict[o]}
47
48
49def deep_diff(old, new, ignore=None):
50    ignore = ignore or []
51    res = {}
52    old = copy.deepcopy(old)
53    new = copy.deepcopy(new)
54    stack = [(old, new, False)]
55
56    while stack:
57        tmps = []
58        tmp_old, tmp_new, reentrant = stack.pop()
59        for key in set(list(tmp_old) + list(tmp_new)):
60            if key in tmp_old and key in tmp_new and tmp_old[key] == tmp_new[key]:
61                del tmp_old[key]
62                del tmp_new[key]
63                continue
64            if not reentrant:
65                if key in tmp_old and key in ignore:
66                    del tmp_old[key]
67                if key in tmp_new and key in ignore:
68                    del tmp_new[key]
69                if isinstance(tmp_old.get(key), Mapping) and isinstance(
70                    tmp_new.get(key), Mapping
71                ):
72                    tmps.append((tmp_old[key], tmp_new[key], False))
73        if tmps:
74            stack.extend([(tmp_old, tmp_new, True)] + tmps)
75    if old:
76        res["old"] = old
77    if new:
78        res["new"] = new
79    return res
80
81
82def recursive_diff(past_dict, current_dict, ignore_missing_keys=True):
83    """
84    Returns a RecursiveDictDiffer object that computes the recursive diffs
85    between two dictionaries
86
87    past_dict
88            Past dictionary
89
90    current_dict
91        Current dictionary
92
93    ignore_missing_keys
94        Flag specifying whether to ignore keys that no longer exist in the
95        current_dict, but exist in the past_dict. If true, the diff will
96        not contain the missing keys.
97        Default is True.
98    """
99    return RecursiveDictDiffer(past_dict, current_dict, ignore_missing_keys)
100
101
102class RecursiveDictDiffer(DictDiffer):
103    """
104    Calculates a recursive diff between the current_dict and the past_dict
105    creating a diff in the format
106
107    {'new': new_value, 'old': old_value}
108
109    It recursively searches differences in common keys whose values are
110    dictionaries creating a diff dict in the format
111
112    {'common_key' : {'new': new_value, 'old': old_value}
113
114    The class overrides all DictDiffer methods, returning lists of keys and
115    subkeys using the . notation (i.e 'common_key1.common_key2.changed_key')
116
117    The class provides access to:
118        (1) the added, removed, changes keys and subkeys (using the . notation)
119               ``added``, ``removed``, ``changed`` methods
120        (2) the diffs in the format above (diff property)
121                ``diffs`` property
122        (3) a dict with the new changed values only (new_values property)
123                ``new_values`` property
124        (4) a dict with the old changed values only (old_values property)
125                ``old_values`` property
126        (5) a string representation of the changes in the format:
127                ``changes_str`` property
128
129    Note:
130        The <_null_> value is a reserved value
131
132    .. code-block:: text
133
134        common_key1:
135          common_key2:
136            changed_key1 from '<old_str>' to '<new_str>'
137            changed_key2 from '[<old_elem1>, ..]' to '[<new_elem1>, ..]'
138        common_key3:
139          changed_key3 from <old_int> to <new_int>
140
141    """
142
143    NONE_VALUE = "<_null_>"
144
145    def __init__(self, past_dict, current_dict, ignore_missing_keys):
146        """
147        past_dict
148            Past dictionary.
149
150        current_dict
151            Current dictionary.
152
153        ignore_missing_keys
154            Flag specifying whether to ignore keys that no longer exist in the
155            current_dict, but exist in the past_dict. If true, the diff will
156            not contain the missing keys.
157        """
158        super().__init__(current_dict, past_dict)
159        self._diffs = self._get_diffs(
160            self.current_dict, self.past_dict, ignore_missing_keys
161        )
162        # Ignores unet values when assessing the changes
163        self.ignore_unset_values = True
164
165    @classmethod
166    def _get_diffs(cls, dict1, dict2, ignore_missing_keys):
167        """
168        Returns a dict with the differences between dict1 and dict2
169
170        Notes:
171            Keys that only exist in dict2 are not included in the diff if
172            ignore_missing_keys is True, otherwise they are
173            Simple compares are done on lists
174        """
175        ret_dict = {}
176        for p in dict1.keys():
177            if p not in dict2:
178                ret_dict.update({p: {"new": dict1[p], "old": cls.NONE_VALUE}})
179            elif dict1[p] != dict2[p]:
180                if isinstance(dict1[p], dict) and isinstance(dict2[p], dict):
181                    sub_diff_dict = cls._get_diffs(
182                        dict1[p], dict2[p], ignore_missing_keys
183                    )
184                    if sub_diff_dict:
185                        ret_dict.update({p: sub_diff_dict})
186                else:
187                    ret_dict.update({p: {"new": dict1[p], "old": dict2[p]}})
188        if not ignore_missing_keys:
189            for p in dict2.keys():
190                if p not in dict1.keys():
191                    ret_dict.update({p: {"new": cls.NONE_VALUE, "old": dict2[p]}})
192        return ret_dict
193
194    @classmethod
195    def _get_values(cls, diff_dict, type="new"):
196        """
197        Returns a dictionaries with the 'new' values in a diff dict.
198
199        type
200            Which values to return, 'new' or 'old'
201        """
202        ret_dict = {}
203        for p in diff_dict.keys():
204            if type in diff_dict[p].keys():
205                ret_dict.update({p: diff_dict[p][type]})
206            else:
207                ret_dict.update({p: cls._get_values(diff_dict[p], type=type)})
208        return ret_dict
209
210    @classmethod
211    def _get_changes(cls, diff_dict):
212        """
213        Returns a list of string message with the differences in a diff dict.
214
215        Each inner difference is tabulated two space deeper
216        """
217        changes_strings = []
218        for p in sorted(diff_dict.keys()):
219            if sorted(diff_dict[p].keys()) == ["new", "old"]:
220                # Some string formatting
221                old_value = diff_dict[p]["old"]
222                if diff_dict[p]["old"] == cls.NONE_VALUE:
223                    old_value = "nothing"
224                elif isinstance(diff_dict[p]["old"], str):
225                    old_value = "'{}'".format(diff_dict[p]["old"])
226                elif isinstance(diff_dict[p]["old"], list):
227                    old_value = "'{}'".format(", ".join(diff_dict[p]["old"]))
228                new_value = diff_dict[p]["new"]
229                if diff_dict[p]["new"] == cls.NONE_VALUE:
230                    new_value = "nothing"
231                elif isinstance(diff_dict[p]["new"], str):
232                    new_value = "'{}'".format(diff_dict[p]["new"])
233                elif isinstance(diff_dict[p]["new"], list):
234                    new_value = "'{}'".format(", ".join(diff_dict[p]["new"]))
235                changes_strings.append(
236                    "{} from {} to {}".format(p, old_value, new_value)
237                )
238            else:
239                sub_changes = cls._get_changes(diff_dict[p])
240                if sub_changes:
241                    changes_strings.append("{}:".format(p))
242                    changes_strings.extend(["  {}".format(c) for c in sub_changes])
243        return changes_strings
244
245    def added(self):
246        """
247        Returns all keys that have been added.
248
249        If the keys are in child dictionaries they will be represented with
250        . notation
251        """
252
253        def _added(diffs, prefix):
254            keys = []
255            for key in diffs.keys():
256                if isinstance(diffs[key], dict) and "old" not in diffs[key]:
257                    keys.extend(_added(diffs[key], prefix="{}{}.".format(prefix, key)))
258                elif diffs[key]["old"] == self.NONE_VALUE:
259                    if isinstance(diffs[key]["new"], dict):
260                        keys.extend(
261                            _added(
262                                diffs[key]["new"], prefix="{}{}.".format(prefix, key)
263                            )
264                        )
265                    else:
266                        keys.append("{}{}".format(prefix, key))
267            return keys
268
269        return sorted(_added(self._diffs, prefix=""))
270
271    def removed(self):
272        """
273        Returns all keys that have been removed.
274
275        If the keys are in child dictionaries they will be represented with
276        . notation
277        """
278
279        def _removed(diffs, prefix):
280            keys = []
281            for key in diffs.keys():
282                if isinstance(diffs[key], dict) and "old" not in diffs[key]:
283                    keys.extend(
284                        _removed(diffs[key], prefix="{}{}.".format(prefix, key))
285                    )
286                elif diffs[key]["new"] == self.NONE_VALUE:
287                    keys.append("{}{}".format(prefix, key))
288                elif isinstance(diffs[key]["new"], dict):
289                    keys.extend(
290                        _removed(diffs[key]["new"], prefix="{}{}.".format(prefix, key))
291                    )
292            return keys
293
294        return sorted(_removed(self._diffs, prefix=""))
295
296    def changed(self):
297        """
298        Returns all keys that have been changed.
299
300        If the keys are in child dictionaries they will be represented with
301        . notation
302        """
303
304        def _changed(diffs, prefix):
305            keys = []
306            for key in diffs.keys():
307                if not isinstance(diffs[key], dict):
308                    continue
309
310                if isinstance(diffs[key], dict) and "old" not in diffs[key]:
311                    keys.extend(
312                        _changed(diffs[key], prefix="{}{}.".format(prefix, key))
313                    )
314                    continue
315                if self.ignore_unset_values:
316                    if (
317                        "old" in diffs[key]
318                        and "new" in diffs[key]
319                        and diffs[key]["old"] != self.NONE_VALUE
320                        and diffs[key]["new"] != self.NONE_VALUE
321                    ):
322                        if isinstance(diffs[key]["new"], dict):
323                            keys.extend(
324                                _changed(
325                                    diffs[key]["new"],
326                                    prefix="{}{}.".format(prefix, key),
327                                )
328                            )
329                        else:
330                            keys.append("{}{}".format(prefix, key))
331                    elif isinstance(diffs[key], dict):
332                        keys.extend(
333                            _changed(diffs[key], prefix="{}{}.".format(prefix, key))
334                        )
335                else:
336                    if "old" in diffs[key] and "new" in diffs[key]:
337                        if isinstance(diffs[key]["new"], dict):
338                            keys.extend(
339                                _changed(
340                                    diffs[key]["new"],
341                                    prefix="{}{}.".format(prefix, key),
342                                )
343                            )
344                        else:
345                            keys.append("{}{}".format(prefix, key))
346                    elif isinstance(diffs[key], dict):
347                        keys.extend(
348                            _changed(diffs[key], prefix="{}{}.".format(prefix, key))
349                        )
350
351            return keys
352
353        return sorted(_changed(self._diffs, prefix=""))
354
355    def unchanged(self):
356        """
357        Returns all keys that have been unchanged.
358
359        If the keys are in child dictionaries they will be represented with
360        . notation
361        """
362
363        def _unchanged(current_dict, diffs, prefix):
364            keys = []
365            for key in current_dict.keys():
366                if key not in diffs:
367                    keys.append("{}{}".format(prefix, key))
368                elif isinstance(current_dict[key], dict):
369                    if "new" in diffs[key]:
370                        # There is a diff
371                        continue
372                    else:
373                        keys.extend(
374                            _unchanged(
375                                current_dict[key],
376                                diffs[key],
377                                prefix="{}{}.".format(prefix, key),
378                            )
379                        )
380
381            return keys
382
383        return sorted(_unchanged(self.current_dict, self._diffs, prefix=""))
384
385    @property
386    def diffs(self):
387        """Returns a dict with the recursive diffs current_dict - past_dict"""
388        return self._diffs
389
390    @property
391    def new_values(self):
392        """Returns a dictionary with the new values"""
393        return self._get_values(self._diffs, type="new")
394
395    @property
396    def old_values(self):
397        """Returns a dictionary with the old values"""
398        return self._get_values(self._diffs, type="old")
399
400    @property
401    def changes_str(self):
402        """Returns a string describing the changes"""
403        return "\n".join(self._get_changes(self._diffs))
404