1""" 2 Calculate the difference between two dictionaries as: 3 (1) items added 4 (2) items removed 5 (3) keys same in both but changed values 6 (4) keys same in both and unchanged values 7 8 Originally posted at http://stackoverflow.com/questions/1165352/fast-comparison-between-two-python-dictionary/1165552#1165552 9 Available at repository: https://github.com/hughdbrown/dictdiffer 10 11 Added the ability to recursively compare dictionaries 12""" 13 14import copy 15from collections.abc import Mapping 16 17 18def diff(current_dict, past_dict): 19 return DictDiffer(current_dict, past_dict) 20 21 22class DictDiffer: 23 """ 24 Calculate the difference between two dictionaries as: 25 (1) items added 26 (2) items removed 27 (3) keys same in both but changed values 28 (4) keys same in both and unchanged values 29 """ 30 31 def __init__(self, current_dict, past_dict): 32 self.current_dict, self.past_dict = current_dict, past_dict 33 self.set_current, self.set_past = set(list(current_dict)), set(list(past_dict)) 34 self.intersect = self.set_current.intersection(self.set_past) 35 36 def added(self): 37 return self.set_current - self.intersect 38 39 def removed(self): 40 return self.set_past - self.intersect 41 42 def changed(self): 43 return {o for o in self.intersect if self.past_dict[o] != self.current_dict[o]} 44 45 def unchanged(self): 46 return {o for o in self.intersect if self.past_dict[o] == self.current_dict[o]} 47 48 49def deep_diff(old, new, ignore=None): 50 ignore = ignore or [] 51 res = {} 52 old = copy.deepcopy(old) 53 new = copy.deepcopy(new) 54 stack = [(old, new, False)] 55 56 while stack: 57 tmps = [] 58 tmp_old, tmp_new, reentrant = stack.pop() 59 for key in set(list(tmp_old) + list(tmp_new)): 60 if key in tmp_old and key in tmp_new and tmp_old[key] == tmp_new[key]: 61 del tmp_old[key] 62 del tmp_new[key] 63 continue 64 if not reentrant: 65 if key in tmp_old and key in ignore: 66 del tmp_old[key] 67 if key in tmp_new and key in ignore: 68 del tmp_new[key] 69 if isinstance(tmp_old.get(key), Mapping) and isinstance( 70 tmp_new.get(key), Mapping 71 ): 72 tmps.append((tmp_old[key], tmp_new[key], False)) 73 if tmps: 74 stack.extend([(tmp_old, tmp_new, True)] + tmps) 75 if old: 76 res["old"] = old 77 if new: 78 res["new"] = new 79 return res 80 81 82def recursive_diff(past_dict, current_dict, ignore_missing_keys=True): 83 """ 84 Returns a RecursiveDictDiffer object that computes the recursive diffs 85 between two dictionaries 86 87 past_dict 88 Past dictionary 89 90 current_dict 91 Current dictionary 92 93 ignore_missing_keys 94 Flag specifying whether to ignore keys that no longer exist in the 95 current_dict, but exist in the past_dict. If true, the diff will 96 not contain the missing keys. 97 Default is True. 98 """ 99 return RecursiveDictDiffer(past_dict, current_dict, ignore_missing_keys) 100 101 102class RecursiveDictDiffer(DictDiffer): 103 """ 104 Calculates a recursive diff between the current_dict and the past_dict 105 creating a diff in the format 106 107 {'new': new_value, 'old': old_value} 108 109 It recursively searches differences in common keys whose values are 110 dictionaries creating a diff dict in the format 111 112 {'common_key' : {'new': new_value, 'old': old_value} 113 114 The class overrides all DictDiffer methods, returning lists of keys and 115 subkeys using the . notation (i.e 'common_key1.common_key2.changed_key') 116 117 The class provides access to: 118 (1) the added, removed, changes keys and subkeys (using the . notation) 119 ``added``, ``removed``, ``changed`` methods 120 (2) the diffs in the format above (diff property) 121 ``diffs`` property 122 (3) a dict with the new changed values only (new_values property) 123 ``new_values`` property 124 (4) a dict with the old changed values only (old_values property) 125 ``old_values`` property 126 (5) a string representation of the changes in the format: 127 ``changes_str`` property 128 129 Note: 130 The <_null_> value is a reserved value 131 132 .. code-block:: text 133 134 common_key1: 135 common_key2: 136 changed_key1 from '<old_str>' to '<new_str>' 137 changed_key2 from '[<old_elem1>, ..]' to '[<new_elem1>, ..]' 138 common_key3: 139 changed_key3 from <old_int> to <new_int> 140 141 """ 142 143 NONE_VALUE = "<_null_>" 144 145 def __init__(self, past_dict, current_dict, ignore_missing_keys): 146 """ 147 past_dict 148 Past dictionary. 149 150 current_dict 151 Current dictionary. 152 153 ignore_missing_keys 154 Flag specifying whether to ignore keys that no longer exist in the 155 current_dict, but exist in the past_dict. If true, the diff will 156 not contain the missing keys. 157 """ 158 super().__init__(current_dict, past_dict) 159 self._diffs = self._get_diffs( 160 self.current_dict, self.past_dict, ignore_missing_keys 161 ) 162 # Ignores unet values when assessing the changes 163 self.ignore_unset_values = True 164 165 @classmethod 166 def _get_diffs(cls, dict1, dict2, ignore_missing_keys): 167 """ 168 Returns a dict with the differences between dict1 and dict2 169 170 Notes: 171 Keys that only exist in dict2 are not included in the diff if 172 ignore_missing_keys is True, otherwise they are 173 Simple compares are done on lists 174 """ 175 ret_dict = {} 176 for p in dict1.keys(): 177 if p not in dict2: 178 ret_dict.update({p: {"new": dict1[p], "old": cls.NONE_VALUE}}) 179 elif dict1[p] != dict2[p]: 180 if isinstance(dict1[p], dict) and isinstance(dict2[p], dict): 181 sub_diff_dict = cls._get_diffs( 182 dict1[p], dict2[p], ignore_missing_keys 183 ) 184 if sub_diff_dict: 185 ret_dict.update({p: sub_diff_dict}) 186 else: 187 ret_dict.update({p: {"new": dict1[p], "old": dict2[p]}}) 188 if not ignore_missing_keys: 189 for p in dict2.keys(): 190 if p not in dict1.keys(): 191 ret_dict.update({p: {"new": cls.NONE_VALUE, "old": dict2[p]}}) 192 return ret_dict 193 194 @classmethod 195 def _get_values(cls, diff_dict, type="new"): 196 """ 197 Returns a dictionaries with the 'new' values in a diff dict. 198 199 type 200 Which values to return, 'new' or 'old' 201 """ 202 ret_dict = {} 203 for p in diff_dict.keys(): 204 if type in diff_dict[p].keys(): 205 ret_dict.update({p: diff_dict[p][type]}) 206 else: 207 ret_dict.update({p: cls._get_values(diff_dict[p], type=type)}) 208 return ret_dict 209 210 @classmethod 211 def _get_changes(cls, diff_dict): 212 """ 213 Returns a list of string message with the differences in a diff dict. 214 215 Each inner difference is tabulated two space deeper 216 """ 217 changes_strings = [] 218 for p in sorted(diff_dict.keys()): 219 if sorted(diff_dict[p].keys()) == ["new", "old"]: 220 # Some string formatting 221 old_value = diff_dict[p]["old"] 222 if diff_dict[p]["old"] == cls.NONE_VALUE: 223 old_value = "nothing" 224 elif isinstance(diff_dict[p]["old"], str): 225 old_value = "'{}'".format(diff_dict[p]["old"]) 226 elif isinstance(diff_dict[p]["old"], list): 227 old_value = "'{}'".format(", ".join(diff_dict[p]["old"])) 228 new_value = diff_dict[p]["new"] 229 if diff_dict[p]["new"] == cls.NONE_VALUE: 230 new_value = "nothing" 231 elif isinstance(diff_dict[p]["new"], str): 232 new_value = "'{}'".format(diff_dict[p]["new"]) 233 elif isinstance(diff_dict[p]["new"], list): 234 new_value = "'{}'".format(", ".join(diff_dict[p]["new"])) 235 changes_strings.append( 236 "{} from {} to {}".format(p, old_value, new_value) 237 ) 238 else: 239 sub_changes = cls._get_changes(diff_dict[p]) 240 if sub_changes: 241 changes_strings.append("{}:".format(p)) 242 changes_strings.extend([" {}".format(c) for c in sub_changes]) 243 return changes_strings 244 245 def added(self): 246 """ 247 Returns all keys that have been added. 248 249 If the keys are in child dictionaries they will be represented with 250 . notation 251 """ 252 253 def _added(diffs, prefix): 254 keys = [] 255 for key in diffs.keys(): 256 if isinstance(diffs[key], dict) and "old" not in diffs[key]: 257 keys.extend(_added(diffs[key], prefix="{}{}.".format(prefix, key))) 258 elif diffs[key]["old"] == self.NONE_VALUE: 259 if isinstance(diffs[key]["new"], dict): 260 keys.extend( 261 _added( 262 diffs[key]["new"], prefix="{}{}.".format(prefix, key) 263 ) 264 ) 265 else: 266 keys.append("{}{}".format(prefix, key)) 267 return keys 268 269 return sorted(_added(self._diffs, prefix="")) 270 271 def removed(self): 272 """ 273 Returns all keys that have been removed. 274 275 If the keys are in child dictionaries they will be represented with 276 . notation 277 """ 278 279 def _removed(diffs, prefix): 280 keys = [] 281 for key in diffs.keys(): 282 if isinstance(diffs[key], dict) and "old" not in diffs[key]: 283 keys.extend( 284 _removed(diffs[key], prefix="{}{}.".format(prefix, key)) 285 ) 286 elif diffs[key]["new"] == self.NONE_VALUE: 287 keys.append("{}{}".format(prefix, key)) 288 elif isinstance(diffs[key]["new"], dict): 289 keys.extend( 290 _removed(diffs[key]["new"], prefix="{}{}.".format(prefix, key)) 291 ) 292 return keys 293 294 return sorted(_removed(self._diffs, prefix="")) 295 296 def changed(self): 297 """ 298 Returns all keys that have been changed. 299 300 If the keys are in child dictionaries they will be represented with 301 . notation 302 """ 303 304 def _changed(diffs, prefix): 305 keys = [] 306 for key in diffs.keys(): 307 if not isinstance(diffs[key], dict): 308 continue 309 310 if isinstance(diffs[key], dict) and "old" not in diffs[key]: 311 keys.extend( 312 _changed(diffs[key], prefix="{}{}.".format(prefix, key)) 313 ) 314 continue 315 if self.ignore_unset_values: 316 if ( 317 "old" in diffs[key] 318 and "new" in diffs[key] 319 and diffs[key]["old"] != self.NONE_VALUE 320 and diffs[key]["new"] != self.NONE_VALUE 321 ): 322 if isinstance(diffs[key]["new"], dict): 323 keys.extend( 324 _changed( 325 diffs[key]["new"], 326 prefix="{}{}.".format(prefix, key), 327 ) 328 ) 329 else: 330 keys.append("{}{}".format(prefix, key)) 331 elif isinstance(diffs[key], dict): 332 keys.extend( 333 _changed(diffs[key], prefix="{}{}.".format(prefix, key)) 334 ) 335 else: 336 if "old" in diffs[key] and "new" in diffs[key]: 337 if isinstance(diffs[key]["new"], dict): 338 keys.extend( 339 _changed( 340 diffs[key]["new"], 341 prefix="{}{}.".format(prefix, key), 342 ) 343 ) 344 else: 345 keys.append("{}{}".format(prefix, key)) 346 elif isinstance(diffs[key], dict): 347 keys.extend( 348 _changed(diffs[key], prefix="{}{}.".format(prefix, key)) 349 ) 350 351 return keys 352 353 return sorted(_changed(self._diffs, prefix="")) 354 355 def unchanged(self): 356 """ 357 Returns all keys that have been unchanged. 358 359 If the keys are in child dictionaries they will be represented with 360 . notation 361 """ 362 363 def _unchanged(current_dict, diffs, prefix): 364 keys = [] 365 for key in current_dict.keys(): 366 if key not in diffs: 367 keys.append("{}{}".format(prefix, key)) 368 elif isinstance(current_dict[key], dict): 369 if "new" in diffs[key]: 370 # There is a diff 371 continue 372 else: 373 keys.extend( 374 _unchanged( 375 current_dict[key], 376 diffs[key], 377 prefix="{}{}.".format(prefix, key), 378 ) 379 ) 380 381 return keys 382 383 return sorted(_unchanged(self.current_dict, self._diffs, prefix="")) 384 385 @property 386 def diffs(self): 387 """Returns a dict with the recursive diffs current_dict - past_dict""" 388 return self._diffs 389 390 @property 391 def new_values(self): 392 """Returns a dictionary with the new values""" 393 return self._get_values(self._diffs, type="new") 394 395 @property 396 def old_values(self): 397 """Returns a dictionary with the old values""" 398 return self._get_values(self._diffs, type="old") 399 400 @property 401 def changes_str(self): 402 """Returns a string describing the changes""" 403 return "\n".join(self._get_changes(self._diffs)) 404