1#!/usr/bin/env python 2import re 3import pytest 4import logging 5import datetime 6from collections import namedtuple 7from functools import partial 8from enum import Enum 9from deepdiff import DeepHash 10from deepdiff.deephash import ( 11 prepare_string_for_hashing, unprocessed, UNPROCESSED_KEY, BoolObj, HASH_LOOKUP_ERR_MSG, combine_hashes_lists) 12from deepdiff.helper import pypy3, get_id, number_to_string, np 13from tests import CustomClass2 14 15logging.disable(logging.CRITICAL) 16 17 18class ClassC: 19 class_attr = 0 20 21 def __init__(self, a, b=None): 22 self.a = a 23 self.b = b 24 25 def __str__(self): 26 return "({}, {})".format(self.a, self.b) 27 28 __repr__ = __str__ 29 30 31# Only the prep part of DeepHash. We don't need to test the actual hash function. 32DeepHashPrep = partial(DeepHash, apply_hash=False) 33 34 35def prep_str(obj, ignore_string_type_changes=True): 36 return obj if ignore_string_type_changes else 'str:{}'.format(obj) 37 38 39class TestDeepHash: 40 41 def test_dictionary(self): 42 43 obj = {1: 1} 44 result = DeepHash(obj) 45 assert set(result.keys()) == {1, get_id(obj)} 46 47 def test_get_hash_by_obj_is_the_same_as_by_obj_get_id(self): 48 a = "a" 49 obj = {1: a} 50 result = DeepHash(obj) 51 assert result[a] 52 53 def test_deephash_repr(self): 54 obj = "a" 55 result = DeepHash(obj) 56 assert "{'a': '980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c'}" == repr(result) 57 58 def test_deephash_values(self): 59 obj = "a" 60 result = list(DeepHash(obj).values()) 61 assert ['980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c'] == result 62 63 def test_deephash_keys(self): 64 obj = "a" 65 result = list(DeepHash(obj).keys()) 66 assert ["a"] == result 67 68 def test_deephash_items(self): 69 obj = "a" 70 result = list(DeepHash(obj).items()) 71 assert [('a', '980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c')] == result 72 73 def test_get_hash_by_obj_when_does_not_exist(self): 74 a = "a" 75 obj = {1: a} 76 result = DeepHash(obj) 77 with pytest.raises(KeyError): 78 result[2] 79 80 def test_datetime(self): 81 now = datetime.datetime.now() 82 a = b = now 83 a_hash = DeepHash(a) 84 b_hash = DeepHash(b) 85 assert a_hash[a] == b_hash[b] 86 87 def test_datetime_truncate(self): 88 a = datetime.datetime(2020, 5, 17, 22, 15, 34, 913070) 89 b = datetime.datetime(2020, 5, 17, 22, 15, 39, 296583) 90 c = datetime.datetime(2020, 5, 17, 22, 15, 34, 500000) 91 92 a_hash = DeepHash(a, truncate_datetime='minute') 93 b_hash = DeepHash(b, truncate_datetime='minute') 94 assert a_hash[a] == b_hash[b] 95 96 a_hash = DeepHash(a, truncate_datetime='second') 97 c_hash = DeepHash(c, truncate_datetime='second') 98 assert a_hash[a] == c_hash[c] 99 100 def test_get_reserved_keyword(self): 101 hashes = {UNPROCESSED_KEY: 'full item', 'key1': ('item', 'count')} 102 result = DeepHash._getitem(hashes, obj='key1') 103 assert 'item' == result 104 # For reserved keys, it should just grab the object instead of grabbing an item in the tuple object. 105 result = DeepHash._getitem(hashes, obj=UNPROCESSED_KEY) 106 assert 'full item' == result 107 108 def test_get_key(self): 109 hashes = {'key1': ('item', 'count')} 110 result = DeepHash.get_key(hashes, key='key2', default='banana') 111 assert 'banana' == result 112 113 def test_list_of_sets(self): 114 a = {1} 115 b = {2} 116 obj = [a, b] 117 result = DeepHash(obj) 118 expected_result = {1, 2, get_id(a), get_id(b), get_id(obj)} 119 assert set(result.keys()) == expected_result 120 121 def test_bad_attribute(self): 122 class Bad: 123 __slots__ = ['x', 'y'] 124 125 def __getattr__(self, key): 126 raise AttributeError("Bad item") 127 128 def __str__(self): 129 return "Bad Object" 130 131 def __repr__(self): 132 return "<Bad obj id {}>".format(id(self)) 133 134 t1 = Bad() 135 136 result = DeepHash(t1) 137 expected_result = {t1: unprocessed, UNPROCESSED_KEY: [t1]} 138 assert expected_result == result 139 140 def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self): 141 a = 'hello' 142 b = b'hello' 143 a_hash = DeepHash(a, ignore_string_type_changes=True)[a] 144 b_hash = DeepHash(b, ignore_string_type_changes=True)[b] 145 assert a_hash == b_hash 146 147 def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self): 148 a = 'hello' 149 b = b'hello' 150 a_hash = DeepHash(a, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[a] 151 b_hash = DeepHash(b, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[b] 152 assert a_hash == b_hash 153 154 155class TestDeepHashPrep: 156 """DeepHashPrep Tests covering object serialization.""" 157 158 def test_prep_bool_vs_num1(self): 159 assert {BoolObj.TRUE: 'bool:true'} == DeepHashPrep(True) 160 assert {1: 'int:1'} == DeepHashPrep(1) 161 162 def test_prep_bool_vs_num2(self): 163 item1 = { 164 "Value One": True, 165 "Value Two": 1, 166 } 167 item2 = { 168 "Value Two": 1, 169 "Value One": True, 170 } 171 assert DeepHashPrep(item1)[item1] == DeepHashPrep(item2)[item2] 172 173 def test_prep_str(self): 174 obj = "a" 175 expected_result = {obj: prep_str(obj)} 176 result = DeepHashPrep(obj, ignore_string_type_changes=True) 177 assert expected_result == result 178 expected_result = {obj: prep_str(obj, ignore_string_type_changes=False)} 179 result = DeepHashPrep(obj, ignore_string_type_changes=False) 180 assert expected_result == result 181 182 def test_dictionary_key_type_change(self): 183 obj1 = {"b": 10} 184 obj2 = {b"b": 10} 185 186 result1 = DeepHashPrep(obj1, ignore_string_type_changes=True) 187 result2 = DeepHashPrep(obj2, ignore_string_type_changes=True) 188 assert result1[obj1] == result2[obj2] 189 assert result1["b"] == result2[b"b"] 190 191 def test_number_type_change(self): 192 obj1 = 10 193 obj2 = 10.0 194 195 result1 = DeepHashPrep(obj1) 196 result2 = DeepHashPrep(obj2) 197 assert result1[obj1] != result2[obj2] 198 199 result1 = DeepHashPrep(obj1, ignore_numeric_type_changes=True) 200 result2 = DeepHashPrep(obj2, ignore_numeric_type_changes=True) 201 assert result1[obj1] == result2[obj2] 202 203 def test_prep_str_fail_if_deephash_leaks_results(self): 204 """ 205 This test fails if DeepHash is getting a mutable copy of hashes 206 which means each init of the DeepHash will have hashes from 207 the previous init. 208 """ 209 obj1 = "a" 210 expected_result = {obj1: prep_str(obj1)} 211 result = DeepHashPrep(obj1, ignore_string_type_changes=True) 212 assert expected_result == result 213 obj2 = "b" 214 result = DeepHashPrep(obj2, ignore_string_type_changes=True) 215 assert obj1 not in result 216 217 def test_dict_in_dict(self): 218 obj2 = {2: 3} 219 obj = {'a': obj2} 220 result = DeepHashPrep(obj, ignore_string_type_changes=True) 221 assert 'a' in result 222 assert obj2 in result 223 224 def do_list_or_tuple(self, func, func_str): 225 string1 = "a" 226 obj = func([string1, 10, 20]) 227 if func is list: 228 obj_id = get_id(obj) 229 else: 230 obj_id = obj 231 string1_prepped = prep_str(string1) 232 expected_result = { 233 10: 'int:10', 234 20: 'int:20', 235 string1: string1_prepped, 236 obj_id: '{}:{},int:10,int:20'.format(func_str, string1_prepped), 237 } 238 result = DeepHashPrep(obj, ignore_string_type_changes=True) 239 assert expected_result == result 240 241 def test_list_and_tuple(self): 242 for func, func_str in ((list, 'list'), (tuple, 'tuple')): 243 self.do_list_or_tuple(func, func_str) 244 245 def test_named_tuples(self): 246 # checking if pypy3 is running the test 247 # in that case due to a difference of string interning implementation 248 # the id of x inside the named tuple changes. 249 x = "x" 250 x_prep = prep_str(x) 251 Point = namedtuple('Point', [x]) 252 obj = Point(x=11) 253 result = DeepHashPrep(obj, ignore_string_type_changes=True) 254 if pypy3: 255 assert result[obj] == "ntPoint:{%s:int:11}" % x 256 else: 257 expected_result = { 258 x: x_prep, 259 obj: "ntPoint:{%s:int:11}" % x, 260 11: 'int:11', 261 } 262 assert expected_result == result 263 264 def test_enum(self): 265 class MyEnum(Enum): 266 A = 1 267 B = 2 268 269 assert DeepHashPrep(MyEnum.A)[MyEnum.A] == r'objMyEnum:{str:_name_:str:A;str:_value_:int:1}' 270 assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum(1)) 271 assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.name) 272 assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.value) 273 assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.B) 274 275 def test_dict_hash(self): 276 string1 = "a" 277 string1_prepped = prep_str(string1) 278 key1 = "key1" 279 key1_prepped = prep_str(key1) 280 obj = {key1: string1, 1: 10, 2: 20} 281 expected_result = { 282 1: 'int:1', 283 10: 'int:10', 284 2: 'int:2', 285 20: 'int:20', 286 key1: key1_prepped, 287 string1: string1_prepped, 288 get_id(obj): 'dict:{{int:1:int:10;int:2:int:20;{}:{}}}'.format(key1, string1) 289 } 290 result = DeepHashPrep(obj, ignore_string_type_changes=True) 291 assert expected_result == result 292 293 def test_dict_in_list(self): 294 string1 = "a" 295 key1 = "key1" 296 dict1 = {key1: string1, 1: 10, 2: 20} 297 obj = [0, dict1] 298 expected_result = { 299 0: 'int:0', 300 1: 'int:1', 301 10: 'int:10', 302 2: 'int:2', 303 20: 'int:20', 304 key1: key1, 305 string1: string1, 306 get_id(dict1): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' % 307 (key1, string1), 308 get_id(obj): 309 'list:dict:{int:1:int:10;int:2:int:20;%s:%s},int:0' % 310 (key1, string1) 311 } 312 result = DeepHashPrep(obj, ignore_string_type_changes=True) 313 assert expected_result == result 314 315 def test_nested_lists_same_hash(self): 316 t1 = [1, 2, [3, 4]] 317 t2 = [[4, 3], 2, 1] 318 t1_hash = DeepHashPrep(t1) 319 t2_hash = DeepHashPrep(t2) 320 321 assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] 322 323 def test_nested_lists_same_hash2(self): 324 t1 = [1, 2, [3, [4, 5]]] 325 t2 = [[[5, 4], 3], 2, 1] 326 t1_hash = DeepHashPrep(t1) 327 t2_hash = DeepHashPrep(t2) 328 329 assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] 330 331 def test_nested_lists_same_hash3(self): 332 t1 = [{1: [2, 3], 4: [5, [6, 7]]}] 333 t2 = [{4: [[7, 6], 5], 1: [3, 2]}] 334 t1_hash = DeepHashPrep(t1) 335 t2_hash = DeepHashPrep(t2) 336 337 assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] 338 339 def test_nested_lists_in_dictionary_same_hash(self): 340 t1 = [{"c": 4}, {"c": 3}] 341 t2 = [{"c": 3}, {"c": 4}] 342 t1_hash = DeepHashPrep(t1) 343 t2_hash = DeepHashPrep(t2) 344 345 assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] 346 347 def test_same_sets_same_hash(self): 348 t1 = {1, 3, 2} 349 t2 = {2, 3, 1} 350 t1_hash = DeepHashPrep(t1) 351 t2_hash = DeepHashPrep(t2) 352 353 assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] 354 355 @pytest.mark.parametrize("t1, t2, significant_digits, number_format_notation, result", [ 356 ({0.012, 0.98}, {0.013, 0.99}, 1, "f", 'set:float:0.00,float:1.0'), 357 (100000, 100021, 3, "e", 'int:1.000e+05'), 358 ]) 359 def test_similar_significant_hash(self, t1, t2, significant_digits, 360 number_format_notation, result): 361 t1_hash = DeepHashPrep(t1, significant_digits=significant_digits, 362 number_format_notation=number_format_notation) 363 t2_hash = DeepHashPrep(t2, significant_digits=significant_digits, 364 number_format_notation=number_format_notation) 365 366 if result: 367 assert result == t1_hash[t1] == t2_hash[t2] 368 else: 369 assert t1_hash[t1] != t2_hash[t2] 370 371 def test_number_to_string_func(self): 372 def custom_number_to_string(number, *args, **kwargs): 373 number = 100 if number < 100 else number 374 return number_to_string(number, *args, **kwargs) 375 376 t1 = [10, 12, 100000] 377 t2 = [50, 63, 100021] 378 t1_hash = DeepHashPrep(t1, significant_digits=4, number_format_notation="e", 379 number_to_string_func=custom_number_to_string) 380 t2_hash = DeepHashPrep(t2, significant_digits=4, number_format_notation="e", 381 number_to_string_func=custom_number_to_string) 382 383 assert t1_hash[10] == t2_hash[50] == t1_hash[12] == t2_hash[63] != t1_hash[100000] 384 385 def test_same_sets_in_lists_same_hash(self): 386 t1 = ["a", {1, 3, 2}] 387 t2 = [{2, 3, 1}, "a"] 388 t1_hash = DeepHashPrep(t1) 389 t2_hash = DeepHashPrep(t2) 390 391 assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)] 392 393 def test_unknown_parameters(self): 394 with pytest.raises(ValueError): 395 DeepHashPrep(1, wrong_param=2) 396 397 def test_bad_attribute_prep(self): 398 class Bad: 399 __slots__ = ['x', 'y'] 400 401 def __getattr__(self, key): 402 raise AttributeError("Bad item") 403 404 def __str__(self): 405 return "Bad Object" 406 407 t1 = Bad() 408 409 result = DeepHashPrep(t1) 410 expected_result = {t1: unprocessed, UNPROCESSED_KEY: [t1]} 411 assert expected_result == result 412 413 class Burrito: 414 bread = 'flour' 415 416 def __init__(self): 417 self.spicy = True 418 419 class Taco: 420 bread = 'flour' 421 422 def __init__(self): 423 self.spicy = True 424 425 class ClassA: 426 def __init__(self, x, y): 427 self.x = x 428 self.y = y 429 430 class ClassB: 431 def __init__(self, x, y): 432 self.x = x 433 self.y = y 434 435 class ClassC(ClassB): 436 pass 437 438 obj_a = ClassA(1, 2) 439 obj_b = ClassB(1, 2) 440 obj_c = ClassC(1, 2) 441 442 burrito = Burrito() 443 taco = Taco() 444 445 @pytest.mark.parametrize("t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual", [ 446 (taco, burrito, [], False, False), 447 (taco, burrito, [(Taco, Burrito)], False, True), 448 ([taco], [burrito], [(Taco, Burrito)], False, True), 449 ([obj_a], [obj_c], [(ClassA, ClassB)], False, False), 450 ([obj_a], [obj_c], [(ClassA, ClassB)], True, True), 451 ([obj_b], [obj_c], [(ClassB, )], True, True), 452 ]) 453 def test_objects_with_same_content(self, t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual): 454 455 t1_result = DeepHashPrep(t1, ignore_type_in_groups=ignore_type_in_groups, 456 ignore_type_subclasses=ignore_type_subclasses) 457 t2_result = DeepHashPrep(t2, ignore_type_in_groups=ignore_type_in_groups, 458 ignore_type_subclasses=ignore_type_subclasses) 459 assert is_qual == (t1_result[t1] == t2_result[t2]) 460 461 def test_custom_object(self): 462 cc_a = CustomClass2(prop1=["a"], prop2=["b"]) 463 t1 = [cc_a, CustomClass2(prop1=["c"], prop2=["d"])] 464 t1_result = DeepHashPrep(t1) 465 expected = 'list:objCustomClass2:{str:prop1:list:str:a;str:prop2:list:str:b},objCustomClass2:{str:prop1:list:str:c;str:prop2:list:str:d}' # NOQA 466 assert expected == t1_result[t1] 467 468 def test_repetition_by_default_does_not_effect(self): 469 list1 = [3, 4] 470 list1_id = get_id(list1) 471 a = [1, 2, list1] 472 a_id = get_id(a) 473 474 list2 = [4, 3, 3] 475 list2_id = get_id(list2) 476 b = [list2, 2, 1] 477 b_id = get_id(b) 478 479 hash_a = DeepHashPrep(a) 480 hash_b = DeepHashPrep(b) 481 482 assert hash_a[list1_id] == hash_b[list2_id] 483 assert hash_a[a_id] == hash_b[b_id] 484 485 def test_setting_repetition_off_unequal_hash(self): 486 list1 = [3, 4] 487 list1_id = get_id(list1) 488 a = [1, 2, list1] 489 a_id = get_id(a) 490 491 list2 = [4, 3, 3] 492 list2_id = get_id(list2) 493 b = [list2, 2, 1] 494 b_id = get_id(b) 495 496 hash_a = DeepHashPrep(a, ignore_repetition=False) 497 hash_b = DeepHashPrep(b, ignore_repetition=False) 498 499 assert not hash_a[list1_id] == hash_b[list2_id] 500 assert not hash_a[a_id] == hash_b[b_id] 501 502 assert hash_a[list1_id].replace('3|1', '3|2') == hash_b[list2_id] 503 504 def test_already_calculated_hash_wont_be_recalculated(self): 505 hashes = (i for i in range(10)) 506 507 def hasher(obj): 508 return str(next(hashes)) 509 obj = "a" 510 expected_result = {obj: '0'} 511 result = DeepHash(obj, hasher=hasher) 512 assert expected_result == result 513 514 # we simply feed the last result to DeepHash 515 # So it can re-use the results. 516 result2 = DeepHash(obj, hasher=hasher, hashes=result) 517 # if hashes are not cached and re-used, 518 # then the next time hasher runs, it returns 519 # number 1 instead of 0. 520 assert expected_result == result2 521 522 result3 = DeepHash(obj, hasher=hasher) 523 expected_result = {obj: '1'} 524 assert expected_result == result3 525 526 def test_skip_type(self): 527 l1 = logging.getLogger("test") 528 obj = {"log": l1, 2: 1337} 529 result = DeepHashPrep(obj, exclude_types={logging.Logger}) 530 assert get_id(l1) not in result 531 532 def test_skip_type2(self): 533 l1 = logging.getLogger("test") 534 result = DeepHashPrep(l1, exclude_types={logging.Logger}) 535 assert not result 536 537 def test_prep_dic_with_loop(self): 538 obj = {2: 1337} 539 obj[1] = obj 540 result = DeepHashPrep(obj) 541 expected_result = {get_id(obj): 'dict:{int:2:int:1337}', 1: 'int:1', 2: 'int:2', 1337: 'int:1337'} 542 assert expected_result == result 543 544 def test_prep_iterable_with_loop(self): 545 obj = [1] 546 obj.append(obj) 547 result = DeepHashPrep(obj) 548 expected_result = {get_id(obj): 'list:int:1', 1: 'int:1'} 549 assert expected_result == result 550 551 def test_prep_iterable_with_excluded_type(self): 552 l1 = logging.getLogger("test") 553 obj = [1, l1] 554 result = DeepHashPrep(obj, exclude_types={logging.Logger}) 555 assert get_id(l1) not in result 556 557 def test_skip_str_type_in_dict_on_list(self): 558 dic1 = {1: "a"} 559 t1 = [dic1] 560 dic2 = {} 561 t2 = [dic2] 562 t1_hash = DeepHashPrep(t1, exclude_types=[str]) 563 t2_hash = DeepHashPrep(t2, exclude_types=[str]) 564 assert 1 in t1_hash 565 assert t1_hash[dic1] == t2_hash[dic2] 566 567 def test_skip_path(self): 568 dic1 = {1: "a"} 569 t1 = [dic1, 2] 570 dic2 = {} 571 t2 = [dic2, 2] 572 t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]']) 573 t2_hash = DeepHashPrep(t2, exclude_paths='root[0]') 574 assert 1 not in t1_hash 575 assert 2 in t1_hash 576 assert t1_hash[2] == t2_hash[2] 577 578 def test_skip_path2(self): 579 580 obj10 = {'a': 1, 'b': 'f', 'e': "1111", 'foo': {'bar': 'baz'}} 581 obj11 = {'c': 1, 'd': 'f', 'e': 'Cool'} 582 583 obj20 = {'a': 1, 'b': 'f', 'e': 'Cool', 'foo': {'bar': 'baz2'}} 584 obj21 = {'c': 1, 'd': 'f', 'e': "2222"} 585 586 t1 = [obj10, obj11] 587 t2 = [obj20, obj21] 588 589 exclude_paths = ["root[0]['e']", "root[1]['e']", "root[0]['foo']['bar']"] 590 591 t1_hash = DeepHashPrep(t1, exclude_paths=exclude_paths) 592 t2_hash = DeepHashPrep(t2, exclude_paths=exclude_paths) 593 assert t1_hash[t1] == t2_hash[t2] 594 595 def test_skip_regex_path(self): 596 dic1 = {1: "a"} 597 t1 = [dic1, 2] 598 exclude_re = re.compile(r'\[0\]') 599 t1_hash = DeepHashPrep(t1, exclude_regex_paths=r'\[0\]') 600 t2_hash = DeepHashPrep(t1, exclude_regex_paths=[exclude_re]) 601 assert 1 not in t1_hash 602 assert 2 in t1_hash 603 assert t1_hash[2] == t2_hash[2] 604 605 def test_skip_hash_exclude_obj_callback(self): 606 def exclude_obj_callback(obj, parent): 607 return True if parent == "root[0]['x']" or obj == 2 else False 608 609 dic1 = {"x": 1, "y": 2, "z": 3} 610 t1 = [dic1] 611 t1_hash = DeepHashPrep(t1, exclude_obj_callback=exclude_obj_callback) 612 assert t1_hash == {'y': 'str:y', 'z': 'str:z', 3: 'int:3', 613 get_id(dic1): 'dict:{str:z:int:3}', get_id(t1): 'list:dict:{str:z:int:3}'} 614 dic2 = {"z": 3} 615 t2 = [dic2] 616 t2_hash = DeepHashPrep(t2, exclude_obj_callback=exclude_obj_callback) 617 assert t1_hash[t1] == t2_hash[t2] 618 619 def test_string_case(self): 620 t1 = "Hello" 621 622 t1_hash = DeepHashPrep(t1) 623 assert t1_hash == {'Hello': 'str:Hello'} 624 625 t1_hash = DeepHashPrep(t1, ignore_string_case=True) 626 assert t1_hash == {'Hello': 'str:hello'} 627 628 def test_hash_class(self): 629 t1 = ClassC 630 t1_hash = DeepHashPrep(t1) 631 assert t1_hash['class_attr'] == 'str:class_attr' 632 assert t1_hash[0] == 'int:0' 633 # Note: we ignore private names in calculating hashes now. So you dont see __init__ here for example. 634 assert t1_hash[t1] == r'objClassC:{str:class_attr:int:0}' 635 636 def test_hash_set_in_list(self): 637 t1 = [{1, 2, 3}, {4, 5}] 638 t1_hash = DeepHashPrep(t1) 639 assert t1_hash[t1] == 'list:set:int:1,int:2,int:3,set:int:4,int:5' 640 641 def test_hash_numpy_array1(self): 642 t1 = np.array([[1, 2]], np.int8) 643 t2 = np.array([[2, 1]], np.int8) 644 t1_hash = DeepHashPrep(t1) 645 t2_hash = DeepHashPrep(t2) 646 assert t1_hash[t1] == 'ndarray:ndarray:int8:1,int8:2' 647 assert t2_hash[t2] == t1_hash[t1] 648 649 def test_hash_numpy_array_ignore_numeric_type_changes(self): 650 t1 = np.array([[1, 2]], np.int8) 651 t1_hash = DeepHashPrep(t1, ignore_numeric_type_changes=True) 652 assert t1_hash[t1] == 'ndarray:ndarray:number:1.000000000000,number:2.000000000000' 653 654 def test_hash_numpy_array2_multi_dimensional_can_not_retrieve_individual_array_item_hashes(self): 655 """ 656 This is a very interesting case. When DeepHash extracts t1[0] to create a hash for it, 657 Numpy creates an array. But that array will only be technically available during the DeepHash run. 658 Once DeepHash is run, the array is marked to be deleted by the garbage collector. 659 However depending on the version of the python and the machine that runs it, by the time we get 660 to the line that is t1_hash[t1[0]], the t1[0] may or may not be still in memory. 661 If it is still in the memory, t1_hash[t1[0]] works without a problem. 662 If it is already garbage collected, t1_hash[t1[0]] will throw a key error since there will be 663 a new t1[0] by the time t1_hash[t1[0]] is called. Hence it will have a new ID and thus it 664 will not be available anymore in t1_hash. Remember that since Numpy arrays are not hashable, 665 the ID of the array is stored in t1_hash as a key and not the object itself. 666 """ 667 t1 = np.array([[1, 2, 3, 4], [4, 2, 2, 1]], np.int8) 668 t1_hash = DeepHashPrep(t1) 669 try: 670 t1_hash[t1[0]] 671 except Exception as e: 672 assert str(e).strip("'") == HASH_LOOKUP_ERR_MSG.format(t1[0]) 673 674 675class TestDeepHashSHA: 676 """DeepHash with SHA Tests.""" 677 678 def test_str_sha1(self): 679 obj = "a" 680 expected_result = { 681 obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' 682 } 683 result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) 684 assert expected_result == result 685 686 def test_str_sha256(self): 687 obj = "a" 688 expected_result = { 689 obj: 'ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb' 690 } 691 result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha256hex) 692 assert expected_result == result 693 694 def test_prep_str_sha1_fail_if_mutable(self): 695 """ 696 This test fails if DeepHash is getting a mutable copy of hashes 697 which means each init of the DeepHash will have hashes from 698 the previous init. 699 """ 700 obj1 = "a" 701 expected_result = { 702 obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' 703 } 704 result = DeepHash(obj1, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) 705 assert expected_result == result 706 obj2 = "b" 707 result = DeepHash(obj2, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) 708 assert obj1 not in result 709 710 def test_bytecode(self): 711 obj = b"a" 712 expected_result = { 713 obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8' 714 } 715 result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) 716 assert expected_result == result 717 718 def test_list1(self): 719 string1 = "a" 720 obj = [string1, 10, 20] 721 expected_result = { 722 string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', 723 get_id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb', 724 10: DeepHash.sha1hex('int:10'), 725 20: DeepHash.sha1hex('int:20'), 726 } 727 result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) 728 assert expected_result == result 729 730 def test_dict1(self): 731 string1 = "a" 732 key1 = "key1" 733 obj = {key1: string1, 1: 10, 2: 20} 734 expected_result = { 735 1: DeepHash.sha1hex('int:1'), 736 10: DeepHash.sha1hex('int:10'), 737 2: DeepHash.sha1hex('int:2'), 738 20: DeepHash.sha1hex('int:20'), 739 key1: '1073ab6cda4b991cd29f9e83a307f34004ae9327', 740 string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8', 741 get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895' 742 } 743 result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex) 744 assert expected_result == result 745 746 747class TestCleaningString: 748 749 @pytest.mark.parametrize("text, ignore_string_type_changes, expected_result", [ 750 (b'hello', True, 'hello'), 751 (b'hello', False, 'bytes:hello'), 752 ('hello', True, 'hello'), 753 ('hello', False, 'str:hello'), 754 ]) 755 def test_clean_type(self, text, ignore_string_type_changes, expected_result): 756 result = prepare_string_for_hashing(text, ignore_string_type_changes=ignore_string_type_changes) 757 assert expected_result == result 758 759 760class TestCounts: 761 762 @pytest.mark.parametrize('obj, expected_count', [ 763 ( 764 {1: 1, 2: 3}, 765 5 766 ), 767 ( 768 {"key": {1: 1, 2: 4}, "key2": ["a", "b"]}, 769 11 770 ), 771 ( 772 [{1}], 773 3 774 ), 775 ( 776 [ClassC(a=10, b=11)], 777 6 778 ) 779 ]) 780 def test_dict_count(self, obj, expected_count): 781 """ 782 How many object went to build this dict? 783 """ 784 785 result = DeepHash(obj).get(obj, extract_index=1) 786 assert expected_count == result 787 788 789class TestOtherHashFuncs: 790 791 @pytest.mark.parametrize('items, prefix, expected', [ 792 ([[1], [2]], 'pre', 'pre583852d84b3482edf53408b64724a37289d7af458c44bb989a8abbffe24d2d2b'), 793 ([[1], [2]], b'pre', 'pre583852d84b3482edf53408b64724a37289d7af458c44bb989a8abbffe24d2d2b'), 794 ]) 795 def test_combine_hashes_lists(self, items, prefix, expected): 796 result = combine_hashes_lists(items, prefix) 797 assert expected == result 798