1#!/usr/bin/env python
2import re
3import pytest
4import logging
5import datetime
6from collections import namedtuple
7from functools import partial
8from enum import Enum
9from deepdiff import DeepHash
10from deepdiff.deephash import (
11    prepare_string_for_hashing, unprocessed, UNPROCESSED_KEY, BoolObj, HASH_LOOKUP_ERR_MSG, combine_hashes_lists)
12from deepdiff.helper import pypy3, get_id, number_to_string, np
13from tests import CustomClass2
14
15logging.disable(logging.CRITICAL)
16
17
18class ClassC:
19    class_attr = 0
20
21    def __init__(self, a, b=None):
22        self.a = a
23        self.b = b
24
25    def __str__(self):
26        return "({}, {})".format(self.a, self.b)
27
28    __repr__ = __str__
29
30
31# Only the prep part of DeepHash. We don't need to test the actual hash function.
32DeepHashPrep = partial(DeepHash, apply_hash=False)
33
34
35def prep_str(obj, ignore_string_type_changes=True):
36    return obj if ignore_string_type_changes else 'str:{}'.format(obj)
37
38
39class TestDeepHash:
40
41    def test_dictionary(self):
42
43        obj = {1: 1}
44        result = DeepHash(obj)
45        assert set(result.keys()) == {1, get_id(obj)}
46
47    def test_get_hash_by_obj_is_the_same_as_by_obj_get_id(self):
48        a = "a"
49        obj = {1: a}
50        result = DeepHash(obj)
51        assert result[a]
52
53    def test_deephash_repr(self):
54        obj = "a"
55        result = DeepHash(obj)
56        assert "{'a': '980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c'}" == repr(result)
57
58    def test_deephash_values(self):
59        obj = "a"
60        result = list(DeepHash(obj).values())
61        assert ['980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c'] == result
62
63    def test_deephash_keys(self):
64        obj = "a"
65        result = list(DeepHash(obj).keys())
66        assert ["a"] == result
67
68    def test_deephash_items(self):
69        obj = "a"
70        result = list(DeepHash(obj).items())
71        assert [('a', '980410da9522db17c3ab8743541f192a5ab27772a6154dbc7795ee909e653a5c')] == result
72
73    def test_get_hash_by_obj_when_does_not_exist(self):
74        a = "a"
75        obj = {1: a}
76        result = DeepHash(obj)
77        with pytest.raises(KeyError):
78            result[2]
79
80    def test_datetime(self):
81        now = datetime.datetime.now()
82        a = b = now
83        a_hash = DeepHash(a)
84        b_hash = DeepHash(b)
85        assert a_hash[a] == b_hash[b]
86
87    def test_datetime_truncate(self):
88        a = datetime.datetime(2020, 5, 17, 22, 15, 34, 913070)
89        b = datetime.datetime(2020, 5, 17, 22, 15, 39, 296583)
90        c = datetime.datetime(2020, 5, 17, 22, 15, 34, 500000)
91
92        a_hash = DeepHash(a, truncate_datetime='minute')
93        b_hash = DeepHash(b, truncate_datetime='minute')
94        assert a_hash[a] == b_hash[b]
95
96        a_hash = DeepHash(a, truncate_datetime='second')
97        c_hash = DeepHash(c, truncate_datetime='second')
98        assert a_hash[a] == c_hash[c]
99
100    def test_get_reserved_keyword(self):
101        hashes = {UNPROCESSED_KEY: 'full item', 'key1': ('item', 'count')}
102        result = DeepHash._getitem(hashes, obj='key1')
103        assert 'item' == result
104        # For reserved keys, it should just grab the object instead of grabbing an item in the tuple object.
105        result = DeepHash._getitem(hashes, obj=UNPROCESSED_KEY)
106        assert 'full item' == result
107
108    def test_get_key(self):
109        hashes = {'key1': ('item', 'count')}
110        result = DeepHash.get_key(hashes, key='key2', default='banana')
111        assert 'banana' == result
112
113    def test_list_of_sets(self):
114        a = {1}
115        b = {2}
116        obj = [a, b]
117        result = DeepHash(obj)
118        expected_result = {1, 2, get_id(a), get_id(b), get_id(obj)}
119        assert set(result.keys()) == expected_result
120
121    def test_bad_attribute(self):
122        class Bad:
123            __slots__ = ['x', 'y']
124
125            def __getattr__(self, key):
126                raise AttributeError("Bad item")
127
128            def __str__(self):
129                return "Bad Object"
130
131            def __repr__(self):
132                return "<Bad obj id {}>".format(id(self))
133
134        t1 = Bad()
135
136        result = DeepHash(t1)
137        expected_result = {t1: unprocessed, UNPROCESSED_KEY: [t1]}
138        assert expected_result == result
139
140    def test_built_in_hash_not_sensitive_to_bytecode_vs_unicode(self):
141        a = 'hello'
142        b = b'hello'
143        a_hash = DeepHash(a, ignore_string_type_changes=True)[a]
144        b_hash = DeepHash(b, ignore_string_type_changes=True)[b]
145        assert a_hash == b_hash
146
147    def test_sha1_hash_not_sensitive_to_bytecode_vs_unicode(self):
148        a = 'hello'
149        b = b'hello'
150        a_hash = DeepHash(a, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[a]
151        b_hash = DeepHash(b, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)[b]
152        assert a_hash == b_hash
153
154
155class TestDeepHashPrep:
156    """DeepHashPrep Tests covering object serialization."""
157
158    def test_prep_bool_vs_num1(self):
159        assert {BoolObj.TRUE: 'bool:true'} == DeepHashPrep(True)
160        assert {1: 'int:1'} == DeepHashPrep(1)
161
162    def test_prep_bool_vs_num2(self):
163        item1 = {
164            "Value One": True,
165            "Value Two": 1,
166        }
167        item2 = {
168            "Value Two": 1,
169            "Value One": True,
170        }
171        assert DeepHashPrep(item1)[item1] == DeepHashPrep(item2)[item2]
172
173    def test_prep_str(self):
174        obj = "a"
175        expected_result = {obj: prep_str(obj)}
176        result = DeepHashPrep(obj, ignore_string_type_changes=True)
177        assert expected_result == result
178        expected_result = {obj: prep_str(obj, ignore_string_type_changes=False)}
179        result = DeepHashPrep(obj, ignore_string_type_changes=False)
180        assert expected_result == result
181
182    def test_dictionary_key_type_change(self):
183        obj1 = {"b": 10}
184        obj2 = {b"b": 10}
185
186        result1 = DeepHashPrep(obj1, ignore_string_type_changes=True)
187        result2 = DeepHashPrep(obj2, ignore_string_type_changes=True)
188        assert result1[obj1] == result2[obj2]
189        assert result1["b"] == result2[b"b"]
190
191    def test_number_type_change(self):
192        obj1 = 10
193        obj2 = 10.0
194
195        result1 = DeepHashPrep(obj1)
196        result2 = DeepHashPrep(obj2)
197        assert result1[obj1] != result2[obj2]
198
199        result1 = DeepHashPrep(obj1, ignore_numeric_type_changes=True)
200        result2 = DeepHashPrep(obj2, ignore_numeric_type_changes=True)
201        assert result1[obj1] == result2[obj2]
202
203    def test_prep_str_fail_if_deephash_leaks_results(self):
204        """
205        This test fails if DeepHash is getting a mutable copy of hashes
206        which means each init of the DeepHash will have hashes from
207        the previous init.
208        """
209        obj1 = "a"
210        expected_result = {obj1: prep_str(obj1)}
211        result = DeepHashPrep(obj1, ignore_string_type_changes=True)
212        assert expected_result == result
213        obj2 = "b"
214        result = DeepHashPrep(obj2, ignore_string_type_changes=True)
215        assert obj1 not in result
216
217    def test_dict_in_dict(self):
218        obj2 = {2: 3}
219        obj = {'a': obj2}
220        result = DeepHashPrep(obj, ignore_string_type_changes=True)
221        assert 'a' in result
222        assert obj2 in result
223
224    def do_list_or_tuple(self, func, func_str):
225        string1 = "a"
226        obj = func([string1, 10, 20])
227        if func is list:
228            obj_id = get_id(obj)
229        else:
230            obj_id = obj
231        string1_prepped = prep_str(string1)
232        expected_result = {
233            10: 'int:10',
234            20: 'int:20',
235            string1: string1_prepped,
236            obj_id: '{}:{},int:10,int:20'.format(func_str, string1_prepped),
237        }
238        result = DeepHashPrep(obj, ignore_string_type_changes=True)
239        assert expected_result == result
240
241    def test_list_and_tuple(self):
242        for func, func_str in ((list, 'list'), (tuple, 'tuple')):
243            self.do_list_or_tuple(func, func_str)
244
245    def test_named_tuples(self):
246        # checking if pypy3 is running the test
247        # in that case due to a difference of string interning implementation
248        # the id of x inside the named tuple changes.
249        x = "x"
250        x_prep = prep_str(x)
251        Point = namedtuple('Point', [x])
252        obj = Point(x=11)
253        result = DeepHashPrep(obj, ignore_string_type_changes=True)
254        if pypy3:
255            assert result[obj] == "ntPoint:{%s:int:11}" % x
256        else:
257            expected_result = {
258                x: x_prep,
259                obj: "ntPoint:{%s:int:11}" % x,
260                11: 'int:11',
261            }
262            assert expected_result == result
263
264    def test_enum(self):
265        class MyEnum(Enum):
266            A = 1
267            B = 2
268
269        assert DeepHashPrep(MyEnum.A)[MyEnum.A] == r'objMyEnum:{str:_name_:str:A;str:_value_:int:1}'
270        assert DeepHashPrep(MyEnum.A) == DeepHashPrep(MyEnum(1))
271        assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.name)
272        assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.A.value)
273        assert DeepHashPrep(MyEnum.A) != DeepHashPrep(MyEnum.B)
274
275    def test_dict_hash(self):
276        string1 = "a"
277        string1_prepped = prep_str(string1)
278        key1 = "key1"
279        key1_prepped = prep_str(key1)
280        obj = {key1: string1, 1: 10, 2: 20}
281        expected_result = {
282            1: 'int:1',
283            10: 'int:10',
284            2: 'int:2',
285            20: 'int:20',
286            key1: key1_prepped,
287            string1: string1_prepped,
288            get_id(obj): 'dict:{{int:1:int:10;int:2:int:20;{}:{}}}'.format(key1, string1)
289        }
290        result = DeepHashPrep(obj, ignore_string_type_changes=True)
291        assert expected_result == result
292
293    def test_dict_in_list(self):
294        string1 = "a"
295        key1 = "key1"
296        dict1 = {key1: string1, 1: 10, 2: 20}
297        obj = [0, dict1]
298        expected_result = {
299            0: 'int:0',
300            1: 'int:1',
301            10: 'int:10',
302            2: 'int:2',
303            20: 'int:20',
304            key1: key1,
305            string1: string1,
306            get_id(dict1): 'dict:{int:1:int:10;int:2:int:20;%s:%s}' %
307            (key1, string1),
308            get_id(obj):
309            'list:dict:{int:1:int:10;int:2:int:20;%s:%s},int:0' %
310            (key1, string1)
311        }
312        result = DeepHashPrep(obj, ignore_string_type_changes=True)
313        assert expected_result == result
314
315    def test_nested_lists_same_hash(self):
316        t1 = [1, 2, [3, 4]]
317        t2 = [[4, 3], 2, 1]
318        t1_hash = DeepHashPrep(t1)
319        t2_hash = DeepHashPrep(t2)
320
321        assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)]
322
323    def test_nested_lists_same_hash2(self):
324        t1 = [1, 2, [3, [4, 5]]]
325        t2 = [[[5, 4], 3], 2, 1]
326        t1_hash = DeepHashPrep(t1)
327        t2_hash = DeepHashPrep(t2)
328
329        assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)]
330
331    def test_nested_lists_same_hash3(self):
332        t1 = [{1: [2, 3], 4: [5, [6, 7]]}]
333        t2 = [{4: [[7, 6], 5], 1: [3, 2]}]
334        t1_hash = DeepHashPrep(t1)
335        t2_hash = DeepHashPrep(t2)
336
337        assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)]
338
339    def test_nested_lists_in_dictionary_same_hash(self):
340        t1 = [{"c": 4}, {"c": 3}]
341        t2 = [{"c": 3}, {"c": 4}]
342        t1_hash = DeepHashPrep(t1)
343        t2_hash = DeepHashPrep(t2)
344
345        assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)]
346
347    def test_same_sets_same_hash(self):
348        t1 = {1, 3, 2}
349        t2 = {2, 3, 1}
350        t1_hash = DeepHashPrep(t1)
351        t2_hash = DeepHashPrep(t2)
352
353        assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)]
354
355    @pytest.mark.parametrize("t1, t2, significant_digits, number_format_notation, result", [
356        ({0.012, 0.98}, {0.013, 0.99}, 1, "f", 'set:float:0.00,float:1.0'),
357        (100000, 100021, 3, "e", 'int:1.000e+05'),
358    ])
359    def test_similar_significant_hash(self, t1, t2, significant_digits,
360                                      number_format_notation, result):
361        t1_hash = DeepHashPrep(t1, significant_digits=significant_digits,
362                               number_format_notation=number_format_notation)
363        t2_hash = DeepHashPrep(t2, significant_digits=significant_digits,
364                               number_format_notation=number_format_notation)
365
366        if result:
367            assert result == t1_hash[t1] == t2_hash[t2]
368        else:
369            assert t1_hash[t1] != t2_hash[t2]
370
371    def test_number_to_string_func(self):
372        def custom_number_to_string(number, *args, **kwargs):
373            number = 100 if number < 100 else number
374            return number_to_string(number, *args, **kwargs)
375
376        t1 = [10, 12, 100000]
377        t2 = [50, 63, 100021]
378        t1_hash = DeepHashPrep(t1, significant_digits=4, number_format_notation="e",
379                               number_to_string_func=custom_number_to_string)
380        t2_hash = DeepHashPrep(t2, significant_digits=4, number_format_notation="e",
381                               number_to_string_func=custom_number_to_string)
382
383        assert t1_hash[10] == t2_hash[50] == t1_hash[12] == t2_hash[63] != t1_hash[100000]
384
385    def test_same_sets_in_lists_same_hash(self):
386        t1 = ["a", {1, 3, 2}]
387        t2 = [{2, 3, 1}, "a"]
388        t1_hash = DeepHashPrep(t1)
389        t2_hash = DeepHashPrep(t2)
390
391        assert t1_hash[get_id(t1)] == t2_hash[get_id(t2)]
392
393    def test_unknown_parameters(self):
394        with pytest.raises(ValueError):
395            DeepHashPrep(1, wrong_param=2)
396
397    def test_bad_attribute_prep(self):
398        class Bad:
399            __slots__ = ['x', 'y']
400
401            def __getattr__(self, key):
402                raise AttributeError("Bad item")
403
404            def __str__(self):
405                return "Bad Object"
406
407        t1 = Bad()
408
409        result = DeepHashPrep(t1)
410        expected_result = {t1: unprocessed, UNPROCESSED_KEY: [t1]}
411        assert expected_result == result
412
413    class Burrito:
414        bread = 'flour'
415
416        def __init__(self):
417            self.spicy = True
418
419    class Taco:
420        bread = 'flour'
421
422        def __init__(self):
423            self.spicy = True
424
425    class ClassA:
426        def __init__(self, x, y):
427            self.x = x
428            self.y = y
429
430    class ClassB:
431        def __init__(self, x, y):
432            self.x = x
433            self.y = y
434
435    class ClassC(ClassB):
436        pass
437
438    obj_a = ClassA(1, 2)
439    obj_b = ClassB(1, 2)
440    obj_c = ClassC(1, 2)
441
442    burrito = Burrito()
443    taco = Taco()
444
445    @pytest.mark.parametrize("t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual", [
446        (taco, burrito, [], False, False),
447        (taco, burrito, [(Taco, Burrito)], False, True),
448        ([taco], [burrito], [(Taco, Burrito)], False, True),
449        ([obj_a], [obj_c], [(ClassA, ClassB)], False, False),
450        ([obj_a], [obj_c], [(ClassA, ClassB)], True, True),
451        ([obj_b], [obj_c], [(ClassB, )], True, True),
452    ])
453    def test_objects_with_same_content(self, t1, t2, ignore_type_in_groups, ignore_type_subclasses, is_qual):
454
455        t1_result = DeepHashPrep(t1, ignore_type_in_groups=ignore_type_in_groups,
456                                 ignore_type_subclasses=ignore_type_subclasses)
457        t2_result = DeepHashPrep(t2, ignore_type_in_groups=ignore_type_in_groups,
458                                 ignore_type_subclasses=ignore_type_subclasses)
459        assert is_qual == (t1_result[t1] == t2_result[t2])
460
461    def test_custom_object(self):
462        cc_a = CustomClass2(prop1=["a"], prop2=["b"])
463        t1 = [cc_a, CustomClass2(prop1=["c"], prop2=["d"])]
464        t1_result = DeepHashPrep(t1)
465        expected = 'list:objCustomClass2:{str:prop1:list:str:a;str:prop2:list:str:b},objCustomClass2:{str:prop1:list:str:c;str:prop2:list:str:d}'  # NOQA
466        assert expected == t1_result[t1]
467
468    def test_repetition_by_default_does_not_effect(self):
469        list1 = [3, 4]
470        list1_id = get_id(list1)
471        a = [1, 2, list1]
472        a_id = get_id(a)
473
474        list2 = [4, 3, 3]
475        list2_id = get_id(list2)
476        b = [list2, 2, 1]
477        b_id = get_id(b)
478
479        hash_a = DeepHashPrep(a)
480        hash_b = DeepHashPrep(b)
481
482        assert hash_a[list1_id] == hash_b[list2_id]
483        assert hash_a[a_id] == hash_b[b_id]
484
485    def test_setting_repetition_off_unequal_hash(self):
486        list1 = [3, 4]
487        list1_id = get_id(list1)
488        a = [1, 2, list1]
489        a_id = get_id(a)
490
491        list2 = [4, 3, 3]
492        list2_id = get_id(list2)
493        b = [list2, 2, 1]
494        b_id = get_id(b)
495
496        hash_a = DeepHashPrep(a, ignore_repetition=False)
497        hash_b = DeepHashPrep(b, ignore_repetition=False)
498
499        assert not hash_a[list1_id] == hash_b[list2_id]
500        assert not hash_a[a_id] == hash_b[b_id]
501
502        assert hash_a[list1_id].replace('3|1', '3|2') == hash_b[list2_id]
503
504    def test_already_calculated_hash_wont_be_recalculated(self):
505        hashes = (i for i in range(10))
506
507        def hasher(obj):
508            return str(next(hashes))
509        obj = "a"
510        expected_result = {obj: '0'}
511        result = DeepHash(obj, hasher=hasher)
512        assert expected_result == result
513
514        # we simply feed the last result to DeepHash
515        # So it can re-use the results.
516        result2 = DeepHash(obj, hasher=hasher, hashes=result)
517        # if hashes are not cached and re-used,
518        # then the next time hasher runs, it returns
519        # number 1 instead of 0.
520        assert expected_result == result2
521
522        result3 = DeepHash(obj, hasher=hasher)
523        expected_result = {obj: '1'}
524        assert expected_result == result3
525
526    def test_skip_type(self):
527        l1 = logging.getLogger("test")
528        obj = {"log": l1, 2: 1337}
529        result = DeepHashPrep(obj, exclude_types={logging.Logger})
530        assert get_id(l1) not in result
531
532    def test_skip_type2(self):
533        l1 = logging.getLogger("test")
534        result = DeepHashPrep(l1, exclude_types={logging.Logger})
535        assert not result
536
537    def test_prep_dic_with_loop(self):
538        obj = {2: 1337}
539        obj[1] = obj
540        result = DeepHashPrep(obj)
541        expected_result = {get_id(obj): 'dict:{int:2:int:1337}', 1: 'int:1', 2: 'int:2', 1337: 'int:1337'}
542        assert expected_result == result
543
544    def test_prep_iterable_with_loop(self):
545        obj = [1]
546        obj.append(obj)
547        result = DeepHashPrep(obj)
548        expected_result = {get_id(obj): 'list:int:1', 1: 'int:1'}
549        assert expected_result == result
550
551    def test_prep_iterable_with_excluded_type(self):
552        l1 = logging.getLogger("test")
553        obj = [1, l1]
554        result = DeepHashPrep(obj, exclude_types={logging.Logger})
555        assert get_id(l1) not in result
556
557    def test_skip_str_type_in_dict_on_list(self):
558        dic1 = {1: "a"}
559        t1 = [dic1]
560        dic2 = {}
561        t2 = [dic2]
562        t1_hash = DeepHashPrep(t1, exclude_types=[str])
563        t2_hash = DeepHashPrep(t2, exclude_types=[str])
564        assert 1 in t1_hash
565        assert t1_hash[dic1] == t2_hash[dic2]
566
567    def test_skip_path(self):
568        dic1 = {1: "a"}
569        t1 = [dic1, 2]
570        dic2 = {}
571        t2 = [dic2, 2]
572        t1_hash = DeepHashPrep(t1, exclude_paths=['root[0]'])
573        t2_hash = DeepHashPrep(t2, exclude_paths='root[0]')
574        assert 1 not in t1_hash
575        assert 2 in t1_hash
576        assert t1_hash[2] == t2_hash[2]
577
578    def test_skip_path2(self):
579
580        obj10 = {'a': 1, 'b': 'f', 'e': "1111", 'foo': {'bar': 'baz'}}
581        obj11 = {'c': 1, 'd': 'f', 'e': 'Cool'}
582
583        obj20 = {'a': 1, 'b': 'f', 'e': 'Cool', 'foo': {'bar': 'baz2'}}
584        obj21 = {'c': 1, 'd': 'f', 'e': "2222"}
585
586        t1 = [obj10, obj11]
587        t2 = [obj20, obj21]
588
589        exclude_paths = ["root[0]['e']", "root[1]['e']", "root[0]['foo']['bar']"]
590
591        t1_hash = DeepHashPrep(t1, exclude_paths=exclude_paths)
592        t2_hash = DeepHashPrep(t2, exclude_paths=exclude_paths)
593        assert t1_hash[t1] == t2_hash[t2]
594
595    def test_skip_regex_path(self):
596        dic1 = {1: "a"}
597        t1 = [dic1, 2]
598        exclude_re = re.compile(r'\[0\]')
599        t1_hash = DeepHashPrep(t1, exclude_regex_paths=r'\[0\]')
600        t2_hash = DeepHashPrep(t1, exclude_regex_paths=[exclude_re])
601        assert 1 not in t1_hash
602        assert 2 in t1_hash
603        assert t1_hash[2] == t2_hash[2]
604
605    def test_skip_hash_exclude_obj_callback(self):
606        def exclude_obj_callback(obj, parent):
607            return True if parent == "root[0]['x']" or obj == 2 else False
608
609        dic1 = {"x": 1, "y": 2, "z": 3}
610        t1 = [dic1]
611        t1_hash = DeepHashPrep(t1, exclude_obj_callback=exclude_obj_callback)
612        assert t1_hash == {'y': 'str:y', 'z': 'str:z', 3: 'int:3',
613                           get_id(dic1): 'dict:{str:z:int:3}', get_id(t1): 'list:dict:{str:z:int:3}'}
614        dic2 = {"z": 3}
615        t2 = [dic2]
616        t2_hash = DeepHashPrep(t2, exclude_obj_callback=exclude_obj_callback)
617        assert t1_hash[t1] == t2_hash[t2]
618
619    def test_string_case(self):
620        t1 = "Hello"
621
622        t1_hash = DeepHashPrep(t1)
623        assert t1_hash == {'Hello': 'str:Hello'}
624
625        t1_hash = DeepHashPrep(t1, ignore_string_case=True)
626        assert t1_hash == {'Hello': 'str:hello'}
627
628    def test_hash_class(self):
629        t1 = ClassC
630        t1_hash = DeepHashPrep(t1)
631        assert t1_hash['class_attr'] == 'str:class_attr'
632        assert t1_hash[0] == 'int:0'
633        # Note: we ignore private names in calculating hashes now. So you dont see __init__ here for example.
634        assert t1_hash[t1] == r'objClassC:{str:class_attr:int:0}'
635
636    def test_hash_set_in_list(self):
637        t1 = [{1, 2, 3}, {4, 5}]
638        t1_hash = DeepHashPrep(t1)
639        assert t1_hash[t1] == 'list:set:int:1,int:2,int:3,set:int:4,int:5'
640
641    def test_hash_numpy_array1(self):
642        t1 = np.array([[1, 2]], np.int8)
643        t2 = np.array([[2, 1]], np.int8)
644        t1_hash = DeepHashPrep(t1)
645        t2_hash = DeepHashPrep(t2)
646        assert t1_hash[t1] == 'ndarray:ndarray:int8:1,int8:2'
647        assert t2_hash[t2] == t1_hash[t1]
648
649    def test_hash_numpy_array_ignore_numeric_type_changes(self):
650        t1 = np.array([[1, 2]], np.int8)
651        t1_hash = DeepHashPrep(t1, ignore_numeric_type_changes=True)
652        assert t1_hash[t1] == 'ndarray:ndarray:number:1.000000000000,number:2.000000000000'
653
654    def test_hash_numpy_array2_multi_dimensional_can_not_retrieve_individual_array_item_hashes(self):
655        """
656        This is a very interesting case. When DeepHash extracts t1[0] to create a hash for it,
657        Numpy creates an array. But that array will only be technically available during the DeepHash run.
658        Once DeepHash is run, the array is marked to be deleted by the garbage collector.
659        However depending on the version of the python and the machine that runs it, by the time we get
660        to the line that is t1_hash[t1[0]], the t1[0] may or may not be still in memory.
661        If it is still in the memory, t1_hash[t1[0]] works without a problem.
662        If it is already garbage collected, t1_hash[t1[0]] will throw a key error since there will be
663        a new t1[0] by the time t1_hash[t1[0]] is called. Hence it will have a new ID and thus it
664        will not be available anymore in t1_hash. Remember that since Numpy arrays are not hashable,
665        the ID of the array is stored in t1_hash as a key and not the object itself.
666        """
667        t1 = np.array([[1, 2, 3, 4], [4, 2, 2, 1]], np.int8)
668        t1_hash = DeepHashPrep(t1)
669        try:
670            t1_hash[t1[0]]
671        except Exception as e:
672            assert str(e).strip("'") == HASH_LOOKUP_ERR_MSG.format(t1[0])
673
674
675class TestDeepHashSHA:
676    """DeepHash with SHA Tests."""
677
678    def test_str_sha1(self):
679        obj = "a"
680        expected_result = {
681            obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8'
682        }
683        result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
684        assert expected_result == result
685
686    def test_str_sha256(self):
687        obj = "a"
688        expected_result = {
689            obj: 'ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb'
690        }
691        result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha256hex)
692        assert expected_result == result
693
694    def test_prep_str_sha1_fail_if_mutable(self):
695        """
696        This test fails if DeepHash is getting a mutable copy of hashes
697        which means each init of the DeepHash will have hashes from
698        the previous init.
699        """
700        obj1 = "a"
701        expected_result = {
702            obj1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8'
703        }
704        result = DeepHash(obj1, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
705        assert expected_result == result
706        obj2 = "b"
707        result = DeepHash(obj2, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
708        assert obj1 not in result
709
710    def test_bytecode(self):
711        obj = b"a"
712        expected_result = {
713            obj: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8'
714        }
715        result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
716        assert expected_result == result
717
718    def test_list1(self):
719        string1 = "a"
720        obj = [string1, 10, 20]
721        expected_result = {
722            string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
723            get_id(obj): 'eac61cbd194e5e03c210a3dce67b9bfd6a7b7acb',
724            10: DeepHash.sha1hex('int:10'),
725            20: DeepHash.sha1hex('int:20'),
726        }
727        result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
728        assert expected_result == result
729
730    def test_dict1(self):
731        string1 = "a"
732        key1 = "key1"
733        obj = {key1: string1, 1: 10, 2: 20}
734        expected_result = {
735            1: DeepHash.sha1hex('int:1'),
736            10: DeepHash.sha1hex('int:10'),
737            2: DeepHash.sha1hex('int:2'),
738            20: DeepHash.sha1hex('int:20'),
739            key1: '1073ab6cda4b991cd29f9e83a307f34004ae9327',
740            string1: '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
741            get_id(obj): '11e23f096df81b1ccab0c309cdf8b4ba5a0a6895'
742        }
743        result = DeepHash(obj, ignore_string_type_changes=True, hasher=DeepHash.sha1hex)
744        assert expected_result == result
745
746
747class TestCleaningString:
748
749    @pytest.mark.parametrize("text, ignore_string_type_changes, expected_result", [
750        (b'hello', True, 'hello'),
751        (b'hello', False, 'bytes:hello'),
752        ('hello', True, 'hello'),
753        ('hello', False, 'str:hello'),
754    ])
755    def test_clean_type(self, text, ignore_string_type_changes, expected_result):
756        result = prepare_string_for_hashing(text, ignore_string_type_changes=ignore_string_type_changes)
757        assert expected_result == result
758
759
760class TestCounts:
761
762    @pytest.mark.parametrize('obj, expected_count', [
763        (
764            {1: 1, 2: 3},
765            5
766        ),
767        (
768            {"key": {1: 1, 2: 4}, "key2": ["a", "b"]},
769            11
770        ),
771        (
772            [{1}],
773            3
774        ),
775        (
776            [ClassC(a=10, b=11)],
777            6
778        )
779    ])
780    def test_dict_count(self, obj, expected_count):
781        """
782        How many object went to build this dict?
783        """
784
785        result = DeepHash(obj).get(obj, extract_index=1)
786        assert expected_count == result
787
788
789class TestOtherHashFuncs:
790
791    @pytest.mark.parametrize('items, prefix, expected', [
792        ([[1], [2]], 'pre', 'pre583852d84b3482edf53408b64724a37289d7af458c44bb989a8abbffe24d2d2b'),
793        ([[1], [2]], b'pre', 'pre583852d84b3482edf53408b64724a37289d7af458c44bb989a8abbffe24d2d2b'),
794    ])
795    def test_combine_hashes_lists(self, items, prefix, expected):
796        result = combine_hashes_lists(items, prefix)
797        assert expected == result
798