1from test import support, seq_tests
2import unittest
3
4import gc
5import pickle
6
7# For tuple hashes, we normally only run a test to ensure that we get
8# the same results across platforms in a handful of cases.  If that's
9# so, there's no real point to running more.  Set RUN_ALL_HASH_TESTS to
10# run more anyway.  That's usually of real interest only when analyzing,
11# or changing, the hash algorithm.  In which case it's usually also
12# most useful to set JUST_SHOW_HASH_RESULTS, to see all the results
13# instead of wrestling with test "failures".  See the bottom of the
14# file for extensive notes on what we're testing here and why.
15RUN_ALL_HASH_TESTS = False
16JUST_SHOW_HASH_RESULTS = False # if RUN_ALL_HASH_TESTS, just display
17
18class TupleTest(seq_tests.CommonTest):
19    type2test = tuple
20
21    def test_getitem_error(self):
22        t = ()
23        msg = "tuple indices must be integers or slices"
24        with self.assertRaisesRegex(TypeError, msg):
25            t['a']
26
27    def test_constructors(self):
28        super().test_constructors()
29        # calling built-in types without argument must return empty
30        self.assertEqual(tuple(), ())
31        t0_3 = (0, 1, 2, 3)
32        t0_3_bis = tuple(t0_3)
33        self.assertTrue(t0_3 is t0_3_bis)
34        self.assertEqual(tuple([]), ())
35        self.assertEqual(tuple([0, 1, 2, 3]), (0, 1, 2, 3))
36        self.assertEqual(tuple(''), ())
37        self.assertEqual(tuple('spam'), ('s', 'p', 'a', 'm'))
38        self.assertEqual(tuple(x for x in range(10) if x % 2),
39                         (1, 3, 5, 7, 9))
40
41    def test_keyword_args(self):
42        with self.assertRaisesRegex(TypeError, 'keyword argument'):
43            tuple(sequence=())
44
45    def test_truth(self):
46        super().test_truth()
47        self.assertTrue(not ())
48        self.assertTrue((42, ))
49
50    def test_len(self):
51        super().test_len()
52        self.assertEqual(len(()), 0)
53        self.assertEqual(len((0,)), 1)
54        self.assertEqual(len((0, 1, 2)), 3)
55
56    def test_iadd(self):
57        super().test_iadd()
58        u = (0, 1)
59        u2 = u
60        u += (2, 3)
61        self.assertTrue(u is not u2)
62
63    def test_imul(self):
64        super().test_imul()
65        u = (0, 1)
66        u2 = u
67        u *= 3
68        self.assertTrue(u is not u2)
69
70    def test_tupleresizebug(self):
71        # Check that a specific bug in _PyTuple_Resize() is squashed.
72        def f():
73            for i in range(1000):
74                yield i
75        self.assertEqual(list(tuple(f())), list(range(1000)))
76
77    # We expect tuples whose base components have deterministic hashes to
78    # have deterministic hashes too - and, indeed, the same hashes across
79    # platforms with hash codes of the same bit width.
80    def test_hash_exact(self):
81        def check_one_exact(t, e32, e64):
82            got = hash(t)
83            expected = e32 if support.NHASHBITS == 32 else e64
84            if got != expected:
85                msg = f"FAIL hash({t!r}) == {got} != {expected}"
86                self.fail(msg)
87
88        check_one_exact((), 750394483, 5740354900026072187)
89        check_one_exact((0,), 1214856301, -8753497827991233192)
90        check_one_exact((0, 0), -168982784, -8458139203682520985)
91        check_one_exact((0.5,), 2077348973, -408149959306781352)
92        check_one_exact((0.5, (), (-2, 3, (4, 6))), 714642271,
93                        -1845940830829704396)
94
95    # Various tests for hashing of tuples to check that we get few collisions.
96    # Does something only if RUN_ALL_HASH_TESTS is true.
97    #
98    # Earlier versions of the tuple hash algorithm had massive collisions
99    # reported at:
100    # - https://bugs.python.org/issue942952
101    # - https://bugs.python.org/issue34751
102    def test_hash_optional(self):
103        from itertools import product
104
105        if not RUN_ALL_HASH_TESTS:
106            return
107
108        # If specified, `expected` is a 2-tuple of expected
109        # (number_of_collisions, pileup) values, and the test fails if
110        # those aren't the values we get.  Also if specified, the test
111        # fails if z > `zlimit`.
112        def tryone_inner(tag, nbins, hashes, expected=None, zlimit=None):
113            from collections import Counter
114
115            nballs = len(hashes)
116            mean, sdev = support.collision_stats(nbins, nballs)
117            c = Counter(hashes)
118            collisions = nballs - len(c)
119            z = (collisions - mean) / sdev
120            pileup = max(c.values()) - 1
121            del c
122            got = (collisions, pileup)
123            failed = False
124            prefix = ""
125            if zlimit is not None and z > zlimit:
126                failed = True
127                prefix = f"FAIL z > {zlimit}; "
128            if expected is not None and got != expected:
129                failed = True
130                prefix += f"FAIL {got} != {expected}; "
131            if failed or JUST_SHOW_HASH_RESULTS:
132                msg = f"{prefix}{tag}; pileup {pileup:,} mean {mean:.1f} "
133                msg += f"coll {collisions:,} z {z:+.1f}"
134                if JUST_SHOW_HASH_RESULTS:
135                    import sys
136                    print(msg, file=sys.__stdout__)
137                else:
138                    self.fail(msg)
139
140        def tryone(tag, xs,
141                   native32=None, native64=None, hi32=None, lo32=None,
142                   zlimit=None):
143            NHASHBITS = support.NHASHBITS
144            hashes = list(map(hash, xs))
145            tryone_inner(tag + f"; {NHASHBITS}-bit hash codes",
146                         1 << NHASHBITS,
147                         hashes,
148                         native32 if NHASHBITS == 32 else native64,
149                         zlimit)
150
151            if NHASHBITS > 32:
152                shift = NHASHBITS - 32
153                tryone_inner(tag + "; 32-bit upper hash codes",
154                             1 << 32,
155                             [h >> shift for h in hashes],
156                             hi32,
157                             zlimit)
158
159                mask = (1 << 32) - 1
160                tryone_inner(tag + "; 32-bit lower hash codes",
161                             1 << 32,
162                             [h & mask for h in hashes],
163                             lo32,
164                             zlimit)
165
166        # Tuples of smallish positive integers are common - nice if we
167        # get "better than random" for these.
168        tryone("range(100) by 3", list(product(range(100), repeat=3)),
169               (0, 0), (0, 0), (4, 1), (0, 0))
170
171        # A previous hash had systematic problems when mixing integers of
172        # similar magnitude but opposite sign, obscurely related to that
173        # j ^ -2 == -j when j is odd.
174        cands = list(range(-10, -1)) + list(range(9))
175
176        # Note:  -1 is omitted because hash(-1) == hash(-2) == -2, and
177        # there's nothing the tuple hash can do to avoid collisions
178        # inherited from collisions in the tuple components' hashes.
179        tryone("-10 .. 8 by 4", list(product(cands, repeat=4)),
180               (0, 0), (0, 0), (0, 0), (0, 0))
181        del cands
182
183        # The hashes here are a weird mix of values where all the
184        # variation is in the lowest bits and across a single high-order
185        # bit - the middle bits are all zeroes. A decent hash has to
186        # both propagate low bits to the left and high bits to the
187        # right.  This is also complicated a bit in that there are
188        # collisions among the hashes of the integers in L alone.
189        L = [n << 60 for n in range(100)]
190        tryone("0..99 << 60 by 3", list(product(L, repeat=3)),
191               (0, 0), (0, 0), (0, 0), (324, 1))
192        del L
193
194        # Used to suffer a massive number of collisions.
195        tryone("[-3, 3] by 18", list(product([-3, 3], repeat=18)),
196               (7, 1), (0, 0), (7, 1), (6, 1))
197
198        # And even worse.  hash(0.5) has only a single bit set, at the
199        # high end. A decent hash needs to propagate high bits right.
200        tryone("[0, 0.5] by 18", list(product([0, 0.5], repeat=18)),
201               (5, 1), (0, 0), (9, 1), (12, 1))
202
203        # Hashes of ints and floats are the same across platforms.
204        # String hashes vary even on a single platform across runs, due
205        # to hash randomization for strings.  So we can't say exactly
206        # what this should do.  Instead we insist that the # of
207        # collisions is no more than 4 sdevs above the theoretically
208        # random mean.  Even if the tuple hash can't achieve that on its
209        # own, the string hash is trying to be decently pseudo-random
210        # (in all bit positions) on _its_ own.  We can at least test
211        # that the tuple hash doesn't systematically ruin that.
212        tryone("4-char tuples",
213               list(product("abcdefghijklmnopqrstuvwxyz", repeat=4)),
214               zlimit=4.0)
215
216        # The "old tuple test".  See https://bugs.python.org/issue942952.
217        # Ensures, for example, that the hash:
218        #   is non-commutative
219        #   spreads closely spaced values
220        #   doesn't exhibit cancellation in tuples like (x,(x,y))
221        N = 50
222        base = list(range(N))
223        xp = list(product(base, repeat=2))
224        inps = base + list(product(base, xp)) + \
225                     list(product(xp, base)) + xp + list(zip(base))
226        tryone("old tuple test", inps,
227               (2, 1), (0, 0), (52, 49), (7, 1))
228        del base, xp, inps
229
230        # The "new tuple test".  See https://bugs.python.org/issue34751.
231        # Even more tortured nesting, and a mix of signed ints of very
232        # small magnitude.
233        n = 5
234        A = [x for x in range(-n, n+1) if x != -1]
235        B = A + [(a,) for a in A]
236        L2 = list(product(A, repeat=2))
237        L3 = L2 + list(product(A, repeat=3))
238        L4 = L3 + list(product(A, repeat=4))
239        # T = list of testcases. These consist of all (possibly nested
240        # at most 2 levels deep) tuples containing at most 4 items from
241        # the set A.
242        T = A
243        T += [(a,) for a in B + L4]
244        T += product(L3, B)
245        T += product(L2, repeat=2)
246        T += product(B, L3)
247        T += product(B, B, L2)
248        T += product(B, L2, B)
249        T += product(L2, B, B)
250        T += product(B, repeat=4)
251        assert len(T) == 345130
252        tryone("new tuple test", T,
253               (9, 1), (0, 0), (21, 5), (6, 1))
254
255    def test_repr(self):
256        l0 = tuple()
257        l2 = (0, 1, 2)
258        a0 = self.type2test(l0)
259        a2 = self.type2test(l2)
260
261        self.assertEqual(str(a0), repr(l0))
262        self.assertEqual(str(a2), repr(l2))
263        self.assertEqual(repr(a0), "()")
264        self.assertEqual(repr(a2), "(0, 1, 2)")
265
266    def _not_tracked(self, t):
267        # Nested tuples can take several collections to untrack
268        gc.collect()
269        gc.collect()
270        self.assertFalse(gc.is_tracked(t), t)
271
272    def _tracked(self, t):
273        self.assertTrue(gc.is_tracked(t), t)
274        gc.collect()
275        gc.collect()
276        self.assertTrue(gc.is_tracked(t), t)
277
278    @support.cpython_only
279    def test_track_literals(self):
280        # Test GC-optimization of tuple literals
281        x, y, z = 1.5, "a", []
282
283        self._not_tracked(())
284        self._not_tracked((1,))
285        self._not_tracked((1, 2))
286        self._not_tracked((1, 2, "a"))
287        self._not_tracked((1, 2, (None, True, False, ()), int))
288        self._not_tracked((object(),))
289        self._not_tracked(((1, x), y, (2, 3)))
290
291        # Tuples with mutable elements are always tracked, even if those
292        # elements are not tracked right now.
293        self._tracked(([],))
294        self._tracked(([1],))
295        self._tracked(({},))
296        self._tracked((set(),))
297        self._tracked((x, y, z))
298
299    def check_track_dynamic(self, tp, always_track):
300        x, y, z = 1.5, "a", []
301
302        check = self._tracked if always_track else self._not_tracked
303        check(tp())
304        check(tp([]))
305        check(tp(set()))
306        check(tp([1, x, y]))
307        check(tp(obj for obj in [1, x, y]))
308        check(tp(set([1, x, y])))
309        check(tp(tuple([obj]) for obj in [1, x, y]))
310        check(tuple(tp([obj]) for obj in [1, x, y]))
311
312        self._tracked(tp([z]))
313        self._tracked(tp([[x, y]]))
314        self._tracked(tp([{x: y}]))
315        self._tracked(tp(obj for obj in [x, y, z]))
316        self._tracked(tp(tuple([obj]) for obj in [x, y, z]))
317        self._tracked(tuple(tp([obj]) for obj in [x, y, z]))
318
319    @support.cpython_only
320    def test_track_dynamic(self):
321        # Test GC-optimization of dynamically constructed tuples.
322        self.check_track_dynamic(tuple, False)
323
324    @support.cpython_only
325    def test_track_subtypes(self):
326        # Tuple subtypes must always be tracked
327        class MyTuple(tuple):
328            pass
329        self.check_track_dynamic(MyTuple, True)
330
331    @support.cpython_only
332    def test_bug7466(self):
333        # Trying to untrack an unfinished tuple could crash Python
334        self._not_tracked(tuple(gc.collect() for i in range(101)))
335
336    def test_repr_large(self):
337        # Check the repr of large list objects
338        def check(n):
339            l = (0,) * n
340            s = repr(l)
341            self.assertEqual(s,
342                '(' + ', '.join(['0'] * n) + ')')
343        check(10)       # check our checking code
344        check(1000000)
345
346    def test_iterator_pickle(self):
347        # Userlist iterators don't support pickling yet since
348        # they are based on generators.
349        data = self.type2test([4, 5, 6, 7])
350        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
351            itorg = iter(data)
352            d = pickle.dumps(itorg, proto)
353            it = pickle.loads(d)
354            self.assertEqual(type(itorg), type(it))
355            self.assertEqual(self.type2test(it), self.type2test(data))
356
357            it = pickle.loads(d)
358            next(it)
359            d = pickle.dumps(it, proto)
360            self.assertEqual(self.type2test(it), self.type2test(data)[1:])
361
362    def test_reversed_pickle(self):
363        data = self.type2test([4, 5, 6, 7])
364        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
365            itorg = reversed(data)
366            d = pickle.dumps(itorg, proto)
367            it = pickle.loads(d)
368            self.assertEqual(type(itorg), type(it))
369            self.assertEqual(self.type2test(it), self.type2test(reversed(data)))
370
371            it = pickle.loads(d)
372            next(it)
373            d = pickle.dumps(it, proto)
374            self.assertEqual(self.type2test(it), self.type2test(reversed(data))[1:])
375
376    def test_no_comdat_folding(self):
377        # Issue 8847: In the PGO build, the MSVC linker's COMDAT folding
378        # optimization causes failures in code that relies on distinct
379        # function addresses.
380        class T(tuple): pass
381        with self.assertRaises(TypeError):
382            [3,] + T((1,2))
383
384    def test_lexicographic_ordering(self):
385        # Issue 21100
386        a = self.type2test([1, 2])
387        b = self.type2test([1, 2, 0])
388        c = self.type2test([1, 3])
389        self.assertLess(a, b)
390        self.assertLess(b, c)
391
392# Notes on testing hash codes.  The primary thing is that Python doesn't
393# care about "random" hash codes.  To the contrary, we like them to be
394# very regular when possible, so that the low-order bits are as evenly
395# distributed as possible.  For integers this is easy: hash(i) == i for
396# all not-huge i except i==-1.
397#
398# For tuples of mixed type there's really no hope of that, so we want
399# "randomish" here instead.  But getting close to pseudo-random in all
400# bit positions is more expensive than we've been willing to pay for.
401#
402# We can tolerate large deviations from random - what we don't want is
403# catastrophic pileups on a relative handful of hash codes.  The dict
404# and set lookup routines remain effective provided that full-width hash
405# codes for not-equal objects are distinct.
406#
407# So we compute various statistics here based on what a "truly random"
408# hash would do, but don't automate "pass or fail" based on those
409# results.  Instead those are viewed as inputs to human judgment, and the
410# automated tests merely ensure we get the _same_ results across
411# platforms.  In fact, we normally don't bother to run them at all -
412# set RUN_ALL_HASH_TESTS to force it.
413#
414# When global JUST_SHOW_HASH_RESULTS is True, the tuple hash statistics
415# are just displayed to stdout.  A typical output line looks like:
416#
417# old tuple test; 32-bit upper hash codes; \
418#             pileup 49 mean 7.4 coll 52 z +16.4
419#
420# "old tuple test" is just a string name for the test being run.
421#
422# "32-bit upper hash codes" means this was run under a 64-bit build and
423# we've shifted away the lower 32 bits of the hash codes.
424#
425# "pileup" is 0 if there were no collisions across those hash codes.
426# It's 1 less than the maximum number of times any single hash code was
427# seen.  So in this case, there was (at least) one hash code that was
428# seen 50 times:  that hash code "piled up" 49 more times than ideal.
429#
430# "mean" is the number of collisions a perfectly random hash function
431# would have yielded, on average.
432#
433# "coll" is the number of collisions actually seen.
434#
435# "z" is "coll - mean" divided by the standard deviation of the number
436# of collisions a perfectly random hash function would suffer.  A
437# positive value is "worse than random", and negative value "better than
438# random".  Anything of magnitude greater than 3 would be highly suspect
439# for a hash function that claimed to be random.  It's essentially
440# impossible that a truly random function would deliver a result 16.4
441# sdevs "worse than random".
442#
443# But we don't care here!  That's why the test isn't coded to fail.
444# Knowing something about how the high-order hash code bits behave
445# provides insight, but is irrelevant to how the dict and set lookup
446# code performs.  The low-order bits are much more important to that,
447# and on the same test those did "just like random":
448#
449# old tuple test; 32-bit lower hash codes; \
450#            pileup 1 mean 7.4 coll 7 z -0.2
451#
452# So there are always tradeoffs to consider.  For another:
453#
454# 0..99 << 60 by 3; 32-bit hash codes; \
455#            pileup 0 mean 116.4 coll 0 z -10.8
456#
457# That was run under a 32-bit build, and is spectacularly "better than
458# random".  On a 64-bit build the wider hash codes are fine too:
459#
460# 0..99 << 60 by 3; 64-bit hash codes; \
461#             pileup 0 mean 0.0 coll 0 z -0.0
462#
463# but their lower 32 bits are poor:
464#
465# 0..99 << 60 by 3; 32-bit lower hash codes; \
466#             pileup 1 mean 116.4 coll 324 z +19.2
467#
468# In a statistical sense that's waaaaay too many collisions, but (a) 324
469# collisions out of a million hash codes isn't anywhere near being a
470# real problem; and, (b) the worst pileup on a single hash code is a measly
471# 1 extra.  It's a relatively poor case for the tuple hash, but still
472# fine for practical use.
473#
474# This isn't, which is what Python 3.7.1 produced for the hashes of
475# itertools.product([0, 0.5], repeat=18).  Even with a fat 64-bit
476# hashcode, the highest pileup was over 16,000 - making a dict/set
477# lookup on one of the colliding values thousands of times slower (on
478# average) than we expect.
479#
480# [0, 0.5] by 18; 64-bit hash codes; \
481#            pileup 16,383 mean 0.0 coll 262,128 z +6073641856.9
482# [0, 0.5] by 18; 32-bit lower hash codes; \
483#            pileup 262,143 mean 8.0 coll 262,143 z +92683.6
484
485if __name__ == "__main__":
486    unittest.main()
487