1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime_test
6
7import (
8	"fmt"
9	"math"
10	"math/rand"
11	. "runtime"
12	"strings"
13	"testing"
14	"unsafe"
15)
16
17func TestMemHash32Equality(t *testing.T) {
18	if *UseAeshash {
19		t.Skip("skipping since AES hash implementation is used")
20	}
21	var b [4]byte
22	r := rand.New(rand.NewSource(1234))
23	seed := uintptr(r.Uint64())
24	for i := 0; i < 100; i++ {
25		randBytes(r, b[:])
26		got := MemHash32(unsafe.Pointer(&b), seed)
27		want := MemHash(unsafe.Pointer(&b), seed, 4)
28		if got != want {
29			t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
30		}
31	}
32}
33
34func TestMemHash64Equality(t *testing.T) {
35	if *UseAeshash {
36		t.Skip("skipping since AES hash implementation is used")
37	}
38	var b [8]byte
39	r := rand.New(rand.NewSource(1234))
40	seed := uintptr(r.Uint64())
41	for i := 0; i < 100; i++ {
42		randBytes(r, b[:])
43		got := MemHash64(unsafe.Pointer(&b), seed)
44		want := MemHash(unsafe.Pointer(&b), seed, 8)
45		if got != want {
46			t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
47		}
48	}
49}
50
51// Smhasher is a torture test for hash functions.
52// https://code.google.com/p/smhasher/
53// This code is a port of some of the Smhasher tests to Go.
54//
55// The current AES hash function passes Smhasher. Our fallback
56// hash functions don't, so we only enable the difficult tests when
57// we know the AES implementation is available.
58
59// Sanity checks.
60// hash should not depend on values outside key.
61// hash should not depend on alignment.
62func TestSmhasherSanity(t *testing.T) {
63	r := rand.New(rand.NewSource(1234))
64	const REP = 10
65	const KEYMAX = 128
66	const PAD = 16
67	const OFFMAX = 16
68	for k := 0; k < REP; k++ {
69		for n := 0; n < KEYMAX; n++ {
70			for i := 0; i < OFFMAX; i++ {
71				var b [KEYMAX + OFFMAX + 2*PAD]byte
72				var c [KEYMAX + OFFMAX + 2*PAD]byte
73				randBytes(r, b[:])
74				randBytes(r, c[:])
75				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
76				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
77					t.Errorf("hash depends on bytes outside key")
78				}
79			}
80		}
81	}
82}
83
84type HashSet struct {
85	m map[uintptr]struct{} // set of hashes added
86	n int                  // number of hashes added
87}
88
89func newHashSet() *HashSet {
90	return &HashSet{make(map[uintptr]struct{}), 0}
91}
92func (s *HashSet) add(h uintptr) {
93	s.m[h] = struct{}{}
94	s.n++
95}
96func (s *HashSet) addS(x string) {
97	s.add(StringHash(x, 0))
98}
99func (s *HashSet) addB(x []byte) {
100	s.add(BytesHash(x, 0))
101}
102func (s *HashSet) addS_seed(x string, seed uintptr) {
103	s.add(StringHash(x, seed))
104}
105func (s *HashSet) check(t *testing.T) {
106	const SLOP = 50.0
107	collisions := s.n - len(s.m)
108	pairs := int64(s.n) * int64(s.n-1) / 2
109	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
110	stddev := math.Sqrt(expected)
111	if float64(collisions) > expected+SLOP*(3*stddev+1) {
112		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f threshold=%f", collisions, expected, stddev, expected+SLOP*(3*stddev+1))
113	}
114}
115
116// a string plus adding zeros must make distinct hashes
117func TestSmhasherAppendedZeros(t *testing.T) {
118	s := "hello" + strings.Repeat("\x00", 256)
119	h := newHashSet()
120	for i := 0; i <= len(s); i++ {
121		h.addS(s[:i])
122	}
123	h.check(t)
124}
125
126// All 0-3 byte strings have distinct hashes.
127func TestSmhasherSmallKeys(t *testing.T) {
128	h := newHashSet()
129	var b [3]byte
130	for i := 0; i < 256; i++ {
131		b[0] = byte(i)
132		h.addB(b[:1])
133		for j := 0; j < 256; j++ {
134			b[1] = byte(j)
135			h.addB(b[:2])
136			if !testing.Short() {
137				for k := 0; k < 256; k++ {
138					b[2] = byte(k)
139					h.addB(b[:3])
140				}
141			}
142		}
143	}
144	h.check(t)
145}
146
147// Different length strings of all zeros have distinct hashes.
148func TestSmhasherZeros(t *testing.T) {
149	N := 256 * 1024
150	if testing.Short() {
151		N = 1024
152	}
153	h := newHashSet()
154	b := make([]byte, N)
155	for i := 0; i <= N; i++ {
156		h.addB(b[:i])
157	}
158	h.check(t)
159}
160
161// Strings with up to two nonzero bytes all have distinct hashes.
162func TestSmhasherTwoNonzero(t *testing.T) {
163	if GOARCH == "wasm" {
164		t.Skip("Too slow on wasm")
165	}
166	if testing.Short() {
167		t.Skip("Skipping in short mode")
168	}
169	h := newHashSet()
170	for n := 2; n <= 16; n++ {
171		twoNonZero(h, n)
172	}
173	h.check(t)
174}
175func twoNonZero(h *HashSet, n int) {
176	b := make([]byte, n)
177
178	// all zero
179	h.addB(b)
180
181	// one non-zero byte
182	for i := 0; i < n; i++ {
183		for x := 1; x < 256; x++ {
184			b[i] = byte(x)
185			h.addB(b)
186			b[i] = 0
187		}
188	}
189
190	// two non-zero bytes
191	for i := 0; i < n; i++ {
192		for x := 1; x < 256; x++ {
193			b[i] = byte(x)
194			for j := i + 1; j < n; j++ {
195				for y := 1; y < 256; y++ {
196					b[j] = byte(y)
197					h.addB(b)
198					b[j] = 0
199				}
200			}
201			b[i] = 0
202		}
203	}
204}
205
206// Test strings with repeats, like "abcdabcdabcdabcd..."
207func TestSmhasherCyclic(t *testing.T) {
208	if testing.Short() {
209		t.Skip("Skipping in short mode")
210	}
211	r := rand.New(rand.NewSource(1234))
212	const REPEAT = 8
213	const N = 1000000
214	for n := 4; n <= 12; n++ {
215		h := newHashSet()
216		b := make([]byte, REPEAT*n)
217		for i := 0; i < N; i++ {
218			b[0] = byte(i * 79 % 97)
219			b[1] = byte(i * 43 % 137)
220			b[2] = byte(i * 151 % 197)
221			b[3] = byte(i * 199 % 251)
222			randBytes(r, b[4:n])
223			for j := n; j < n*REPEAT; j++ {
224				b[j] = b[j-n]
225			}
226			h.addB(b)
227		}
228		h.check(t)
229	}
230}
231
232// Test strings with only a few bits set
233func TestSmhasherSparse(t *testing.T) {
234	if GOARCH == "wasm" {
235		t.Skip("Too slow on wasm")
236	}
237	if testing.Short() {
238		t.Skip("Skipping in short mode")
239	}
240	sparse(t, 32, 6)
241	sparse(t, 40, 6)
242	sparse(t, 48, 5)
243	sparse(t, 56, 5)
244	sparse(t, 64, 5)
245	sparse(t, 96, 4)
246	sparse(t, 256, 3)
247	sparse(t, 2048, 2)
248}
249func sparse(t *testing.T, n int, k int) {
250	b := make([]byte, n/8)
251	h := newHashSet()
252	setbits(h, b, 0, k)
253	h.check(t)
254}
255
256// set up to k bits at index i and greater
257func setbits(h *HashSet, b []byte, i int, k int) {
258	h.addB(b)
259	if k == 0 {
260		return
261	}
262	for j := i; j < len(b)*8; j++ {
263		b[j/8] |= byte(1 << uint(j&7))
264		setbits(h, b, j+1, k-1)
265		b[j/8] &= byte(^(1 << uint(j&7)))
266	}
267}
268
269// Test all possible combinations of n blocks from the set s.
270// "permutation" is a bad name here, but it is what Smhasher uses.
271func TestSmhasherPermutation(t *testing.T) {
272	if GOARCH == "wasm" {
273		t.Skip("Too slow on wasm")
274	}
275	if testing.Short() {
276		t.Skip("Skipping in short mode")
277	}
278	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
279	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
280	permutation(t, []uint32{0, 1}, 20)
281	permutation(t, []uint32{0, 1 << 31}, 20)
282	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
283}
284func permutation(t *testing.T, s []uint32, n int) {
285	b := make([]byte, n*4)
286	h := newHashSet()
287	genPerm(h, b, s, 0)
288	h.check(t)
289}
290func genPerm(h *HashSet, b []byte, s []uint32, n int) {
291	h.addB(b[:n])
292	if n == len(b) {
293		return
294	}
295	for _, v := range s {
296		b[n] = byte(v)
297		b[n+1] = byte(v >> 8)
298		b[n+2] = byte(v >> 16)
299		b[n+3] = byte(v >> 24)
300		genPerm(h, b, s, n+4)
301	}
302}
303
304type Key interface {
305	clear()              // set bits all to 0
306	random(r *rand.Rand) // set key to something random
307	bits() int           // how many bits key has
308	flipBit(i int)       // flip bit i of the key
309	hash() uintptr       // hash the key
310	name() string        // for error reporting
311}
312
313type BytesKey struct {
314	b []byte
315}
316
317func (k *BytesKey) clear() {
318	for i := range k.b {
319		k.b[i] = 0
320	}
321}
322func (k *BytesKey) random(r *rand.Rand) {
323	randBytes(r, k.b)
324}
325func (k *BytesKey) bits() int {
326	return len(k.b) * 8
327}
328func (k *BytesKey) flipBit(i int) {
329	k.b[i>>3] ^= byte(1 << uint(i&7))
330}
331func (k *BytesKey) hash() uintptr {
332	return BytesHash(k.b, 0)
333}
334func (k *BytesKey) name() string {
335	return fmt.Sprintf("bytes%d", len(k.b))
336}
337
338type Int32Key struct {
339	i uint32
340}
341
342func (k *Int32Key) clear() {
343	k.i = 0
344}
345func (k *Int32Key) random(r *rand.Rand) {
346	k.i = r.Uint32()
347}
348func (k *Int32Key) bits() int {
349	return 32
350}
351func (k *Int32Key) flipBit(i int) {
352	k.i ^= 1 << uint(i)
353}
354func (k *Int32Key) hash() uintptr {
355	return Int32Hash(k.i, 0)
356}
357func (k *Int32Key) name() string {
358	return "int32"
359}
360
361type Int64Key struct {
362	i uint64
363}
364
365func (k *Int64Key) clear() {
366	k.i = 0
367}
368func (k *Int64Key) random(r *rand.Rand) {
369	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
370}
371func (k *Int64Key) bits() int {
372	return 64
373}
374func (k *Int64Key) flipBit(i int) {
375	k.i ^= 1 << uint(i)
376}
377func (k *Int64Key) hash() uintptr {
378	return Int64Hash(k.i, 0)
379}
380func (k *Int64Key) name() string {
381	return "int64"
382}
383
384type EfaceKey struct {
385	i any
386}
387
388func (k *EfaceKey) clear() {
389	k.i = nil
390}
391func (k *EfaceKey) random(r *rand.Rand) {
392	k.i = uint64(r.Int63())
393}
394func (k *EfaceKey) bits() int {
395	// use 64 bits. This tests inlined interfaces
396	// on 64-bit targets and indirect interfaces on
397	// 32-bit targets.
398	return 64
399}
400func (k *EfaceKey) flipBit(i int) {
401	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
402}
403func (k *EfaceKey) hash() uintptr {
404	return EfaceHash(k.i, 0)
405}
406func (k *EfaceKey) name() string {
407	return "Eface"
408}
409
410type IfaceKey struct {
411	i interface {
412		F()
413	}
414}
415type fInter uint64
416
417func (x fInter) F() {
418}
419
420func (k *IfaceKey) clear() {
421	k.i = nil
422}
423func (k *IfaceKey) random(r *rand.Rand) {
424	k.i = fInter(r.Int63())
425}
426func (k *IfaceKey) bits() int {
427	// use 64 bits. This tests inlined interfaces
428	// on 64-bit targets and indirect interfaces on
429	// 32-bit targets.
430	return 64
431}
432func (k *IfaceKey) flipBit(i int) {
433	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
434}
435func (k *IfaceKey) hash() uintptr {
436	return IfaceHash(k.i, 0)
437}
438func (k *IfaceKey) name() string {
439	return "Iface"
440}
441
442// Flipping a single bit of a key should flip each output bit with 50% probability.
443func TestSmhasherAvalanche(t *testing.T) {
444	if GOARCH == "wasm" {
445		t.Skip("Too slow on wasm")
446	}
447	if testing.Short() {
448		t.Skip("Skipping in short mode")
449	}
450	avalancheTest1(t, &BytesKey{make([]byte, 2)})
451	avalancheTest1(t, &BytesKey{make([]byte, 4)})
452	avalancheTest1(t, &BytesKey{make([]byte, 8)})
453	avalancheTest1(t, &BytesKey{make([]byte, 16)})
454	avalancheTest1(t, &BytesKey{make([]byte, 32)})
455	avalancheTest1(t, &BytesKey{make([]byte, 200)})
456	avalancheTest1(t, &Int32Key{})
457	avalancheTest1(t, &Int64Key{})
458	avalancheTest1(t, &EfaceKey{})
459	avalancheTest1(t, &IfaceKey{})
460}
461func avalancheTest1(t *testing.T, k Key) {
462	const REP = 100000
463	r := rand.New(rand.NewSource(1234))
464	n := k.bits()
465
466	// grid[i][j] is a count of whether flipping
467	// input bit i affects output bit j.
468	grid := make([][hashSize]int, n)
469
470	for z := 0; z < REP; z++ {
471		// pick a random key, hash it
472		k.random(r)
473		h := k.hash()
474
475		// flip each bit, hash & compare the results
476		for i := 0; i < n; i++ {
477			k.flipBit(i)
478			d := h ^ k.hash()
479			k.flipBit(i)
480
481			// record the effects of that bit flip
482			g := &grid[i]
483			for j := 0; j < hashSize; j++ {
484				g[j] += int(d & 1)
485				d >>= 1
486			}
487		}
488	}
489
490	// Each entry in the grid should be about REP/2.
491	// More precisely, we did N = k.bits() * hashSize experiments where
492	// each is the sum of REP coin flips. We want to find bounds on the
493	// sum of coin flips such that a truly random experiment would have
494	// all sums inside those bounds with 99% probability.
495	N := n * hashSize
496	var c float64
497	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
498	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
499	}
500	c *= 4.0 // allowed slack - we don't need to be perfectly random
501	mean := .5 * REP
502	stddev := .5 * math.Sqrt(REP)
503	low := int(mean - c*stddev)
504	high := int(mean + c*stddev)
505	for i := 0; i < n; i++ {
506		for j := 0; j < hashSize; j++ {
507			x := grid[i][j]
508			if x < low || x > high {
509				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
510			}
511		}
512	}
513}
514
515// All bit rotations of a set of distinct keys
516func TestSmhasherWindowed(t *testing.T) {
517	t.Logf("32 bit keys")
518	windowed(t, &Int32Key{})
519	t.Logf("64 bit keys")
520	windowed(t, &Int64Key{})
521	t.Logf("string keys")
522	windowed(t, &BytesKey{make([]byte, 128)})
523}
524func windowed(t *testing.T, k Key) {
525	if GOARCH == "wasm" {
526		t.Skip("Too slow on wasm")
527	}
528	if testing.Short() {
529		t.Skip("Skipping in short mode")
530	}
531	const BITS = 16
532
533	for r := 0; r < k.bits(); r++ {
534		h := newHashSet()
535		for i := 0; i < 1<<BITS; i++ {
536			k.clear()
537			for j := 0; j < BITS; j++ {
538				if i>>uint(j)&1 != 0 {
539					k.flipBit((j + r) % k.bits())
540				}
541			}
542			h.add(k.hash())
543		}
544		h.check(t)
545	}
546}
547
548// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
549func TestSmhasherText(t *testing.T) {
550	if testing.Short() {
551		t.Skip("Skipping in short mode")
552	}
553	text(t, "Foo", "Bar")
554	text(t, "FooBar", "")
555	text(t, "", "FooBar")
556}
557func text(t *testing.T, prefix, suffix string) {
558	const N = 4
559	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
560	const L = len(S)
561	b := make([]byte, len(prefix)+N+len(suffix))
562	copy(b, prefix)
563	copy(b[len(prefix)+N:], suffix)
564	h := newHashSet()
565	c := b[len(prefix):]
566	for i := 0; i < L; i++ {
567		c[0] = S[i]
568		for j := 0; j < L; j++ {
569			c[1] = S[j]
570			for k := 0; k < L; k++ {
571				c[2] = S[k]
572				for x := 0; x < L; x++ {
573					c[3] = S[x]
574					h.addB(b)
575				}
576			}
577		}
578	}
579	h.check(t)
580}
581
582// Make sure different seed values generate different hashes.
583func TestSmhasherSeed(t *testing.T) {
584	h := newHashSet()
585	const N = 100000
586	s := "hello"
587	for i := 0; i < N; i++ {
588		h.addS_seed(s, uintptr(i))
589	}
590	h.check(t)
591}
592
593// size of the hash output (32 or 64 bits)
594const hashSize = 32 + int(^uintptr(0)>>63<<5)
595
596func randBytes(r *rand.Rand, b []byte) {
597	for i := range b {
598		b[i] = byte(r.Uint32())
599	}
600}
601
602func benchmarkHash(b *testing.B, n int) {
603	s := strings.Repeat("A", n)
604
605	for i := 0; i < b.N; i++ {
606		StringHash(s, 0)
607	}
608	b.SetBytes(int64(n))
609}
610
611func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
612func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
613func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
614func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
615func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
616
617func TestArrayHash(t *testing.T) {
618	// Make sure that "" in arrays hash correctly. The hash
619	// should at least scramble the input seed so that, e.g.,
620	// {"","foo"} and {"foo",""} have different hashes.
621
622	// If the hash is bad, then all (8 choose 4) = 70 keys
623	// have the same hash. If so, we allocate 70/8 = 8
624	// overflow buckets. If the hash is good we don't
625	// normally allocate any overflow buckets, and the
626	// probability of even one or two overflows goes down rapidly.
627	// (There is always 1 allocation of the bucket array. The map
628	// header is allocated on the stack.)
629	f := func() {
630		// Make the key type at most 128 bytes. Otherwise,
631		// we get an allocation per key.
632		type key [8]string
633		m := make(map[key]bool, 70)
634
635		// fill m with keys that have 4 "foo"s and 4 ""s.
636		for i := 0; i < 256; i++ {
637			var k key
638			cnt := 0
639			for j := uint(0); j < 8; j++ {
640				if i>>j&1 != 0 {
641					k[j] = "foo"
642					cnt++
643				}
644			}
645			if cnt == 4 {
646				m[k] = true
647			}
648		}
649		if len(m) != 70 {
650			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
651		}
652	}
653	if n := testing.AllocsPerRun(10, f); n > 6 {
654		t.Errorf("too many allocs %f - hash not balanced", n)
655	}
656}
657func TestStructHash(t *testing.T) {
658	// See the comment in TestArrayHash.
659	f := func() {
660		type key struct {
661			a, b, c, d, e, f, g, h string
662		}
663		m := make(map[key]bool, 70)
664
665		// fill m with keys that have 4 "foo"s and 4 ""s.
666		for i := 0; i < 256; i++ {
667			var k key
668			cnt := 0
669			if i&1 != 0 {
670				k.a = "foo"
671				cnt++
672			}
673			if i&2 != 0 {
674				k.b = "foo"
675				cnt++
676			}
677			if i&4 != 0 {
678				k.c = "foo"
679				cnt++
680			}
681			if i&8 != 0 {
682				k.d = "foo"
683				cnt++
684			}
685			if i&16 != 0 {
686				k.e = "foo"
687				cnt++
688			}
689			if i&32 != 0 {
690				k.f = "foo"
691				cnt++
692			}
693			if i&64 != 0 {
694				k.g = "foo"
695				cnt++
696			}
697			if i&128 != 0 {
698				k.h = "foo"
699				cnt++
700			}
701			if cnt == 4 {
702				m[k] = true
703			}
704		}
705		if len(m) != 70 {
706			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
707		}
708	}
709	if n := testing.AllocsPerRun(10, f); n > 6 {
710		t.Errorf("too many allocs %f - hash not balanced", n)
711	}
712}
713
714var sink uint64
715
716func BenchmarkAlignedLoad(b *testing.B) {
717	var buf [16]byte
718	p := unsafe.Pointer(&buf[0])
719	var s uint64
720	for i := 0; i < b.N; i++ {
721		s += ReadUnaligned64(p)
722	}
723	sink = s
724}
725
726func BenchmarkUnalignedLoad(b *testing.B) {
727	var buf [16]byte
728	p := unsafe.Pointer(&buf[1])
729	var s uint64
730	for i := 0; i < b.N; i++ {
731		s += ReadUnaligned64(p)
732	}
733	sink = s
734}
735
736func TestCollisions(t *testing.T) {
737	if testing.Short() {
738		t.Skip("Skipping in short mode")
739	}
740	for i := 0; i < 16; i++ {
741		for j := 0; j < 16; j++ {
742			if j == i {
743				continue
744			}
745			var a [16]byte
746			m := make(map[uint16]struct{}, 1<<16)
747			for n := 0; n < 1<<16; n++ {
748				a[i] = byte(n)
749				a[j] = byte(n >> 8)
750				m[uint16(BytesHash(a[:], 0))] = struct{}{}
751			}
752			if len(m) <= 1<<15 {
753				t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
754			}
755		}
756	}
757}
758