1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime_test
6
7import (
8	"fmt"
9	"math"
10	"math/rand"
11	. "runtime"
12	"strings"
13	"testing"
14	"unsafe"
15)
16
17func TestMemHash32Equality(t *testing.T) {
18	if *UseAeshash {
19		t.Skip("skipping since AES hash implementation is used")
20	}
21	var b [4]byte
22	r := rand.New(rand.NewSource(1234))
23	seed := uintptr(r.Uint64())
24	for i := 0; i < 100; i++ {
25		randBytes(r, b[:])
26		got := MemHash32(unsafe.Pointer(&b), seed)
27		want := MemHash(unsafe.Pointer(&b), seed, 4)
28		if got != want {
29			t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want)
30		}
31	}
32}
33
34func TestMemHash64Equality(t *testing.T) {
35	if *UseAeshash {
36		t.Skip("skipping since AES hash implementation is used")
37	}
38	var b [8]byte
39	r := rand.New(rand.NewSource(1234))
40	seed := uintptr(r.Uint64())
41	for i := 0; i < 100; i++ {
42		randBytes(r, b[:])
43		got := MemHash64(unsafe.Pointer(&b), seed)
44		want := MemHash(unsafe.Pointer(&b), seed, 8)
45		if got != want {
46			t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want)
47		}
48	}
49}
50
51// Smhasher is a torture test for hash functions.
52// https://code.google.com/p/smhasher/
53// This code is a port of some of the Smhasher tests to Go.
54//
55// The current AES hash function passes Smhasher. Our fallback
56// hash functions don't, so we only enable the difficult tests when
57// we know the AES implementation is available.
58
59// Sanity checks.
60// hash should not depend on values outside key.
61// hash should not depend on alignment.
62func TestSmhasherSanity(t *testing.T) {
63	r := rand.New(rand.NewSource(1234))
64	const REP = 10
65	const KEYMAX = 128
66	const PAD = 16
67	const OFFMAX = 16
68	for k := 0; k < REP; k++ {
69		for n := 0; n < KEYMAX; n++ {
70			for i := 0; i < OFFMAX; i++ {
71				var b [KEYMAX + OFFMAX + 2*PAD]byte
72				var c [KEYMAX + OFFMAX + 2*PAD]byte
73				randBytes(r, b[:])
74				randBytes(r, c[:])
75				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
76				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
77					t.Errorf("hash depends on bytes outside key")
78				}
79			}
80		}
81	}
82}
83
84type HashSet struct {
85	m map[uintptr]struct{} // set of hashes added
86	n int                  // number of hashes added
87}
88
89func newHashSet() *HashSet {
90	return &HashSet{make(map[uintptr]struct{}), 0}
91}
92func (s *HashSet) add(h uintptr) {
93	s.m[h] = struct{}{}
94	s.n++
95}
96func (s *HashSet) addS(x string) {
97	s.add(StringHash(x, 0))
98}
99func (s *HashSet) addB(x []byte) {
100	s.add(BytesHash(x, 0))
101}
102func (s *HashSet) addS_seed(x string, seed uintptr) {
103	s.add(StringHash(x, seed))
104}
105func (s *HashSet) check(t *testing.T) {
106	const SLOP = 10.0
107	collisions := s.n - len(s.m)
108	//fmt.Printf("%d/%d\n", len(s.m), s.n)
109	pairs := int64(s.n) * int64(s.n-1) / 2
110	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
111	stddev := math.Sqrt(expected)
112	if float64(collisions) > expected+SLOP*(3*stddev+1) {
113		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
114	}
115}
116
117// a string plus adding zeros must make distinct hashes
118func TestSmhasherAppendedZeros(t *testing.T) {
119	s := "hello" + strings.Repeat("\x00", 256)
120	h := newHashSet()
121	for i := 0; i <= len(s); i++ {
122		h.addS(s[:i])
123	}
124	h.check(t)
125}
126
127// All 0-3 byte strings have distinct hashes.
128func TestSmhasherSmallKeys(t *testing.T) {
129	h := newHashSet()
130	var b [3]byte
131	for i := 0; i < 256; i++ {
132		b[0] = byte(i)
133		h.addB(b[:1])
134		for j := 0; j < 256; j++ {
135			b[1] = byte(j)
136			h.addB(b[:2])
137			if !testing.Short() {
138				for k := 0; k < 256; k++ {
139					b[2] = byte(k)
140					h.addB(b[:3])
141				}
142			}
143		}
144	}
145	h.check(t)
146}
147
148// Different length strings of all zeros have distinct hashes.
149func TestSmhasherZeros(t *testing.T) {
150	N := 256 * 1024
151	if testing.Short() {
152		N = 1024
153	}
154	h := newHashSet()
155	b := make([]byte, N)
156	for i := 0; i <= N; i++ {
157		h.addB(b[:i])
158	}
159	h.check(t)
160}
161
162// Strings with up to two nonzero bytes all have distinct hashes.
163func TestSmhasherTwoNonzero(t *testing.T) {
164	if testing.Short() {
165		t.Skip("Skipping in short mode")
166	}
167	h := newHashSet()
168	for n := 2; n <= 16; n++ {
169		twoNonZero(h, n)
170	}
171	h.check(t)
172}
173func twoNonZero(h *HashSet, n int) {
174	b := make([]byte, n)
175
176	// all zero
177	h.addB(b[:])
178
179	// one non-zero byte
180	for i := 0; i < n; i++ {
181		for x := 1; x < 256; x++ {
182			b[i] = byte(x)
183			h.addB(b[:])
184			b[i] = 0
185		}
186	}
187
188	// two non-zero bytes
189	for i := 0; i < n; i++ {
190		for x := 1; x < 256; x++ {
191			b[i] = byte(x)
192			for j := i + 1; j < n; j++ {
193				for y := 1; y < 256; y++ {
194					b[j] = byte(y)
195					h.addB(b[:])
196					b[j] = 0
197				}
198			}
199			b[i] = 0
200		}
201	}
202}
203
204// Test strings with repeats, like "abcdabcdabcdabcd..."
205func TestSmhasherCyclic(t *testing.T) {
206	if testing.Short() {
207		t.Skip("Skipping in short mode")
208	}
209	r := rand.New(rand.NewSource(1234))
210	const REPEAT = 8
211	const N = 1000000
212	for n := 4; n <= 12; n++ {
213		h := newHashSet()
214		b := make([]byte, REPEAT*n)
215		for i := 0; i < N; i++ {
216			b[0] = byte(i * 79 % 97)
217			b[1] = byte(i * 43 % 137)
218			b[2] = byte(i * 151 % 197)
219			b[3] = byte(i * 199 % 251)
220			randBytes(r, b[4:n])
221			for j := n; j < n*REPEAT; j++ {
222				b[j] = b[j-n]
223			}
224			h.addB(b)
225		}
226		h.check(t)
227	}
228}
229
230// Test strings with only a few bits set
231func TestSmhasherSparse(t *testing.T) {
232	if testing.Short() {
233		t.Skip("Skipping in short mode")
234	}
235	sparse(t, 32, 6)
236	sparse(t, 40, 6)
237	sparse(t, 48, 5)
238	sparse(t, 56, 5)
239	sparse(t, 64, 5)
240	sparse(t, 96, 4)
241	sparse(t, 256, 3)
242	sparse(t, 2048, 2)
243}
244func sparse(t *testing.T, n int, k int) {
245	b := make([]byte, n/8)
246	h := newHashSet()
247	setbits(h, b, 0, k)
248	h.check(t)
249}
250
251// set up to k bits at index i and greater
252func setbits(h *HashSet, b []byte, i int, k int) {
253	h.addB(b)
254	if k == 0 {
255		return
256	}
257	for j := i; j < len(b)*8; j++ {
258		b[j/8] |= byte(1 << uint(j&7))
259		setbits(h, b, j+1, k-1)
260		b[j/8] &= byte(^(1 << uint(j&7)))
261	}
262}
263
264// Test all possible combinations of n blocks from the set s.
265// "permutation" is a bad name here, but it is what Smhasher uses.
266func TestSmhasherPermutation(t *testing.T) {
267	if testing.Short() {
268		t.Skip("Skipping in short mode")
269	}
270	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
271	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
272	permutation(t, []uint32{0, 1}, 20)
273	permutation(t, []uint32{0, 1 << 31}, 20)
274	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
275}
276func permutation(t *testing.T, s []uint32, n int) {
277	b := make([]byte, n*4)
278	h := newHashSet()
279	genPerm(h, b, s, 0)
280	h.check(t)
281}
282func genPerm(h *HashSet, b []byte, s []uint32, n int) {
283	h.addB(b[:n])
284	if n == len(b) {
285		return
286	}
287	for _, v := range s {
288		b[n] = byte(v)
289		b[n+1] = byte(v >> 8)
290		b[n+2] = byte(v >> 16)
291		b[n+3] = byte(v >> 24)
292		genPerm(h, b, s, n+4)
293	}
294}
295
296type Key interface {
297	clear()              // set bits all to 0
298	random(r *rand.Rand) // set key to something random
299	bits() int           // how many bits key has
300	flipBit(i int)       // flip bit i of the key
301	hash() uintptr       // hash the key
302	name() string        // for error reporting
303}
304
305type BytesKey struct {
306	b []byte
307}
308
309func (k *BytesKey) clear() {
310	for i := range k.b {
311		k.b[i] = 0
312	}
313}
314func (k *BytesKey) random(r *rand.Rand) {
315	randBytes(r, k.b)
316}
317func (k *BytesKey) bits() int {
318	return len(k.b) * 8
319}
320func (k *BytesKey) flipBit(i int) {
321	k.b[i>>3] ^= byte(1 << uint(i&7))
322}
323func (k *BytesKey) hash() uintptr {
324	return BytesHash(k.b, 0)
325}
326func (k *BytesKey) name() string {
327	return fmt.Sprintf("bytes%d", len(k.b))
328}
329
330type Int32Key struct {
331	i uint32
332}
333
334func (k *Int32Key) clear() {
335	k.i = 0
336}
337func (k *Int32Key) random(r *rand.Rand) {
338	k.i = r.Uint32()
339}
340func (k *Int32Key) bits() int {
341	return 32
342}
343func (k *Int32Key) flipBit(i int) {
344	k.i ^= 1 << uint(i)
345}
346func (k *Int32Key) hash() uintptr {
347	return Int32Hash(k.i, 0)
348}
349func (k *Int32Key) name() string {
350	return "int32"
351}
352
353type Int64Key struct {
354	i uint64
355}
356
357func (k *Int64Key) clear() {
358	k.i = 0
359}
360func (k *Int64Key) random(r *rand.Rand) {
361	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
362}
363func (k *Int64Key) bits() int {
364	return 64
365}
366func (k *Int64Key) flipBit(i int) {
367	k.i ^= 1 << uint(i)
368}
369func (k *Int64Key) hash() uintptr {
370	return Int64Hash(k.i, 0)
371}
372func (k *Int64Key) name() string {
373	return "int64"
374}
375
376type EfaceKey struct {
377	i interface{}
378}
379
380func (k *EfaceKey) clear() {
381	k.i = nil
382}
383func (k *EfaceKey) random(r *rand.Rand) {
384	k.i = uint64(r.Int63())
385}
386func (k *EfaceKey) bits() int {
387	// use 64 bits. This tests inlined interfaces
388	// on 64-bit targets and indirect interfaces on
389	// 32-bit targets.
390	return 64
391}
392func (k *EfaceKey) flipBit(i int) {
393	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
394}
395func (k *EfaceKey) hash() uintptr {
396	return EfaceHash(k.i, 0)
397}
398func (k *EfaceKey) name() string {
399	return "Eface"
400}
401
402type IfaceKey struct {
403	i interface {
404		F()
405	}
406}
407type fInter uint64
408
409func (x fInter) F() {
410}
411
412func (k *IfaceKey) clear() {
413	k.i = nil
414}
415func (k *IfaceKey) random(r *rand.Rand) {
416	k.i = fInter(r.Int63())
417}
418func (k *IfaceKey) bits() int {
419	// use 64 bits. This tests inlined interfaces
420	// on 64-bit targets and indirect interfaces on
421	// 32-bit targets.
422	return 64
423}
424func (k *IfaceKey) flipBit(i int) {
425	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
426}
427func (k *IfaceKey) hash() uintptr {
428	return IfaceHash(k.i, 0)
429}
430func (k *IfaceKey) name() string {
431	return "Iface"
432}
433
434// Flipping a single bit of a key should flip each output bit with 50% probability.
435func TestSmhasherAvalanche(t *testing.T) {
436	if testing.Short() {
437		t.Skip("Skipping in short mode")
438	}
439	avalancheTest1(t, &BytesKey{make([]byte, 2)})
440	avalancheTest1(t, &BytesKey{make([]byte, 4)})
441	avalancheTest1(t, &BytesKey{make([]byte, 8)})
442	avalancheTest1(t, &BytesKey{make([]byte, 16)})
443	avalancheTest1(t, &BytesKey{make([]byte, 32)})
444	avalancheTest1(t, &BytesKey{make([]byte, 200)})
445	avalancheTest1(t, &Int32Key{})
446	avalancheTest1(t, &Int64Key{})
447	avalancheTest1(t, &EfaceKey{})
448	avalancheTest1(t, &IfaceKey{})
449}
450func avalancheTest1(t *testing.T, k Key) {
451	const REP = 100000
452	r := rand.New(rand.NewSource(1234))
453	n := k.bits()
454
455	// grid[i][j] is a count of whether flipping
456	// input bit i affects output bit j.
457	grid := make([][hashSize]int, n)
458
459	for z := 0; z < REP; z++ {
460		// pick a random key, hash it
461		k.random(r)
462		h := k.hash()
463
464		// flip each bit, hash & compare the results
465		for i := 0; i < n; i++ {
466			k.flipBit(i)
467			d := h ^ k.hash()
468			k.flipBit(i)
469
470			// record the effects of that bit flip
471			g := &grid[i]
472			for j := 0; j < hashSize; j++ {
473				g[j] += int(d & 1)
474				d >>= 1
475			}
476		}
477	}
478
479	// Each entry in the grid should be about REP/2.
480	// More precisely, we did N = k.bits() * hashSize experiments where
481	// each is the sum of REP coin flips. We want to find bounds on the
482	// sum of coin flips such that a truly random experiment would have
483	// all sums inside those bounds with 99% probability.
484	N := n * hashSize
485	var c float64
486	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
487	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
488	}
489	c *= 4.0 // allowed slack - we don't need to be perfectly random
490	mean := .5 * REP
491	stddev := .5 * math.Sqrt(REP)
492	low := int(mean - c*stddev)
493	high := int(mean + c*stddev)
494	for i := 0; i < n; i++ {
495		for j := 0; j < hashSize; j++ {
496			x := grid[i][j]
497			if x < low || x > high {
498				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
499			}
500		}
501	}
502}
503
504// All bit rotations of a set of distinct keys
505func TestSmhasherWindowed(t *testing.T) {
506	windowed(t, &Int32Key{})
507	windowed(t, &Int64Key{})
508	windowed(t, &BytesKey{make([]byte, 128)})
509}
510func windowed(t *testing.T, k Key) {
511	if testing.Short() {
512		t.Skip("Skipping in short mode")
513	}
514	const BITS = 16
515
516	for r := 0; r < k.bits(); r++ {
517		h := newHashSet()
518		for i := 0; i < 1<<BITS; i++ {
519			k.clear()
520			for j := 0; j < BITS; j++ {
521				if i>>uint(j)&1 != 0 {
522					k.flipBit((j + r) % k.bits())
523				}
524			}
525			h.add(k.hash())
526		}
527		h.check(t)
528	}
529}
530
531// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
532func TestSmhasherText(t *testing.T) {
533	if testing.Short() {
534		t.Skip("Skipping in short mode")
535	}
536	text(t, "Foo", "Bar")
537	text(t, "FooBar", "")
538	text(t, "", "FooBar")
539}
540func text(t *testing.T, prefix, suffix string) {
541	const N = 4
542	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
543	const L = len(S)
544	b := make([]byte, len(prefix)+N+len(suffix))
545	copy(b, prefix)
546	copy(b[len(prefix)+N:], suffix)
547	h := newHashSet()
548	c := b[len(prefix):]
549	for i := 0; i < L; i++ {
550		c[0] = S[i]
551		for j := 0; j < L; j++ {
552			c[1] = S[j]
553			for k := 0; k < L; k++ {
554				c[2] = S[k]
555				for x := 0; x < L; x++ {
556					c[3] = S[x]
557					h.addB(b)
558				}
559			}
560		}
561	}
562	h.check(t)
563}
564
565// Make sure different seed values generate different hashes.
566func TestSmhasherSeed(t *testing.T) {
567	h := newHashSet()
568	const N = 100000
569	s := "hello"
570	for i := 0; i < N; i++ {
571		h.addS_seed(s, uintptr(i))
572	}
573	h.check(t)
574}
575
576// size of the hash output (32 or 64 bits)
577const hashSize = 32 + int(^uintptr(0)>>63<<5)
578
579func randBytes(r *rand.Rand, b []byte) {
580	for i := range b {
581		b[i] = byte(r.Uint32())
582	}
583}
584
585func benchmarkHash(b *testing.B, n int) {
586	s := strings.Repeat("A", n)
587
588	for i := 0; i < b.N; i++ {
589		StringHash(s, 0)
590	}
591	b.SetBytes(int64(n))
592}
593
594func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
595func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
596func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
597func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
598func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
599
600func TestArrayHash(t *testing.T) {
601	if Compiler == "gccgo" {
602		t.Skip("does not work on gccgo without better escape analysis")
603	}
604
605	// Make sure that "" in arrays hash correctly. The hash
606	// should at least scramble the input seed so that, e.g.,
607	// {"","foo"} and {"foo",""} have different hashes.
608
609	// If the hash is bad, then all (8 choose 4) = 70 keys
610	// have the same hash. If so, we allocate 70/8 = 8
611	// overflow buckets. If the hash is good we don't
612	// normally allocate any overflow buckets, and the
613	// probability of even one or two overflows goes down rapidly.
614	// (There is always 1 allocation of the bucket array. The map
615	// header is allocated on the stack.)
616	f := func() {
617		// Make the key type at most 128 bytes. Otherwise,
618		// we get an allocation per key.
619		type key [8]string
620		m := make(map[key]bool, 70)
621
622		// fill m with keys that have 4 "foo"s and 4 ""s.
623		for i := 0; i < 256; i++ {
624			var k key
625			cnt := 0
626			for j := uint(0); j < 8; j++ {
627				if i>>j&1 != 0 {
628					k[j] = "foo"
629					cnt++
630				}
631			}
632			if cnt == 4 {
633				m[k] = true
634			}
635		}
636		if len(m) != 70 {
637			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
638		}
639	}
640	if n := testing.AllocsPerRun(10, f); n > 6 {
641		t.Errorf("too many allocs %f - hash not balanced", n)
642	}
643}
644func TestStructHash(t *testing.T) {
645	// See the comment in TestArrayHash.
646	f := func() {
647		type key struct {
648			a, b, c, d, e, f, g, h string
649		}
650		m := make(map[key]bool, 70)
651
652		// fill m with keys that have 4 "foo"s and 4 ""s.
653		for i := 0; i < 256; i++ {
654			var k key
655			cnt := 0
656			if i&1 != 0 {
657				k.a = "foo"
658				cnt++
659			}
660			if i&2 != 0 {
661				k.b = "foo"
662				cnt++
663			}
664			if i&4 != 0 {
665				k.c = "foo"
666				cnt++
667			}
668			if i&8 != 0 {
669				k.d = "foo"
670				cnt++
671			}
672			if i&16 != 0 {
673				k.e = "foo"
674				cnt++
675			}
676			if i&32 != 0 {
677				k.f = "foo"
678				cnt++
679			}
680			if i&64 != 0 {
681				k.g = "foo"
682				cnt++
683			}
684			if i&128 != 0 {
685				k.h = "foo"
686				cnt++
687			}
688			if cnt == 4 {
689				m[k] = true
690			}
691		}
692		if len(m) != 70 {
693			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
694		}
695	}
696	if n := testing.AllocsPerRun(10, f); n > 6 {
697		t.Errorf("too many allocs %f - hash not balanced", n)
698	}
699}
700
701var sink uint64
702
703func BenchmarkAlignedLoad(b *testing.B) {
704	var buf [16]byte
705	p := unsafe.Pointer(&buf[0])
706	var s uint64
707	for i := 0; i < b.N; i++ {
708		s += ReadUnaligned64(p)
709	}
710	sink = s
711}
712
713func BenchmarkUnalignedLoad(b *testing.B) {
714	var buf [16]byte
715	p := unsafe.Pointer(&buf[1])
716	var s uint64
717	for i := 0; i < b.N; i++ {
718		s += ReadUnaligned64(p)
719	}
720	sink = s
721}
722
723func TestCollisions(t *testing.T) {
724	if testing.Short() {
725		t.Skip("Skipping in short mode")
726	}
727	for i := 0; i < 16; i++ {
728		for j := 0; j < 16; j++ {
729			if j == i {
730				continue
731			}
732			var a [16]byte
733			m := make(map[uint16]struct{}, 1<<16)
734			for n := 0; n < 1<<16; n++ {
735				a[i] = byte(n)
736				a[j] = byte(n >> 8)
737				m[uint16(BytesHash(a[:], 0))] = struct{}{}
738			}
739			if len(m) <= 1<<15 {
740				t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
741			}
742		}
743	}
744}
745