1// Copyright 2013 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package runtime_test
6
7import (
8	"fmt"
9	"math"
10	"math/rand"
11	. "runtime"
12	"strings"
13	"testing"
14	"unsafe"
15)
16
17// Smhasher is a torture test for hash functions.
18// https://code.google.com/p/smhasher/
19// This code is a port of some of the Smhasher tests to Go.
20//
21// The current AES hash function passes Smhasher. Our fallback
22// hash functions don't, so we only enable the difficult tests when
23// we know the AES implementation is available.
24
25// Sanity checks.
26// hash should not depend on values outside key.
27// hash should not depend on alignment.
28func TestSmhasherSanity(t *testing.T) {
29	r := rand.New(rand.NewSource(1234))
30	const REP = 10
31	const KEYMAX = 128
32	const PAD = 16
33	const OFFMAX = 16
34	for k := 0; k < REP; k++ {
35		for n := 0; n < KEYMAX; n++ {
36			for i := 0; i < OFFMAX; i++ {
37				var b [KEYMAX + OFFMAX + 2*PAD]byte
38				var c [KEYMAX + OFFMAX + 2*PAD]byte
39				randBytes(r, b[:])
40				randBytes(r, c[:])
41				copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n])
42				if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) {
43					t.Errorf("hash depends on bytes outside key")
44				}
45			}
46		}
47	}
48}
49
50type HashSet struct {
51	m map[uintptr]struct{} // set of hashes added
52	n int                  // number of hashes added
53}
54
55func newHashSet() *HashSet {
56	return &HashSet{make(map[uintptr]struct{}), 0}
57}
58func (s *HashSet) add(h uintptr) {
59	s.m[h] = struct{}{}
60	s.n++
61}
62func (s *HashSet) addS(x string) {
63	s.add(StringHash(x, 0))
64}
65func (s *HashSet) addB(x []byte) {
66	s.add(BytesHash(x, 0))
67}
68func (s *HashSet) addS_seed(x string, seed uintptr) {
69	s.add(StringHash(x, seed))
70}
71func (s *HashSet) check(t *testing.T) {
72	const SLOP = 10.0
73	collisions := s.n - len(s.m)
74	//fmt.Printf("%d/%d\n", len(s.m), s.n)
75	pairs := int64(s.n) * int64(s.n-1) / 2
76	expected := float64(pairs) / math.Pow(2.0, float64(hashSize))
77	stddev := math.Sqrt(expected)
78	if float64(collisions) > expected+SLOP*(3*stddev+1) {
79		t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev)
80	}
81}
82
83// a string plus adding zeros must make distinct hashes
84func TestSmhasherAppendedZeros(t *testing.T) {
85	s := "hello" + strings.Repeat("\x00", 256)
86	h := newHashSet()
87	for i := 0; i <= len(s); i++ {
88		h.addS(s[:i])
89	}
90	h.check(t)
91}
92
93// All 0-3 byte strings have distinct hashes.
94func TestSmhasherSmallKeys(t *testing.T) {
95	h := newHashSet()
96	var b [3]byte
97	for i := 0; i < 256; i++ {
98		b[0] = byte(i)
99		h.addB(b[:1])
100		for j := 0; j < 256; j++ {
101			b[1] = byte(j)
102			h.addB(b[:2])
103			if !testing.Short() {
104				for k := 0; k < 256; k++ {
105					b[2] = byte(k)
106					h.addB(b[:3])
107				}
108			}
109		}
110	}
111	h.check(t)
112}
113
114// Different length strings of all zeros have distinct hashes.
115func TestSmhasherZeros(t *testing.T) {
116	N := 256 * 1024
117	if testing.Short() {
118		N = 1024
119	}
120	h := newHashSet()
121	b := make([]byte, N)
122	for i := 0; i <= N; i++ {
123		h.addB(b[:i])
124	}
125	h.check(t)
126}
127
128// Strings with up to two nonzero bytes all have distinct hashes.
129func TestSmhasherTwoNonzero(t *testing.T) {
130	if testing.Short() {
131		t.Skip("Skipping in short mode")
132	}
133	h := newHashSet()
134	for n := 2; n <= 16; n++ {
135		twoNonZero(h, n)
136	}
137	h.check(t)
138}
139func twoNonZero(h *HashSet, n int) {
140	b := make([]byte, n)
141
142	// all zero
143	h.addB(b[:])
144
145	// one non-zero byte
146	for i := 0; i < n; i++ {
147		for x := 1; x < 256; x++ {
148			b[i] = byte(x)
149			h.addB(b[:])
150			b[i] = 0
151		}
152	}
153
154	// two non-zero bytes
155	for i := 0; i < n; i++ {
156		for x := 1; x < 256; x++ {
157			b[i] = byte(x)
158			for j := i + 1; j < n; j++ {
159				for y := 1; y < 256; y++ {
160					b[j] = byte(y)
161					h.addB(b[:])
162					b[j] = 0
163				}
164			}
165			b[i] = 0
166		}
167	}
168}
169
170// Test strings with repeats, like "abcdabcdabcdabcd..."
171func TestSmhasherCyclic(t *testing.T) {
172	if testing.Short() {
173		t.Skip("Skipping in short mode")
174	}
175	r := rand.New(rand.NewSource(1234))
176	const REPEAT = 8
177	const N = 1000000
178	for n := 4; n <= 12; n++ {
179		h := newHashSet()
180		b := make([]byte, REPEAT*n)
181		for i := 0; i < N; i++ {
182			b[0] = byte(i * 79 % 97)
183			b[1] = byte(i * 43 % 137)
184			b[2] = byte(i * 151 % 197)
185			b[3] = byte(i * 199 % 251)
186			randBytes(r, b[4:n])
187			for j := n; j < n*REPEAT; j++ {
188				b[j] = b[j-n]
189			}
190			h.addB(b)
191		}
192		h.check(t)
193	}
194}
195
196// Test strings with only a few bits set
197func TestSmhasherSparse(t *testing.T) {
198	if testing.Short() {
199		t.Skip("Skipping in short mode")
200	}
201	sparse(t, 32, 6)
202	sparse(t, 40, 6)
203	sparse(t, 48, 5)
204	sparse(t, 56, 5)
205	sparse(t, 64, 5)
206	sparse(t, 96, 4)
207	sparse(t, 256, 3)
208	sparse(t, 2048, 2)
209}
210func sparse(t *testing.T, n int, k int) {
211	b := make([]byte, n/8)
212	h := newHashSet()
213	setbits(h, b, 0, k)
214	h.check(t)
215}
216
217// set up to k bits at index i and greater
218func setbits(h *HashSet, b []byte, i int, k int) {
219	h.addB(b)
220	if k == 0 {
221		return
222	}
223	for j := i; j < len(b)*8; j++ {
224		b[j/8] |= byte(1 << uint(j&7))
225		setbits(h, b, j+1, k-1)
226		b[j/8] &= byte(^(1 << uint(j&7)))
227	}
228}
229
230// Test all possible combinations of n blocks from the set s.
231// "permutation" is a bad name here, but it is what Smhasher uses.
232func TestSmhasherPermutation(t *testing.T) {
233	if testing.Short() {
234		t.Skip("Skipping in short mode")
235	}
236	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8)
237	permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8)
238	permutation(t, []uint32{0, 1}, 20)
239	permutation(t, []uint32{0, 1 << 31}, 20)
240	permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6)
241}
242func permutation(t *testing.T, s []uint32, n int) {
243	b := make([]byte, n*4)
244	h := newHashSet()
245	genPerm(h, b, s, 0)
246	h.check(t)
247}
248func genPerm(h *HashSet, b []byte, s []uint32, n int) {
249	h.addB(b[:n])
250	if n == len(b) {
251		return
252	}
253	for _, v := range s {
254		b[n] = byte(v)
255		b[n+1] = byte(v >> 8)
256		b[n+2] = byte(v >> 16)
257		b[n+3] = byte(v >> 24)
258		genPerm(h, b, s, n+4)
259	}
260}
261
262type Key interface {
263	clear()              // set bits all to 0
264	random(r *rand.Rand) // set key to something random
265	bits() int           // how many bits key has
266	flipBit(i int)       // flip bit i of the key
267	hash() uintptr       // hash the key
268	name() string        // for error reporting
269}
270
271type BytesKey struct {
272	b []byte
273}
274
275func (k *BytesKey) clear() {
276	for i := range k.b {
277		k.b[i] = 0
278	}
279}
280func (k *BytesKey) random(r *rand.Rand) {
281	randBytes(r, k.b)
282}
283func (k *BytesKey) bits() int {
284	return len(k.b) * 8
285}
286func (k *BytesKey) flipBit(i int) {
287	k.b[i>>3] ^= byte(1 << uint(i&7))
288}
289func (k *BytesKey) hash() uintptr {
290	return BytesHash(k.b, 0)
291}
292func (k *BytesKey) name() string {
293	return fmt.Sprintf("bytes%d", len(k.b))
294}
295
296type Int32Key struct {
297	i uint32
298}
299
300func (k *Int32Key) clear() {
301	k.i = 0
302}
303func (k *Int32Key) random(r *rand.Rand) {
304	k.i = r.Uint32()
305}
306func (k *Int32Key) bits() int {
307	return 32
308}
309func (k *Int32Key) flipBit(i int) {
310	k.i ^= 1 << uint(i)
311}
312func (k *Int32Key) hash() uintptr {
313	return Int32Hash(k.i, 0)
314}
315func (k *Int32Key) name() string {
316	return "int32"
317}
318
319type Int64Key struct {
320	i uint64
321}
322
323func (k *Int64Key) clear() {
324	k.i = 0
325}
326func (k *Int64Key) random(r *rand.Rand) {
327	k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32
328}
329func (k *Int64Key) bits() int {
330	return 64
331}
332func (k *Int64Key) flipBit(i int) {
333	k.i ^= 1 << uint(i)
334}
335func (k *Int64Key) hash() uintptr {
336	return Int64Hash(k.i, 0)
337}
338func (k *Int64Key) name() string {
339	return "int64"
340}
341
342type EfaceKey struct {
343	i interface{}
344}
345
346func (k *EfaceKey) clear() {
347	k.i = nil
348}
349func (k *EfaceKey) random(r *rand.Rand) {
350	k.i = uint64(r.Int63())
351}
352func (k *EfaceKey) bits() int {
353	// use 64 bits. This tests inlined interfaces
354	// on 64-bit targets and indirect interfaces on
355	// 32-bit targets.
356	return 64
357}
358func (k *EfaceKey) flipBit(i int) {
359	k.i = k.i.(uint64) ^ uint64(1)<<uint(i)
360}
361func (k *EfaceKey) hash() uintptr {
362	return EfaceHash(k.i, 0)
363}
364func (k *EfaceKey) name() string {
365	return "Eface"
366}
367
368type IfaceKey struct {
369	i interface {
370		F()
371	}
372}
373type fInter uint64
374
375func (x fInter) F() {
376}
377
378func (k *IfaceKey) clear() {
379	k.i = nil
380}
381func (k *IfaceKey) random(r *rand.Rand) {
382	k.i = fInter(r.Int63())
383}
384func (k *IfaceKey) bits() int {
385	// use 64 bits. This tests inlined interfaces
386	// on 64-bit targets and indirect interfaces on
387	// 32-bit targets.
388	return 64
389}
390func (k *IfaceKey) flipBit(i int) {
391	k.i = k.i.(fInter) ^ fInter(1)<<uint(i)
392}
393func (k *IfaceKey) hash() uintptr {
394	return IfaceHash(k.i, 0)
395}
396func (k *IfaceKey) name() string {
397	return "Iface"
398}
399
400// Flipping a single bit of a key should flip each output bit with 50% probability.
401func TestSmhasherAvalanche(t *testing.T) {
402	if testing.Short() {
403		t.Skip("Skipping in short mode")
404	}
405	avalancheTest1(t, &BytesKey{make([]byte, 2)})
406	avalancheTest1(t, &BytesKey{make([]byte, 4)})
407	avalancheTest1(t, &BytesKey{make([]byte, 8)})
408	avalancheTest1(t, &BytesKey{make([]byte, 16)})
409	avalancheTest1(t, &BytesKey{make([]byte, 32)})
410	avalancheTest1(t, &BytesKey{make([]byte, 200)})
411	avalancheTest1(t, &Int32Key{})
412	avalancheTest1(t, &Int64Key{})
413	avalancheTest1(t, &EfaceKey{})
414	avalancheTest1(t, &IfaceKey{})
415}
416func avalancheTest1(t *testing.T, k Key) {
417	const REP = 100000
418	r := rand.New(rand.NewSource(1234))
419	n := k.bits()
420
421	// grid[i][j] is a count of whether flipping
422	// input bit i affects output bit j.
423	grid := make([][hashSize]int, n)
424
425	for z := 0; z < REP; z++ {
426		// pick a random key, hash it
427		k.random(r)
428		h := k.hash()
429
430		// flip each bit, hash & compare the results
431		for i := 0; i < n; i++ {
432			k.flipBit(i)
433			d := h ^ k.hash()
434			k.flipBit(i)
435
436			// record the effects of that bit flip
437			g := &grid[i]
438			for j := 0; j < hashSize; j++ {
439				g[j] += int(d & 1)
440				d >>= 1
441			}
442		}
443	}
444
445	// Each entry in the grid should be about REP/2.
446	// More precisely, we did N = k.bits() * hashSize experiments where
447	// each is the sum of REP coin flips. We want to find bounds on the
448	// sum of coin flips such that a truly random experiment would have
449	// all sums inside those bounds with 99% probability.
450	N := n * hashSize
451	var c float64
452	// find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999
453	for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 {
454	}
455	c *= 4.0 // allowed slack - we don't need to be perfectly random
456	mean := .5 * REP
457	stddev := .5 * math.Sqrt(REP)
458	low := int(mean - c*stddev)
459	high := int(mean + c*stddev)
460	for i := 0; i < n; i++ {
461		for j := 0; j < hashSize; j++ {
462			x := grid[i][j]
463			if x < low || x > high {
464				t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP)
465			}
466		}
467	}
468}
469
470// All bit rotations of a set of distinct keys
471func TestSmhasherWindowed(t *testing.T) {
472	windowed(t, &Int32Key{})
473	windowed(t, &Int64Key{})
474	windowed(t, &BytesKey{make([]byte, 128)})
475}
476func windowed(t *testing.T, k Key) {
477	if testing.Short() {
478		t.Skip("Skipping in short mode")
479	}
480	const BITS = 16
481
482	for r := 0; r < k.bits(); r++ {
483		h := newHashSet()
484		for i := 0; i < 1<<BITS; i++ {
485			k.clear()
486			for j := 0; j < BITS; j++ {
487				if i>>uint(j)&1 != 0 {
488					k.flipBit((j + r) % k.bits())
489				}
490			}
491			h.add(k.hash())
492		}
493		h.check(t)
494	}
495}
496
497// All keys of the form prefix + [A-Za-z0-9]*N + suffix.
498func TestSmhasherText(t *testing.T) {
499	if testing.Short() {
500		t.Skip("Skipping in short mode")
501	}
502	text(t, "Foo", "Bar")
503	text(t, "FooBar", "")
504	text(t, "", "FooBar")
505}
506func text(t *testing.T, prefix, suffix string) {
507	const N = 4
508	const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789"
509	const L = len(S)
510	b := make([]byte, len(prefix)+N+len(suffix))
511	copy(b, prefix)
512	copy(b[len(prefix)+N:], suffix)
513	h := newHashSet()
514	c := b[len(prefix):]
515	for i := 0; i < L; i++ {
516		c[0] = S[i]
517		for j := 0; j < L; j++ {
518			c[1] = S[j]
519			for k := 0; k < L; k++ {
520				c[2] = S[k]
521				for x := 0; x < L; x++ {
522					c[3] = S[x]
523					h.addB(b)
524				}
525			}
526		}
527	}
528	h.check(t)
529}
530
531// Make sure different seed values generate different hashes.
532func TestSmhasherSeed(t *testing.T) {
533	h := newHashSet()
534	const N = 100000
535	s := "hello"
536	for i := 0; i < N; i++ {
537		h.addS_seed(s, uintptr(i))
538	}
539	h.check(t)
540}
541
542// size of the hash output (32 or 64 bits)
543const hashSize = 32 + int(^uintptr(0)>>63<<5)
544
545func randBytes(r *rand.Rand, b []byte) {
546	for i := range b {
547		b[i] = byte(r.Uint32())
548	}
549}
550
551func benchmarkHash(b *testing.B, n int) {
552	s := strings.Repeat("A", n)
553
554	for i := 0; i < b.N; i++ {
555		StringHash(s, 0)
556	}
557	b.SetBytes(int64(n))
558}
559
560func BenchmarkHash5(b *testing.B)     { benchmarkHash(b, 5) }
561func BenchmarkHash16(b *testing.B)    { benchmarkHash(b, 16) }
562func BenchmarkHash64(b *testing.B)    { benchmarkHash(b, 64) }
563func BenchmarkHash1024(b *testing.B)  { benchmarkHash(b, 1024) }
564func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) }
565
566func TestArrayHash(t *testing.T) {
567	// Make sure that "" in arrays hash correctly. The hash
568	// should at least scramble the input seed so that, e.g.,
569	// {"","foo"} and {"foo",""} have different hashes.
570
571	// If the hash is bad, then all (8 choose 4) = 70 keys
572	// have the same hash. If so, we allocate 70/8 = 8
573	// overflow buckets. If the hash is good we don't
574	// normally allocate any overflow buckets, and the
575	// probability of even one or two overflows goes down rapidly.
576	// (There is always 1 allocation of the bucket array. The map
577	// header is allocated on the stack.)
578	f := func() {
579		// Make the key type at most 128 bytes. Otherwise,
580		// we get an allocation per key.
581		type key [8]string
582		m := make(map[key]bool, 70)
583
584		// fill m with keys that have 4 "foo"s and 4 ""s.
585		for i := 0; i < 256; i++ {
586			var k key
587			cnt := 0
588			for j := uint(0); j < 8; j++ {
589				if i>>j&1 != 0 {
590					k[j] = "foo"
591					cnt++
592				}
593			}
594			if cnt == 4 {
595				m[k] = true
596			}
597		}
598		if len(m) != 70 {
599			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
600		}
601	}
602	if n := testing.AllocsPerRun(10, f); n > 6 {
603		t.Errorf("too many allocs %f - hash not balanced", n)
604	}
605}
606func TestStructHash(t *testing.T) {
607	// See the comment in TestArrayHash.
608	f := func() {
609		type key struct {
610			a, b, c, d, e, f, g, h string
611		}
612		m := make(map[key]bool, 70)
613
614		// fill m with keys that have 4 "foo"s and 4 ""s.
615		for i := 0; i < 256; i++ {
616			var k key
617			cnt := 0
618			if i&1 != 0 {
619				k.a = "foo"
620				cnt++
621			}
622			if i&2 != 0 {
623				k.b = "foo"
624				cnt++
625			}
626			if i&4 != 0 {
627				k.c = "foo"
628				cnt++
629			}
630			if i&8 != 0 {
631				k.d = "foo"
632				cnt++
633			}
634			if i&16 != 0 {
635				k.e = "foo"
636				cnt++
637			}
638			if i&32 != 0 {
639				k.f = "foo"
640				cnt++
641			}
642			if i&64 != 0 {
643				k.g = "foo"
644				cnt++
645			}
646			if i&128 != 0 {
647				k.h = "foo"
648				cnt++
649			}
650			if cnt == 4 {
651				m[k] = true
652			}
653		}
654		if len(m) != 70 {
655			t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m))
656		}
657	}
658	if n := testing.AllocsPerRun(10, f); n > 6 {
659		t.Errorf("too many allocs %f - hash not balanced", n)
660	}
661}
662
663var sink uint64
664
665func BenchmarkAlignedLoad(b *testing.B) {
666	var buf [16]byte
667	p := unsafe.Pointer(&buf[0])
668	var s uint64
669	for i := 0; i < b.N; i++ {
670		s += ReadUnaligned64(p)
671	}
672	sink = s
673}
674
675func BenchmarkUnalignedLoad(b *testing.B) {
676	var buf [16]byte
677	p := unsafe.Pointer(&buf[1])
678	var s uint64
679	for i := 0; i < b.N; i++ {
680		s += ReadUnaligned64(p)
681	}
682	sink = s
683}
684
685func TestCollisions(t *testing.T) {
686	if testing.Short() {
687		t.Skip("Skipping in short mode")
688	}
689	for i := 0; i < 16; i++ {
690		for j := 0; j < 16; j++ {
691			if j == i {
692				continue
693			}
694			var a [16]byte
695			m := make(map[uint16]struct{}, 1<<16)
696			for n := 0; n < 1<<16; n++ {
697				a[i] = byte(n)
698				a[j] = byte(n >> 8)
699				m[uint16(BytesHash(a[:], 0))] = struct{}{}
700			}
701			if len(m) <= 1<<15 {
702				t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m))
703			}
704		}
705	}
706}
707