1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package runtime_test 6 7import ( 8 "fmt" 9 "math" 10 "math/rand" 11 . "runtime" 12 "strings" 13 "testing" 14 "unsafe" 15) 16 17func TestMemHash32Equality(t *testing.T) { 18 if *UseAeshash { 19 t.Skip("skipping since AES hash implementation is used") 20 } 21 var b [4]byte 22 r := rand.New(rand.NewSource(1234)) 23 seed := uintptr(r.Uint64()) 24 for i := 0; i < 100; i++ { 25 randBytes(r, b[:]) 26 got := MemHash32(unsafe.Pointer(&b), seed) 27 want := MemHash(unsafe.Pointer(&b), seed, 4) 28 if got != want { 29 t.Errorf("MemHash32(%x, %v) = %v; want %v", b, seed, got, want) 30 } 31 } 32} 33 34func TestMemHash64Equality(t *testing.T) { 35 if *UseAeshash { 36 t.Skip("skipping since AES hash implementation is used") 37 } 38 var b [8]byte 39 r := rand.New(rand.NewSource(1234)) 40 seed := uintptr(r.Uint64()) 41 for i := 0; i < 100; i++ { 42 randBytes(r, b[:]) 43 got := MemHash64(unsafe.Pointer(&b), seed) 44 want := MemHash(unsafe.Pointer(&b), seed, 8) 45 if got != want { 46 t.Errorf("MemHash64(%x, %v) = %v; want %v", b, seed, got, want) 47 } 48 } 49} 50 51// Smhasher is a torture test for hash functions. 52// https://code.google.com/p/smhasher/ 53// This code is a port of some of the Smhasher tests to Go. 54// 55// The current AES hash function passes Smhasher. Our fallback 56// hash functions don't, so we only enable the difficult tests when 57// we know the AES implementation is available. 58 59// Sanity checks. 60// hash should not depend on values outside key. 61// hash should not depend on alignment. 62func TestSmhasherSanity(t *testing.T) { 63 r := rand.New(rand.NewSource(1234)) 64 const REP = 10 65 const KEYMAX = 128 66 const PAD = 16 67 const OFFMAX = 16 68 for k := 0; k < REP; k++ { 69 for n := 0; n < KEYMAX; n++ { 70 for i := 0; i < OFFMAX; i++ { 71 var b [KEYMAX + OFFMAX + 2*PAD]byte 72 var c [KEYMAX + OFFMAX + 2*PAD]byte 73 randBytes(r, b[:]) 74 randBytes(r, c[:]) 75 copy(c[PAD+i:PAD+i+n], b[PAD:PAD+n]) 76 if BytesHash(b[PAD:PAD+n], 0) != BytesHash(c[PAD+i:PAD+i+n], 0) { 77 t.Errorf("hash depends on bytes outside key") 78 } 79 } 80 } 81 } 82} 83 84type HashSet struct { 85 m map[uintptr]struct{} // set of hashes added 86 n int // number of hashes added 87} 88 89func newHashSet() *HashSet { 90 return &HashSet{make(map[uintptr]struct{}), 0} 91} 92func (s *HashSet) add(h uintptr) { 93 s.m[h] = struct{}{} 94 s.n++ 95} 96func (s *HashSet) addS(x string) { 97 s.add(StringHash(x, 0)) 98} 99func (s *HashSet) addB(x []byte) { 100 s.add(BytesHash(x, 0)) 101} 102func (s *HashSet) addS_seed(x string, seed uintptr) { 103 s.add(StringHash(x, seed)) 104} 105func (s *HashSet) check(t *testing.T) { 106 const SLOP = 10.0 107 collisions := s.n - len(s.m) 108 //fmt.Printf("%d/%d\n", len(s.m), s.n) 109 pairs := int64(s.n) * int64(s.n-1) / 2 110 expected := float64(pairs) / math.Pow(2.0, float64(hashSize)) 111 stddev := math.Sqrt(expected) 112 if float64(collisions) > expected+SLOP*(3*stddev+1) { 113 t.Errorf("unexpected number of collisions: got=%d mean=%f stddev=%f", collisions, expected, stddev) 114 } 115} 116 117// a string plus adding zeros must make distinct hashes 118func TestSmhasherAppendedZeros(t *testing.T) { 119 s := "hello" + strings.Repeat("\x00", 256) 120 h := newHashSet() 121 for i := 0; i <= len(s); i++ { 122 h.addS(s[:i]) 123 } 124 h.check(t) 125} 126 127// All 0-3 byte strings have distinct hashes. 128func TestSmhasherSmallKeys(t *testing.T) { 129 h := newHashSet() 130 var b [3]byte 131 for i := 0; i < 256; i++ { 132 b[0] = byte(i) 133 h.addB(b[:1]) 134 for j := 0; j < 256; j++ { 135 b[1] = byte(j) 136 h.addB(b[:2]) 137 if !testing.Short() { 138 for k := 0; k < 256; k++ { 139 b[2] = byte(k) 140 h.addB(b[:3]) 141 } 142 } 143 } 144 } 145 h.check(t) 146} 147 148// Different length strings of all zeros have distinct hashes. 149func TestSmhasherZeros(t *testing.T) { 150 N := 256 * 1024 151 if testing.Short() { 152 N = 1024 153 } 154 h := newHashSet() 155 b := make([]byte, N) 156 for i := 0; i <= N; i++ { 157 h.addB(b[:i]) 158 } 159 h.check(t) 160} 161 162// Strings with up to two nonzero bytes all have distinct hashes. 163func TestSmhasherTwoNonzero(t *testing.T) { 164 if testing.Short() { 165 t.Skip("Skipping in short mode") 166 } 167 h := newHashSet() 168 for n := 2; n <= 16; n++ { 169 twoNonZero(h, n) 170 } 171 h.check(t) 172} 173func twoNonZero(h *HashSet, n int) { 174 b := make([]byte, n) 175 176 // all zero 177 h.addB(b[:]) 178 179 // one non-zero byte 180 for i := 0; i < n; i++ { 181 for x := 1; x < 256; x++ { 182 b[i] = byte(x) 183 h.addB(b[:]) 184 b[i] = 0 185 } 186 } 187 188 // two non-zero bytes 189 for i := 0; i < n; i++ { 190 for x := 1; x < 256; x++ { 191 b[i] = byte(x) 192 for j := i + 1; j < n; j++ { 193 for y := 1; y < 256; y++ { 194 b[j] = byte(y) 195 h.addB(b[:]) 196 b[j] = 0 197 } 198 } 199 b[i] = 0 200 } 201 } 202} 203 204// Test strings with repeats, like "abcdabcdabcdabcd..." 205func TestSmhasherCyclic(t *testing.T) { 206 if testing.Short() { 207 t.Skip("Skipping in short mode") 208 } 209 r := rand.New(rand.NewSource(1234)) 210 const REPEAT = 8 211 const N = 1000000 212 for n := 4; n <= 12; n++ { 213 h := newHashSet() 214 b := make([]byte, REPEAT*n) 215 for i := 0; i < N; i++ { 216 b[0] = byte(i * 79 % 97) 217 b[1] = byte(i * 43 % 137) 218 b[2] = byte(i * 151 % 197) 219 b[3] = byte(i * 199 % 251) 220 randBytes(r, b[4:n]) 221 for j := n; j < n*REPEAT; j++ { 222 b[j] = b[j-n] 223 } 224 h.addB(b) 225 } 226 h.check(t) 227 } 228} 229 230// Test strings with only a few bits set 231func TestSmhasherSparse(t *testing.T) { 232 if testing.Short() { 233 t.Skip("Skipping in short mode") 234 } 235 sparse(t, 32, 6) 236 sparse(t, 40, 6) 237 sparse(t, 48, 5) 238 sparse(t, 56, 5) 239 sparse(t, 64, 5) 240 sparse(t, 96, 4) 241 sparse(t, 256, 3) 242 sparse(t, 2048, 2) 243} 244func sparse(t *testing.T, n int, k int) { 245 b := make([]byte, n/8) 246 h := newHashSet() 247 setbits(h, b, 0, k) 248 h.check(t) 249} 250 251// set up to k bits at index i and greater 252func setbits(h *HashSet, b []byte, i int, k int) { 253 h.addB(b) 254 if k == 0 { 255 return 256 } 257 for j := i; j < len(b)*8; j++ { 258 b[j/8] |= byte(1 << uint(j&7)) 259 setbits(h, b, j+1, k-1) 260 b[j/8] &= byte(^(1 << uint(j&7))) 261 } 262} 263 264// Test all possible combinations of n blocks from the set s. 265// "permutation" is a bad name here, but it is what Smhasher uses. 266func TestSmhasherPermutation(t *testing.T) { 267 if testing.Short() { 268 t.Skip("Skipping in short mode") 269 } 270 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7}, 8) 271 permutation(t, []uint32{0, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 8) 272 permutation(t, []uint32{0, 1}, 20) 273 permutation(t, []uint32{0, 1 << 31}, 20) 274 permutation(t, []uint32{0, 1, 2, 3, 4, 5, 6, 7, 1 << 29, 2 << 29, 3 << 29, 4 << 29, 5 << 29, 6 << 29, 7 << 29}, 6) 275} 276func permutation(t *testing.T, s []uint32, n int) { 277 b := make([]byte, n*4) 278 h := newHashSet() 279 genPerm(h, b, s, 0) 280 h.check(t) 281} 282func genPerm(h *HashSet, b []byte, s []uint32, n int) { 283 h.addB(b[:n]) 284 if n == len(b) { 285 return 286 } 287 for _, v := range s { 288 b[n] = byte(v) 289 b[n+1] = byte(v >> 8) 290 b[n+2] = byte(v >> 16) 291 b[n+3] = byte(v >> 24) 292 genPerm(h, b, s, n+4) 293 } 294} 295 296type Key interface { 297 clear() // set bits all to 0 298 random(r *rand.Rand) // set key to something random 299 bits() int // how many bits key has 300 flipBit(i int) // flip bit i of the key 301 hash() uintptr // hash the key 302 name() string // for error reporting 303} 304 305type BytesKey struct { 306 b []byte 307} 308 309func (k *BytesKey) clear() { 310 for i := range k.b { 311 k.b[i] = 0 312 } 313} 314func (k *BytesKey) random(r *rand.Rand) { 315 randBytes(r, k.b) 316} 317func (k *BytesKey) bits() int { 318 return len(k.b) * 8 319} 320func (k *BytesKey) flipBit(i int) { 321 k.b[i>>3] ^= byte(1 << uint(i&7)) 322} 323func (k *BytesKey) hash() uintptr { 324 return BytesHash(k.b, 0) 325} 326func (k *BytesKey) name() string { 327 return fmt.Sprintf("bytes%d", len(k.b)) 328} 329 330type Int32Key struct { 331 i uint32 332} 333 334func (k *Int32Key) clear() { 335 k.i = 0 336} 337func (k *Int32Key) random(r *rand.Rand) { 338 k.i = r.Uint32() 339} 340func (k *Int32Key) bits() int { 341 return 32 342} 343func (k *Int32Key) flipBit(i int) { 344 k.i ^= 1 << uint(i) 345} 346func (k *Int32Key) hash() uintptr { 347 return Int32Hash(k.i, 0) 348} 349func (k *Int32Key) name() string { 350 return "int32" 351} 352 353type Int64Key struct { 354 i uint64 355} 356 357func (k *Int64Key) clear() { 358 k.i = 0 359} 360func (k *Int64Key) random(r *rand.Rand) { 361 k.i = uint64(r.Uint32()) + uint64(r.Uint32())<<32 362} 363func (k *Int64Key) bits() int { 364 return 64 365} 366func (k *Int64Key) flipBit(i int) { 367 k.i ^= 1 << uint(i) 368} 369func (k *Int64Key) hash() uintptr { 370 return Int64Hash(k.i, 0) 371} 372func (k *Int64Key) name() string { 373 return "int64" 374} 375 376type EfaceKey struct { 377 i interface{} 378} 379 380func (k *EfaceKey) clear() { 381 k.i = nil 382} 383func (k *EfaceKey) random(r *rand.Rand) { 384 k.i = uint64(r.Int63()) 385} 386func (k *EfaceKey) bits() int { 387 // use 64 bits. This tests inlined interfaces 388 // on 64-bit targets and indirect interfaces on 389 // 32-bit targets. 390 return 64 391} 392func (k *EfaceKey) flipBit(i int) { 393 k.i = k.i.(uint64) ^ uint64(1)<<uint(i) 394} 395func (k *EfaceKey) hash() uintptr { 396 return EfaceHash(k.i, 0) 397} 398func (k *EfaceKey) name() string { 399 return "Eface" 400} 401 402type IfaceKey struct { 403 i interface { 404 F() 405 } 406} 407type fInter uint64 408 409func (x fInter) F() { 410} 411 412func (k *IfaceKey) clear() { 413 k.i = nil 414} 415func (k *IfaceKey) random(r *rand.Rand) { 416 k.i = fInter(r.Int63()) 417} 418func (k *IfaceKey) bits() int { 419 // use 64 bits. This tests inlined interfaces 420 // on 64-bit targets and indirect interfaces on 421 // 32-bit targets. 422 return 64 423} 424func (k *IfaceKey) flipBit(i int) { 425 k.i = k.i.(fInter) ^ fInter(1)<<uint(i) 426} 427func (k *IfaceKey) hash() uintptr { 428 return IfaceHash(k.i, 0) 429} 430func (k *IfaceKey) name() string { 431 return "Iface" 432} 433 434// Flipping a single bit of a key should flip each output bit with 50% probability. 435func TestSmhasherAvalanche(t *testing.T) { 436 if testing.Short() { 437 t.Skip("Skipping in short mode") 438 } 439 avalancheTest1(t, &BytesKey{make([]byte, 2)}) 440 avalancheTest1(t, &BytesKey{make([]byte, 4)}) 441 avalancheTest1(t, &BytesKey{make([]byte, 8)}) 442 avalancheTest1(t, &BytesKey{make([]byte, 16)}) 443 avalancheTest1(t, &BytesKey{make([]byte, 32)}) 444 avalancheTest1(t, &BytesKey{make([]byte, 200)}) 445 avalancheTest1(t, &Int32Key{}) 446 avalancheTest1(t, &Int64Key{}) 447 avalancheTest1(t, &EfaceKey{}) 448 avalancheTest1(t, &IfaceKey{}) 449} 450func avalancheTest1(t *testing.T, k Key) { 451 const REP = 100000 452 r := rand.New(rand.NewSource(1234)) 453 n := k.bits() 454 455 // grid[i][j] is a count of whether flipping 456 // input bit i affects output bit j. 457 grid := make([][hashSize]int, n) 458 459 for z := 0; z < REP; z++ { 460 // pick a random key, hash it 461 k.random(r) 462 h := k.hash() 463 464 // flip each bit, hash & compare the results 465 for i := 0; i < n; i++ { 466 k.flipBit(i) 467 d := h ^ k.hash() 468 k.flipBit(i) 469 470 // record the effects of that bit flip 471 g := &grid[i] 472 for j := 0; j < hashSize; j++ { 473 g[j] += int(d & 1) 474 d >>= 1 475 } 476 } 477 } 478 479 // Each entry in the grid should be about REP/2. 480 // More precisely, we did N = k.bits() * hashSize experiments where 481 // each is the sum of REP coin flips. We want to find bounds on the 482 // sum of coin flips such that a truly random experiment would have 483 // all sums inside those bounds with 99% probability. 484 N := n * hashSize 485 var c float64 486 // find c such that Prob(mean-c*stddev < x < mean+c*stddev)^N > .9999 487 for c = 0.0; math.Pow(math.Erf(c/math.Sqrt(2)), float64(N)) < .9999; c += .1 { 488 } 489 c *= 4.0 // allowed slack - we don't need to be perfectly random 490 mean := .5 * REP 491 stddev := .5 * math.Sqrt(REP) 492 low := int(mean - c*stddev) 493 high := int(mean + c*stddev) 494 for i := 0; i < n; i++ { 495 for j := 0; j < hashSize; j++ { 496 x := grid[i][j] 497 if x < low || x > high { 498 t.Errorf("bad bias for %s bit %d -> bit %d: %d/%d\n", k.name(), i, j, x, REP) 499 } 500 } 501 } 502} 503 504// All bit rotations of a set of distinct keys 505func TestSmhasherWindowed(t *testing.T) { 506 windowed(t, &Int32Key{}) 507 windowed(t, &Int64Key{}) 508 windowed(t, &BytesKey{make([]byte, 128)}) 509} 510func windowed(t *testing.T, k Key) { 511 if testing.Short() { 512 t.Skip("Skipping in short mode") 513 } 514 const BITS = 16 515 516 for r := 0; r < k.bits(); r++ { 517 h := newHashSet() 518 for i := 0; i < 1<<BITS; i++ { 519 k.clear() 520 for j := 0; j < BITS; j++ { 521 if i>>uint(j)&1 != 0 { 522 k.flipBit((j + r) % k.bits()) 523 } 524 } 525 h.add(k.hash()) 526 } 527 h.check(t) 528 } 529} 530 531// All keys of the form prefix + [A-Za-z0-9]*N + suffix. 532func TestSmhasherText(t *testing.T) { 533 if testing.Short() { 534 t.Skip("Skipping in short mode") 535 } 536 text(t, "Foo", "Bar") 537 text(t, "FooBar", "") 538 text(t, "", "FooBar") 539} 540func text(t *testing.T, prefix, suffix string) { 541 const N = 4 542 const S = "ABCDEFGHIJKLMNOPQRSTabcdefghijklmnopqrst0123456789" 543 const L = len(S) 544 b := make([]byte, len(prefix)+N+len(suffix)) 545 copy(b, prefix) 546 copy(b[len(prefix)+N:], suffix) 547 h := newHashSet() 548 c := b[len(prefix):] 549 for i := 0; i < L; i++ { 550 c[0] = S[i] 551 for j := 0; j < L; j++ { 552 c[1] = S[j] 553 for k := 0; k < L; k++ { 554 c[2] = S[k] 555 for x := 0; x < L; x++ { 556 c[3] = S[x] 557 h.addB(b) 558 } 559 } 560 } 561 } 562 h.check(t) 563} 564 565// Make sure different seed values generate different hashes. 566func TestSmhasherSeed(t *testing.T) { 567 h := newHashSet() 568 const N = 100000 569 s := "hello" 570 for i := 0; i < N; i++ { 571 h.addS_seed(s, uintptr(i)) 572 } 573 h.check(t) 574} 575 576// size of the hash output (32 or 64 bits) 577const hashSize = 32 + int(^uintptr(0)>>63<<5) 578 579func randBytes(r *rand.Rand, b []byte) { 580 for i := range b { 581 b[i] = byte(r.Uint32()) 582 } 583} 584 585func benchmarkHash(b *testing.B, n int) { 586 s := strings.Repeat("A", n) 587 588 for i := 0; i < b.N; i++ { 589 StringHash(s, 0) 590 } 591 b.SetBytes(int64(n)) 592} 593 594func BenchmarkHash5(b *testing.B) { benchmarkHash(b, 5) } 595func BenchmarkHash16(b *testing.B) { benchmarkHash(b, 16) } 596func BenchmarkHash64(b *testing.B) { benchmarkHash(b, 64) } 597func BenchmarkHash1024(b *testing.B) { benchmarkHash(b, 1024) } 598func BenchmarkHash65536(b *testing.B) { benchmarkHash(b, 65536) } 599 600func TestArrayHash(t *testing.T) { 601 if Compiler == "gccgo" { 602 t.Skip("does not work on gccgo without better escape analysis") 603 } 604 605 // Make sure that "" in arrays hash correctly. The hash 606 // should at least scramble the input seed so that, e.g., 607 // {"","foo"} and {"foo",""} have different hashes. 608 609 // If the hash is bad, then all (8 choose 4) = 70 keys 610 // have the same hash. If so, we allocate 70/8 = 8 611 // overflow buckets. If the hash is good we don't 612 // normally allocate any overflow buckets, and the 613 // probability of even one or two overflows goes down rapidly. 614 // (There is always 1 allocation of the bucket array. The map 615 // header is allocated on the stack.) 616 f := func() { 617 // Make the key type at most 128 bytes. Otherwise, 618 // we get an allocation per key. 619 type key [8]string 620 m := make(map[key]bool, 70) 621 622 // fill m with keys that have 4 "foo"s and 4 ""s. 623 for i := 0; i < 256; i++ { 624 var k key 625 cnt := 0 626 for j := uint(0); j < 8; j++ { 627 if i>>j&1 != 0 { 628 k[j] = "foo" 629 cnt++ 630 } 631 } 632 if cnt == 4 { 633 m[k] = true 634 } 635 } 636 if len(m) != 70 { 637 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) 638 } 639 } 640 if n := testing.AllocsPerRun(10, f); n > 6 { 641 t.Errorf("too many allocs %f - hash not balanced", n) 642 } 643} 644func TestStructHash(t *testing.T) { 645 // See the comment in TestArrayHash. 646 f := func() { 647 type key struct { 648 a, b, c, d, e, f, g, h string 649 } 650 m := make(map[key]bool, 70) 651 652 // fill m with keys that have 4 "foo"s and 4 ""s. 653 for i := 0; i < 256; i++ { 654 var k key 655 cnt := 0 656 if i&1 != 0 { 657 k.a = "foo" 658 cnt++ 659 } 660 if i&2 != 0 { 661 k.b = "foo" 662 cnt++ 663 } 664 if i&4 != 0 { 665 k.c = "foo" 666 cnt++ 667 } 668 if i&8 != 0 { 669 k.d = "foo" 670 cnt++ 671 } 672 if i&16 != 0 { 673 k.e = "foo" 674 cnt++ 675 } 676 if i&32 != 0 { 677 k.f = "foo" 678 cnt++ 679 } 680 if i&64 != 0 { 681 k.g = "foo" 682 cnt++ 683 } 684 if i&128 != 0 { 685 k.h = "foo" 686 cnt++ 687 } 688 if cnt == 4 { 689 m[k] = true 690 } 691 } 692 if len(m) != 70 { 693 t.Errorf("bad test: (8 choose 4) should be 70, not %d", len(m)) 694 } 695 } 696 if n := testing.AllocsPerRun(10, f); n > 6 { 697 t.Errorf("too many allocs %f - hash not balanced", n) 698 } 699} 700 701var sink uint64 702 703func BenchmarkAlignedLoad(b *testing.B) { 704 var buf [16]byte 705 p := unsafe.Pointer(&buf[0]) 706 var s uint64 707 for i := 0; i < b.N; i++ { 708 s += ReadUnaligned64(p) 709 } 710 sink = s 711} 712 713func BenchmarkUnalignedLoad(b *testing.B) { 714 var buf [16]byte 715 p := unsafe.Pointer(&buf[1]) 716 var s uint64 717 for i := 0; i < b.N; i++ { 718 s += ReadUnaligned64(p) 719 } 720 sink = s 721} 722 723func TestCollisions(t *testing.T) { 724 if testing.Short() { 725 t.Skip("Skipping in short mode") 726 } 727 for i := 0; i < 16; i++ { 728 for j := 0; j < 16; j++ { 729 if j == i { 730 continue 731 } 732 var a [16]byte 733 m := make(map[uint16]struct{}, 1<<16) 734 for n := 0; n < 1<<16; n++ { 735 a[i] = byte(n) 736 a[j] = byte(n >> 8) 737 m[uint16(BytesHash(a[:], 0))] = struct{}{} 738 } 739 if len(m) <= 1<<15 { 740 t.Errorf("too many collisions i=%d j=%d outputs=%d out of 65536\n", i, j, len(m)) 741 } 742 } 743 } 744} 745