1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE CPP #-} 3{-# LANGUAGE MagicHash #-} 4 5{-| 6 7A hash table using the cuckoo strategy. (See 8<http://en.wikipedia.org/wiki/Cuckoo_hashing>). Use this hash table if you... 9 10 * want the fastest possible inserts, and very fast lookups. 11 12 * are conscious of memory usage; this table has less space overhead than 13 "Data.HashTable.ST.Basic" or "Data.HashTable.ST.Linear". 14 15 * don't care that a table resize might pause for a long time to rehash all 16 of the key-value mappings. 17 18 19/Details:/ 20 21The basic idea of cuckoo hashing, first introduced by Pagh and Rodler in 2001, 22is to use /d/ hash functions instead of only one; in this implementation d=2 23and the strategy we use is to split up a flat array of slots into @k@ buckets, 24each cache-line-sized: 25 26@ 27+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+----------+ 28|x0|x1|x2|x3|x4|x5|x6|x7|y0|y1|y2|y3|y4|y5|y6|y7|z0|z1|z2........| 29+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+----------+ 30[ ^^^ bucket 0 ^^^ ][ ^^^ bucket 1 ^^^ ]... 31@ 32 33There are actually three parallel arrays: one unboxed array of 'Int's for hash 34codes, one boxed array for keys, and one boxed array for values. When looking 35up a key-value mapping, we hash the key using two hash functions and look in 36both buckets in the hash code array for the key. Each bucket is cache-line 37sized, with its keys in no particular order. Because the hash code array is 38unboxed, we can search it for the key using a highly-efficient branchless 39strategy in C code, using SSE instructions if available. 40 41On insert, if both buckets are full, we knock out a randomly-selected entry 42from one of the buckets (using a random walk ensures that \"key cycles\" are 43broken with maximum probability) and try to repeat the insert procedure. This 44process may not succeed; if all items have not successfully found a home after 45some number of tries, we give up and rehash all of the elements into a larger 46table. 47 48/Space overhead: experimental results/ 49 50The implementation of cuckoo hash given here is almost as fast for lookups as 51the basic open-addressing hash table using linear probing, and on average is 52more space-efficient: in randomized testing on my 64-bit machine (see 53@test\/compute-overhead\/ComputeOverhead.hs@ in the source distribution), mean 54overhead is 0.77 machine words per key-value mapping, with a standard deviation 55of 0.29 words, and 1.23 words per mapping at the 95th percentile. 56 57/References:/ 58 59 * A. Pagh and F. Rodler. Cuckoo hashing. In /Proceedings of the 9th 60 Annual European Symposium on Algorithms/, pp. 121-133, 2001. 61-} 62 63 64module Data.HashTable.ST.Cuckoo 65 ( HashTable 66 , new 67 , newSized 68 , delete 69 , lookup 70 , insert 71 , mutate 72 , mutateST 73 , mapM_ 74 , foldM 75 , lookupIndex 76 , nextByIndex 77 ) where 78 79 80------------------------------------------------------------------------------ 81#if !MIN_VERSION_base(4,8,0) 82import Control.Applicative 83#endif 84import Control.Monad hiding 85 (foldM, 86 mapM_) 87import Control.Monad.ST (ST) 88import Data.Bits 89import Data.Hashable hiding 90 (hash) 91import qualified Data.Hashable as H 92import Data.Int 93import Data.Maybe 94import Data.Primitive.Array 95import Data.STRef 96import GHC.Exts 97import Prelude hiding 98 (lookup, 99 mapM_, 100 read) 101------------------------------------------------------------------------------ 102import qualified Data.HashTable.Class as C 103import Data.HashTable.Internal.CacheLine 104import Data.HashTable.Internal.CheapPseudoRandomBitStream 105import Data.HashTable.Internal.IntArray (Elem) 106import qualified Data.HashTable.Internal.IntArray as U 107import Data.HashTable.Internal.Utils 108 109#ifdef DEBUG 110import System.IO 111#endif 112 113 114------------------------------------------------------------------------------ 115-- | A cuckoo hash table. 116newtype HashTable s k v = HT (STRef s (HashTable_ s k v)) 117 118data HashTable_ s k v = HashTable 119 { _size :: {-# UNPACK #-} !Int -- ^ in buckets, total size is 120 -- numElemsInCacheLine * _size 121 , _rng :: {-# UNPACK #-} !(BitStream s) 122 , _hashes :: {-# UNPACK #-} !(U.IntArray s) 123 , _keys :: {-# UNPACK #-} !(MutableArray s k) 124 , _values :: {-# UNPACK #-} !(MutableArray s v) 125 , _maxAttempts :: {-# UNPACK #-} !Int 126 } 127 128 129------------------------------------------------------------------------------ 130instance C.HashTable HashTable where 131 new = new 132 newSized = newSized 133 insert = insert 134 delete = delete 135 lookup = lookup 136 foldM = foldM 137 mapM_ = mapM_ 138 lookupIndex = lookupIndex 139 nextByIndex = nextByIndex 140 computeOverhead = computeOverhead 141 mutate = mutate 142 mutateST = mutateST 143 144 145------------------------------------------------------------------------------ 146instance Show (HashTable s k v) where 147 show _ = "<HashTable>" 148 149 150------------------------------------------------------------------------------ 151-- | See the documentation for this function in 152-- 'Data.HashTable.Class.new'. 153new :: ST s (HashTable s k v) 154new = newSizedReal 2 >>= newRef 155{-# INLINE new #-} 156 157 158------------------------------------------------------------------------------ 159-- | See the documentation for this function in 160-- 'Data.HashTable.Class.newSized'. 161newSized :: Int -> ST s (HashTable s k v) 162newSized n = do 163 let n' = (n + numElemsInCacheLine - 1) `div` numElemsInCacheLine 164 let k = nextBestPrime $ ceiling $ fromIntegral n' / maxLoad 165 newSizedReal k >>= newRef 166{-# INLINE newSized #-} 167 168 169------------------------------------------------------------------------------ 170-- | See the documentation for this function in 171-- 'Data.HashTable.Class.insert'. 172insert :: (Eq k, Hashable k) => HashTable s k v -> k -> v -> ST s () 173insert ht !k !v = readRef ht >>= \h -> insert' h k v >>= writeRef ht 174 175 176------------------------------------------------------------------------------ 177mutate :: (Eq k, Hashable k) => 178 HashTable s k v 179 -> k 180 -> (Maybe v -> (Maybe v, a)) 181 -> ST s a 182mutate htRef !k !f = mutateST htRef k (pure . f) 183{-# INLINE mutate #-} 184 185 186------------------------------------------------------------------------------ 187mutateST :: (Eq k, Hashable k) => 188 HashTable s k v 189 -> k 190 -> (Maybe v -> ST s (Maybe v, a)) 191 -> ST s a 192mutateST htRef !k !f = do 193 ht <- readRef htRef 194 (newHt, a) <- mutate' ht k f 195 writeRef htRef newHt 196 return a 197{-# INLINE mutateST #-} 198 199 200------------------------------------------------------------------------------ 201-- | See the documentation for this function in 202-- 'Data.HashTable.Class.computeOverhead'. 203computeOverhead :: HashTable s k v -> ST s Double 204computeOverhead htRef = readRef htRef >>= work 205 where 206 work (HashTable sz _ _ _ _ _) = do 207 nFilled <- foldM f 0 htRef 208 209 let oh = (totSz `div` hashCodesPerWord) -- one half or quarter word 210 -- per element in hashes 211 + 2 * (totSz - nFilled) -- two words per non-filled entry 212 + 12 -- fixed overhead 213 214 return $! fromIntegral (oh::Int) / fromIntegral nFilled 215 216 where 217 hashCodesPerWord = (finiteBitSize (0 :: Int)) `div` 16 218 totSz = numElemsInCacheLine * sz 219 220 f !a _ = return $! a+1 221 222 223------------------------------------------------------------------------------ 224-- | See the documentation for this function in 225-- 'Data.HashTable.Class.delete'. 226delete :: (Hashable k, Eq k) => 227 HashTable s k v 228 -> k 229 -> ST s () 230delete htRef k = readRef htRef >>= go 231 where 232 go ht@(HashTable sz _ _ _ _ _) = do 233 _ <- delete' ht False k b1 b2 h1 h2 234 return () 235 236 where 237 h1 = hash1 k 238 h2 = hash2 k 239 240 b1 = whichLine h1 sz 241 b2 = whichLine h2 sz 242 243 244------------------------------------------------------------------------------ 245-- | See the documentation for this function in 246-- 'Data.HashTable.Class.lookup'. 247lookup :: (Eq k, Hashable k) => 248 HashTable s k v 249 -> k 250 -> ST s (Maybe v) 251lookup htRef k = do 252 ht <- readRef htRef 253 lookup' ht k 254{-# INLINE lookup #-} 255 256 257------------------------------------------------------------------------------ 258lookup' :: (Eq k, Hashable k) => 259 HashTable_ s k v 260 -> k 261 -> ST s (Maybe v) 262lookup' (HashTable sz _ hashes keys values _) !k = do 263 -- Unlike the write case, prefetch doesn't seem to help here for lookup. 264 -- prefetchRead hashes b2 265 idx1 <- searchOne keys hashes k b1 he1 266 267 if idx1 >= 0 268 then do 269 v <- readArray values idx1 270 return $! Just v 271 else do 272 idx2 <- searchOne keys hashes k b2 he2 273 if idx2 >= 0 274 then do 275 v <- readArray values idx2 276 return $! Just v 277 else 278 return Nothing 279 280 where 281 h1 = hash1 k 282 h2 = hash2 k 283 284 he1 = hashToElem h1 285 he2 = hashToElem h2 286 287 b1 = whichLine h1 sz 288 b2 = whichLine h2 sz 289{-# INLINE lookup' #-} 290 291 292------------------------------------------------------------------------------ 293searchOne :: (Eq k) => 294 MutableArray s k 295 -> U.IntArray s 296 -> k 297 -> Int 298 -> Elem 299 -> ST s Int 300searchOne !keys !hashes !k !b0 !h = go b0 301 where 302 go !b = do 303 debug $ "searchOne: go/" ++ show b ++ "/" ++ show h 304 idx <- cacheLineSearch hashes b h 305 debug $ "searchOne: cacheLineSearch returned " ++ show idx 306 307 case idx of 308 -1 -> return (-1) 309 _ -> do 310 k' <- readArray keys idx 311 if k == k' 312 then return idx 313 else do 314 let !idx' = idx + 1 315 if isCacheLineAligned idx' 316 then return (-1) 317 else go idx' 318{-# INLINE searchOne #-} 319 320 321 322------------------------------------------------------------------------------ 323-- | See the documentation for this function in 324-- 'Data.HashTable.Class.foldM'. 325foldM :: (a -> (k,v) -> ST s a) 326 -> a 327 -> HashTable s k v 328 -> ST s a 329foldM f seed0 htRef = readRef htRef >>= foldMWork f seed0 330{-# INLINE foldM #-} 331 332 333------------------------------------------------------------------------------ 334foldMWork :: (a -> (k,v) -> ST s a) 335 -> a 336 -> HashTable_ s k v 337 -> ST s a 338foldMWork f seed0 (HashTable sz _ hashes keys values _) = go 0 seed0 339 where 340 totSz = numElemsInCacheLine * sz 341 342 go !i !seed | i >= totSz = return seed 343 | otherwise = do 344 h <- U.readArray hashes i 345 if h /= emptyMarker 346 then do 347 k <- readArray keys i 348 v <- readArray values i 349 !seed' <- f seed (k,v) 350 go (i+1) seed' 351 352 else 353 go (i+1) seed 354{-# INLINE foldMWork #-} 355 356 357------------------------------------------------------------------------------ 358-- | See the documentation for this function in 359-- 'Data.HashTable.Class.mapM_'. 360mapM_ :: ((k,v) -> ST s a) 361 -> HashTable s k v 362 -> ST s () 363mapM_ f htRef = readRef htRef >>= mapMWork f 364{-# INLINE mapM_ #-} 365 366 367------------------------------------------------------------------------------ 368mapMWork :: ((k,v) -> ST s a) 369 -> HashTable_ s k v 370 -> ST s () 371mapMWork f (HashTable sz _ hashes keys values _) = go 0 372 where 373 totSz = numElemsInCacheLine * sz 374 375 go !i | i >= totSz = return () 376 | otherwise = do 377 h <- U.readArray hashes i 378 if h /= emptyMarker 379 then do 380 k <- readArray keys i 381 v <- readArray values i 382 _ <- f (k,v) 383 go (i+1) 384 else 385 go (i+1) 386{-# INLINE mapMWork #-} 387 388 389--------------------------------- 390-- Private declarations follow -- 391--------------------------------- 392 393 394------------------------------------------------------------------------------ 395newSizedReal :: Int -> ST s (HashTable_ s k v) 396newSizedReal nbuckets = do 397 let !ntotal = nbuckets * numElemsInCacheLine 398 let !maxAttempts = 12 + (log2 $ toEnum nbuckets) 399 400 debug $ "creating cuckoo hash table with " ++ 401 show nbuckets ++ " buckets having " ++ 402 show ntotal ++ " total slots" 403 404 rng <- newBitStream 405 hashes <- U.newArray ntotal 406 keys <- newArray ntotal undefined 407 values <- newArray ntotal undefined 408 409 return $! HashTable nbuckets rng hashes keys values maxAttempts 410 411 412insert' :: (Eq k, Hashable k) => 413 HashTable_ s k v 414 -> k 415 -> v 416 -> ST s (HashTable_ s k v) 417insert' ht k v = do 418 debug "insert': begin" 419 mbX <- updateOrFail ht k v 420 z <- maybe (return ht) 421 (\(k',v') -> grow ht k' v') 422 mbX 423 debug "insert': end" 424 return z 425{-# INLINE insert #-} 426 427 428------------------------------------------------------------------------------ 429mutate' :: (Eq k, Hashable k) => 430 HashTable_ s k v 431 -> k 432 -> (Maybe v -> ST s (Maybe v, a)) 433 -> ST s (HashTable_ s k v, a) 434mutate' ht@(HashTable sz _ hashes keys values _) !k !f = do 435 !(maybeVal, idx, _hashCode) <- lookupSlot 436 !fRes <- f maybeVal 437 case (maybeVal, fRes) of 438 (Nothing, (Nothing, a)) -> return (ht, a) 439 (Just _v, (Just v', a)) -> do 440 writeArray values idx v' 441 return (ht, a) 442 (Just _v, (Nothing, a)) -> do 443 deleteFromSlot ht idx 444 return (ht, a) 445 (Nothing, (Just v', a)) -> do 446 newHt <- insertNew v' 447 return (newHt, a) 448 449 where 450 h1 = hash1 k 451 h2 = hash2 k 452 453 b1 = whichLine h1 sz 454 b2 = whichLine h2 sz 455 456 he1 = hashToElem h1 457 he2 = hashToElem h2 458 459 lookupSlot = do 460 idx1 <- searchOne keys hashes k b1 he1 461 if idx1 >= 0 462 then do 463 v <- readArray values idx1 464 return (Just v, idx1, h1) 465 else do 466 idx2 <- searchOne keys hashes k b2 he2 467 if idx2 >= 0 468 then do 469 v <- readArray values idx2 470 return (Just v, idx2, h2) 471 else do 472 return (Nothing, -1, -1) 473 474 insertNew v = do 475 idxE1 <- cacheLineSearch hashes b1 emptyMarker 476 if idxE1 >= 0 477 then do 478 insertIntoSlot ht idxE1 he1 k v 479 return ht 480 else do 481 idxE2 <- cacheLineSearch hashes b2 emptyMarker 482 if idxE2 >= 0 483 then do 484 insertIntoSlot ht idxE2 he2 k v 485 return ht 486 else do 487 result <- cuckooOrFail ht h1 h2 b1 b2 k v 488 maybe (return ht) 489 (\(k', v') -> do 490 newHt <- grow ht k' v' 491 return newHt) 492 result 493{-# INLINE mutate' #-} 494 495 496------------------------------------------------------------------------------ 497deleteFromSlot :: (Eq k, Hashable k) => 498 HashTable_ s k v 499 -> Int 500 -> ST s () 501deleteFromSlot _ht@(HashTable _ _ hashes keys values _) idx = do 502 U.writeArray hashes idx emptyMarker 503 writeArray keys idx undefined 504 writeArray values idx undefined 505{-# INLINE deleteFromSlot #-} 506 507 508------------------------------------------------------------------------------ 509insertIntoSlot :: (Eq k, Hashable k) => 510 HashTable_ s k v 511 -> Int 512 -> Elem 513 -> k 514 -> v 515 -> ST s () 516insertIntoSlot _ht@(HashTable _ _ hashes keys values _) idx he k v = do 517 U.writeArray hashes idx he 518 writeArray keys idx k 519 writeArray values idx v 520{-# INLINE insertIntoSlot #-} 521 522 523 524------------------------------------------------------------------------------ 525updateOrFail :: (Eq k, Hashable k) => 526 HashTable_ s k v 527 -> k 528 -> v 529 -> ST s (Maybe (k,v)) 530updateOrFail ht@(HashTable sz _ hashes keys values _) k v = do 531 debug $ "updateOrFail: begin: sz = " ++ show sz 532 debug $ " h1=" ++ show h1 ++ ", h2=" ++ show h2 533 ++ ", b1=" ++ show b1 ++ ", b2=" ++ show b2 534 (didx, hashCode) <- delete' ht True k b1 b2 h1 h2 535 536 debug $ "delete' returned (" ++ show didx ++ "," ++ show hashCode ++ ")" 537 538 if didx >= 0 539 then do 540 U.writeArray hashes didx hashCode 541 writeArray keys didx k 542 writeArray values didx v 543 return Nothing 544 else cuckoo 545 546 where 547 h1 = hash1 k 548 h2 = hash2 k 549 550 b1 = whichLine h1 sz 551 b2 = whichLine h2 sz 552 553 cuckoo = do 554 debug "cuckoo: calling cuckooOrFail" 555 result <- cuckooOrFail ht h1 h2 b1 b2 k v 556 debug $ "cuckoo: cuckooOrFail returned " ++ 557 (if isJust result then "Just _" else "Nothing") 558 559 -- if cuckoo failed we need to grow the table. 560 maybe (return Nothing) 561 (return . Just) 562 result 563{-# INLINE updateOrFail #-} 564 565 566------------------------------------------------------------------------------ 567-- Returns either (-1, 0) (not found, and both buckets full ==> trigger 568-- cuckoo), or the slot in the array where it would be safe to write the given 569-- key, and the hashcode to use there 570delete' :: (Hashable k, Eq k) => 571 HashTable_ s k v -- ^ hash table 572 -> Bool -- ^ are we updating? 573 -> k -- ^ key 574 -> Int -- ^ cache line start address 1 575 -> Int -- ^ cache line start address 2 576 -> Int -- ^ hash1 577 -> Int -- ^ hash2 578 -> ST s (Int, Elem) 579delete' (HashTable _ _ hashes keys values _) !updating !k b1 b2 h1 h2 = do 580 debug $ "delete' b1=" ++ show b1 581 ++ " b2=" ++ show b2 582 ++ " h1=" ++ show h1 583 ++ " h2=" ++ show h2 584 prefetchWrite hashes b2 585 let !he1 = hashToElem h1 586 let !he2 = hashToElem h2 587 idx1 <- searchOne keys hashes k b1 he1 588 if idx1 < 0 589 then do 590 idx2 <- searchOne keys hashes k b2 he2 591 if idx2 < 0 592 then if updating 593 then do 594 debug $ "delete': looking for empty element" 595 -- if we're updating, we look for an empty element 596 idxE1 <- cacheLineSearch hashes b1 emptyMarker 597 debug $ "delete': idxE1 was " ++ show idxE1 598 if idxE1 >= 0 599 then return (idxE1, he1) 600 else do 601 idxE2 <- cacheLineSearch hashes b2 emptyMarker 602 debug $ "delete': idxE2 was " ++ show idxE1 603 if idxE2 >= 0 604 then return (idxE2, he2) 605 else return (-1, 0) 606 else return (-1, 0) 607 else deleteIt idx2 he2 608 else deleteIt idx1 he1 609 610 where 611 deleteIt !idx !h = do 612 if not updating 613 then do 614 U.writeArray hashes idx emptyMarker 615 writeArray keys idx undefined 616 writeArray values idx undefined 617 else return () 618 return $! (idx, h) 619{-# INLINE delete' #-} 620 621 622------------------------------------------------------------------------------ 623cuckooOrFail :: (Hashable k, Eq k) => 624 HashTable_ s k v -- ^ hash table 625 -> Int -- ^ hash code 1 626 -> Int -- ^ hash code 2 627 -> Int -- ^ cache line 1 628 -> Int -- ^ cache line 2 629 -> k -- ^ key 630 -> v -- ^ value 631 -> ST s (Maybe (k,v)) 632cuckooOrFail (HashTable sz rng hashes keys values maxAttempts0) 633 !h1_0 !h2_0 !b1_0 !b2_0 !k0 !v0 = do 634 -- at this point we know: 635 -- 636 -- * there is no empty slot in either cache line 637 -- 638 -- * the key doesn't already exist in the table 639 -- 640 -- next things to do: 641 -- 642 -- * decide which element to bump 643 -- 644 -- * read that element, and write (k,v) in there 645 -- 646 -- * attempt to write the bumped element into its other cache slot 647 -- 648 -- * if it fails, recurse. 649 650 debug $ "cuckooOrFail h1_0=" ++ show h1_0 651 ++ " h2_0=" ++ show h2_0 652 ++ " b1_0=" ++ show b1_0 653 ++ " b2_0=" ++ show b2_0 654 655 !lineChoice <- getNextBit rng 656 657 debug $ "chose line " ++ show lineChoice 658 let (!b, !h) = if lineChoice == 0 then (b1_0, h1_0) else (b2_0, h2_0) 659 go b h k0 v0 maxAttempts0 660 661 662 where 663 randomIdx !b = do 664 !z <- getNBits cacheLineIntBits rng 665 return $! b + fromIntegral z 666 667 bumpIdx !idx !h !k !v = do 668 let !he = hashToElem h 669 debug $ "bumpIdx idx=" ++ show idx ++ " h=" ++ show h 670 ++ " he=" ++ show he 671 !he' <- U.readArray hashes idx 672 debug $ "bumpIdx: he' was " ++ show he' 673 !k' <- readArray keys idx 674 v' <- readArray values idx 675 U.writeArray hashes idx he 676 writeArray keys idx k 677 writeArray values idx v 678 debug $ "bumped key with he'=" ++ show he' 679 return $! (he', k', v') 680 681 otherHash he k = if hashToElem h1 == he then h2 else h1 682 where 683 h1 = hash1 k 684 h2 = hash2 k 685 686 tryWrite !b !h k v maxAttempts = do 687 debug $ "tryWrite b=" ++ show b ++ " h=" ++ show h 688 idx <- cacheLineSearch hashes b emptyMarker 689 debug $ "cacheLineSearch returned " ++ show idx 690 691 if idx >= 0 692 then do 693 U.writeArray hashes idx $! hashToElem h 694 writeArray keys idx k 695 writeArray values idx v 696 return Nothing 697 else go b h k v $! maxAttempts - 1 698 699 go !b !h !k v !maxAttempts | maxAttempts == 0 = return $! Just (k,v) 700 | otherwise = do 701 idx <- randomIdx b 702 (!he0', !k', v') <- bumpIdx idx h k v 703 let !h' = otherHash he0' k' 704 let !b' = whichLine h' sz 705 706 tryWrite b' h' k' v' maxAttempts 707 708 709------------------------------------------------------------------------------ 710grow :: (Eq k, Hashable k) => 711 HashTable_ s k v 712 -> k 713 -> v 714 -> ST s (HashTable_ s k v) 715grow (HashTable sz _ hashes keys values _) k0 v0 = do 716 newHt <- grow' $! bumpSize bumpFactor sz 717 718 mbR <- updateOrFail newHt k0 v0 719 maybe (return newHt) 720 (\_ -> grow' $ bumpSize bumpFactor $ _size newHt) 721 mbR 722 723 where 724 grow' newSz = do 725 debug $ "growing table, oldsz = " ++ show sz ++ 726 ", newsz=" ++ show newSz 727 newHt <- newSizedReal newSz 728 rehash newSz newHt 729 730 731 rehash !newSz !newHt = go 0 732 where 733 totSz = numElemsInCacheLine * sz 734 735 go !i | i >= totSz = return newHt 736 | otherwise = do 737 h <- U.readArray hashes i 738 if (h /= emptyMarker) 739 then do 740 k <- readArray keys i 741 v <- readArray values i 742 743 mbR <- updateOrFail newHt k v 744 maybe (go $ i + 1) 745 (\_ -> grow' $ bumpSize bumpFactor newSz) 746 mbR 747 else go $ i + 1 748 749 750------------------------------------------------------------------------------ 751hashPrime :: Int 752hashPrime = if wordSize == 32 then hashPrime32 else hashPrime64 753 where 754 hashPrime32 = 0xedf2a025 755 hashPrime64 = 0x3971ca9c8b3722e9 756 757 758------------------------------------------------------------------------------ 759hash1 :: Hashable k => k -> Int 760hash1 = H.hash 761{-# INLINE hash1 #-} 762 763 764hash2 :: Hashable k => k -> Int 765hash2 = H.hashWithSalt hashPrime 766{-# INLINE hash2 #-} 767 768 769------------------------------------------------------------------------------ 770hashToElem :: Int -> Elem 771hashToElem !h = out 772 where 773 !(I# lo#) = h .&. U.elemMask 774 775 !m# = maskw# lo# 0# 776 !nm# = not# m# 777 778 !r# = ((int2Word# 1#) `and#` m#) `or#` (int2Word# lo# `and#` nm#) 779 !out = U.primWordToElem r# 780{-# INLINE hashToElem #-} 781 782 783------------------------------------------------------------------------------ 784emptyMarker :: Elem 785emptyMarker = 0 786 787 788------------------------------------------------------------------------------ 789maxLoad :: Double 790maxLoad = 0.88 791 792 793------------------------------------------------------------------------------ 794bumpFactor :: Double 795bumpFactor = 0.73 796 797 798------------------------------------------------------------------------------ 799debug :: String -> ST s () 800#ifdef DEBUG 801debug s = unsafeIOToST (putStrLn s >> hFlush stdout) 802#else 803debug _ = return () 804#endif 805{-# INLINE debug #-} 806 807 808------------------------------------------------------------------------------ 809whichLine :: Int -> Int -> Int 810whichLine !h !sz = whichBucket h sz `iShiftL` cacheLineIntBits 811{-# INLINE whichLine #-} 812 813 814------------------------------------------------------------------------------ 815newRef :: HashTable_ s k v -> ST s (HashTable s k v) 816newRef = liftM HT . newSTRef 817{-# INLINE newRef #-} 818 819writeRef :: HashTable s k v -> HashTable_ s k v -> ST s () 820writeRef (HT ref) ht = writeSTRef ref ht 821{-# INLINE writeRef #-} 822 823readRef :: HashTable s k v -> ST s (HashTable_ s k v) 824readRef (HT ref) = readSTRef ref 825{-# INLINE readRef #-} 826 827 828------------------------------------------------------------------------------ 829 830-- | Find index of given key in the hashtable. 831lookupIndex :: (Hashable k, Eq k) => HashTable s k v -> k -> ST s (Maybe Word) 832lookupIndex htRef k = 833 do HashTable sz _ hashes keys _ _ <- readRef htRef 834 835 let !h1 = hash1 k 836 !h2 = hash2 k 837 !he1 = hashToElem h1 838 !he2 = hashToElem h2 839 !b1 = whichLine h1 sz 840 !b2 = whichLine h2 sz 841 842 idx1 <- searchOne keys hashes k b1 he1 843 if idx1 >= 0 844 then return $! (Just $! fromIntegral idx1) 845 else do idx2 <- searchOne keys hashes k b2 he2 846 if idx2 >= 0 847 then return $! (Just $! fromIntegral idx2) 848 else return Nothing 849 850-- | Find the next entry in the hashtable starting at the given index. 851nextByIndex :: HashTable s k v -> Word -> ST s (Maybe (Word,k,v)) 852nextByIndex htRef i0 = 853 do HashTable sz _ hashes keys values _ <- readRef htRef 854 let totSz = numElemsInCacheLine * sz 855 go i 856 | i >= totSz = return Nothing 857 | otherwise = 858 do h <- U.readArray hashes i 859 if h == emptyMarker 860 then go (i+1) 861 else do k <- readArray keys i 862 v <- readArray values i 863 let !i' = fromIntegral i 864 return (Just (i',k,v)) 865 866 go (fromIntegral i0) 867