1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE CPP          #-}
3{-# LANGUAGE MagicHash    #-}
4
5{-|
6
7A hash table using the cuckoo strategy. (See
8<http://en.wikipedia.org/wiki/Cuckoo_hashing>). Use this hash table if you...
9
10  * want the fastest possible inserts, and very fast lookups.
11
12  * are conscious of memory usage; this table has less space overhead than
13    "Data.HashTable.ST.Basic" or "Data.HashTable.ST.Linear".
14
15  * don't care that a table resize might pause for a long time to rehash all
16    of the key-value mappings.
17
18
19/Details:/
20
21The basic idea of cuckoo hashing, first introduced by Pagh and Rodler in 2001,
22is to use /d/ hash functions instead of only one; in this implementation d=2
23and the strategy we use is to split up a flat array of slots into @k@ buckets,
24each cache-line-sized:
25
26@
27+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+----------+
28|x0|x1|x2|x3|x4|x5|x6|x7|y0|y1|y2|y3|y4|y5|y6|y7|z0|z1|z2........|
29+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+----------+
30[  ^^^  bucket 0  ^^^  ][  ^^^  bucket 1  ^^^  ]...
31@
32
33There are actually three parallel arrays: one unboxed array of 'Int's for hash
34codes, one boxed array for keys, and one boxed array for values. When looking
35up a key-value mapping, we hash the key using two hash functions and look in
36both buckets in the hash code array for the key. Each bucket is cache-line
37sized, with its keys in no particular order. Because the hash code array is
38unboxed, we can search it for the key using a highly-efficient branchless
39strategy in C code, using SSE instructions if available.
40
41On insert, if both buckets are full, we knock out a randomly-selected entry
42from one of the buckets (using a random walk ensures that \"key cycles\" are
43broken with maximum probability) and try to repeat the insert procedure. This
44process may not succeed; if all items have not successfully found a home after
45some number of tries, we give up and rehash all of the elements into a larger
46table.
47
48/Space overhead: experimental results/
49
50The implementation of cuckoo hash given here is almost as fast for lookups as
51the basic open-addressing hash table using linear probing, and on average is
52more space-efficient: in randomized testing on my 64-bit machine (see
53@test\/compute-overhead\/ComputeOverhead.hs@ in the source distribution), mean
54overhead is 0.77 machine words per key-value mapping, with a standard deviation
55of 0.29 words, and 1.23 words per mapping at the 95th percentile.
56
57/References:/
58
59  * A. Pagh and F. Rodler. Cuckoo hashing. In /Proceedings of the 9th
60    Annual European Symposium on Algorithms/, pp. 121-133, 2001.
61-}
62
63
64module Data.HashTable.ST.Cuckoo
65  ( HashTable
66  , new
67  , newSized
68  , delete
69  , lookup
70  , insert
71  , mutate
72  , mutateST
73  , mapM_
74  , foldM
75  , lookupIndex
76  , nextByIndex
77  ) where
78
79
80------------------------------------------------------------------------------
81#if !MIN_VERSION_base(4,8,0)
82import           Control.Applicative
83#endif
84import           Control.Monad                                      hiding
85                                                                     (foldM,
86                                                                     mapM_)
87import           Control.Monad.ST                                   (ST)
88import           Data.Bits
89import           Data.Hashable                                      hiding
90                                                                     (hash)
91import qualified Data.Hashable                                      as H
92import           Data.Int
93import           Data.Maybe
94import           Data.Primitive.Array
95import           Data.STRef
96import           GHC.Exts
97import           Prelude                                            hiding
98                                                                     (lookup,
99                                                                     mapM_,
100                                                                     read)
101------------------------------------------------------------------------------
102import qualified Data.HashTable.Class                               as C
103import           Data.HashTable.Internal.CacheLine
104import           Data.HashTable.Internal.CheapPseudoRandomBitStream
105import           Data.HashTable.Internal.IntArray                   (Elem)
106import qualified Data.HashTable.Internal.IntArray                   as U
107import           Data.HashTable.Internal.Utils
108
109#ifdef DEBUG
110import           System.IO
111#endif
112
113
114------------------------------------------------------------------------------
115-- | A cuckoo hash table.
116newtype HashTable s k v = HT (STRef s (HashTable_ s k v))
117
118data HashTable_ s k v = HashTable
119    { _size        :: {-# UNPACK #-} !Int     -- ^ in buckets, total size is
120                                              --   numElemsInCacheLine * _size
121    , _rng         :: {-# UNPACK #-} !(BitStream s)
122    , _hashes      :: {-# UNPACK #-} !(U.IntArray s)
123    , _keys        :: {-# UNPACK #-} !(MutableArray s k)
124    , _values      :: {-# UNPACK #-} !(MutableArray s v)
125    , _maxAttempts :: {-# UNPACK #-} !Int
126    }
127
128
129------------------------------------------------------------------------------
130instance C.HashTable HashTable where
131    new             = new
132    newSized        = newSized
133    insert          = insert
134    delete          = delete
135    lookup          = lookup
136    foldM           = foldM
137    mapM_           = mapM_
138    lookupIndex     = lookupIndex
139    nextByIndex     = nextByIndex
140    computeOverhead = computeOverhead
141    mutate          = mutate
142    mutateST        = mutateST
143
144
145------------------------------------------------------------------------------
146instance Show (HashTable s k v) where
147    show _ = "<HashTable>"
148
149
150------------------------------------------------------------------------------
151-- | See the documentation for this function in
152-- 'Data.HashTable.Class.new'.
153new :: ST s (HashTable s k v)
154new = newSizedReal 2 >>= newRef
155{-# INLINE new #-}
156
157
158------------------------------------------------------------------------------
159-- | See the documentation for this function in
160-- 'Data.HashTable.Class.newSized'.
161newSized :: Int -> ST s (HashTable s k v)
162newSized n = do
163    let n' = (n + numElemsInCacheLine - 1) `div` numElemsInCacheLine
164    let k = nextBestPrime $ ceiling $ fromIntegral n' / maxLoad
165    newSizedReal k >>= newRef
166{-# INLINE newSized #-}
167
168
169------------------------------------------------------------------------------
170-- | See the documentation for this function in
171-- 'Data.HashTable.Class.insert'.
172insert :: (Eq k, Hashable k) => HashTable s k v -> k -> v -> ST s ()
173insert ht !k !v = readRef ht >>= \h -> insert' h k v >>= writeRef ht
174
175
176------------------------------------------------------------------------------
177mutate :: (Eq k, Hashable k) =>
178          HashTable s k v
179       -> k
180       -> (Maybe v -> (Maybe v, a))
181       -> ST s a
182mutate htRef !k !f = mutateST htRef k (pure . f)
183{-# INLINE mutate #-}
184
185
186------------------------------------------------------------------------------
187mutateST :: (Eq k, Hashable k) =>
188            HashTable s k v
189         -> k
190         -> (Maybe v -> ST s (Maybe v, a))
191         -> ST s a
192mutateST htRef !k !f = do
193    ht <- readRef htRef
194    (newHt, a) <- mutate' ht k f
195    writeRef htRef newHt
196    return a
197{-# INLINE mutateST #-}
198
199
200------------------------------------------------------------------------------
201-- | See the documentation for this function in
202-- 'Data.HashTable.Class.computeOverhead'.
203computeOverhead :: HashTable s k v -> ST s Double
204computeOverhead htRef = readRef htRef >>= work
205  where
206    work (HashTable sz _ _ _ _ _) = do
207        nFilled <- foldM f 0 htRef
208
209        let oh = (totSz `div` hashCodesPerWord)  -- one half or quarter word
210                                                 -- per element in hashes
211               + 2 * (totSz - nFilled)           -- two words per non-filled entry
212               + 12                              -- fixed overhead
213
214        return $! fromIntegral (oh::Int) / fromIntegral nFilled
215
216      where
217        hashCodesPerWord = (finiteBitSize (0 :: Int)) `div` 16
218        totSz = numElemsInCacheLine * sz
219
220        f !a _ = return $! a+1
221
222
223------------------------------------------------------------------------------
224-- | See the documentation for this function in
225-- 'Data.HashTable.Class.delete'.
226delete :: (Hashable k, Eq k) =>
227          HashTable s k v
228       -> k
229       -> ST s ()
230delete htRef k = readRef htRef >>= go
231  where
232    go ht@(HashTable sz _ _ _ _ _) = do
233        _ <- delete' ht False k b1 b2 h1 h2
234        return ()
235
236      where
237        h1 = hash1 k
238        h2 = hash2 k
239
240        b1 = whichLine h1 sz
241        b2 = whichLine h2 sz
242
243
244------------------------------------------------------------------------------
245-- | See the documentation for this function in
246-- 'Data.HashTable.Class.lookup'.
247lookup :: (Eq k, Hashable k) =>
248          HashTable s k v
249       -> k
250       -> ST s (Maybe v)
251lookup htRef k = do
252    ht <- readRef htRef
253    lookup' ht k
254{-# INLINE lookup #-}
255
256
257------------------------------------------------------------------------------
258lookup' :: (Eq k, Hashable k) =>
259           HashTable_ s k v
260        -> k
261        -> ST s (Maybe v)
262lookup' (HashTable sz _ hashes keys values _) !k = do
263    -- Unlike the write case, prefetch doesn't seem to help here for lookup.
264    -- prefetchRead hashes b2
265    idx1 <- searchOne keys hashes k b1 he1
266
267    if idx1 >= 0
268      then do
269        v <- readArray values idx1
270        return $! Just v
271      else do
272        idx2 <- searchOne keys hashes k b2 he2
273        if idx2 >= 0
274          then do
275            v <- readArray values idx2
276            return $! Just v
277          else
278            return Nothing
279
280  where
281    h1 = hash1 k
282    h2 = hash2 k
283
284    he1 = hashToElem h1
285    he2 = hashToElem h2
286
287    b1 = whichLine h1 sz
288    b2 = whichLine h2 sz
289{-# INLINE lookup' #-}
290
291
292------------------------------------------------------------------------------
293searchOne :: (Eq k) =>
294             MutableArray s k
295          -> U.IntArray s
296          -> k
297          -> Int
298          -> Elem
299          -> ST s Int
300searchOne !keys !hashes !k !b0 !h = go b0
301  where
302    go !b = do
303        debug $ "searchOne: go/" ++ show b ++ "/" ++ show h
304        idx <- cacheLineSearch hashes b h
305        debug $ "searchOne: cacheLineSearch returned " ++ show idx
306
307        case idx of
308          -1 -> return (-1)
309          _  -> do
310              k' <- readArray keys idx
311              if k == k'
312                then return idx
313                else do
314                  let !idx' = idx + 1
315                  if isCacheLineAligned idx'
316                    then return (-1)
317                    else go idx'
318{-# INLINE searchOne #-}
319
320
321
322------------------------------------------------------------------------------
323-- | See the documentation for this function in
324-- 'Data.HashTable.Class.foldM'.
325foldM :: (a -> (k,v) -> ST s a)
326      -> a
327      -> HashTable s k v
328      -> ST s a
329foldM f seed0 htRef = readRef htRef >>= foldMWork f seed0
330{-# INLINE foldM #-}
331
332
333------------------------------------------------------------------------------
334foldMWork :: (a -> (k,v) -> ST s a)
335          -> a
336          -> HashTable_ s k v
337          -> ST s a
338foldMWork f seed0 (HashTable sz _ hashes keys values _) = go 0 seed0
339  where
340    totSz = numElemsInCacheLine * sz
341
342    go !i !seed | i >= totSz = return seed
343                | otherwise  = do
344        h <- U.readArray hashes i
345        if h /= emptyMarker
346          then do
347            k <- readArray keys i
348            v <- readArray values i
349            !seed' <- f seed (k,v)
350            go (i+1) seed'
351
352          else
353            go (i+1) seed
354{-# INLINE foldMWork #-}
355
356
357------------------------------------------------------------------------------
358-- | See the documentation for this function in
359-- 'Data.HashTable.Class.mapM_'.
360mapM_ :: ((k,v) -> ST s a)
361      -> HashTable s k v
362      -> ST s ()
363mapM_ f htRef = readRef htRef >>= mapMWork f
364{-# INLINE mapM_ #-}
365
366
367------------------------------------------------------------------------------
368mapMWork :: ((k,v) -> ST s a)
369         -> HashTable_ s k v
370         -> ST s ()
371mapMWork f (HashTable sz _ hashes keys values _) = go 0
372  where
373    totSz = numElemsInCacheLine * sz
374
375    go !i | i >= totSz = return ()
376          | otherwise  = do
377        h <- U.readArray hashes i
378        if h /= emptyMarker
379          then do
380            k <- readArray keys i
381            v <- readArray values i
382            _ <- f (k,v)
383            go (i+1)
384          else
385            go (i+1)
386{-# INLINE mapMWork #-}
387
388
389---------------------------------
390-- Private declarations follow --
391---------------------------------
392
393
394------------------------------------------------------------------------------
395newSizedReal :: Int -> ST s (HashTable_ s k v)
396newSizedReal nbuckets = do
397    let !ntotal   = nbuckets * numElemsInCacheLine
398    let !maxAttempts = 12 + (log2 $ toEnum nbuckets)
399
400    debug $ "creating cuckoo hash table with " ++
401            show nbuckets ++ " buckets having " ++
402            show ntotal ++ " total slots"
403
404    rng    <- newBitStream
405    hashes <- U.newArray ntotal
406    keys   <- newArray ntotal undefined
407    values <- newArray ntotal undefined
408
409    return $! HashTable nbuckets rng hashes keys values maxAttempts
410
411
412insert' :: (Eq k, Hashable k) =>
413           HashTable_ s k v
414        -> k
415        -> v
416        -> ST s (HashTable_ s k v)
417insert' ht k v = do
418    debug "insert': begin"
419    mbX <- updateOrFail ht k v
420    z <- maybe (return ht)
421               (\(k',v') -> grow ht k' v')
422               mbX
423    debug "insert': end"
424    return z
425{-# INLINE insert #-}
426
427
428------------------------------------------------------------------------------
429mutate' :: (Eq k, Hashable k) =>
430           HashTable_ s k v
431        -> k
432        -> (Maybe v -> ST s (Maybe v, a))
433        -> ST s (HashTable_ s k v, a)
434mutate' ht@(HashTable sz _ hashes keys values _) !k !f = do
435    !(maybeVal, idx, _hashCode) <- lookupSlot
436    !fRes <- f maybeVal
437    case (maybeVal, fRes) of
438        (Nothing, (Nothing, a)) -> return (ht, a)
439        (Just _v, (Just v', a)) -> do
440            writeArray values idx v'
441            return (ht, a)
442        (Just _v, (Nothing, a)) -> do
443            deleteFromSlot ht idx
444            return (ht, a)
445        (Nothing, (Just v', a)) -> do
446            newHt <- insertNew v'
447            return (newHt, a)
448
449  where
450    h1 = hash1 k
451    h2 = hash2 k
452
453    b1 = whichLine h1 sz
454    b2 = whichLine h2 sz
455
456    he1 = hashToElem h1
457    he2 = hashToElem h2
458
459    lookupSlot = do
460        idx1 <- searchOne keys hashes k b1 he1
461        if idx1 >= 0
462          then do
463            v <- readArray values idx1
464            return (Just v, idx1, h1)
465          else do
466            idx2 <- searchOne keys hashes k b2 he2
467            if idx2 >= 0
468              then do
469                v <- readArray values idx2
470                return (Just v, idx2, h2)
471              else do
472                return (Nothing, -1, -1)
473
474    insertNew v = do
475        idxE1 <- cacheLineSearch hashes b1 emptyMarker
476        if idxE1 >= 0
477          then do
478            insertIntoSlot ht idxE1 he1 k v
479            return ht
480          else do
481            idxE2 <- cacheLineSearch hashes b2 emptyMarker
482            if idxE2 >= 0
483              then do
484                insertIntoSlot ht idxE2 he2 k v
485                return ht
486              else do
487                result <- cuckooOrFail ht h1 h2 b1 b2 k v
488                maybe (return ht)
489                      (\(k', v') -> do
490                          newHt <- grow ht k' v'
491                          return newHt)
492                      result
493{-# INLINE mutate' #-}
494
495
496------------------------------------------------------------------------------
497deleteFromSlot :: (Eq k, Hashable k) =>
498                  HashTable_ s k v
499               -> Int
500               -> ST s ()
501deleteFromSlot _ht@(HashTable _ _ hashes keys values _) idx = do
502    U.writeArray hashes idx emptyMarker
503    writeArray keys idx undefined
504    writeArray values idx undefined
505{-# INLINE deleteFromSlot #-}
506
507
508------------------------------------------------------------------------------
509insertIntoSlot :: (Eq k, Hashable k) =>
510                  HashTable_ s k v
511               -> Int
512               -> Elem
513               -> k
514               -> v
515               -> ST s ()
516insertIntoSlot _ht@(HashTable _ _ hashes keys values _) idx he k v = do
517    U.writeArray hashes idx he
518    writeArray keys idx k
519    writeArray values idx v
520{-# INLINE insertIntoSlot #-}
521
522
523
524------------------------------------------------------------------------------
525updateOrFail :: (Eq k, Hashable k) =>
526                HashTable_ s k v
527             -> k
528             -> v
529             -> ST s (Maybe (k,v))
530updateOrFail ht@(HashTable sz _ hashes keys values _) k v = do
531    debug $ "updateOrFail: begin: sz = " ++ show sz
532    debug $ "   h1=" ++ show h1 ++ ", h2=" ++ show h2
533            ++ ", b1=" ++ show b1 ++ ", b2=" ++ show b2
534    (didx, hashCode) <- delete' ht True k b1 b2 h1 h2
535
536    debug $ "delete' returned (" ++ show didx ++ "," ++ show hashCode ++ ")"
537
538    if didx >= 0
539      then do
540        U.writeArray hashes didx hashCode
541        writeArray keys didx k
542        writeArray values didx v
543        return Nothing
544      else cuckoo
545
546  where
547    h1 = hash1 k
548    h2 = hash2 k
549
550    b1 = whichLine h1 sz
551    b2 = whichLine h2 sz
552
553    cuckoo = do
554        debug "cuckoo: calling cuckooOrFail"
555        result <- cuckooOrFail ht h1 h2 b1 b2 k v
556        debug $ "cuckoo: cuckooOrFail returned " ++
557                  (if isJust result then "Just _" else "Nothing")
558
559        -- if cuckoo failed we need to grow the table.
560        maybe (return Nothing)
561              (return . Just)
562              result
563{-# INLINE updateOrFail #-}
564
565
566------------------------------------------------------------------------------
567-- Returns either (-1, 0) (not found, and both buckets full ==> trigger
568-- cuckoo), or the slot in the array where it would be safe to write the given
569-- key, and the hashcode to use there
570delete' :: (Hashable k, Eq k) =>
571           HashTable_ s k v     -- ^ hash table
572        -> Bool                 -- ^ are we updating?
573        -> k                    -- ^ key
574        -> Int                  -- ^ cache line start address 1
575        -> Int                  -- ^ cache line start address 2
576        -> Int                  -- ^ hash1
577        -> Int                  -- ^ hash2
578        -> ST s (Int, Elem)
579delete' (HashTable _ _ hashes keys values _) !updating !k b1 b2 h1 h2 = do
580    debug $ "delete' b1=" ++ show b1
581              ++ " b2=" ++ show b2
582              ++ " h1=" ++ show h1
583              ++ " h2=" ++ show h2
584    prefetchWrite hashes b2
585    let !he1 = hashToElem h1
586    let !he2 = hashToElem h2
587    idx1 <- searchOne keys hashes k b1 he1
588    if idx1 < 0
589      then do
590        idx2 <- searchOne keys hashes k b2 he2
591        if idx2 < 0
592          then if updating
593                 then do
594                   debug $ "delete': looking for empty element"
595                   -- if we're updating, we look for an empty element
596                   idxE1 <- cacheLineSearch hashes b1 emptyMarker
597                   debug $ "delete': idxE1 was " ++ show idxE1
598                   if idxE1 >= 0
599                     then return (idxE1, he1)
600                     else do
601                       idxE2 <- cacheLineSearch hashes b2 emptyMarker
602                       debug $ "delete': idxE2 was " ++ show idxE1
603                       if idxE2 >= 0
604                         then return (idxE2, he2)
605                         else return (-1, 0)
606                 else return (-1, 0)
607          else deleteIt idx2 he2
608      else deleteIt idx1 he1
609
610  where
611    deleteIt !idx !h = do
612        if not updating
613          then do
614            U.writeArray hashes idx emptyMarker
615            writeArray keys idx undefined
616            writeArray values idx undefined
617          else return ()
618        return $! (idx, h)
619{-# INLINE delete' #-}
620
621
622------------------------------------------------------------------------------
623cuckooOrFail :: (Hashable k, Eq k) =>
624                HashTable_ s k v  -- ^ hash table
625             -> Int               -- ^ hash code 1
626             -> Int               -- ^ hash code 2
627             -> Int               -- ^ cache line 1
628             -> Int               -- ^ cache line 2
629             -> k                 -- ^ key
630             -> v                 -- ^ value
631             -> ST s (Maybe (k,v))
632cuckooOrFail (HashTable sz rng hashes keys values maxAttempts0)
633                 !h1_0 !h2_0 !b1_0 !b2_0 !k0 !v0 = do
634    -- at this point we know:
635    --
636    --   * there is no empty slot in either cache line
637    --
638    --   * the key doesn't already exist in the table
639    --
640    -- next things to do:
641    --
642    --   * decide which element to bump
643    --
644    --   * read that element, and write (k,v) in there
645    --
646    --   * attempt to write the bumped element into its other cache slot
647    --
648    --   * if it fails, recurse.
649
650    debug $ "cuckooOrFail h1_0=" ++ show h1_0
651              ++ " h2_0=" ++ show h2_0
652              ++ " b1_0=" ++ show b1_0
653              ++ " b2_0=" ++ show b2_0
654
655    !lineChoice <- getNextBit rng
656
657    debug $ "chose line " ++ show lineChoice
658    let (!b, !h) = if lineChoice == 0 then (b1_0, h1_0) else (b2_0, h2_0)
659    go b h k0 v0 maxAttempts0
660
661
662  where
663    randomIdx !b = do
664        !z <- getNBits cacheLineIntBits rng
665        return $! b + fromIntegral z
666
667    bumpIdx !idx !h !k !v = do
668        let !he = hashToElem h
669        debug $ "bumpIdx idx=" ++ show idx ++ " h=" ++ show h
670                  ++ " he=" ++ show he
671        !he' <- U.readArray hashes idx
672        debug $ "bumpIdx: he' was " ++ show he'
673        !k' <- readArray keys idx
674        v'  <- readArray values idx
675        U.writeArray hashes idx he
676        writeArray keys idx k
677        writeArray values idx v
678        debug $ "bumped key with he'=" ++ show he'
679        return $! (he', k', v')
680
681    otherHash he k = if hashToElem h1 == he then h2 else h1
682      where
683        h1 = hash1 k
684        h2 = hash2 k
685
686    tryWrite !b !h k v maxAttempts = do
687        debug $ "tryWrite b=" ++ show b ++ " h=" ++ show h
688        idx <- cacheLineSearch hashes b emptyMarker
689        debug $ "cacheLineSearch returned " ++ show idx
690
691        if idx >= 0
692          then do
693            U.writeArray hashes idx $! hashToElem h
694            writeArray keys idx k
695            writeArray values idx v
696            return Nothing
697          else go b h k v $! maxAttempts - 1
698
699    go !b !h !k v !maxAttempts | maxAttempts == 0 = return $! Just (k,v)
700                               | otherwise = do
701        idx <- randomIdx b
702        (!he0', !k', v') <- bumpIdx idx h k v
703        let !h' = otherHash he0' k'
704        let !b' = whichLine h' sz
705
706        tryWrite b' h' k' v' maxAttempts
707
708
709------------------------------------------------------------------------------
710grow :: (Eq k, Hashable k) =>
711        HashTable_ s k v
712     -> k
713     -> v
714     -> ST s (HashTable_ s k v)
715grow (HashTable sz _ hashes keys values _) k0 v0 = do
716    newHt <- grow' $! bumpSize bumpFactor sz
717
718    mbR <- updateOrFail newHt k0 v0
719    maybe (return newHt)
720          (\_ -> grow' $ bumpSize bumpFactor $ _size newHt)
721          mbR
722
723  where
724    grow' newSz = do
725        debug $ "growing table, oldsz = " ++ show sz ++
726                ", newsz=" ++ show newSz
727        newHt <- newSizedReal newSz
728        rehash newSz newHt
729
730
731    rehash !newSz !newHt = go 0
732      where
733        totSz = numElemsInCacheLine * sz
734
735        go !i | i >= totSz = return newHt
736              | otherwise  = do
737            h <- U.readArray hashes i
738            if (h /= emptyMarker)
739              then do
740                k <- readArray keys i
741                v <- readArray values i
742
743                mbR <- updateOrFail newHt k v
744                maybe (go $ i + 1)
745                      (\_ -> grow' $ bumpSize bumpFactor newSz)
746                      mbR
747              else go $ i + 1
748
749
750------------------------------------------------------------------------------
751hashPrime :: Int
752hashPrime = if wordSize == 32 then hashPrime32 else hashPrime64
753  where
754    hashPrime32 = 0xedf2a025
755    hashPrime64 = 0x3971ca9c8b3722e9
756
757
758------------------------------------------------------------------------------
759hash1 :: Hashable k => k -> Int
760hash1 = H.hash
761{-# INLINE hash1 #-}
762
763
764hash2 :: Hashable k => k -> Int
765hash2 = H.hashWithSalt hashPrime
766{-# INLINE hash2 #-}
767
768
769------------------------------------------------------------------------------
770hashToElem :: Int -> Elem
771hashToElem !h = out
772  where
773    !(I# lo#) = h .&. U.elemMask
774
775    !m#  = maskw# lo# 0#
776    !nm# = not# m#
777
778    !r#  = ((int2Word# 1#) `and#` m#) `or#` (int2Word# lo# `and#` nm#)
779    !out = U.primWordToElem r#
780{-# INLINE hashToElem #-}
781
782
783------------------------------------------------------------------------------
784emptyMarker :: Elem
785emptyMarker = 0
786
787
788------------------------------------------------------------------------------
789maxLoad :: Double
790maxLoad = 0.88
791
792
793------------------------------------------------------------------------------
794bumpFactor :: Double
795bumpFactor = 0.73
796
797
798------------------------------------------------------------------------------
799debug :: String -> ST s ()
800#ifdef DEBUG
801debug s = unsafeIOToST (putStrLn s >> hFlush stdout)
802#else
803debug _ = return ()
804#endif
805{-# INLINE debug #-}
806
807
808------------------------------------------------------------------------------
809whichLine :: Int -> Int -> Int
810whichLine !h !sz = whichBucket h sz `iShiftL` cacheLineIntBits
811{-# INLINE whichLine #-}
812
813
814------------------------------------------------------------------------------
815newRef :: HashTable_ s k v -> ST s (HashTable s k v)
816newRef = liftM HT . newSTRef
817{-# INLINE newRef #-}
818
819writeRef :: HashTable s k v -> HashTable_ s k v -> ST s ()
820writeRef (HT ref) ht = writeSTRef ref ht
821{-# INLINE writeRef #-}
822
823readRef :: HashTable s k v -> ST s (HashTable_ s k v)
824readRef (HT ref) = readSTRef ref
825{-# INLINE readRef #-}
826
827
828------------------------------------------------------------------------------
829
830-- | Find index of given key in the hashtable.
831lookupIndex :: (Hashable k, Eq k) => HashTable s k v -> k -> ST s (Maybe Word)
832lookupIndex htRef k =
833  do HashTable sz _ hashes keys _ _ <- readRef htRef
834
835     let !h1  = hash1 k
836         !h2  = hash2 k
837         !he1 = hashToElem h1
838         !he2 = hashToElem h2
839         !b1  = whichLine h1 sz
840         !b2  = whichLine h2 sz
841
842     idx1 <- searchOne keys hashes k b1 he1
843     if idx1 >= 0
844       then return $! (Just $! fromIntegral idx1)
845       else do idx2 <- searchOne keys hashes k b2 he2
846               if idx2 >= 0
847                 then return $! (Just $! fromIntegral idx2)
848                 else return Nothing
849
850-- | Find the next entry in the hashtable starting at the given index.
851nextByIndex :: HashTable s k v -> Word -> ST s (Maybe (Word,k,v))
852nextByIndex htRef i0 =
853  do HashTable sz _ hashes keys values _ <- readRef htRef
854     let totSz = numElemsInCacheLine * sz
855         go i
856           | i >= totSz = return Nothing
857           | otherwise =
858               do h <- U.readArray hashes i
859                  if h == emptyMarker
860                    then go (i+1)
861                    else do k <- readArray keys i
862                            v <- readArray values i
863                            let !i' = fromIntegral i
864                            return (Just (i',k,v))
865
866     go (fromIntegral i0)
867