1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE CPP          #-}
3{-# LANGUAGE MagicHash    #-}
4
5{-|
6
7A basic open-addressing hash table using linear probing. Use this hash table if
8you...
9
10  * want the fastest possible lookups, and very fast inserts.
11
12  * don't care about wasting a little bit of memory to get it.
13
14  * don't care that a table resize might pause for a long time to rehash all
15    of the key-value mappings.
16
17  * have a workload which is not heavy with deletes; deletes clutter the table
18    with deleted markers and force the table to be completely rehashed fairly
19    often.
20
21Of the hash tables in this collection, this hash table has the best lookup
22performance, while maintaining competitive insert performance.
23
24/Space overhead/
25
26This table is not especially memory-efficient; firstly, the table has a maximum
27load factor of 0.83 and will be resized if load exceeds this value. Secondly,
28to improve insert and lookup performance, we store a 16-bit hash code for each
29key in the table.
30
31Each hash table entry requires at least 2.25 words (on a 64-bit machine), two
32for the pointers to the key and value and one quarter word for the hash code.
33We don't count key and value pointers as overhead, because they have to be
34there -- so the overhead for a full slot is at least one quarter word -- but
35empty slots in the hash table count for a full 2.25 words of overhead. Define
36@m@ as the number of slots in the table, @n@ as the number of key value
37mappings, and @ws@ as the machine word size in /bytes/. If the load factor is
38@k=n\/m@, the amount of space /wasted/ per mapping in words is:
39
40@
41w(n) = (m*(2*ws + 2) - n*(2*ws)) / ws
42@
43
44Since @m=n\/k@,
45
46@
47w(n) = n\/k * (2*ws + 2) - n*(2*ws)
48     = (n * (2 + 2*ws*(1-k)) / k) / ws
49@
50
51Solving for @k=0.83@, the maximum load factor, gives a /minimum/ overhead of
520.71 words per mapping on a 64-bit machine, or 1.01 words per mapping on a
5332-bit machine. If @k=0.5@, which should be under normal usage the /maximum/
54overhead situation, then the overhead would be 2.5 words per mapping on a
5564-bit machine, or 3.0 words per mapping on a 32-bit machine.
56
57/Space overhead: experimental results/
58
59In randomized testing on a 64-bit machine (see
60@test\/compute-overhead\/ComputeOverhead.hs@ in the source distribution), mean
61overhead (that is, the number of words needed to store the key-value mapping
62over and above the two words necessary for the key and the value pointers) is
63approximately 1.24 machine words per key-value mapping with a standard
64deviation of about 0.30 words, and 1.70 words per mapping at the 95th
65percentile.
66
67/Expensive resizes/
68
69If enough elements are inserted into the table to make it exceed the maximum
70load factor, the table is resized. A resize involves a complete rehash of all
71the elements in the table, which means that any given call to 'insert' might
72take /O(n)/ time in the size of the table, with a large constant factor. If a
73long pause waiting for the table to resize is unacceptable for your
74application, you should choose the included linear hash table instead.
75
76
77/References:/
78
79  * Knuth, Donald E. /The Art of Computer Programming/, vol. 3 Sorting and
80    Searching. Addison-Wesley Publishing Company, 1973.
81-}
82
83module Data.HashTable.ST.Basic
84  ( HashTable
85  , new
86  , newSized
87  , delete
88  , lookup
89  , insert
90  , mutate
91  , mutateST
92  , mapM_
93  , foldM
94  , computeOverhead
95  ) where
96
97
98------------------------------------------------------------------------------
99#if !MIN_VERSION_base(4,8,0)
100import           Control.Applicative
101#endif
102import           Control.Exception                 (assert)
103import           Control.Monad                     hiding (foldM, mapM_)
104import           Control.Monad.ST                  (ST)
105import           Data.Bits
106import           Data.Hashable                     (Hashable)
107import qualified Data.Hashable                     as H
108import           Data.Maybe
109import           Data.Monoid
110#if MIN_VERSION_base(4,9,0) && !MIN_VERSION_base(4,11,0)
111import           Data.Semigroup
112#endif
113import qualified Data.Primitive.ByteArray          as A
114import           Data.STRef
115import           GHC.Exts
116import           Prelude                           hiding (lookup, mapM_, read)
117------------------------------------------------------------------------------
118import qualified Data.HashTable.Class              as C
119import           Data.HashTable.Internal.Array
120import           Data.HashTable.Internal.CacheLine
121import           Data.HashTable.Internal.IntArray  (Elem)
122import qualified Data.HashTable.Internal.IntArray  as U
123import           Data.HashTable.Internal.Utils
124
125
126------------------------------------------------------------------------------
127-- | An open addressing hash table using linear probing.
128newtype HashTable s k v = HT (STRef s (HashTable_ s k v))
129
130type SizeRefs s = A.MutableByteArray s
131
132intSz :: Int
133intSz = (finiteBitSize (0::Int) `div` 8)
134
135readLoad :: SizeRefs s -> ST s Int
136readLoad = flip A.readByteArray 0
137
138writeLoad :: SizeRefs s -> Int -> ST s ()
139writeLoad = flip A.writeByteArray 0
140
141readDelLoad :: SizeRefs s -> ST s Int
142readDelLoad = flip A.readByteArray 1
143
144writeDelLoad :: SizeRefs s -> Int -> ST s ()
145writeDelLoad = flip A.writeByteArray 1
146
147newSizeRefs :: ST s (SizeRefs s)
148newSizeRefs = do
149    let asz = 2 * intSz
150    a <- A.newAlignedPinnedByteArray asz intSz
151    A.fillByteArray a 0 asz 0
152    return a
153
154
155data HashTable_ s k v = HashTable
156    { _size   :: {-# UNPACK #-} !Int
157    , _load   :: !(SizeRefs s)   -- ^ 2-element array, stores how many entries
158                                  -- and deleted entries are in the table.
159    , _hashes :: !(U.IntArray s)
160    , _keys   :: {-# UNPACK #-} !(MutableArray s k)
161    , _values :: {-# UNPACK #-} !(MutableArray s v)
162    }
163
164
165------------------------------------------------------------------------------
166instance C.HashTable HashTable where
167    new             = new
168    newSized        = newSized
169    insert          = insert
170    delete          = delete
171    lookup          = lookup
172    foldM           = foldM
173    mapM_           = mapM_
174    lookupIndex     = lookupIndex
175    nextByIndex     = nextByIndex
176    computeOverhead = computeOverhead
177    mutate          = mutate
178    mutateST        = mutateST
179
180
181------------------------------------------------------------------------------
182instance Show (HashTable s k v) where
183    show _ = "<HashTable>"
184
185
186------------------------------------------------------------------------------
187-- | See the documentation for this function in
188-- 'Data.HashTable.Class.new'.
189new :: ST s (HashTable s k v)
190new = newSized 1
191{-# INLINE new #-}
192
193
194------------------------------------------------------------------------------
195-- | See the documentation for this function in
196-- 'Data.HashTable.Class.newSized'.
197newSized :: Int -> ST s (HashTable s k v)
198newSized n = do
199    debug $ "entering: newSized " ++ show n
200    let m = nextBestPrime $ ceiling (fromIntegral n / maxLoad)
201    ht <- newSizedReal m
202    newRef ht
203{-# INLINE newSized #-}
204
205
206------------------------------------------------------------------------------
207newSizedReal :: Int -> ST s (HashTable_ s k v)
208newSizedReal m = do
209    -- make sure the hash array is a multiple of cache-line sized so we can
210    -- always search a whole cache line at once
211    let m' = ((m + numElemsInCacheLine - 1) `div` numElemsInCacheLine)
212             * numElemsInCacheLine
213    h  <- U.newArray m'
214    k  <- newArray m undefined
215    v  <- newArray m undefined
216    ld <- newSizeRefs
217    return $! HashTable m ld h k v
218
219
220------------------------------------------------------------------------------
221-- | See the documentation for this function in
222-- 'Data.HashTable.Class.delete'.
223delete :: (Hashable k, Eq k) =>
224          (HashTable s k v)
225       -> k
226       -> ST s ()
227delete htRef k = do
228    ht <- readRef htRef
229    slots <- findSafeSlots ht k h
230    when (trueInt (_slotFound slots)) $ deleteFromSlot ht (_slotB1 slots)
231  where
232    !h = hash k
233{-# INLINE delete #-}
234
235
236------------------------------------------------------------------------------
237-- | See the documentation for this function in
238-- 'Data.HashTable.Class.lookup'.
239lookup :: (Eq k, Hashable k) => (HashTable s k v) -> k -> ST s (Maybe v)
240lookup htRef !k = do
241    ht <- readRef htRef
242    lookup' ht
243  where
244    lookup' (HashTable sz _ hashes keys values) = do
245        let !b = whichBucket h sz
246        debug $ "lookup h=" ++ show h ++ " sz=" ++ show sz ++ " b=" ++ show b
247        go b 0 sz
248
249      where
250        !h  = hash k
251        !he = hashToElem h
252
253        go !b !start !end = {-# SCC "lookup/go" #-} do
254            debug $ concat [ "lookup'/go: "
255                           , show b
256                           , "/"
257                           , show start
258                           , "/"
259                           , show end
260                           ]
261            idx <- forwardSearch2 hashes b end he emptyMarker
262            debug $ "forwardSearch2 returned " ++ show idx
263            if (idx < 0 || idx < start || idx >= end)
264               then return Nothing
265               else do
266                 h0  <- U.readArray hashes idx
267                 debug $ "h0 was " ++ show h0
268
269                 if recordIsEmpty h0
270                   then do
271                       debug $ "record empty, returning Nothing"
272                       return Nothing
273                   else do
274                     k' <- readArray keys idx
275                     if k == k'
276                       then do
277                         debug $ "value found at " ++ show idx
278                         v <- readArray values idx
279                         return $! Just v
280                       else do
281                         debug $ "value not found, recursing"
282                         if idx < b
283                           then go (idx + 1) (idx + 1) b
284                           else go (idx + 1) start end
285{-# INLINE lookup #-}
286
287
288------------------------------------------------------------------------------
289-- | See the documentation for this function in
290-- 'Data.HashTable.Class.insert'.
291insert :: (Eq k, Hashable k) =>
292          (HashTable s k v)
293       -> k
294       -> v
295       -> ST s ()
296insert htRef !k !v = do
297    ht <- readRef htRef
298    debug $ "insert: h=" ++ show h
299    slots@(SlotFindResponse foundInt b0 b1) <- findSafeSlots ht k h
300    let found = trueInt foundInt
301    debug $ "insert: findSafeSlots returned " ++ show slots
302    when (found && (b0 /= b1)) $ deleteFromSlot ht b1
303    insertIntoSlot ht b0 he k v
304    ht' <- checkOverflow ht
305    writeRef htRef ht'
306
307  where
308    !h = hash k
309    !he = hashToElem h
310{-# INLINE insert #-}
311
312
313------------------------------------------------------------------------------
314-- | See the documentation for this function in
315-- 'Data.HashTable.Class.mutate'.
316mutate :: (Eq k, Hashable k) =>
317          (HashTable s k v)
318       -> k
319       -> (Maybe v -> (Maybe v, a))
320       -> ST s a
321mutate htRef !k !f = mutateST htRef k (pure . f)
322{-# INLINE mutate #-}
323
324
325------------------------------------------------------------------------------
326-- | See the documentation for this function in
327-- 'Data.HashTable.Class.mutateST'.
328mutateST :: (Eq k, Hashable k) =>
329            (HashTable s k v)
330         -> k
331         -> (Maybe v -> ST s (Maybe v, a))
332         -> ST s a
333mutateST htRef !k !f = do
334    ht <- readRef htRef
335    let values = _values ht
336    debug $ "mutate h=" ++ show h
337    slots@(SlotFindResponse foundInt b0 b1) <- findSafeSlots ht k h
338    let found = trueInt foundInt
339    debug $ "findSafeSlots returned " ++ show slots
340    !mv <- if found
341              then fmap Just $ readArray values b1
342              else return Nothing
343    (!mv', !result) <- f mv
344    case (mv, mv') of
345        (Nothing, Nothing) -> return ()
346        (Just _, Nothing)  -> do
347            deleteFromSlot ht b1
348        (Nothing, Just v') -> do
349            insertIntoSlot ht b0 he k v'
350            ht' <- checkOverflow ht
351            writeRef htRef ht'
352        (Just _, Just v')  -> do
353            when (b0 /= b1) $
354                deleteFromSlot ht b1
355            insertIntoSlot ht b0 he k v'
356    return result
357  where
358    !h     = hash k
359    !he    = hashToElem h
360{-# INLINE mutateST #-}
361
362
363------------------------------------------------------------------------------
364-- | See the documentation for this function in
365-- 'Data.HashTable.Class.foldM'.
366foldM :: (a -> (k,v) -> ST s a) -> a -> HashTable s k v -> ST s a
367foldM f seed0 htRef = readRef htRef >>= work
368  where
369    work (HashTable sz _ hashes keys values) = go 0 seed0
370      where
371        go !i !seed | i >= sz = return seed
372                    | otherwise = do
373            h <- U.readArray hashes i
374            if recordIsEmpty h || recordIsDeleted h
375              then go (i+1) seed
376              else do
377                k <- readArray keys i
378                v <- readArray values i
379                !seed' <- f seed (k, v)
380                go (i+1) seed'
381
382
383------------------------------------------------------------------------------
384-- | See the documentation for this function in
385-- 'Data.HashTable.Class.mapM_'.
386mapM_ :: ((k,v) -> ST s b) -> HashTable s k v -> ST s ()
387mapM_ f htRef = readRef htRef >>= work
388  where
389    work (HashTable sz _ hashes keys values) = go 0
390      where
391        go !i | i >= sz = return ()
392              | otherwise = do
393            h <- U.readArray hashes i
394            if recordIsEmpty h || recordIsDeleted h
395              then go (i+1)
396              else do
397                k <- readArray keys i
398                v <- readArray values i
399                _ <- f (k, v)
400                go (i+1)
401
402
403------------------------------------------------------------------------------
404-- | See the documentation for this function in
405-- 'Data.HashTable.Class.computeOverhead'.
406computeOverhead :: HashTable s k v -> ST s Double
407computeOverhead htRef = readRef htRef >>= work
408  where
409    work (HashTable sz' loadRef _ _ _) = do
410        !ld <- readLoad loadRef
411        let k = fromIntegral ld / sz
412        return $ constOverhead/sz + (2 + 2*ws*(1-k)) / (k * ws)
413      where
414        ws = fromIntegral $! finiteBitSize (0::Int) `div` 8
415        sz = fromIntegral sz'
416        -- Change these if you change the representation
417        constOverhead = 14
418
419
420------------------------------
421-- Private functions follow --
422------------------------------
423
424
425------------------------------------------------------------------------------
426{-# INLINE insertRecord #-}
427insertRecord :: Int
428             -> U.IntArray s
429             -> MutableArray s k
430             -> MutableArray s v
431             -> Int
432             -> k
433             -> v
434             -> ST s ()
435insertRecord !sz !hashes !keys !values !h !key !value = do
436    let !b = whichBucket h sz
437    debug $ "insertRecord sz=" ++ show sz ++ " h=" ++ show h ++ " b=" ++ show b
438    probe b
439
440  where
441    he = hashToElem h
442
443    probe !i = {-# SCC "insertRecord/probe" #-} do
444        !idx <- forwardSearch2 hashes i sz emptyMarker deletedMarker
445        debug $ "forwardSearch2 returned " ++ show idx
446        assert (idx >= 0) $ do
447            U.writeArray hashes idx he
448            writeArray keys idx key
449            writeArray values idx value
450
451
452------------------------------------------------------------------------------
453checkOverflow :: (Eq k, Hashable k) =>
454                 (HashTable_ s k v)
455              -> ST s (HashTable_ s k v)
456checkOverflow ht@(HashTable sz ldRef _ _ _) = do
457    !ld <- readLoad ldRef
458    !dl <- readDelLoad ldRef
459
460    debug $ concat [ "checkOverflow: sz="
461                   , show sz
462                   , " entries="
463                   , show ld
464                   , " deleted="
465                   , show dl ]
466
467    if fromIntegral (ld + dl) / fromIntegral sz > maxLoad
468      then if dl > ld `div` 2
469             then rehashAll ht sz
470             else growTable ht
471      else return ht
472
473
474------------------------------------------------------------------------------
475rehashAll :: Hashable k => HashTable_ s k v -> Int -> ST s (HashTable_ s k v)
476rehashAll (HashTable sz loadRef hashes keys values) sz' = do
477    debug $ "rehashing: old size " ++ show sz ++ ", new size " ++ show sz'
478    ht' <- newSizedReal sz'
479    let (HashTable _ loadRef' newHashes newKeys newValues) = ht'
480    readLoad loadRef >>= writeLoad loadRef'
481    rehash newHashes newKeys newValues
482    return ht'
483
484  where
485    rehash newHashes newKeys newValues = go 0
486      where
487        go !i | i >= sz   = return ()
488              | otherwise = {-# SCC "growTable/rehash" #-} do
489                    h0 <- U.readArray hashes i
490                    when (not (recordIsEmpty h0 || recordIsDeleted h0)) $ do
491                        k <- readArray keys i
492                        v <- readArray values i
493                        insertRecord sz' newHashes newKeys newValues
494                                     (hash k) k v
495                    go $ i+1
496
497
498------------------------------------------------------------------------------
499growTable :: Hashable k => HashTable_ s k v -> ST s (HashTable_ s k v)
500growTable ht@(HashTable sz _ _ _ _) = do
501    let !sz' = bumpSize maxLoad sz
502    rehashAll ht sz'
503
504
505------------------------------------------------------------------------------
506-- Helper data structure for findSafeSlots
507newtype Slot = Slot { _slot :: Int } deriving (Show)
508
509
510------------------------------------------------------------------------------
511
512#if MIN_VERSION_base(4,9,0)
513instance Semigroup Slot where
514 (<>) = mappend
515#endif
516
517instance Monoid Slot where
518    mempty = Slot maxBound
519    (Slot x1) `mappend` (Slot x2) =
520        let !m = mask x1 maxBound
521        in Slot $! (complement m .&. x1) .|. (m .&. x2)
522
523
524------------------------------------------------------------------------------
525-- findSafeSlots return type
526data SlotFindResponse = SlotFindResponse {
527    _slotFound :: {-# UNPACK #-} !Int -- we use Int because Bool won't unpack
528  , _slotB0    :: {-# UNPACK #-} !Int
529  , _slotB1    :: {-# UNPACK #-} !Int
530} deriving (Show)
531
532
533------------------------------------------------------------------------------
534-- Returns ST s (SlotFoundResponse found b0 b1),
535-- where
536--     * found :: Int  - 1 if key-value mapping is already in the table,
537--                       0 otherwise.
538--     * b0    :: Int  - The index of a slot where it would be safe to write
539--                       the given key (if the key is already in the mapping,
540--                       you have to delete it before using this slot).
541--     * b1    :: Int  - The index of a slot where the key currently resides.
542--                       Or, if the key is not in the table, b1 is a slot
543--                       where it is safe to write the key (b1 == b0).
544findSafeSlots :: (Hashable k, Eq k) =>
545                 (HashTable_ s k v)
546              -> k
547              -> Int
548              -> ST s SlotFindResponse
549findSafeSlots (HashTable !sz _ hashes keys _) k h = do
550    debug $ "findSafeSlots: h=" ++ show h ++ " he=" ++ show he
551            ++ " sz=" ++ show sz ++ " b0=" ++ show b0
552    response <- go mempty b0 False
553    debug $ "go returned " ++ show response
554    return response
555
556  where
557    !he = hashToElem h
558    !b0 = whichBucket h sz
559    haveWrapped !(Slot fp) !b = if fp == maxBound
560                                    then False
561                                    else b <= fp
562
563    -- arguments:
564
565    --   * fp    maintains the slot in the array where it would be safe to
566    --           write the given key
567    --   * b     search the buckets array starting at this index.
568    --   * wrap  True if we've wrapped around, False otherwise
569
570    go !fp !b !wrap = do
571        debug $ concat [ "go: fp="
572                       , show fp
573                       , " b="
574                       , show b
575                       , ", wrap="
576                       , show wrap
577                       , ", he="
578                       , show he
579                       , ", emptyMarker="
580                       , show emptyMarker
581                       , ", deletedMarker="
582                       , show deletedMarker ]
583
584        !idx <- forwardSearch3 hashes b sz he emptyMarker deletedMarker
585        debug $ "forwardSearch3 returned " ++ show idx
586                ++ " with sz=" ++ show sz ++ ", b=" ++ show b
587
588        if wrap && idx >= b0
589          -- we wrapped around in the search and didn't find our hash code;
590          -- this means that the table is full of deleted elements. Just return
591          -- the first place we'd be allowed to insert.
592          --
593          -- TODO: if we get in this situation we should probably just rehash
594          -- the table, because every insert is going to be O(n).
595          then do
596            let !sl = fp `mappend` (Slot (error "impossible"))
597            return $! SlotFindResponse 0 (_slot sl) (_slot sl)
598          else do
599            -- because the table isn't full, we know that there must be either
600            -- an empty or a deleted marker somewhere in the table. Assert this
601            -- here.
602            assert (idx >= 0) $ return ()
603            h0 <- U.readArray hashes idx
604            debug $ "h0 was " ++ show h0
605
606            if recordIsEmpty h0
607              then do
608                  let pl = fp `mappend` (Slot idx)
609                  debug $ "empty, returning " ++ show pl
610                  return $! SlotFindResponse 0 (_slot pl) (_slot pl)
611              else do
612                let !wrap' = haveWrapped fp idx
613                if recordIsDeleted h0
614                  then do
615                      let !pl = fp `mappend` (Slot idx)
616                      debug $ "deleted, cont with pl=" ++ show pl
617                      go pl (idx + 1) wrap'
618                  else
619                    if he == h0
620                      then do
621                        debug $ "found he == h0 == " ++ show h0
622                        k' <- readArray keys idx
623                        if k == k'
624                          then do
625                            debug $ "found at " ++ show idx
626                            let !sl = fp `mappend` (Slot idx)
627                            return $! SlotFindResponse 1 (_slot sl) idx
628                          else go fp (idx + 1) wrap'
629                      else go fp (idx + 1) wrap'
630
631
632------------------------------------------------------------------------------
633{-# INLINE deleteFromSlot #-}
634deleteFromSlot :: (HashTable_ s k v) -> Int -> ST s ()
635deleteFromSlot (HashTable _ loadRef hashes keys values) idx = do
636    !he <- U.readArray hashes idx
637    when (recordIsFilled he) $ do
638        bumpDelLoad loadRef 1
639        bumpLoad loadRef (-1)
640        U.writeArray hashes idx deletedMarker
641        writeArray keys idx undefined
642        writeArray values idx undefined
643
644
645------------------------------------------------------------------------------
646{-# INLINE insertIntoSlot #-}
647insertIntoSlot :: (HashTable_ s k v) -> Int -> Elem -> k -> v -> ST s ()
648insertIntoSlot (HashTable _ loadRef hashes keys values) idx he k v = do
649    !heOld <- U.readArray hashes idx
650    let !heInt    = fromIntegral heOld :: Int
651        !delInt   = fromIntegral deletedMarker :: Int
652        !emptyInt = fromIntegral emptyMarker :: Int
653        !delBump  = mask heInt delInt -- -1 if heInt == delInt,
654                                      --  0  otherwise
655        !mLoad    = mask heInt delInt .|. mask heInt emptyInt
656        !loadBump = mLoad .&. 1 -- 1 if heInt == delInt || heInt == emptyInt,
657                                -- 0 otherwise
658    bumpDelLoad loadRef delBump
659    bumpLoad loadRef loadBump
660    U.writeArray hashes idx he
661    writeArray keys idx k
662    writeArray values idx v
663
664
665-------------------------------------------------------------------------------
666{-# INLINE bumpLoad #-}
667bumpLoad :: (SizeRefs s) -> Int -> ST s ()
668bumpLoad ref i = do
669    !ld <- readLoad ref
670    writeLoad ref $! ld + i
671
672
673------------------------------------------------------------------------------
674{-# INLINE bumpDelLoad #-}
675bumpDelLoad :: (SizeRefs s) -> Int -> ST s ()
676bumpDelLoad ref i = do
677    !ld <- readDelLoad ref
678    writeDelLoad ref $! ld + i
679
680
681-----------------------------------------------------------------------------
682maxLoad :: Double
683maxLoad = 0.82
684
685
686------------------------------------------------------------------------------
687emptyMarker :: Elem
688emptyMarker = 0
689
690
691------------------------------------------------------------------------------
692deletedMarker :: Elem
693deletedMarker = 1
694
695
696------------------------------------------------------------------------------
697{-# INLINE trueInt #-}
698trueInt :: Int -> Bool
699trueInt (I# i#) = tagToEnum# i#
700
701
702------------------------------------------------------------------------------
703{-# INLINE recordIsEmpty #-}
704recordIsEmpty :: Elem -> Bool
705recordIsEmpty = (== emptyMarker)
706
707
708------------------------------------------------------------------------------
709{-# INLINE recordIsDeleted #-}
710recordIsDeleted :: Elem -> Bool
711recordIsDeleted = (== deletedMarker)
712
713
714------------------------------------------------------------------------------
715{-# INLINE recordIsFilled #-}
716recordIsFilled :: Elem -> Bool
717recordIsFilled !el = tagToEnum# isFilled#
718  where
719    !el# = U.elemToInt# el
720    !deletedMarker# = U.elemToInt# deletedMarker
721    !emptyMarker# = U.elemToInt# emptyMarker
722#if __GLASGOW_HASKELL__ >= 708
723    !isFilled# = (el# /=# deletedMarker#) `andI#` (el# /=# emptyMarker#)
724#else
725    !delOrEmpty# = mask# el# deletedMarker# `orI#` mask# el# emptyMarker#
726    !isFilled# = 1# `andI#` notI# delOrEmpty#
727#endif
728
729
730------------------------------------------------------------------------------
731{-# INLINE hash #-}
732hash :: (Hashable k) => k -> Int
733hash = H.hash
734
735
736------------------------------------------------------------------------------
737{-# INLINE hashToElem #-}
738hashToElem :: Int -> Elem
739hashToElem !h = out
740  where
741    !(I# lo#) = h .&. U.elemMask
742
743    !m#  = maskw# lo# 0# `or#` maskw# lo# 1#
744    !nm# = not# m#
745
746    !r#  = ((int2Word# 2#) `and#` m#) `or#` (int2Word# lo# `and#` nm#)
747    !out = U.primWordToElem r#
748
749
750------------------------------------------------------------------------------
751newRef :: HashTable_ s k v -> ST s (HashTable s k v)
752newRef = liftM HT . newSTRef
753{-# INLINE newRef #-}
754
755writeRef :: HashTable s k v -> HashTable_ s k v -> ST s ()
756writeRef (HT ref) ht = writeSTRef ref ht
757{-# INLINE writeRef #-}
758
759readRef :: HashTable s k v -> ST s (HashTable_ s k v)
760readRef (HT ref) = readSTRef ref
761{-# INLINE readRef #-}
762
763
764------------------------------------------------------------------------------
765{-# INLINE debug #-}
766debug :: String -> ST s ()
767#ifdef DEBUG
768debug s = unsafeIOToST (putStrLn s)
769#else
770debug _ = return ()
771#endif
772
773lookupIndex :: (Eq k, Hashable k) => HashTable s k v -> k -> ST s (Maybe Word)
774lookupIndex htRef !k = do
775    ht <- readRef htRef
776    lookup' ht
777  where
778    lookup' (HashTable sz _ hashes keys _values) = do
779        let !b = whichBucket h sz
780        debug $ "lookup h=" ++ show h ++ " sz=" ++ show sz ++ " b=" ++ show b
781        go b 0 sz
782
783      where
784        !h  = hash k
785        !he = hashToElem h
786
787        go !b !start !end = {-# SCC "lookupIndex/go" #-} do
788            debug $ concat [ "lookupIndex/go: "
789                           , show b
790                           , "/"
791                           , show start
792                           , "/"
793                           , show end
794                           ]
795            idx <- forwardSearch2 hashes b end he emptyMarker
796            debug $ "forwardSearch2 returned " ++ show idx
797            if (idx < 0 || idx < start || idx >= end)
798               then return Nothing
799               else do
800                 h0  <- U.readArray hashes idx
801                 debug $ "h0 was " ++ show h0
802
803                 if recordIsEmpty h0
804                   then do
805                       debug $ "record empty, returning Nothing"
806                       return Nothing
807                   else do
808                     k' <- readArray keys idx
809                     if k == k'
810                       then do
811                         debug $ "value found at " ++ show idx
812                         return $! (Just $! fromIntegral idx)
813                       else do
814                         debug $ "value not found, recursing"
815                         if idx < b
816                           then go (idx + 1) (idx + 1) b
817                           else go (idx + 1) start end
818{-# INLINE lookupIndex #-}
819
820nextByIndex :: HashTable s k v -> Word -> ST s (Maybe (Word, k, v))
821nextByIndex htRef i0 = readRef htRef >>= work
822  where
823    work (HashTable sz _ hashes keys values) = go (fromIntegral i0)
824      where
825        go i | i >= sz = return Nothing
826             | otherwise = do
827            h <- U.readArray hashes i
828            if recordIsEmpty h || recordIsDeleted h
829              then go (i+1)
830              else do
831                k <- readArray keys i
832                v <- readArray values i
833                let !i' = fromIntegral i
834                return (Just (i', k, v))
835{-# INLINE nextByIndex #-}
836