1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE DeriveDataTypeable #-} 3{-# LANGUAGE GADTs #-} 4{-# LANGUAGE PolyKinds #-} 5{-# LANGUAGE Safe #-} 6{-# LANGUAGE ScopedTypeVariables #-} 7{-# LANGUAGE TypeOperators #-} 8module Data.Dependent.Map.Internal where 9 10import Data.Dependent.Sum (DSum((:=>))) 11import Data.GADT.Compare (GCompare, GOrdering(..), gcompare) 12import Data.Some (Some, mkSome, withSome) 13import Data.Typeable (Typeable) 14 15-- |Dependent maps: 'k' is a GADT-like thing with a facility for 16-- rediscovering its type parameter, elements of which function as identifiers 17-- tagged with the type of the thing they identify. Real GADTs are one 18-- useful instantiation of @k@, as are 'Tag's from "Data.Unique.Tag" in the 19-- 'prim-uniq' package. 20-- 21-- Semantically, @'DMap' k f@ is equivalent to a set of @'DSum' k f@ where no two 22-- elements have the same tag. 23-- 24-- More informally, 'DMap' is to dependent products as 'M.Map' is to @(->)@. 25-- Thus it could also be thought of as a partial (in the sense of \"partial 26-- function\") dependent product. 27data DMap k f where 28 Tip :: DMap k f 29 Bin :: {- sz -} !Int 30 -> {- key -} !(k v) 31 -> {- value -} f v 32 -> {- left -} !(DMap k f) 33 -> {- right -} !(DMap k f) 34 -> DMap k f 35 deriving Typeable 36 37{-------------------------------------------------------------------- 38 Construction 39--------------------------------------------------------------------} 40 41-- | /O(1)/. The empty map. 42-- 43-- > empty == fromList [] 44-- > size empty == 0 45empty :: DMap k f 46empty = Tip 47 48-- | /O(1)/. A map with a single element. 49-- 50-- > singleton 1 'a' == fromList [(1, 'a')] 51-- > size (singleton 1 'a') == 1 52singleton :: k v -> f v -> DMap k f 53singleton k x = Bin 1 k x Tip Tip 54 55{-------------------------------------------------------------------- 56 Query 57--------------------------------------------------------------------} 58 59-- | /O(1)/. Is the map empty? 60null :: DMap k f -> Bool 61null Tip = True 62null Bin{} = False 63 64-- | /O(1)/. The number of elements in the map. 65size :: DMap k f -> Int 66size Tip = 0 67size (Bin n _ _ _ _) = n 68 69-- | /O(log n)/. Lookup the value at a key in the map. 70-- 71-- The function will return the corresponding value as @('Just' value)@, 72-- or 'Nothing' if the key isn't in the map. 73lookup :: forall k f v. GCompare k => k v -> DMap k f -> Maybe (f v) 74lookup k = k `seq` go 75 where 76 go :: DMap k f -> Maybe (f v) 77 go Tip = Nothing 78 go (Bin _ kx x l r) = 79 case gcompare k kx of 80 GLT -> go l 81 GGT -> go r 82 GEQ -> Just x 83 84lookupAssoc :: forall k f v. GCompare k => Some k -> DMap k f -> Maybe (DSum k f) 85lookupAssoc sk = withSome sk $ \k -> 86 let 87 go :: DMap k f -> Maybe (DSum k f) 88 go Tip = Nothing 89 go (Bin _ kx x l r) = 90 case gcompare k kx of 91 GLT -> go l 92 GGT -> go r 93 GEQ -> Just (kx :=> x) 94 in k `seq` go 95 96{-------------------------------------------------------------------- 97 Utility functions that maintain the balance properties of the tree. 98 All constructors assume that all values in [l] < [k] and all values 99 in [r] > [k], and that [l] and [r] are valid trees. 100 101 In order of sophistication: 102 [Bin sz k x l r] The type constructor. 103 [bin k x l r] Maintains the correct size, assumes that both [l] 104 and [r] are balanced with respect to each other. 105 [balance k x l r] Restores the balance and size. 106 Assumes that the original tree was balanced and 107 that [l] or [r] has changed by at most one element. 108 [combine k x l r] Restores balance and size. 109 110 Furthermore, we can construct a new tree from two trees. Both operations 111 assume that all values in [l] < all values in [r] and that [l] and [r] 112 are valid: 113 [glue l r] Glues [l] and [r] together. Assumes that [l] and 114 [r] are already balanced with respect to each other. 115 [merge l r] Merges two trees and restores balance. 116 117 Note: in contrast to Adam's paper, we use (<=) comparisons instead 118 of (<) comparisons in [combine], [merge] and [balance]. 119 Quickcheck (on [difference]) showed that this was necessary in order 120 to maintain the invariants. It is quite unsatisfactory that I haven't 121 been able to find out why this is actually the case! Fortunately, it 122 doesn't hurt to be a bit more conservative. 123--------------------------------------------------------------------} 124 125{-------------------------------------------------------------------- 126 Combine 127--------------------------------------------------------------------} 128combine :: GCompare k => k v -> f v -> DMap k f -> DMap k f -> DMap k f 129combine kx x Tip r = insertMin kx x r 130combine kx x l Tip = insertMax kx x l 131combine kx x l@(Bin sizeL ky y ly ry) r@(Bin sizeR kz z lz rz) 132 | delta*sizeL <= sizeR = balance kz z (combine kx x l lz) rz 133 | delta*sizeR <= sizeL = balance ky y ly (combine kx x ry r) 134 | otherwise = bin kx x l r 135 136 137-- insertMin and insertMax don't perform potentially expensive comparisons. 138insertMax,insertMin :: k v -> f v -> DMap k f -> DMap k f 139insertMax kx x t 140 = case t of 141 Tip -> singleton kx x 142 Bin _ ky y l r 143 -> balance ky y l (insertMax kx x r) 144 145insertMin kx x t 146 = case t of 147 Tip -> singleton kx x 148 Bin _ ky y l r 149 -> balance ky y (insertMin kx x l) r 150 151{-------------------------------------------------------------------- 152 [merge l r]: merges two trees. 153--------------------------------------------------------------------} 154merge :: DMap k f -> DMap k f -> DMap k f 155merge Tip r = r 156merge l Tip = l 157merge l@(Bin sizeL kx x lx rx) r@(Bin sizeR ky y ly ry) 158 | delta*sizeL <= sizeR = balance ky y (merge l ly) ry 159 | delta*sizeR <= sizeL = balance kx x lx (merge rx r) 160 | otherwise = glue l r 161 162{-------------------------------------------------------------------- 163 [glue l r]: glues two trees together. 164 Assumes that [l] and [r] are already balanced with respect to each other. 165--------------------------------------------------------------------} 166glue :: DMap k f -> DMap k f -> DMap k f 167glue Tip r = r 168glue l Tip = l 169glue l r 170 | size l > size r = case deleteFindMax l of (km :=> m,l') -> balance km m l' r 171 | otherwise = case deleteFindMin r of (km :=> m,r') -> balance km m l r' 172 173-- | /O(log n)/. Delete and find the minimal element. 174-- 175-- > deleteFindMin (fromList [(5,"a"), (3,"b"), (10,"c")]) == ((3,"b"), fromList[(5,"a"), (10,"c")]) 176-- > deleteFindMin Error: can not return the minimal element of an empty map 177 178deleteFindMin :: DMap k f -> (DSum k f, DMap k f) 179deleteFindMin t = case minViewWithKey t of 180 Nothing -> (error "Map.deleteFindMin: can not return the minimal element of an empty map", Tip) 181 Just p -> p 182 183-- | A strict pair. 184data (:*:) a b = !a :*: !b 185infixr 1 :*: 186 187-- | Convert a strict pair to a pair. 188toPair :: a :*: b -> (a, b) 189toPair (a :*: b) = (a, b) 190{-# INLINE toPair #-} 191 192data Triple' a b c = Triple' !a !b !c 193 194-- | Convert a strict triple to a triple. 195toTriple :: Triple' a b c -> (a, b, c) 196toTriple (Triple' a b c) = (a, b, c) 197{-# INLINE toTriple #-} 198 199-- | /O(log n)/. Retrieves the minimal (key :=> value) entry of the map, and 200-- the map stripped of that element, or 'Nothing' if passed an empty map. 201minViewWithKey :: forall k f . DMap k f -> Maybe (DSum k f, DMap k f) 202minViewWithKey Tip = Nothing 203minViewWithKey (Bin _ k0 x0 l0 r0) = Just $! toPair $ go k0 x0 l0 r0 204 where 205 go :: k v -> f v -> DMap k f -> DMap k f -> DSum k f :*: DMap k f 206 go k x Tip r = (k :=> x) :*: r 207 go k x (Bin _ kl xl ll lr) r = 208 let !(km :*: l') = go kl xl ll lr 209 in (km :*: balance k x l' r) 210 211-- | /O(log n)/. Retrieves the maximal (key :=> value) entry of the map, and 212-- the map stripped of that element, or 'Nothing' if passed an empty map. 213maxViewWithKey :: forall k f . DMap k f -> Maybe (DSum k f, DMap k f) 214maxViewWithKey Tip = Nothing 215maxViewWithKey (Bin _ k0 x0 l0 r0) = Just $! toPair $ go k0 x0 l0 r0 216 where 217 go :: k v -> f v -> DMap k f -> DMap k f -> DSum k f :*: DMap k f 218 go k x l Tip = (k :=> x) :*: l 219 go k x l (Bin _ kr xr rl rr) = 220 let !(km :*: r') = go kr xr rl rr 221 in (km :*: balance k x l r') 222 223-- | /O(log n)/. Delete and find the maximal element. 224-- 225-- > deleteFindMax (fromList [(5,"a"), (3,"b"), (10,"c")]) == ((10,"c"), fromList [(3,"b"), (5,"a")]) 226-- > deleteFindMax empty Error: can not return the maximal element of an empty map 227 228deleteFindMax :: DMap k f -> (DSum k f, DMap k f) 229deleteFindMax t 230 = case t of 231 Bin _ k x l Tip -> (k :=> x,l) 232 Bin _ k x l r -> let (km,r') = deleteFindMax r in (km,balance k x l r') 233 Tip -> (error "Map.deleteFindMax: can not return the maximal element of an empty map", Tip) 234 235 236{-------------------------------------------------------------------- 237 [balance l x r] balances two trees with value x. 238 The sizes of the trees should balance after decreasing the 239 size of one of them. (a rotation). 240 241 [delta] is the maximal relative difference between the sizes of 242 two trees, it corresponds with the [w] in Adams' paper. 243 [ratio] is the ratio between an outer and inner sibling of the 244 heavier subtree in an unbalanced setting. It determines 245 whether a double or single rotation should be performed 246 to restore balance. It corresponds with the inverse 247 of $\alpha$ in Adam's article. 248 249 Note that: 250 - [delta] should be larger than 4.646 with a [ratio] of 2. 251 - [delta] should be larger than 3.745 with a [ratio] of 1.534. 252 253 - A lower [delta] leads to a more 'perfectly' balanced tree. 254 - A higher [delta] performs less rebalancing. 255 256 - Balancing is automatic for random data and a balancing 257 scheme is only necessary to avoid pathological worst cases. 258 Almost any choice will do, and in practice, a rather large 259 [delta] may perform better than smaller one. 260 261 Note: in contrast to Adam's paper, we use a ratio of (at least) [2] 262 to decide whether a single or double rotation is needed. Although 263 he actually proves that this ratio is needed to maintain the 264 invariants, his implementation uses an invalid ratio of [1]. 265--------------------------------------------------------------------} 266delta,ratio :: Int 267delta = 4 268ratio = 2 269 270balance :: k v -> f v -> DMap k f -> DMap k f -> DMap k f 271balance k x l r 272 | sizeL + sizeR <= 1 = Bin sizeX k x l r 273 | sizeR >= delta*sizeL = rotateL k x l r 274 | sizeL >= delta*sizeR = rotateR k x l r 275 | otherwise = Bin sizeX k x l r 276 where 277 sizeL = size l 278 sizeR = size r 279 sizeX = sizeL + sizeR + 1 280 281-- rotate 282rotateL :: k v -> f v -> DMap k f -> DMap k f -> DMap k f 283rotateL k x l r@(Bin _ _ _ ly ry) 284 | size ly < ratio*size ry = singleL k x l r 285 | otherwise = doubleL k x l r 286rotateL _ _ _ Tip = error "rotateL Tip" 287 288rotateR :: k v -> f v -> DMap k f -> DMap k f -> DMap k f 289rotateR k x l@(Bin _ _ _ ly ry) r 290 | size ry < ratio*size ly = singleR k x l r 291 | otherwise = doubleR k x l r 292rotateR _ _ Tip _ = error "rotateR Tip" 293 294-- basic rotations 295singleL, singleR :: k v -> f v -> DMap k f -> DMap k f -> DMap k f 296singleL k1 x1 t1 (Bin _ k2 x2 t2 t3) = bin k2 x2 (bin k1 x1 t1 t2) t3 297singleL _ _ _ Tip = error "singleL Tip" 298singleR k1 x1 (Bin _ k2 x2 t1 t2) t3 = bin k2 x2 t1 (bin k1 x1 t2 t3) 299singleR _ _ Tip _ = error "singleR Tip" 300 301doubleL, doubleR :: k v -> f v -> DMap k f -> DMap k f -> DMap k f 302doubleL k1 x1 t1 (Bin _ k2 x2 (Bin _ k3 x3 t2 t3) t4) = bin k3 x3 (bin k1 x1 t1 t2) (bin k2 x2 t3 t4) 303doubleL _ _ _ _ = error "doubleL" 304doubleR k1 x1 (Bin _ k2 x2 t1 (Bin _ k3 x3 t2 t3)) t4 = bin k3 x3 (bin k2 x2 t1 t2) (bin k1 x1 t3 t4) 305doubleR _ _ _ _ = error "doubleR" 306 307{-------------------------------------------------------------------- 308 The bin constructor maintains the size of the tree 309--------------------------------------------------------------------} 310bin :: k v -> f v -> DMap k f -> DMap k f -> DMap k f 311bin k x l r 312 = Bin (size l + size r + 1) k x l r 313 314{-------------------------------------------------------------------- 315 Utility functions that return sub-ranges of the original 316 tree. Some functions take a comparison function as argument to 317 allow comparisons against infinite values. A function [cmplo k] 318 should be read as [compare lo k]. 319 320 [trim cmplo cmphi t] A tree that is either empty or where [cmplo k == LT] 321 and [cmphi k == GT] for the key [k] of the root. 322 [filterGt cmp t] A tree where for all keys [k]. [cmp k == LT] 323 [filterLt cmp t] A tree where for all keys [k]. [cmp k == GT] 324 325 [split k t] Returns two trees [l] and [r] where all keys 326 in [l] are <[k] and all keys in [r] are >[k]. 327 [splitLookup k t] Just like [split] but also returns whether [k] 328 was found in the tree. 329--------------------------------------------------------------------} 330 331{-------------------------------------------------------------------- 332 [trim lo hi t] trims away all subtrees that surely contain no 333 values between the range [lo] to [hi]. The returned tree is either 334 empty or the key of the root is between @lo@ and @hi@. 335--------------------------------------------------------------------} 336trim :: (Some k -> Ordering) -> (Some k -> Ordering) -> DMap k f -> DMap k f 337trim _ _ Tip = Tip 338trim cmplo cmphi t@(Bin _ kx _ l r) 339 = case cmplo (mkSome kx) of 340 LT -> case cmphi (mkSome kx) of 341 GT -> t 342 _ -> trim cmplo cmphi l 343 _ -> trim cmplo cmphi r 344 345trimLookupLo :: GCompare k => Some k -> (Some k -> Ordering) -> DMap k f -> (Maybe (DSum k f), DMap k f) 346trimLookupLo _ _ Tip = (Nothing,Tip) 347trimLookupLo lo cmphi t@(Bin _ kx x l r) 348 = case compare lo (mkSome kx) of 349 LT -> case cmphi (mkSome kx) of 350 GT -> (lookupAssoc lo t, t) 351 _ -> trimLookupLo lo cmphi l 352 GT -> trimLookupLo lo cmphi r 353 EQ -> (Just (kx :=> x),trim (compare lo) cmphi r) 354 355 356{-------------------------------------------------------------------- 357 [filterGt k t] filter all keys >[k] from tree [t] 358 [filterLt k t] filter all keys <[k] from tree [t] 359--------------------------------------------------------------------} 360filterGt :: GCompare k => (Some k -> Ordering) -> DMap k f -> DMap k f 361filterGt cmp = go 362 where 363 go Tip = Tip 364 go (Bin _ kx x l r) = case cmp (mkSome kx) of 365 LT -> combine kx x (go l) r 366 GT -> go r 367 EQ -> r 368 369filterLt :: GCompare k => (Some k -> Ordering) -> DMap k f -> DMap k f 370filterLt cmp = go 371 where 372 go Tip = Tip 373 go (Bin _ kx x l r) = case cmp (mkSome kx) of 374 LT -> go l 375 GT -> combine kx x l (go r) 376 EQ -> l 377