1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE GADTs #-}
4{-# LANGUAGE PolyKinds #-}
5{-# LANGUAGE Safe #-}
6{-# LANGUAGE ScopedTypeVariables #-}
7{-# LANGUAGE TypeOperators #-}
8module Data.Dependent.Map.Internal where
9
10import Data.Dependent.Sum (DSum((:=>)))
11import Data.GADT.Compare (GCompare, GOrdering(..), gcompare)
12import Data.Some (Some, mkSome, withSome)
13import Data.Typeable (Typeable)
14
15-- |Dependent maps: 'k' is a GADT-like thing with a facility for
16-- rediscovering its type parameter, elements of which function as identifiers
17-- tagged with the type of the thing they identify.  Real GADTs are one
18-- useful instantiation of @k@, as are 'Tag's from "Data.Unique.Tag" in the
19-- 'prim-uniq' package.
20--
21-- Semantically, @'DMap' k f@ is equivalent to a set of @'DSum' k f@ where no two
22-- elements have the same tag.
23--
24-- More informally, 'DMap' is to dependent products as 'M.Map' is to @(->)@.
25-- Thus it could also be thought of as a partial (in the sense of \"partial
26-- function\") dependent product.
27data DMap k f where
28    Tip :: DMap k f
29    Bin :: {- sz    -} !Int
30        -> {- key   -} !(k v)
31        -> {- value -} f v
32        -> {- left  -} !(DMap k f)
33        -> {- right -} !(DMap k f)
34        -> DMap k f
35    deriving Typeable
36
37{--------------------------------------------------------------------
38  Construction
39--------------------------------------------------------------------}
40
41-- | /O(1)/. The empty map.
42--
43-- > empty      == fromList []
44-- > size empty == 0
45empty :: DMap k f
46empty = Tip
47
48-- | /O(1)/. A map with a single element.
49--
50-- > singleton 1 'a'        == fromList [(1, 'a')]
51-- > size (singleton 1 'a') == 1
52singleton :: k v -> f v -> DMap k f
53singleton k x = Bin 1 k x Tip Tip
54
55{--------------------------------------------------------------------
56  Query
57--------------------------------------------------------------------}
58
59-- | /O(1)/. Is the map empty?
60null :: DMap k f -> Bool
61null Tip    = True
62null Bin{}  = False
63
64-- | /O(1)/. The number of elements in the map.
65size :: DMap k f -> Int
66size Tip                = 0
67size (Bin n _ _ _ _)    = n
68
69-- | /O(log n)/. Lookup the value at a key in the map.
70--
71-- The function will return the corresponding value as @('Just' value)@,
72-- or 'Nothing' if the key isn't in the map.
73lookup :: forall k f v. GCompare k => k v -> DMap k f -> Maybe (f v)
74lookup k = k `seq` go
75    where
76        go :: DMap k f -> Maybe (f v)
77        go Tip = Nothing
78        go (Bin _ kx x l r) =
79            case gcompare k kx of
80                GLT -> go l
81                GGT -> go r
82                GEQ -> Just x
83
84lookupAssoc :: forall k f v. GCompare k => Some k -> DMap k f -> Maybe (DSum k f)
85lookupAssoc sk = withSome sk $ \k ->
86  let
87    go :: DMap k f -> Maybe (DSum k f)
88    go Tip = Nothing
89    go (Bin _ kx x l r) =
90        case gcompare k kx of
91            GLT -> go l
92            GGT -> go r
93            GEQ -> Just (kx :=> x)
94  in k `seq` go
95
96{--------------------------------------------------------------------
97  Utility functions that maintain the balance properties of the tree.
98  All constructors assume that all values in [l] < [k] and all values
99  in [r] > [k], and that [l] and [r] are valid trees.
100
101  In order of sophistication:
102    [Bin sz k x l r]  The type constructor.
103    [bin k x l r]     Maintains the correct size, assumes that both [l]
104                      and [r] are balanced with respect to each other.
105    [balance k x l r] Restores the balance and size.
106                      Assumes that the original tree was balanced and
107                      that [l] or [r] has changed by at most one element.
108    [combine k x l r] Restores balance and size.
109
110  Furthermore, we can construct a new tree from two trees. Both operations
111  assume that all values in [l] < all values in [r] and that [l] and [r]
112  are valid:
113    [glue l r]        Glues [l] and [r] together. Assumes that [l] and
114                      [r] are already balanced with respect to each other.
115    [merge l r]       Merges two trees and restores balance.
116
117  Note: in contrast to Adam's paper, we use (<=) comparisons instead
118  of (<) comparisons in [combine], [merge] and [balance].
119  Quickcheck (on [difference]) showed that this was necessary in order
120  to maintain the invariants. It is quite unsatisfactory that I haven't
121  been able to find out why this is actually the case! Fortunately, it
122  doesn't hurt to be a bit more conservative.
123--------------------------------------------------------------------}
124
125{--------------------------------------------------------------------
126  Combine
127--------------------------------------------------------------------}
128combine :: GCompare k => k v -> f v -> DMap k f -> DMap k f -> DMap k f
129combine kx x Tip r  = insertMin kx x r
130combine kx x l Tip  = insertMax kx x l
131combine kx x l@(Bin sizeL ky y ly ry) r@(Bin sizeR kz z lz rz)
132  | delta*sizeL <= sizeR  = balance kz z (combine kx x l lz) rz
133  | delta*sizeR <= sizeL  = balance ky y ly (combine kx x ry r)
134  | otherwise             = bin kx x l r
135
136
137-- insertMin and insertMax don't perform potentially expensive comparisons.
138insertMax,insertMin :: k v -> f v -> DMap k f -> DMap k f
139insertMax kx x t
140  = case t of
141      Tip -> singleton kx x
142      Bin _ ky y l r
143          -> balance ky y l (insertMax kx x r)
144
145insertMin kx x t
146  = case t of
147      Tip -> singleton kx x
148      Bin _ ky y l r
149          -> balance ky y (insertMin kx x l) r
150
151{--------------------------------------------------------------------
152  [merge l r]: merges two trees.
153--------------------------------------------------------------------}
154merge :: DMap k f -> DMap k f -> DMap k f
155merge Tip r   = r
156merge l Tip   = l
157merge l@(Bin sizeL kx x lx rx) r@(Bin sizeR ky y ly ry)
158  | delta*sizeL <= sizeR = balance ky y (merge l ly) ry
159  | delta*sizeR <= sizeL = balance kx x lx (merge rx r)
160  | otherwise            = glue l r
161
162{--------------------------------------------------------------------
163  [glue l r]: glues two trees together.
164  Assumes that [l] and [r] are already balanced with respect to each other.
165--------------------------------------------------------------------}
166glue :: DMap k f -> DMap k f -> DMap k f
167glue Tip r = r
168glue l Tip = l
169glue l r
170  | size l > size r = case deleteFindMax l of (km :=> m,l') -> balance km m l' r
171  | otherwise       = case deleteFindMin r of (km :=> m,r') -> balance km m l r'
172
173-- | /O(log n)/. Delete and find the minimal element.
174--
175-- > deleteFindMin (fromList [(5,"a"), (3,"b"), (10,"c")]) == ((3,"b"), fromList[(5,"a"), (10,"c")])
176-- > deleteFindMin                                            Error: can not return the minimal element of an empty map
177
178deleteFindMin :: DMap k f -> (DSum k f, DMap k f)
179deleteFindMin t = case minViewWithKey t of
180      Nothing -> (error "Map.deleteFindMin: can not return the minimal element of an empty map", Tip)
181      Just p -> p
182
183-- | A strict pair.
184data (:*:) a b = !a :*: !b
185infixr 1 :*:
186
187-- | Convert a strict pair to a pair.
188toPair :: a :*: b -> (a, b)
189toPair (a :*: b) = (a, b)
190{-# INLINE toPair #-}
191
192data Triple' a b c = Triple' !a !b !c
193
194-- | Convert a strict triple to a triple.
195toTriple :: Triple' a b c -> (a, b, c)
196toTriple (Triple' a b c) = (a, b, c)
197{-# INLINE toTriple #-}
198
199-- | /O(log n)/. Retrieves the minimal (key :=> value) entry of the map, and
200-- the map stripped of that element, or 'Nothing' if passed an empty map.
201minViewWithKey :: forall k f . DMap k f -> Maybe (DSum k f, DMap k f)
202minViewWithKey Tip = Nothing
203minViewWithKey (Bin _ k0 x0 l0 r0) = Just $! toPair $ go k0 x0 l0 r0
204  where
205    go :: k v -> f v -> DMap k f -> DMap k f -> DSum k f :*: DMap k f
206    go k x Tip r = (k :=> x) :*: r
207    go k x (Bin _ kl xl ll lr) r =
208      let !(km :*: l') = go kl xl ll lr
209      in (km :*: balance k x l' r)
210
211-- | /O(log n)/. Retrieves the maximal (key :=> value) entry of the map, and
212-- the map stripped of that element, or 'Nothing' if passed an empty map.
213maxViewWithKey :: forall k f . DMap k f -> Maybe (DSum k f, DMap k f)
214maxViewWithKey Tip = Nothing
215maxViewWithKey (Bin _ k0 x0 l0 r0) = Just $! toPair $ go k0 x0 l0 r0
216  where
217    go :: k v -> f v -> DMap k f -> DMap k f -> DSum k f :*: DMap k f
218    go k x l Tip = (k :=> x) :*: l
219    go k x l (Bin _ kr xr rl rr) =
220      let !(km :*: r') = go kr xr rl rr
221      in (km :*: balance k x l r')
222
223-- | /O(log n)/. Delete and find the maximal element.
224--
225-- > deleteFindMax (fromList [(5,"a"), (3,"b"), (10,"c")]) == ((10,"c"), fromList [(3,"b"), (5,"a")])
226-- > deleteFindMax empty                                      Error: can not return the maximal element of an empty map
227
228deleteFindMax :: DMap k f -> (DSum k f, DMap k f)
229deleteFindMax t
230  = case t of
231      Bin _ k x l Tip -> (k :=> x,l)
232      Bin _ k x l r   -> let (km,r') = deleteFindMax r in (km,balance k x l r')
233      Tip             -> (error "Map.deleteFindMax: can not return the maximal element of an empty map", Tip)
234
235
236{--------------------------------------------------------------------
237  [balance l x r] balances two trees with value x.
238  The sizes of the trees should balance after decreasing the
239  size of one of them. (a rotation).
240
241  [delta] is the maximal relative difference between the sizes of
242          two trees, it corresponds with the [w] in Adams' paper.
243  [ratio] is the ratio between an outer and inner sibling of the
244          heavier subtree in an unbalanced setting. It determines
245          whether a double or single rotation should be performed
246          to restore balance. It corresponds with the inverse
247          of $\alpha$ in Adam's article.
248
249  Note that:
250  - [delta] should be larger than 4.646 with a [ratio] of 2.
251  - [delta] should be larger than 3.745 with a [ratio] of 1.534.
252
253  - A lower [delta] leads to a more 'perfectly' balanced tree.
254  - A higher [delta] performs less rebalancing.
255
256  - Balancing is automatic for random data and a balancing
257    scheme is only necessary to avoid pathological worst cases.
258    Almost any choice will do, and in practice, a rather large
259    [delta] may perform better than smaller one.
260
261  Note: in contrast to Adam's paper, we use a ratio of (at least) [2]
262  to decide whether a single or double rotation is needed. Although
263  he actually proves that this ratio is needed to maintain the
264  invariants, his implementation uses an invalid ratio of [1].
265--------------------------------------------------------------------}
266delta,ratio :: Int
267delta = 4
268ratio = 2
269
270balance :: k v -> f v -> DMap k f -> DMap k f -> DMap k f
271balance k x l r
272  | sizeL + sizeR <= 1    = Bin sizeX k x l r
273  | sizeR >= delta*sizeL  = rotateL k x l r
274  | sizeL >= delta*sizeR  = rotateR k x l r
275  | otherwise             = Bin sizeX k x l r
276  where
277    sizeL = size l
278    sizeR = size r
279    sizeX = sizeL + sizeR + 1
280
281-- rotate
282rotateL :: k v -> f v -> DMap k f -> DMap k f -> DMap k f
283rotateL k x l r@(Bin _ _ _ ly ry)
284  | size ly < ratio*size ry = singleL k x l r
285  | otherwise               = doubleL k x l r
286rotateL _ _ _ Tip = error "rotateL Tip"
287
288rotateR :: k v -> f v -> DMap k f -> DMap k f -> DMap k f
289rotateR k x l@(Bin _ _ _ ly ry) r
290  | size ry < ratio*size ly = singleR k x l r
291  | otherwise               = doubleR k x l r
292rotateR _ _ Tip _ = error "rotateR Tip"
293
294-- basic rotations
295singleL, singleR :: k v -> f v -> DMap k f -> DMap k f -> DMap k f
296singleL k1 x1 t1 (Bin _ k2 x2 t2 t3)  = bin k2 x2 (bin k1 x1 t1 t2) t3
297singleL _ _ _ Tip = error "singleL Tip"
298singleR k1 x1 (Bin _ k2 x2 t1 t2) t3  = bin k2 x2 t1 (bin k1 x1 t2 t3)
299singleR _ _ Tip _ = error "singleR Tip"
300
301doubleL, doubleR :: k v -> f v -> DMap k f -> DMap k f -> DMap k f
302doubleL k1 x1 t1 (Bin _ k2 x2 (Bin _ k3 x3 t2 t3) t4) = bin k3 x3 (bin k1 x1 t1 t2) (bin k2 x2 t3 t4)
303doubleL _ _ _ _ = error "doubleL"
304doubleR k1 x1 (Bin _ k2 x2 t1 (Bin _ k3 x3 t2 t3)) t4 = bin k3 x3 (bin k2 x2 t1 t2) (bin k1 x1 t3 t4)
305doubleR _ _ _ _ = error "doubleR"
306
307{--------------------------------------------------------------------
308  The bin constructor maintains the size of the tree
309--------------------------------------------------------------------}
310bin :: k v -> f v -> DMap k f -> DMap k f -> DMap k f
311bin k x l r
312  = Bin (size l + size r + 1) k x l r
313
314{--------------------------------------------------------------------
315  Utility functions that return sub-ranges of the original
316  tree. Some functions take a comparison function as argument to
317  allow comparisons against infinite values. A function [cmplo k]
318  should be read as [compare lo k].
319
320  [trim cmplo cmphi t]  A tree that is either empty or where [cmplo k == LT]
321                        and [cmphi k == GT] for the key [k] of the root.
322  [filterGt cmp t]      A tree where for all keys [k]. [cmp k == LT]
323  [filterLt cmp t]      A tree where for all keys [k]. [cmp k == GT]
324
325  [split k t]           Returns two trees [l] and [r] where all keys
326                        in [l] are <[k] and all keys in [r] are >[k].
327  [splitLookup k t]     Just like [split] but also returns whether [k]
328                        was found in the tree.
329--------------------------------------------------------------------}
330
331{--------------------------------------------------------------------
332  [trim lo hi t] trims away all subtrees that surely contain no
333  values between the range [lo] to [hi]. The returned tree is either
334  empty or the key of the root is between @lo@ and @hi@.
335--------------------------------------------------------------------}
336trim :: (Some k -> Ordering) -> (Some k -> Ordering) -> DMap k f -> DMap k f
337trim _     _     Tip = Tip
338trim cmplo cmphi t@(Bin _ kx _ l r)
339  = case cmplo (mkSome kx) of
340      LT -> case cmphi (mkSome kx) of
341              GT -> t
342              _  -> trim cmplo cmphi l
343      _  -> trim cmplo cmphi r
344
345trimLookupLo :: GCompare k => Some k -> (Some k -> Ordering) -> DMap k f -> (Maybe (DSum k f), DMap k f)
346trimLookupLo _  _     Tip = (Nothing,Tip)
347trimLookupLo lo cmphi t@(Bin _ kx x l r)
348  = case compare lo (mkSome kx) of
349      LT -> case cmphi (mkSome kx) of
350              GT -> (lookupAssoc lo t, t)
351              _  -> trimLookupLo lo cmphi l
352      GT -> trimLookupLo lo cmphi r
353      EQ -> (Just (kx :=> x),trim (compare lo) cmphi r)
354
355
356{--------------------------------------------------------------------
357  [filterGt k t] filter all keys >[k] from tree [t]
358  [filterLt k t] filter all keys <[k] from tree [t]
359--------------------------------------------------------------------}
360filterGt :: GCompare k => (Some k -> Ordering) -> DMap k f -> DMap k f
361filterGt cmp = go
362  where
363    go Tip              = Tip
364    go (Bin _ kx x l r) = case cmp (mkSome kx) of
365              LT -> combine kx x (go l) r
366              GT -> go r
367              EQ -> r
368
369filterLt :: GCompare k => (Some k -> Ordering) -> DMap k f -> DMap k f
370filterLt cmp = go
371  where
372    go Tip              = Tip
373    go (Bin _ kx x l r) = case cmp (mkSome kx) of
374          LT -> go l
375          GT -> combine kx x l (go r)
376          EQ -> l
377