1{-# LANGUAGE CPP #-}
2
3-----------------------------------------------------------------------------
4-- |
5-- Module      :  Data.List.Ordered
6-- Copyright   :  (c) 2009-2011 Leon P Smith
7-- License     :  BSD3
8--
9-- Maintainer  :  leon@melding-monads.com
10-- Stability   :  experimental
11-- Portability :  portable
12--
13-- This module implements bag and set operations on ordered lists.  For the
14-- purposes of this module,  a \"bag\" (or \"multiset\") is a non-decreasing
15-- list, whereas a \"set\" is a strictly ascending list.  Bags are sorted
16-- lists that may contain duplicates,  whereas sets are sorted lists that
17-- do not contain duplicates.
18--
19-- Except for the  'nub', 'sort', 'nubSort', and 'isSorted' families of
20-- functions, every function assumes that any list arguments are sorted
21-- lists. Assuming this precondition is met,  every resulting list is also
22-- sorted.
23--
24-- Because 'isect' handles multisets correctly, it does not return results
25-- comparable to @Data.List.'Data.List.intersect'@ on them.  Thus @isect@
26-- is more than just a more efficient @intersect@ on ordered lists. Similar
27-- statements apply to other associations between functions this module and
28-- functions in @Data.List@,  such as 'union' and @Data.List.'union'@.
29--
30-- All functions in this module are left biased.  Elements that appear in
31-- earlier arguments have priority over equal elements that appear in later
32-- arguments,  and elements that appear earlier in a single list have
33-- priority over equal elements that appear later in that list.
34--
35-----------------------------------------------------------------------------
36
37module  Data.List.Ordered
38     (
39        -- * Predicates
40        member, memberBy, has, hasBy
41     ,  subset, subsetBy
42     ,  isSorted, isSortedBy
43
44        -- * Insertion Functions
45     ,  insertBag, insertBagBy
46     ,  insertSet, insertSetBy
47
48        -- * Set-like operations
49     ,  isect, isectBy
50     ,  union, unionBy
51     ,  minus, minusBy
52     ,  minus', minusBy'
53     ,  xunion, xunionBy
54     ,  merge, mergeBy
55     ,  mergeAll, mergeAllBy
56     ,  unionAll, unionAllBy
57
58        -- * Lists to Ordered Lists
59     ,  nub, nubBy
60     ,  sort, sortBy
61     ,  sortOn, sortOn'
62     ,  nubSort, nubSortBy
63     ,  nubSortOn, nubSortOn'
64
65        -- * Miscellaneous folds
66     ,  foldt, foldt'
67
68     )  where
69
70import Data.List(sort,sortBy,intersect)
71#if  MIN_VERSION_base(4,7,1)
72import Data.List(sortOn)
73#endif
74
75-- |  The 'isSorted' predicate returns 'True' if the elements of a list occur
76-- in non-descending order,  equivalent to @'isSortedBy' ('<=')@.
77isSorted :: Ord a => [a] -> Bool
78isSorted = isSortedBy (<=)
79
80-- |  The 'isSortedBy' function returns 'True' iff the predicate returns true
81-- for all adjacent pairs of elements in the list.
82isSortedBy :: (a -> a -> Bool) -> [a] -> Bool
83isSortedBy lte = loop
84  where
85    loop []       = True
86    loop [_]      = True
87    loop (x:y:zs) = (x `lte` y) && loop (y:zs)
88
89-- |  The 'member' function returns 'True' if the element appears in the
90-- ordered list.
91member :: Ord a => a -> [a] -> Bool
92member = memberBy compare
93
94-- |  The 'memberBy' function is the non-overloaded version of 'member'.
95memberBy :: (a -> a -> Ordering) -> a -> [a] -> Bool
96memberBy cmp x = loop
97  where
98    loop []     = False
99    loop (y:ys) = case cmp x y of
100                    LT -> False
101                    EQ -> True
102                    GT -> loop ys
103
104-- |  The 'has' function returns 'True' if the element appears in the list;
105-- it is equivalent to 'member' except the order of the arguments is reversed,
106-- making it a function from an ordered list to its characteristic function.
107has :: Ord a => [a] -> a -> Bool
108has xs y = memberBy compare y xs
109
110-- |  The 'hasBy' function is the non-overloaded version of 'has'.
111hasBy :: (a -> a -> Ordering) -> [a] -> a -> Bool
112hasBy cmp xs y = memberBy cmp y xs
113
114-- |  The 'insertBag' function inserts an element into a list.  If the element
115-- is already there,  then another copy of the element is inserted.
116insertBag :: Ord a => a -> [a] -> [a]
117insertBag = insertBagBy compare
118
119-- |  The 'insertBagBy' function is the non-overloaded version of 'insertBag'.
120insertBagBy :: (a -> a -> Ordering) -> a -> [a] -> [a]
121insertBagBy cmp = loop
122  where
123    loop x [] = [x]
124    loop x (y:ys)
125      = case cmp x y of
126         GT -> y:loop x ys
127         _  -> x:y:ys
128
129-- |  The 'insertSet' function inserts an element into an ordered list.
130-- If the element is already there,  then the element replaces the existing
131-- element.
132insertSet :: Ord a => a -> [a] -> [a]
133insertSet = insertSetBy compare
134
135-- |  The 'insertSetBy' function is the non-overloaded version of 'insertSet'.
136insertSetBy :: (a -> a -> Ordering) -> a -> [a] -> [a]
137insertSetBy cmp = loop
138  where
139    loop x [] = [x]
140    loop x (y:ys) = case cmp x y of
141            LT -> x:y:ys
142            EQ -> x:ys
143            GT -> y:loop x ys
144
145{-
146-- This function is moderately interesting,  as it encompasses all the
147-- "Venn diagram" functions on two sets. (though not merge;  which isn't
148-- a set function)
149
150-- However, it doesn't seem that useful,  considering that of the 8 possible
151-- functions,  there are only 4 interesting variations:  isect, union, minus,
152-- and xunion.  Due to interactions with GHC's optimizer,  coded separately,
153-- these have a smaller combined object code size than the object code size
154-- for genSectBy.  (Or,  turn off certain optimizations and lose speed.)
155
156-- Each individual object code can be recovered from genSectBy via GHC's
157-- inliner and constant propagation;  but this doesn't save much in terms
158-- of source code size and reduces portability.
159
160-- Note that the Static Argument Transformation is necessary for this to work
161-- correctly;  inlining genSectBy allows for cmp and p to be inlined as well,
162-- or at least eliminate some indirect jumps.  All of the *By functions in
163-- this module follow this idiom for this reason.
164
165genSectBy :: (a -> a -> Ordering)
166          -> (Bool -> Bool -> Bool)
167          -> [a] -> [a] -> [a]
168genSectBy cmp p = loop
169  where
170    loop [] ys | p False True = ys
171               | otherwise    = []
172    loop xs [] | p True False = xs
173               | otherwise    = []
174    loop (x:xs) (y:ys)
175      = case cmp x y of
176          LT | p True False -> x : loop xs (y:ys)
177             | otherwise    ->     loop xs (y:ys)
178          EQ | p True True  -> x : loop xs ys
179             | otherwise    ->     loop xs ys
180          GT | p False True -> y : loop (x:xs) ys
181             | otherwise    ->     loop (x:xs) ys
182
183-- Here's another variation that was suggested to me.  It is more general
184-- than genSectBy, as it can implement a merge; but it cannot implement
185-- a left-biased merge
186
187foldrMergeBy :: (a -> b -> Ordering)
188             -> (a -> c -> c) -> (b -> c -> c) -> (a -> b -> c -> c) -> c
189             -> [a] -> [b] -> c
190foldrMergeBy cmp addA addB unify z = loop
191  where
192    loop xs [] = foldr addA z xs
193    loop [] ys = foldr addB z ys
194    loop (x:xs) (y:ys)
195      = case cmp x y of
196          LT -> x `addA` loop  xs (y:ys)
197          EQ -> unify x y (loop xs ys)
198          GT -> y `addB` loop (x:xs) ys
199-}
200
201-- |  The 'isect' function computes the intersection of two ordered lists.
202-- An element occurs in the output as many times as the minimum number of
203-- occurrences in either input.  If either input is a set,  then the output
204-- is a set.
205--
206-- > isect [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 3,4 ]
207-- > isect [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1, 2,2 ]
208isect :: Ord a => [a] -> [a] -> [a]
209isect = isectBy compare
210
211-- |  The 'isectBy' function is the non-overloaded version of 'isect'.
212isectBy :: (a -> b -> Ordering) -> [a] -> [b] -> [a]
213isectBy cmp = loop
214  where
215     loop [] _ys  = []
216     loop _xs []  = []
217     loop (x:xs) (y:ys)
218       = case cmp x y of
219          LT ->     loop xs (y:ys)
220          EQ -> x : loop xs ys
221          GT ->     loop (x:xs) ys
222
223-- |  The 'union' function computes the union of two ordered lists.
224-- An element occurs in the output as many times as the maximum number
225-- of occurrences in either input.  The output is a set if and only if
226-- both inputs are sets.
227--
228-- > union [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2, 3,4, 5,6 ]
229-- > union [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1, 2,2,2 ]
230union :: Ord a => [a] -> [a] -> [a]
231union = unionBy compare
232
233-- |  The 'unionBy' function is the non-overloaded version of 'union'.
234unionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
235unionBy cmp = loop
236  where
237     loop [] ys = ys
238     loop xs [] = xs
239     loop (x:xs) (y:ys)
240       = case cmp x y of
241          LT -> x : loop xs (y:ys)
242          EQ -> x : loop xs ys
243          GT -> y : loop (x:xs) ys
244
245-- |  The 'minus' function computes the difference of two ordered lists.
246-- An element occurs in the output as many times as it occurs in
247-- the first input, minus the number of occurrences in the second input.
248-- If the first input is a set,  then the output is a set.
249--
250-- > minus [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2 ]
251-- > minus [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 2 ]
252minus :: Ord a => [a] -> [a] -> [a]
253minus = minusBy compare
254
255-- |  The 'minusBy' function is the non-overloaded version of 'minus'.
256minusBy :: (a -> b -> Ordering) -> [a] -> [b] -> [a]
257minusBy cmp = loop
258  where
259     loop [] _ys = []
260     loop xs [] = xs
261     loop (x:xs) (y:ys)
262       = case cmp x y of
263          LT -> x : loop xs (y:ys)
264          EQ ->     loop xs ys
265          GT ->     loop (x:xs) ys
266
267-- |  The 'minus'' function computes the difference of two ordered lists.
268-- The result consists of elements from the first list that do not appear
269-- in the second list.  If the first input is a set, then the output is
270-- a set.
271--
272-- > minus' [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2 ]
273-- > minus' [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == []
274-- > minus' [ 1,1, 2,2 ] [ 2 ]          == [ 1,1 ]
275minus' :: Ord a => [a] -> [a] -> [a]
276minus' = minusBy' compare
277
278-- |  The 'minusBy'' function is the non-overloaded version of 'minus''.
279minusBy' :: (a -> b -> Ordering) -> [a] -> [b] -> [a]
280minusBy' cmp = loop
281  where
282     loop [] _ys = []
283     loop xs [] = xs
284     loop (x:xs) (y:ys)
285       = case cmp x y of
286          LT -> x : loop xs (y:ys)
287          EQ ->     loop xs (y:ys)
288          GT ->     loop (x:xs) ys
289
290-- |  The 'xunion' function computes the exclusive union of two ordered lists.
291-- An element occurs in the output as many times as the absolute difference
292-- between the number of occurrences in the inputs.  If both inputs
293-- are sets,  then the output is a set.
294--
295-- > xunion [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2, 5,6 ]
296-- > xunion [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1, 2 ]
297xunion :: Ord a => [a] -> [a] -> [a]
298xunion = xunionBy compare
299
300-- |  The 'xunionBy' function is the non-overloaded version of 'xunion'.
301xunionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
302xunionBy cmp = loop
303  where
304     loop [] ys = ys
305     loop xs [] = xs
306     loop (x:xs) (y:ys)
307       = case cmp x y of
308          LT -> x : loop xs (y:ys)
309          EQ ->     loop xs ys
310          GT -> y : loop (x:xs) ys
311
312-- |  The 'merge' function combines all elements of two ordered lists.
313-- An element occurs in the output as many times as the sum of the
314-- occurrences in both lists.   The output is a set if and only if
315-- the inputs are disjoint sets.
316--
317-- > merge [ 1,2, 3,4 ] [ 3,4, 5,6 ]   == [ 1,2,  3,3,4,4,  5,6 ]
318-- > merge [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1,1,  2,2,2,2,2 ]
319merge :: Ord a => [a] -> [a] -> [a]
320merge = mergeBy compare
321
322-- |  The 'mergeBy' function is the non-overloaded version of 'merge'.
323mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
324mergeBy cmp = loop
325  where
326    loop [] ys  = ys
327    loop xs []  = xs
328    loop (x:xs) (y:ys)
329      = case cmp x y of
330         GT -> y : loop (x:xs) ys
331         _  -> x : loop xs (y:ys)
332
333-- |  The 'subset' function returns true if the first list is a sub-list
334-- of the second.
335subset :: Ord a => [a] -> [a] -> Bool
336subset = subsetBy compare
337
338-- |  The 'subsetBy' function is the non-overloaded version of 'subset'.
339subsetBy :: (a -> a -> Ordering) -> [a] -> [a] -> Bool
340subsetBy cmp = loop
341  where
342    loop [] _ys = True
343    loop _xs [] = False
344    loop (x:xs) (y:ys)
345      = case cmp x y of
346         LT -> False
347         EQ -> loop xs ys
348         GT -> loop (x:xs) ys
349
350{-
351-- This is Ian Lynagh's mergesort implementation,  which appeared as
352-- Data.List.sort, with the static argument transformation applied.
353-- It's not clear whether this modification is truly worthwhile or not.
354
355sort :: Ord a => [a] -> [a]
356sort = sortBy compare
357
358sortBy :: (a -> a -> Ordering) -> [a] -> [a]
359sortBy cmp = foldt (mergeBy cmp) [] . map (\x -> [x])
360-}
361
362#if !MIN_VERSION_base(4,7,1)
363-- |  The 'sortOn' function provides the decorate-sort-undecorate idiom,
364-- also known as the \"Schwartzian transform\".
365sortOn :: Ord b => (a -> b) -> [a] -> [a]
366sortOn f  = map snd . sortOn' fst .  map (\x -> let y = f x in y `seq` (y, x))
367#endif
368
369-- |  This variant of 'sortOn' recomputes the sorting key every comparison.
370-- This can be better for functions that are cheap to compute.
371-- This is definitely better for projections,  as the decorate-sort-undecorate
372-- saves nothing and adds two traversals of the list and extra memory
373-- allocation.
374sortOn' :: Ord b => (a -> b) -> [a] -> [a]
375sortOn' f = sortBy (\x y -> compare (f x) (f y))
376
377-- |  The 'nubSort' function is equivalent to @'nub' '.' 'sort'@,  except
378-- that duplicates are removed as it sorts. It is essentially the same
379-- implementation as @Data.List.sort@, with 'merge' replaced by 'union'.
380-- Thus the performance of 'nubSort' should better than or nearly equal
381-- to 'sort' alone.  It is faster than both 'sort' and @'nub' '.' 'sort'@
382-- when the input contains significant quantities of duplicated elements.
383nubSort :: Ord a => [a] -> [a]
384nubSort = nubSortBy compare
385
386-- |  The 'nubSortBy' function is the non-overloaded version of 'nubSort'.
387nubSortBy :: (a -> a -> Ordering) -> [a] -> [a]
388nubSortBy cmp = foldt' (unionBy cmp) [] . runs
389  where
390    -- 'runs' partitions the input into sublists that are monotonic,
391    -- contiguous,  and non-overlapping.   Descending runs are reversed
392    -- and adjacent duplicates are eliminated,  so every run returned is
393    -- strictly ascending.
394
395    runs (a:b:xs)
396      = case cmp a b of
397          LT -> asc b (a:) xs
398          EQ -> runs (a:xs)
399          GT -> desc b [a] xs
400    runs xs = [xs]
401
402    desc a as []  = [a:as]
403    desc a as (b:bs)
404      = case cmp a b of
405          LT -> (a:as) : runs (b:bs)
406          EQ -> desc a as bs
407          GT -> desc b (a:as) bs
408
409    asc a as [] = [as [a]]
410    asc a as (b:bs)
411      = case cmp a b of
412         LT -> asc b (\ys -> as (a:ys)) bs
413         EQ -> asc a as bs
414         GT -> as [a] : runs (b:bs)
415
416-- |  The 'nubSortOn' function provides decorate-sort-undecorate for 'nubSort'.
417nubSortOn :: Ord b => (a -> b) -> [a] -> [a]
418nubSortOn f = map snd . nubSortOn' fst . map (\x -> let y = f x in y `seq` (y, x))
419
420-- |  This variant of 'nubSortOn' recomputes the sorting key for each comparison
421nubSortOn' :: Ord b => (a -> b) -> [a] -> [a]
422nubSortOn' f = nubSortBy (\x y -> compare (f x) (f y))
423
424-- | On ordered lists,  'nub' is equivalent to 'Data.List.nub', except that
425-- it runs in linear time instead of quadratic.   On unordered lists it also
426-- removes elements that are smaller than any preceding element.
427--
428-- > nub [1,1,1,2,2] == [1,2]
429-- > nub [2,0,1,3,3] == [2,3]
430-- > nub = nubBy (<)
431nub :: Ord a => [a] -> [a]
432nub = nubBy (<)
433
434-- | The 'nubBy' function is the greedy algorithm that returns a
435-- sublist of its input such that:
436--
437-- > isSortedBy pred (nubBy pred xs) == True
438--
439-- This is true for all lists,  not just ordered lists,  and all binary
440-- predicates,  not just total orders.   On infinite lists,  this statement
441-- is true in a certain mathematical sense,  but not a computational one.
442nubBy :: (a -> a -> Bool) -> [a] -> [a]
443nubBy p []     = []
444nubBy p (x:xs) = x : loop x xs
445  where
446    loop _ [] = []
447    loop x (y:ys)
448       | p x y     = y : loop y ys
449       | otherwise = loop x ys
450
451-- | The function @'foldt'' plus zero@ computes the sum of a list
452-- using a balanced tree of operations.  'foldt'' necessarily diverges
453-- on infinite lists, hence it is a stricter variant of 'foldt'.
454-- 'foldt'' is used in the implementation of 'sort' and 'nubSort'.
455foldt' :: (a -> a -> a) -> a -> [a] -> a
456foldt' plus zero xs
457  = case xs of
458      []    -> zero
459      (_:_) -> loop xs
460  where
461    loop [x] = x
462    loop xs  = loop (pairs xs)
463
464    pairs (x:y:zs) = plus x y : pairs zs
465    pairs zs       = zs
466
467-- | The function @'foldt' plus zero@ computes the sum of a list using
468-- a sequence of balanced trees of operations.   Given an appropriate @plus@
469-- operator,  this function can be productive on an infinite list, hence it
470-- is lazier than 'foldt''.   'foldt' is used in the implementation of
471-- 'mergeAll' and 'unionAll'.
472foldt :: (a -> a -> a) -> a -> [a] -> a
473foldt plus zero = loop
474  where
475    loop []     = zero
476    loop (x:xs) = x `plus` loop (pairs xs)
477
478    pairs (x:y:zs) = plus x y : pairs zs
479    pairs zs       = zs
480
481-- helper functions used in 'mergeAll' and 'unionAll'
482
483data People a = VIP a (People a) | Crowd [a]
484
485serve (VIP x xs) = x:serve xs
486serve (Crowd xs) = xs
487
488vips xss = [ VIP x (Crowd xs) | (x:xs) <- xss ]
489
490-- | The 'mergeAll' function merges a (potentially) infinite number of
491-- ordered lists, under the assumption that the heads of the inner lists
492-- are sorted.  An element is duplicated in the result as many times as
493-- the total number of occurrences in all inner lists.
494--
495-- The 'mergeAll' function is closely related to @'foldr' 'merge' []@.
496-- The former does not assume that the outer list is finite, whereas
497-- the latter does not assume that the heads of the inner lists are sorted.
498-- When both sets of assumptions are met,  these two functions are
499-- equivalent.
500--
501-- This implementation of 'mergeAll'  uses a tree of comparisons, and is
502-- based on input from Dave Bayer, Heinrich Apfelmus, Omar Antolin Camarena,
503-- and Will Ness.  See @CHANGES@ for details.
504mergeAll :: Ord a => [[a]] -> [a]
505mergeAll = mergeAllBy compare
506
507-- | The 'mergeAllBy' function is the non-overloaded variant of the 'mergeAll'
508-- function.
509mergeAllBy :: (a -> a -> Ordering) -> [[a]] -> [a]
510mergeAllBy cmp = serve . foldt merge' (Crowd []) . vips
511  where
512    merge' (VIP x xs) ys = VIP x (merge' xs ys)
513    merge' (Crowd []) ys = ys
514    merge' (Crowd xs) (Crowd ys) = Crowd (mergeBy cmp xs ys)
515    merge' xs@(Crowd (x:xt)) ys@(VIP y yt)
516      = case cmp x y of
517         GT -> VIP y (merge' xs yt)
518         _  -> VIP x (merge' (Crowd xt) ys)
519
520-- | The 'unionAll' computes the union of a (potentially) infinite number
521-- of lists,  under the assumption that the heads of the inner lists
522-- are sorted.  The result will duplicate an element as many times as
523-- the maximum number of occurrences in any single list.  Thus, the result
524-- is a set if and only if every inner list is a set.
525--
526-- The 'unionAll' function is closely related to @'foldr' 'union' []@.
527-- The former does not assume that the outer list is finite, whereas
528-- the latter does not assume that the heads of the inner lists are sorted.
529-- When both sets of assumptions are met,  these two functions are
530-- equivalent.
531--
532-- Note that there is no simple way to express 'unionAll' in terms of
533-- 'mergeAll' or vice versa on arbitrary valid inputs.  They are related
534-- via 'nub' however,  as @'nub' . 'mergeAll' == 'unionAll' . 'map' 'nub'@.
535-- If every list is a set,  then @map nub == id@,  and in this special case
536-- (and only in this special case) does @nub . mergeAll == unionAll@.
537--
538-- This implementation of 'unionAll'  uses a tree of comparisons, and is
539-- based on input from Dave Bayer, Heinrich Apfelmus, Omar Antolin Camarena,
540-- and Will Ness.  See @CHANGES@ for details.
541unionAll :: Ord a => [[a]] -> [a]
542unionAll = unionAllBy compare
543
544-- | The 'unionAllBy' function is the non-overloaded variant of the 'unionAll'
545-- function.
546unionAllBy :: (a -> a -> Ordering) -> [[a]] -> [a]
547unionAllBy cmp = serve . foldt union' (Crowd []) . vips
548  where
549    msg = "Data.List.Ordered.unionAllBy:  the heads of the lists are not sorted"
550
551    union' (VIP x xs) ys
552       = VIP x $ case ys of
553                  Crowd _ -> union' xs ys
554                  VIP y yt -> case cmp x y of
555                               LT -> union' xs ys
556                               EQ -> union' xs yt
557                               GT -> error msg
558    union' (Crowd []) ys = ys
559    union' (Crowd xs) (Crowd ys) = Crowd (unionBy cmp xs ys)
560    union' xs@(Crowd (x:xt)) ys@(VIP y yt)
561       = case cmp x y of
562           LT -> VIP x (union' (Crowd xt) ys)
563           EQ -> VIP x (union' (Crowd xt) yt)
564           GT -> VIP y (union' xs yt)
565