1{-# LANGUAGE CPP #-} 2 3----------------------------------------------------------------------------- 4-- | 5-- Module : Data.List.Ordered 6-- Copyright : (c) 2009-2011 Leon P Smith 7-- License : BSD3 8-- 9-- Maintainer : leon@melding-monads.com 10-- Stability : experimental 11-- Portability : portable 12-- 13-- This module implements bag and set operations on ordered lists. For the 14-- purposes of this module, a \"bag\" (or \"multiset\") is a non-decreasing 15-- list, whereas a \"set\" is a strictly ascending list. Bags are sorted 16-- lists that may contain duplicates, whereas sets are sorted lists that 17-- do not contain duplicates. 18-- 19-- Except for the 'nub', 'sort', 'nubSort', and 'isSorted' families of 20-- functions, every function assumes that any list arguments are sorted 21-- lists. Assuming this precondition is met, every resulting list is also 22-- sorted. 23-- 24-- Because 'isect' handles multisets correctly, it does not return results 25-- comparable to @Data.List.'Data.List.intersect'@ on them. Thus @isect@ 26-- is more than just a more efficient @intersect@ on ordered lists. Similar 27-- statements apply to other associations between functions this module and 28-- functions in @Data.List@, such as 'union' and @Data.List.'union'@. 29-- 30-- All functions in this module are left biased. Elements that appear in 31-- earlier arguments have priority over equal elements that appear in later 32-- arguments, and elements that appear earlier in a single list have 33-- priority over equal elements that appear later in that list. 34-- 35----------------------------------------------------------------------------- 36 37module Data.List.Ordered 38 ( 39 -- * Predicates 40 member, memberBy, has, hasBy 41 , subset, subsetBy 42 , isSorted, isSortedBy 43 44 -- * Insertion Functions 45 , insertBag, insertBagBy 46 , insertSet, insertSetBy 47 48 -- * Set-like operations 49 , isect, isectBy 50 , union, unionBy 51 , minus, minusBy 52 , minus', minusBy' 53 , xunion, xunionBy 54 , merge, mergeBy 55 , mergeAll, mergeAllBy 56 , unionAll, unionAllBy 57 58 -- * Lists to Ordered Lists 59 , nub, nubBy 60 , sort, sortBy 61 , sortOn, sortOn' 62 , nubSort, nubSortBy 63 , nubSortOn, nubSortOn' 64 65 -- * Miscellaneous folds 66 , foldt, foldt' 67 68 ) where 69 70import Data.List(sort,sortBy,intersect) 71#if MIN_VERSION_base(4,7,1) 72import Data.List(sortOn) 73#endif 74 75-- | The 'isSorted' predicate returns 'True' if the elements of a list occur 76-- in non-descending order, equivalent to @'isSortedBy' ('<=')@. 77isSorted :: Ord a => [a] -> Bool 78isSorted = isSortedBy (<=) 79 80-- | The 'isSortedBy' function returns 'True' iff the predicate returns true 81-- for all adjacent pairs of elements in the list. 82isSortedBy :: (a -> a -> Bool) -> [a] -> Bool 83isSortedBy lte = loop 84 where 85 loop [] = True 86 loop [_] = True 87 loop (x:y:zs) = (x `lte` y) && loop (y:zs) 88 89-- | The 'member' function returns 'True' if the element appears in the 90-- ordered list. 91member :: Ord a => a -> [a] -> Bool 92member = memberBy compare 93 94-- | The 'memberBy' function is the non-overloaded version of 'member'. 95memberBy :: (a -> a -> Ordering) -> a -> [a] -> Bool 96memberBy cmp x = loop 97 where 98 loop [] = False 99 loop (y:ys) = case cmp x y of 100 LT -> False 101 EQ -> True 102 GT -> loop ys 103 104-- | The 'has' function returns 'True' if the element appears in the list; 105-- it is equivalent to 'member' except the order of the arguments is reversed, 106-- making it a function from an ordered list to its characteristic function. 107has :: Ord a => [a] -> a -> Bool 108has xs y = memberBy compare y xs 109 110-- | The 'hasBy' function is the non-overloaded version of 'has'. 111hasBy :: (a -> a -> Ordering) -> [a] -> a -> Bool 112hasBy cmp xs y = memberBy cmp y xs 113 114-- | The 'insertBag' function inserts an element into a list. If the element 115-- is already there, then another copy of the element is inserted. 116insertBag :: Ord a => a -> [a] -> [a] 117insertBag = insertBagBy compare 118 119-- | The 'insertBagBy' function is the non-overloaded version of 'insertBag'. 120insertBagBy :: (a -> a -> Ordering) -> a -> [a] -> [a] 121insertBagBy cmp = loop 122 where 123 loop x [] = [x] 124 loop x (y:ys) 125 = case cmp x y of 126 GT -> y:loop x ys 127 _ -> x:y:ys 128 129-- | The 'insertSet' function inserts an element into an ordered list. 130-- If the element is already there, then the element replaces the existing 131-- element. 132insertSet :: Ord a => a -> [a] -> [a] 133insertSet = insertSetBy compare 134 135-- | The 'insertSetBy' function is the non-overloaded version of 'insertSet'. 136insertSetBy :: (a -> a -> Ordering) -> a -> [a] -> [a] 137insertSetBy cmp = loop 138 where 139 loop x [] = [x] 140 loop x (y:ys) = case cmp x y of 141 LT -> x:y:ys 142 EQ -> x:ys 143 GT -> y:loop x ys 144 145{- 146-- This function is moderately interesting, as it encompasses all the 147-- "Venn diagram" functions on two sets. (though not merge; which isn't 148-- a set function) 149 150-- However, it doesn't seem that useful, considering that of the 8 possible 151-- functions, there are only 4 interesting variations: isect, union, minus, 152-- and xunion. Due to interactions with GHC's optimizer, coded separately, 153-- these have a smaller combined object code size than the object code size 154-- for genSectBy. (Or, turn off certain optimizations and lose speed.) 155 156-- Each individual object code can be recovered from genSectBy via GHC's 157-- inliner and constant propagation; but this doesn't save much in terms 158-- of source code size and reduces portability. 159 160-- Note that the Static Argument Transformation is necessary for this to work 161-- correctly; inlining genSectBy allows for cmp and p to be inlined as well, 162-- or at least eliminate some indirect jumps. All of the *By functions in 163-- this module follow this idiom for this reason. 164 165genSectBy :: (a -> a -> Ordering) 166 -> (Bool -> Bool -> Bool) 167 -> [a] -> [a] -> [a] 168genSectBy cmp p = loop 169 where 170 loop [] ys | p False True = ys 171 | otherwise = [] 172 loop xs [] | p True False = xs 173 | otherwise = [] 174 loop (x:xs) (y:ys) 175 = case cmp x y of 176 LT | p True False -> x : loop xs (y:ys) 177 | otherwise -> loop xs (y:ys) 178 EQ | p True True -> x : loop xs ys 179 | otherwise -> loop xs ys 180 GT | p False True -> y : loop (x:xs) ys 181 | otherwise -> loop (x:xs) ys 182 183-- Here's another variation that was suggested to me. It is more general 184-- than genSectBy, as it can implement a merge; but it cannot implement 185-- a left-biased merge 186 187foldrMergeBy :: (a -> b -> Ordering) 188 -> (a -> c -> c) -> (b -> c -> c) -> (a -> b -> c -> c) -> c 189 -> [a] -> [b] -> c 190foldrMergeBy cmp addA addB unify z = loop 191 where 192 loop xs [] = foldr addA z xs 193 loop [] ys = foldr addB z ys 194 loop (x:xs) (y:ys) 195 = case cmp x y of 196 LT -> x `addA` loop xs (y:ys) 197 EQ -> unify x y (loop xs ys) 198 GT -> y `addB` loop (x:xs) ys 199-} 200 201-- | The 'isect' function computes the intersection of two ordered lists. 202-- An element occurs in the output as many times as the minimum number of 203-- occurrences in either input. If either input is a set, then the output 204-- is a set. 205-- 206-- > isect [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 3,4 ] 207-- > isect [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1, 2,2 ] 208isect :: Ord a => [a] -> [a] -> [a] 209isect = isectBy compare 210 211-- | The 'isectBy' function is the non-overloaded version of 'isect'. 212isectBy :: (a -> b -> Ordering) -> [a] -> [b] -> [a] 213isectBy cmp = loop 214 where 215 loop [] _ys = [] 216 loop _xs [] = [] 217 loop (x:xs) (y:ys) 218 = case cmp x y of 219 LT -> loop xs (y:ys) 220 EQ -> x : loop xs ys 221 GT -> loop (x:xs) ys 222 223-- | The 'union' function computes the union of two ordered lists. 224-- An element occurs in the output as many times as the maximum number 225-- of occurrences in either input. The output is a set if and only if 226-- both inputs are sets. 227-- 228-- > union [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2, 3,4, 5,6 ] 229-- > union [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1, 2,2,2 ] 230union :: Ord a => [a] -> [a] -> [a] 231union = unionBy compare 232 233-- | The 'unionBy' function is the non-overloaded version of 'union'. 234unionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] 235unionBy cmp = loop 236 where 237 loop [] ys = ys 238 loop xs [] = xs 239 loop (x:xs) (y:ys) 240 = case cmp x y of 241 LT -> x : loop xs (y:ys) 242 EQ -> x : loop xs ys 243 GT -> y : loop (x:xs) ys 244 245-- | The 'minus' function computes the difference of two ordered lists. 246-- An element occurs in the output as many times as it occurs in 247-- the first input, minus the number of occurrences in the second input. 248-- If the first input is a set, then the output is a set. 249-- 250-- > minus [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2 ] 251-- > minus [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 2 ] 252minus :: Ord a => [a] -> [a] -> [a] 253minus = minusBy compare 254 255-- | The 'minusBy' function is the non-overloaded version of 'minus'. 256minusBy :: (a -> b -> Ordering) -> [a] -> [b] -> [a] 257minusBy cmp = loop 258 where 259 loop [] _ys = [] 260 loop xs [] = xs 261 loop (x:xs) (y:ys) 262 = case cmp x y of 263 LT -> x : loop xs (y:ys) 264 EQ -> loop xs ys 265 GT -> loop (x:xs) ys 266 267-- | The 'minus'' function computes the difference of two ordered lists. 268-- The result consists of elements from the first list that do not appear 269-- in the second list. If the first input is a set, then the output is 270-- a set. 271-- 272-- > minus' [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2 ] 273-- > minus' [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [] 274-- > minus' [ 1,1, 2,2 ] [ 2 ] == [ 1,1 ] 275minus' :: Ord a => [a] -> [a] -> [a] 276minus' = minusBy' compare 277 278-- | The 'minusBy'' function is the non-overloaded version of 'minus''. 279minusBy' :: (a -> b -> Ordering) -> [a] -> [b] -> [a] 280minusBy' cmp = loop 281 where 282 loop [] _ys = [] 283 loop xs [] = xs 284 loop (x:xs) (y:ys) 285 = case cmp x y of 286 LT -> x : loop xs (y:ys) 287 EQ -> loop xs (y:ys) 288 GT -> loop (x:xs) ys 289 290-- | The 'xunion' function computes the exclusive union of two ordered lists. 291-- An element occurs in the output as many times as the absolute difference 292-- between the number of occurrences in the inputs. If both inputs 293-- are sets, then the output is a set. 294-- 295-- > xunion [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2, 5,6 ] 296-- > xunion [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1, 2 ] 297xunion :: Ord a => [a] -> [a] -> [a] 298xunion = xunionBy compare 299 300-- | The 'xunionBy' function is the non-overloaded version of 'xunion'. 301xunionBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] 302xunionBy cmp = loop 303 where 304 loop [] ys = ys 305 loop xs [] = xs 306 loop (x:xs) (y:ys) 307 = case cmp x y of 308 LT -> x : loop xs (y:ys) 309 EQ -> loop xs ys 310 GT -> y : loop (x:xs) ys 311 312-- | The 'merge' function combines all elements of two ordered lists. 313-- An element occurs in the output as many times as the sum of the 314-- occurrences in both lists. The output is a set if and only if 315-- the inputs are disjoint sets. 316-- 317-- > merge [ 1,2, 3,4 ] [ 3,4, 5,6 ] == [ 1,2, 3,3,4,4, 5,6 ] 318-- > merge [ 1, 2,2,2 ] [ 1,1,1, 2,2 ] == [ 1,1,1,1, 2,2,2,2,2 ] 319merge :: Ord a => [a] -> [a] -> [a] 320merge = mergeBy compare 321 322-- | The 'mergeBy' function is the non-overloaded version of 'merge'. 323mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a] 324mergeBy cmp = loop 325 where 326 loop [] ys = ys 327 loop xs [] = xs 328 loop (x:xs) (y:ys) 329 = case cmp x y of 330 GT -> y : loop (x:xs) ys 331 _ -> x : loop xs (y:ys) 332 333-- | The 'subset' function returns true if the first list is a sub-list 334-- of the second. 335subset :: Ord a => [a] -> [a] -> Bool 336subset = subsetBy compare 337 338-- | The 'subsetBy' function is the non-overloaded version of 'subset'. 339subsetBy :: (a -> a -> Ordering) -> [a] -> [a] -> Bool 340subsetBy cmp = loop 341 where 342 loop [] _ys = True 343 loop _xs [] = False 344 loop (x:xs) (y:ys) 345 = case cmp x y of 346 LT -> False 347 EQ -> loop xs ys 348 GT -> loop (x:xs) ys 349 350{- 351-- This is Ian Lynagh's mergesort implementation, which appeared as 352-- Data.List.sort, with the static argument transformation applied. 353-- It's not clear whether this modification is truly worthwhile or not. 354 355sort :: Ord a => [a] -> [a] 356sort = sortBy compare 357 358sortBy :: (a -> a -> Ordering) -> [a] -> [a] 359sortBy cmp = foldt (mergeBy cmp) [] . map (\x -> [x]) 360-} 361 362#if !MIN_VERSION_base(4,7,1) 363-- | The 'sortOn' function provides the decorate-sort-undecorate idiom, 364-- also known as the \"Schwartzian transform\". 365sortOn :: Ord b => (a -> b) -> [a] -> [a] 366sortOn f = map snd . sortOn' fst . map (\x -> let y = f x in y `seq` (y, x)) 367#endif 368 369-- | This variant of 'sortOn' recomputes the sorting key every comparison. 370-- This can be better for functions that are cheap to compute. 371-- This is definitely better for projections, as the decorate-sort-undecorate 372-- saves nothing and adds two traversals of the list and extra memory 373-- allocation. 374sortOn' :: Ord b => (a -> b) -> [a] -> [a] 375sortOn' f = sortBy (\x y -> compare (f x) (f y)) 376 377-- | The 'nubSort' function is equivalent to @'nub' '.' 'sort'@, except 378-- that duplicates are removed as it sorts. It is essentially the same 379-- implementation as @Data.List.sort@, with 'merge' replaced by 'union'. 380-- Thus the performance of 'nubSort' should better than or nearly equal 381-- to 'sort' alone. It is faster than both 'sort' and @'nub' '.' 'sort'@ 382-- when the input contains significant quantities of duplicated elements. 383nubSort :: Ord a => [a] -> [a] 384nubSort = nubSortBy compare 385 386-- | The 'nubSortBy' function is the non-overloaded version of 'nubSort'. 387nubSortBy :: (a -> a -> Ordering) -> [a] -> [a] 388nubSortBy cmp = foldt' (unionBy cmp) [] . runs 389 where 390 -- 'runs' partitions the input into sublists that are monotonic, 391 -- contiguous, and non-overlapping. Descending runs are reversed 392 -- and adjacent duplicates are eliminated, so every run returned is 393 -- strictly ascending. 394 395 runs (a:b:xs) 396 = case cmp a b of 397 LT -> asc b (a:) xs 398 EQ -> runs (a:xs) 399 GT -> desc b [a] xs 400 runs xs = [xs] 401 402 desc a as [] = [a:as] 403 desc a as (b:bs) 404 = case cmp a b of 405 LT -> (a:as) : runs (b:bs) 406 EQ -> desc a as bs 407 GT -> desc b (a:as) bs 408 409 asc a as [] = [as [a]] 410 asc a as (b:bs) 411 = case cmp a b of 412 LT -> asc b (\ys -> as (a:ys)) bs 413 EQ -> asc a as bs 414 GT -> as [a] : runs (b:bs) 415 416-- | The 'nubSortOn' function provides decorate-sort-undecorate for 'nubSort'. 417nubSortOn :: Ord b => (a -> b) -> [a] -> [a] 418nubSortOn f = map snd . nubSortOn' fst . map (\x -> let y = f x in y `seq` (y, x)) 419 420-- | This variant of 'nubSortOn' recomputes the sorting key for each comparison 421nubSortOn' :: Ord b => (a -> b) -> [a] -> [a] 422nubSortOn' f = nubSortBy (\x y -> compare (f x) (f y)) 423 424-- | On ordered lists, 'nub' is equivalent to 'Data.List.nub', except that 425-- it runs in linear time instead of quadratic. On unordered lists it also 426-- removes elements that are smaller than any preceding element. 427-- 428-- > nub [1,1,1,2,2] == [1,2] 429-- > nub [2,0,1,3,3] == [2,3] 430-- > nub = nubBy (<) 431nub :: Ord a => [a] -> [a] 432nub = nubBy (<) 433 434-- | The 'nubBy' function is the greedy algorithm that returns a 435-- sublist of its input such that: 436-- 437-- > isSortedBy pred (nubBy pred xs) == True 438-- 439-- This is true for all lists, not just ordered lists, and all binary 440-- predicates, not just total orders. On infinite lists, this statement 441-- is true in a certain mathematical sense, but not a computational one. 442nubBy :: (a -> a -> Bool) -> [a] -> [a] 443nubBy p [] = [] 444nubBy p (x:xs) = x : loop x xs 445 where 446 loop _ [] = [] 447 loop x (y:ys) 448 | p x y = y : loop y ys 449 | otherwise = loop x ys 450 451-- | The function @'foldt'' plus zero@ computes the sum of a list 452-- using a balanced tree of operations. 'foldt'' necessarily diverges 453-- on infinite lists, hence it is a stricter variant of 'foldt'. 454-- 'foldt'' is used in the implementation of 'sort' and 'nubSort'. 455foldt' :: (a -> a -> a) -> a -> [a] -> a 456foldt' plus zero xs 457 = case xs of 458 [] -> zero 459 (_:_) -> loop xs 460 where 461 loop [x] = x 462 loop xs = loop (pairs xs) 463 464 pairs (x:y:zs) = plus x y : pairs zs 465 pairs zs = zs 466 467-- | The function @'foldt' plus zero@ computes the sum of a list using 468-- a sequence of balanced trees of operations. Given an appropriate @plus@ 469-- operator, this function can be productive on an infinite list, hence it 470-- is lazier than 'foldt''. 'foldt' is used in the implementation of 471-- 'mergeAll' and 'unionAll'. 472foldt :: (a -> a -> a) -> a -> [a] -> a 473foldt plus zero = loop 474 where 475 loop [] = zero 476 loop (x:xs) = x `plus` loop (pairs xs) 477 478 pairs (x:y:zs) = plus x y : pairs zs 479 pairs zs = zs 480 481-- helper functions used in 'mergeAll' and 'unionAll' 482 483data People a = VIP a (People a) | Crowd [a] 484 485serve (VIP x xs) = x:serve xs 486serve (Crowd xs) = xs 487 488vips xss = [ VIP x (Crowd xs) | (x:xs) <- xss ] 489 490-- | The 'mergeAll' function merges a (potentially) infinite number of 491-- ordered lists, under the assumption that the heads of the inner lists 492-- are sorted. An element is duplicated in the result as many times as 493-- the total number of occurrences in all inner lists. 494-- 495-- The 'mergeAll' function is closely related to @'foldr' 'merge' []@. 496-- The former does not assume that the outer list is finite, whereas 497-- the latter does not assume that the heads of the inner lists are sorted. 498-- When both sets of assumptions are met, these two functions are 499-- equivalent. 500-- 501-- This implementation of 'mergeAll' uses a tree of comparisons, and is 502-- based on input from Dave Bayer, Heinrich Apfelmus, Omar Antolin Camarena, 503-- and Will Ness. See @CHANGES@ for details. 504mergeAll :: Ord a => [[a]] -> [a] 505mergeAll = mergeAllBy compare 506 507-- | The 'mergeAllBy' function is the non-overloaded variant of the 'mergeAll' 508-- function. 509mergeAllBy :: (a -> a -> Ordering) -> [[a]] -> [a] 510mergeAllBy cmp = serve . foldt merge' (Crowd []) . vips 511 where 512 merge' (VIP x xs) ys = VIP x (merge' xs ys) 513 merge' (Crowd []) ys = ys 514 merge' (Crowd xs) (Crowd ys) = Crowd (mergeBy cmp xs ys) 515 merge' xs@(Crowd (x:xt)) ys@(VIP y yt) 516 = case cmp x y of 517 GT -> VIP y (merge' xs yt) 518 _ -> VIP x (merge' (Crowd xt) ys) 519 520-- | The 'unionAll' computes the union of a (potentially) infinite number 521-- of lists, under the assumption that the heads of the inner lists 522-- are sorted. The result will duplicate an element as many times as 523-- the maximum number of occurrences in any single list. Thus, the result 524-- is a set if and only if every inner list is a set. 525-- 526-- The 'unionAll' function is closely related to @'foldr' 'union' []@. 527-- The former does not assume that the outer list is finite, whereas 528-- the latter does not assume that the heads of the inner lists are sorted. 529-- When both sets of assumptions are met, these two functions are 530-- equivalent. 531-- 532-- Note that there is no simple way to express 'unionAll' in terms of 533-- 'mergeAll' or vice versa on arbitrary valid inputs. They are related 534-- via 'nub' however, as @'nub' . 'mergeAll' == 'unionAll' . 'map' 'nub'@. 535-- If every list is a set, then @map nub == id@, and in this special case 536-- (and only in this special case) does @nub . mergeAll == unionAll@. 537-- 538-- This implementation of 'unionAll' uses a tree of comparisons, and is 539-- based on input from Dave Bayer, Heinrich Apfelmus, Omar Antolin Camarena, 540-- and Will Ness. See @CHANGES@ for details. 541unionAll :: Ord a => [[a]] -> [a] 542unionAll = unionAllBy compare 543 544-- | The 'unionAllBy' function is the non-overloaded variant of the 'unionAll' 545-- function. 546unionAllBy :: (a -> a -> Ordering) -> [[a]] -> [a] 547unionAllBy cmp = serve . foldt union' (Crowd []) . vips 548 where 549 msg = "Data.List.Ordered.unionAllBy: the heads of the lists are not sorted" 550 551 union' (VIP x xs) ys 552 = VIP x $ case ys of 553 Crowd _ -> union' xs ys 554 VIP y yt -> case cmp x y of 555 LT -> union' xs ys 556 EQ -> union' xs yt 557 GT -> error msg 558 union' (Crowd []) ys = ys 559 union' (Crowd xs) (Crowd ys) = Crowd (unionBy cmp xs ys) 560 union' xs@(Crowd (x:xt)) ys@(VIP y yt) 561 = case cmp x y of 562 LT -> VIP x (union' (Crowd xt) ys) 563 EQ -> VIP x (union' (Crowd xt) yt) 564 GT -> VIP y (union' xs yt) 565