1{-# LANGUAGE CPP #-} 2{-# LANGUAGE BangPatterns #-} 3{-# LANGUAGE PatternGuards #-} 4#if __GLASGOW_HASKELL__ 5{-# LANGUAGE MagicHash, DeriveDataTypeable, StandaloneDeriving #-} 6#endif 7#if !defined(TESTING) && defined(__GLASGOW_HASKELL__) 8{-# LANGUAGE Trustworthy #-} 9#endif 10#if __GLASGOW_HASKELL__ >= 708 11{-# LANGUAGE TypeFamilies #-} 12#endif 13 14{-# OPTIONS_HADDOCK not-home #-} 15 16#include "containers.h" 17 18----------------------------------------------------------------------------- 19-- | 20-- Module : Data.IntSet.Internal 21-- Copyright : (c) Daan Leijen 2002 22-- (c) Joachim Breitner 2011 23-- License : BSD-style 24-- Maintainer : libraries@haskell.org 25-- Portability : portable 26-- 27-- = WARNING 28-- 29-- This module is considered __internal__. 30-- 31-- The Package Versioning Policy __does not apply__. 32-- 33-- The contents of this module may change __in any way whatsoever__ 34-- and __without any warning__ between minor versions of this package. 35-- 36-- Authors importing this module are expected to track development 37-- closely. 38-- 39-- = Description 40-- 41-- An efficient implementation of integer sets. 42-- 43-- These modules are intended to be imported qualified, to avoid name 44-- clashes with Prelude functions, e.g. 45-- 46-- > import Data.IntSet (IntSet) 47-- > import qualified Data.IntSet as IntSet 48-- 49-- The implementation is based on /big-endian patricia trees/. This data 50-- structure performs especially well on binary operations like 'union' 51-- and 'intersection'. However, my benchmarks show that it is also 52-- (much) faster on insertions and deletions when compared to a generic 53-- size-balanced set implementation (see "Data.Set"). 54-- 55-- * Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\", 56-- Workshop on ML, September 1998, pages 77-86, 57-- <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.5452> 58-- 59-- * D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve Information Coded In Alphanumeric/\", 60-- Journal of the ACM, 15(4), October 1968, pages 514-534. 61-- 62-- Additionally, this implementation places bitmaps in the leaves of the tree. 63-- Their size is the natural size of a machine word (32 or 64 bits) and greatly 64-- reduce memory footprint and execution times for dense sets, e.g. sets where 65-- it is likely that many values lie close to each other. The asymptotics are 66-- not affected by this optimization. 67-- 68-- Many operations have a worst-case complexity of /O(min(n,W))/. 69-- This means that the operation can become linear in the number of 70-- elements with a maximum of /W/ -- the number of bits in an 'Int' 71-- (32 or 64). 72-- 73-- @since 0.5.9 74----------------------------------------------------------------------------- 75 76-- [Note: INLINE bit fiddling] 77-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 78-- It is essential that the bit fiddling functions like mask, zero, branchMask 79-- etc are inlined. If they do not, the memory allocation skyrockets. The GHC 80-- usually gets it right, but it is disastrous if it does not. Therefore we 81-- explicitly mark these functions INLINE. 82 83 84-- [Note: Local 'go' functions and capturing] 85-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 86-- Care must be taken when using 'go' function which captures an argument. 87-- Sometimes (for example when the argument is passed to a data constructor, 88-- as in insert), GHC heap-allocates more than necessary. Therefore C-- code 89-- must be checked for increased allocation when creating and modifying such 90-- functions. 91 92 93-- [Note: Order of constructors] 94-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 95-- The order of constructors of IntSet matters when considering performance. 96-- Currently in GHC 7.0, when type has 3 constructors, they are matched from 97-- the first to the last -- the best performance is achieved when the 98-- constructors are ordered by frequency. 99-- On GHC 7.0, reordering constructors from Nil | Tip | Bin to Bin | Tip | Nil 100-- improves the benchmark by circa 10%. 101 102module Data.IntSet.Internal ( 103 -- * Set type 104 IntSet(..), Key -- instance Eq,Show 105 , Prefix, Mask, BitMap 106 107 -- * Operators 108 , (\\) 109 110 -- * Query 111 , null 112 , size 113 , member 114 , notMember 115 , lookupLT 116 , lookupGT 117 , lookupLE 118 , lookupGE 119 , isSubsetOf 120 , isProperSubsetOf 121 , disjoint 122 123 -- * Construction 124 , empty 125 , singleton 126 , insert 127 , delete 128 , alterF 129 130 -- * Combine 131 , union 132 , unions 133 , difference 134 , intersection 135 136 -- * Filter 137 , filter 138 , partition 139 , split 140 , splitMember 141 , splitRoot 142 143 -- * Map 144 , map 145 , mapMonotonic 146 147 -- * Folds 148 , foldr 149 , foldl 150 -- ** Strict folds 151 , foldr' 152 , foldl' 153 -- ** Legacy folds 154 , fold 155 156 -- * Min\/Max 157 , findMin 158 , findMax 159 , deleteMin 160 , deleteMax 161 , deleteFindMin 162 , deleteFindMax 163 , maxView 164 , minView 165 166 -- * Conversion 167 168 -- ** List 169 , elems 170 , toList 171 , fromList 172 173 -- ** Ordered list 174 , toAscList 175 , toDescList 176 , fromAscList 177 , fromDistinctAscList 178 179 -- * Debugging 180 , showTree 181 , showTreeWith 182 183 -- * Internals 184 , match 185 , suffixBitMask 186 , prefixBitMask 187 , bitmapOf 188 , zero 189 ) where 190 191import Control.Applicative (Const(..)) 192import Control.DeepSeq (NFData(rnf)) 193import Data.Bits 194import qualified Data.List as List 195import Data.Maybe (fromMaybe) 196#if !MIN_VERSION_base(4,8,0) 197import Data.Monoid (Monoid(..)) 198import Data.Word (Word) 199#endif 200#if MIN_VERSION_base(4,9,0) 201import Data.Semigroup (Semigroup(stimes)) 202#endif 203#if !(MIN_VERSION_base(4,11,0)) && MIN_VERSION_base(4,9,0) 204import Data.Semigroup (Semigroup((<>))) 205#endif 206#if MIN_VERSION_base(4,9,0) 207import Data.Semigroup (stimesIdempotentMonoid) 208#endif 209import Data.Typeable 210import Prelude hiding (filter, foldr, foldl, null, map) 211 212import Utils.Containers.Internal.BitUtil 213import Utils.Containers.Internal.StrictPair 214 215#if __GLASGOW_HASKELL__ 216import Data.Data (Data(..), Constr, mkConstr, constrIndex, DataType, mkDataType) 217import qualified Data.Data 218import Text.Read 219#endif 220 221#if __GLASGOW_HASKELL__ 222import qualified GHC.Exts 223#if !(MIN_VERSION_base(4,8,0) && (WORD_SIZE_IN_BITS==64)) 224import qualified GHC.Int 225#endif 226#endif 227 228import qualified Data.Foldable as Foldable 229#if MIN_VERSION_base(4,8,0) 230import Data.Functor.Identity (Identity(..)) 231#else 232import Data.Foldable (Foldable()) 233#endif 234 235infixl 9 \\{-This comment teaches CPP correct behaviour -} 236 237-- A "Nat" is a natural machine word (an unsigned Int) 238type Nat = Word 239 240natFromInt :: Int -> Nat 241natFromInt i = fromIntegral i 242{-# INLINE natFromInt #-} 243 244intFromNat :: Nat -> Int 245intFromNat w = fromIntegral w 246{-# INLINE intFromNat #-} 247 248{-------------------------------------------------------------------- 249 Operators 250--------------------------------------------------------------------} 251-- | /O(n+m)/. See 'difference'. 252(\\) :: IntSet -> IntSet -> IntSet 253m1 \\ m2 = difference m1 m2 254 255{-------------------------------------------------------------------- 256 Types 257--------------------------------------------------------------------} 258 259-- | A set of integers. 260 261-- See Note: Order of constructors 262data IntSet = Bin {-# UNPACK #-} !Prefix {-# UNPACK #-} !Mask !IntSet !IntSet 263-- Invariant: Nil is never found as a child of Bin. 264-- Invariant: The Mask is a power of 2. It is the largest bit position at which 265-- two elements of the set differ. 266-- Invariant: Prefix is the common high-order bits that all elements share to 267-- the left of the Mask bit. 268-- Invariant: In Bin prefix mask left right, left consists of the elements that 269-- don't have the mask bit set; right is all the elements that do. 270 | Tip {-# UNPACK #-} !Prefix {-# UNPACK #-} !BitMap 271-- Invariant: The Prefix is zero for the last 5 (on 32 bit arches) or 6 bits 272-- (on 64 bit arches). The values of the set represented by a tip 273-- are the prefix plus the indices of the set bits in the bit map. 274 | Nil 275 276-- A number stored in a set is stored as 277-- * Prefix (all but last 5-6 bits) and 278-- * BitMap (last 5-6 bits stored as a bitmask) 279-- Last 5-6 bits are called a Suffix. 280 281type Prefix = Int 282type Mask = Int 283type BitMap = Word 284type Key = Int 285 286instance Monoid IntSet where 287 mempty = empty 288 mconcat = unions 289#if !(MIN_VERSION_base(4,9,0)) 290 mappend = union 291#else 292 mappend = (<>) 293 294-- | @since 0.5.7 295instance Semigroup IntSet where 296 (<>) = union 297 stimes = stimesIdempotentMonoid 298#endif 299 300#if __GLASGOW_HASKELL__ 301 302{-------------------------------------------------------------------- 303 A Data instance 304--------------------------------------------------------------------} 305 306-- This instance preserves data abstraction at the cost of inefficiency. 307-- We provide limited reflection services for the sake of data abstraction. 308 309instance Data IntSet where 310 gfoldl f z is = z fromList `f` (toList is) 311 toConstr _ = fromListConstr 312 gunfold k z c = case constrIndex c of 313 1 -> k (z fromList) 314 _ -> error "gunfold" 315 dataTypeOf _ = intSetDataType 316 317fromListConstr :: Constr 318fromListConstr = mkConstr intSetDataType "fromList" [] Data.Data.Prefix 319 320intSetDataType :: DataType 321intSetDataType = mkDataType "Data.IntSet.Internal.IntSet" [fromListConstr] 322 323#endif 324 325{-------------------------------------------------------------------- 326 Query 327--------------------------------------------------------------------} 328-- | /O(1)/. Is the set empty? 329null :: IntSet -> Bool 330null Nil = True 331null _ = False 332{-# INLINE null #-} 333 334-- | /O(n)/. Cardinality of the set. 335size :: IntSet -> Int 336size = go 0 337 where 338 go !acc (Bin _ _ l r) = go (go acc l) r 339 go acc (Tip _ bm) = acc + bitcount 0 bm 340 go acc Nil = acc 341 342-- | /O(min(n,W))/. Is the value a member of the set? 343 344-- See Note: Local 'go' functions and capturing. 345member :: Key -> IntSet -> Bool 346member !x = go 347 where 348 go (Bin p m l r) 349 | nomatch x p m = False 350 | zero x m = go l 351 | otherwise = go r 352 go (Tip y bm) = prefixOf x == y && bitmapOf x .&. bm /= 0 353 go Nil = False 354 355-- | /O(min(n,W))/. Is the element not in the set? 356notMember :: Key -> IntSet -> Bool 357notMember k = not . member k 358 359-- | /O(log n)/. Find largest element smaller than the given one. 360-- 361-- > lookupLT 3 (fromList [3, 5]) == Nothing 362-- > lookupLT 5 (fromList [3, 5]) == Just 3 363 364-- See Note: Local 'go' functions and capturing. 365lookupLT :: Key -> IntSet -> Maybe Key 366lookupLT !x t = case t of 367 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r 368 _ -> go Nil t 369 where 370 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r 371 | zero x m = go def l 372 | otherwise = go l r 373 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm 374 | prefixOf x == kx && maskLT /= 0 = Just $ kx + highestBitSet maskLT 375 | otherwise = unsafeFindMax def 376 where maskLT = (bitmapOf x - 1) .&. bm 377 go def Nil = unsafeFindMax def 378 379 380-- | /O(log n)/. Find smallest element greater than the given one. 381-- 382-- > lookupGT 4 (fromList [3, 5]) == Just 5 383-- > lookupGT 5 (fromList [3, 5]) == Nothing 384 385-- See Note: Local 'go' functions and capturing. 386lookupGT :: Key -> IntSet -> Maybe Key 387lookupGT !x t = case t of 388 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r 389 _ -> go Nil t 390 where 391 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def 392 | zero x m = go r l 393 | otherwise = go def r 394 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm 395 | prefixOf x == kx && maskGT /= 0 = Just $ kx + lowestBitSet maskGT 396 | otherwise = unsafeFindMin def 397 where maskGT = (- ((bitmapOf x) `shiftLL` 1)) .&. bm 398 go def Nil = unsafeFindMin def 399 400 401-- | /O(log n)/. Find largest element smaller or equal to the given one. 402-- 403-- > lookupLE 2 (fromList [3, 5]) == Nothing 404-- > lookupLE 4 (fromList [3, 5]) == Just 3 405-- > lookupLE 5 (fromList [3, 5]) == Just 5 406 407-- See Note: Local 'go' functions and capturing. 408lookupLE :: Key -> IntSet -> Maybe Key 409lookupLE !x t = case t of 410 Bin _ m l r | m < 0 -> if x >= 0 then go r l else go Nil r 411 _ -> go Nil t 412 where 413 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMax def else unsafeFindMax r 414 | zero x m = go def l 415 | otherwise = go l r 416 go def (Tip kx bm) | prefixOf x > kx = Just $ kx + highestBitSet bm 417 | prefixOf x == kx && maskLE /= 0 = Just $ kx + highestBitSet maskLE 418 | otherwise = unsafeFindMax def 419 where maskLE = (((bitmapOf x) `shiftLL` 1) - 1) .&. bm 420 go def Nil = unsafeFindMax def 421 422 423-- | /O(log n)/. Find smallest element greater or equal to the given one. 424-- 425-- > lookupGE 3 (fromList [3, 5]) == Just 3 426-- > lookupGE 4 (fromList [3, 5]) == Just 5 427-- > lookupGE 6 (fromList [3, 5]) == Nothing 428 429-- See Note: Local 'go' functions and capturing. 430lookupGE :: Key -> IntSet -> Maybe Key 431lookupGE !x t = case t of 432 Bin _ m l r | m < 0 -> if x >= 0 then go Nil l else go l r 433 _ -> go Nil t 434 where 435 go def (Bin p m l r) | nomatch x p m = if x < p then unsafeFindMin l else unsafeFindMin def 436 | zero x m = go r l 437 | otherwise = go def r 438 go def (Tip kx bm) | prefixOf x < kx = Just $ kx + lowestBitSet bm 439 | prefixOf x == kx && maskGE /= 0 = Just $ kx + lowestBitSet maskGE 440 | otherwise = unsafeFindMin def 441 where maskGE = (- (bitmapOf x)) .&. bm 442 go def Nil = unsafeFindMin def 443 444 445 446-- Helper function for lookupGE and lookupGT. It assumes that if a Bin node is 447-- given, it has m > 0. 448unsafeFindMin :: IntSet -> Maybe Key 449unsafeFindMin Nil = Nothing 450unsafeFindMin (Tip kx bm) = Just $ kx + lowestBitSet bm 451unsafeFindMin (Bin _ _ l _) = unsafeFindMin l 452 453-- Helper function for lookupLE and lookupLT. It assumes that if a Bin node is 454-- given, it has m > 0. 455unsafeFindMax :: IntSet -> Maybe Key 456unsafeFindMax Nil = Nothing 457unsafeFindMax (Tip kx bm) = Just $ kx + highestBitSet bm 458unsafeFindMax (Bin _ _ _ r) = unsafeFindMax r 459 460{-------------------------------------------------------------------- 461 Construction 462--------------------------------------------------------------------} 463-- | /O(1)/. The empty set. 464empty :: IntSet 465empty 466 = Nil 467{-# INLINE empty #-} 468 469-- | /O(1)/. A set of one element. 470singleton :: Key -> IntSet 471singleton x 472 = Tip (prefixOf x) (bitmapOf x) 473{-# INLINE singleton #-} 474 475{-------------------------------------------------------------------- 476 Insert 477--------------------------------------------------------------------} 478-- | /O(min(n,W))/. Add a value to the set. There is no left- or right bias for 479-- IntSets. 480insert :: Key -> IntSet -> IntSet 481insert !x = insertBM (prefixOf x) (bitmapOf x) 482 483-- Helper function for insert and union. 484insertBM :: Prefix -> BitMap -> IntSet -> IntSet 485insertBM !kx !bm t@(Bin p m l r) 486 | nomatch kx p m = link kx (Tip kx bm) p t 487 | zero kx m = Bin p m (insertBM kx bm l) r 488 | otherwise = Bin p m l (insertBM kx bm r) 489insertBM kx bm t@(Tip kx' bm') 490 | kx' == kx = Tip kx' (bm .|. bm') 491 | otherwise = link kx (Tip kx bm) kx' t 492insertBM kx bm Nil = Tip kx bm 493 494-- | /O(min(n,W))/. Delete a value in the set. Returns the 495-- original set when the value was not present. 496delete :: Key -> IntSet -> IntSet 497delete !x = deleteBM (prefixOf x) (bitmapOf x) 498 499-- Deletes all values mentioned in the BitMap from the set. 500-- Helper function for delete and difference. 501deleteBM :: Prefix -> BitMap -> IntSet -> IntSet 502deleteBM !kx !bm t@(Bin p m l r) 503 | nomatch kx p m = t 504 | zero kx m = bin p m (deleteBM kx bm l) r 505 | otherwise = bin p m l (deleteBM kx bm r) 506deleteBM kx bm t@(Tip kx' bm') 507 | kx' == kx = tip kx (bm' .&. complement bm) 508 | otherwise = t 509deleteBM _ _ Nil = Nil 510 511-- | /O(min(n,W))/. @('alterF' f x s)@ can delete or insert @x@ in @s@ depending 512-- on whether it is already present in @s@. 513-- 514-- In short: 515-- 516-- @ 517-- 'member' x \<$\> 'alterF' f x s = f ('member' x s) 518-- @ 519-- 520-- Note: 'alterF' is a variant of the @at@ combinator from "Control.Lens.At". 521-- 522-- @since 0.6.3.1 523alterF :: Functor f => (Bool -> f Bool) -> Key -> IntSet -> f IntSet 524alterF f k s = fmap choose (f member_) 525 where 526 member_ = member k s 527 528 (inserted, deleted) 529 | member_ = (s , delete k s) 530 | otherwise = (insert k s, s ) 531 532 choose True = inserted 533 choose False = deleted 534#ifndef __GLASGOW_HASKELL__ 535{-# INLINE alterF #-} 536#else 537{-# INLINABLE [2] alterF #-} 538 539{-# RULES 540"alterF/Const" forall k (f :: Bool -> Const a Bool) . alterF f k = \s -> Const . getConst . f $ member k s 541 #-} 542#endif 543 544#if MIN_VERSION_base(4,8,0) 545{-# SPECIALIZE alterF :: (Bool -> Identity Bool) -> Key -> IntSet -> Identity IntSet #-} 546#endif 547 548{-------------------------------------------------------------------- 549 Union 550--------------------------------------------------------------------} 551-- | The union of a list of sets. 552unions :: Foldable f => f IntSet -> IntSet 553unions xs 554 = Foldable.foldl' union empty xs 555 556 557-- | /O(n+m)/. The union of two sets. 558union :: IntSet -> IntSet -> IntSet 559union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) 560 | shorter m1 m2 = union1 561 | shorter m2 m1 = union2 562 | p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2) 563 | otherwise = link p1 t1 p2 t2 564 where 565 union1 | nomatch p2 p1 m1 = link p1 t1 p2 t2 566 | zero p2 m1 = Bin p1 m1 (union l1 t2) r1 567 | otherwise = Bin p1 m1 l1 (union r1 t2) 568 569 union2 | nomatch p1 p2 m2 = link p1 t1 p2 t2 570 | zero p1 m2 = Bin p2 m2 (union t1 l2) r2 571 | otherwise = Bin p2 m2 l2 (union t1 r2) 572 573union t@(Bin _ _ _ _) (Tip kx bm) = insertBM kx bm t 574union t@(Bin _ _ _ _) Nil = t 575union (Tip kx bm) t = insertBM kx bm t 576union Nil t = t 577 578 579{-------------------------------------------------------------------- 580 Difference 581--------------------------------------------------------------------} 582-- | /O(n+m)/. Difference between two sets. 583difference :: IntSet -> IntSet -> IntSet 584difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) 585 | shorter m1 m2 = difference1 586 | shorter m2 m1 = difference2 587 | p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2) 588 | otherwise = t1 589 where 590 difference1 | nomatch p2 p1 m1 = t1 591 | zero p2 m1 = bin p1 m1 (difference l1 t2) r1 592 | otherwise = bin p1 m1 l1 (difference r1 t2) 593 594 difference2 | nomatch p1 p2 m2 = t1 595 | zero p1 m2 = difference t1 l2 596 | otherwise = difference t1 r2 597 598difference t@(Bin _ _ _ _) (Tip kx bm) = deleteBM kx bm t 599difference t@(Bin _ _ _ _) Nil = t 600 601difference t1@(Tip kx bm) t2 = differenceTip t2 602 where differenceTip (Bin p2 m2 l2 r2) | nomatch kx p2 m2 = t1 603 | zero kx m2 = differenceTip l2 604 | otherwise = differenceTip r2 605 differenceTip (Tip kx2 bm2) | kx == kx2 = tip kx (bm .&. complement bm2) 606 | otherwise = t1 607 differenceTip Nil = t1 608 609difference Nil _ = Nil 610 611 612 613{-------------------------------------------------------------------- 614 Intersection 615--------------------------------------------------------------------} 616-- | /O(n+m)/. The intersection of two sets. 617intersection :: IntSet -> IntSet -> IntSet 618intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) 619 | shorter m1 m2 = intersection1 620 | shorter m2 m1 = intersection2 621 | p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2) 622 | otherwise = Nil 623 where 624 intersection1 | nomatch p2 p1 m1 = Nil 625 | zero p2 m1 = intersection l1 t2 626 | otherwise = intersection r1 t2 627 628 intersection2 | nomatch p1 p2 m2 = Nil 629 | zero p1 m2 = intersection t1 l2 630 | otherwise = intersection t1 r2 631 632intersection t1@(Bin _ _ _ _) (Tip kx2 bm2) = intersectBM t1 633 where intersectBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = Nil 634 | zero kx2 m1 = intersectBM l1 635 | otherwise = intersectBM r1 636 intersectBM (Tip kx1 bm1) | kx1 == kx2 = tip kx1 (bm1 .&. bm2) 637 | otherwise = Nil 638 intersectBM Nil = Nil 639 640intersection (Bin _ _ _ _) Nil = Nil 641 642intersection (Tip kx1 bm1) t2 = intersectBM t2 643 where intersectBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = Nil 644 | zero kx1 m2 = intersectBM l2 645 | otherwise = intersectBM r2 646 intersectBM (Tip kx2 bm2) | kx1 == kx2 = tip kx1 (bm1 .&. bm2) 647 | otherwise = Nil 648 intersectBM Nil = Nil 649 650intersection Nil _ = Nil 651 652{-------------------------------------------------------------------- 653 Subset 654--------------------------------------------------------------------} 655-- | /O(n+m)/. Is this a proper subset? (ie. a subset but not equal). 656isProperSubsetOf :: IntSet -> IntSet -> Bool 657isProperSubsetOf t1 t2 658 = case subsetCmp t1 t2 of 659 LT -> True 660 _ -> False 661 662subsetCmp :: IntSet -> IntSet -> Ordering 663subsetCmp t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) 664 | shorter m1 m2 = GT 665 | shorter m2 m1 = case subsetCmpLt of 666 GT -> GT 667 _ -> LT 668 | p1 == p2 = subsetCmpEq 669 | otherwise = GT -- disjoint 670 where 671 subsetCmpLt | nomatch p1 p2 m2 = GT 672 | zero p1 m2 = subsetCmp t1 l2 673 | otherwise = subsetCmp t1 r2 674 subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of 675 (GT,_ ) -> GT 676 (_ ,GT) -> GT 677 (EQ,EQ) -> EQ 678 _ -> LT 679 680subsetCmp (Bin _ _ _ _) _ = GT 681subsetCmp (Tip kx1 bm1) (Tip kx2 bm2) 682 | kx1 /= kx2 = GT -- disjoint 683 | bm1 == bm2 = EQ 684 | bm1 .&. complement bm2 == 0 = LT 685 | otherwise = GT 686subsetCmp t1@(Tip kx _) (Bin p m l r) 687 | nomatch kx p m = GT 688 | zero kx m = case subsetCmp t1 l of GT -> GT ; _ -> LT 689 | otherwise = case subsetCmp t1 r of GT -> GT ; _ -> LT 690subsetCmp (Tip _ _) Nil = GT -- disjoint 691subsetCmp Nil Nil = EQ 692subsetCmp Nil _ = LT 693 694-- | /O(n+m)/. Is this a subset? 695-- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@. 696 697isSubsetOf :: IntSet -> IntSet -> Bool 698isSubsetOf t1@(Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) 699 | shorter m1 m2 = False 700 | shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then isSubsetOf t1 l2 701 else isSubsetOf t1 r2) 702 | otherwise = (p1==p2) && isSubsetOf l1 l2 && isSubsetOf r1 r2 703isSubsetOf (Bin _ _ _ _) _ = False 704isSubsetOf (Tip kx1 bm1) (Tip kx2 bm2) = kx1 == kx2 && bm1 .&. complement bm2 == 0 705isSubsetOf t1@(Tip kx _) (Bin p m l r) 706 | nomatch kx p m = False 707 | zero kx m = isSubsetOf t1 l 708 | otherwise = isSubsetOf t1 r 709isSubsetOf (Tip _ _) Nil = False 710isSubsetOf Nil _ = True 711 712 713{-------------------------------------------------------------------- 714 Disjoint 715--------------------------------------------------------------------} 716-- | /O(n+m)/. Check whether two sets are disjoint (i.e. their intersection 717-- is empty). 718-- 719-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True 720-- > disjoint (fromList [2,4,6,8]) (fromList [2,3,5,7]) == False 721-- > disjoint (fromList [1,2]) (fromList [1,2,3,4]) == False 722-- > disjoint (fromList []) (fromList []) == True 723-- 724-- @since 0.5.11 725disjoint :: IntSet -> IntSet -> Bool 726disjoint t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2) 727 | shorter m1 m2 = disjoint1 728 | shorter m2 m1 = disjoint2 729 | p1 == p2 = disjoint l1 l2 && disjoint r1 r2 730 | otherwise = True 731 where 732 disjoint1 | nomatch p2 p1 m1 = True 733 | zero p2 m1 = disjoint l1 t2 734 | otherwise = disjoint r1 t2 735 736 disjoint2 | nomatch p1 p2 m2 = True 737 | zero p1 m2 = disjoint t1 l2 738 | otherwise = disjoint t1 r2 739 740disjoint t1@(Bin _ _ _ _) (Tip kx2 bm2) = disjointBM t1 741 where disjointBM (Bin p1 m1 l1 r1) | nomatch kx2 p1 m1 = True 742 | zero kx2 m1 = disjointBM l1 743 | otherwise = disjointBM r1 744 disjointBM (Tip kx1 bm1) | kx1 == kx2 = (bm1 .&. bm2) == 0 745 | otherwise = True 746 disjointBM Nil = True 747 748disjoint (Bin _ _ _ _) Nil = True 749 750disjoint (Tip kx1 bm1) t2 = disjointBM t2 751 where disjointBM (Bin p2 m2 l2 r2) | nomatch kx1 p2 m2 = True 752 | zero kx1 m2 = disjointBM l2 753 | otherwise = disjointBM r2 754 disjointBM (Tip kx2 bm2) | kx1 == kx2 = (bm1 .&. bm2) == 0 755 | otherwise = True 756 disjointBM Nil = True 757 758disjoint Nil _ = True 759 760 761{-------------------------------------------------------------------- 762 Filter 763--------------------------------------------------------------------} 764-- | /O(n)/. Filter all elements that satisfy some predicate. 765filter :: (Key -> Bool) -> IntSet -> IntSet 766filter predicate t 767 = case t of 768 Bin p m l r 769 -> bin p m (filter predicate l) (filter predicate r) 770 Tip kx bm 771 -> tip kx (foldl'Bits 0 (bitPred kx) 0 bm) 772 Nil -> Nil 773 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi 774 | otherwise = bm 775 {-# INLINE bitPred #-} 776 777-- | /O(n)/. partition the set according to some predicate. 778partition :: (Key -> Bool) -> IntSet -> (IntSet,IntSet) 779partition predicate0 t0 = toPair $ go predicate0 t0 780 where 781 go predicate t 782 = case t of 783 Bin p m l r 784 -> let (l1 :*: l2) = go predicate l 785 (r1 :*: r2) = go predicate r 786 in bin p m l1 r1 :*: bin p m l2 r2 787 Tip kx bm 788 -> let bm1 = foldl'Bits 0 (bitPred kx) 0 bm 789 in tip kx bm1 :*: tip kx (bm `xor` bm1) 790 Nil -> (Nil :*: Nil) 791 where bitPred kx bm bi | predicate (kx + bi) = bm .|. bitmapOfSuffix bi 792 | otherwise = bm 793 {-# INLINE bitPred #-} 794 795 796-- | /O(min(n,W))/. The expression (@'split' x set@) is a pair @(set1,set2)@ 797-- where @set1@ comprises the elements of @set@ less than @x@ and @set2@ 798-- comprises the elements of @set@ greater than @x@. 799-- 800-- > split 3 (fromList [1..5]) == (fromList [1,2], fromList [4,5]) 801split :: Key -> IntSet -> (IntSet,IntSet) 802split x t = 803 case t of 804 Bin _ m l r 805 | m < 0 -> if x >= 0 -- handle negative numbers. 806 then case go x l of (lt :*: gt) -> let !lt' = union lt r 807 in (lt', gt) 808 else case go x r of (lt :*: gt) -> let !gt' = union gt l 809 in (lt, gt') 810 _ -> case go x t of 811 (lt :*: gt) -> (lt, gt) 812 where 813 go !x' t'@(Bin p m l r) 814 | match x' p m = if zero x' m 815 then case go x' l of 816 (lt :*: gt) -> lt :*: union gt r 817 else case go x' r of 818 (lt :*: gt) -> union lt l :*: gt 819 | otherwise = if x' < p then (Nil :*: t') 820 else (t' :*: Nil) 821 go x' t'@(Tip kx' bm) 822 | kx' > x' = (Nil :*: t') 823 -- equivalent to kx' > prefixOf x' 824 | kx' < prefixOf x' = (t' :*: Nil) 825 | otherwise = tip kx' (bm .&. lowerBitmap) :*: tip kx' (bm .&. higherBitmap) 826 where lowerBitmap = bitmapOf x' - 1 827 higherBitmap = complement (lowerBitmap + bitmapOf x') 828 go _ Nil = (Nil :*: Nil) 829 830-- | /O(min(n,W))/. Performs a 'split' but also returns whether the pivot 831-- element was found in the original set. 832splitMember :: Key -> IntSet -> (IntSet,Bool,IntSet) 833splitMember x t = 834 case t of 835 Bin _ m l r | m < 0 -> if x >= 0 836 then case go x l of 837 (lt, fnd, gt) -> let !lt' = union lt r 838 in (lt', fnd, gt) 839 else case go x r of 840 (lt, fnd, gt) -> let !gt' = union gt l 841 in (lt, fnd, gt') 842 _ -> go x t 843 where 844 go x' t'@(Bin p m l r) 845 | match x' p m = if zero x' m 846 then case go x' l of 847 (lt, fnd, gt) -> (lt, fnd, union gt r) 848 else case go x' r of 849 (lt, fnd, gt) -> (union lt l, fnd, gt) 850 | otherwise = if x' < p then (Nil, False, t') else (t', False, Nil) 851 go x' t'@(Tip kx' bm) 852 | kx' > x' = (Nil, False, t') 853 -- equivalent to kx' > prefixOf x' 854 | kx' < prefixOf x' = (t', False, Nil) 855 | otherwise = let !lt = tip kx' (bm .&. lowerBitmap) 856 !found = (bm .&. bitmapOfx') /= 0 857 !gt = tip kx' (bm .&. higherBitmap) 858 in (lt, found, gt) 859 where bitmapOfx' = bitmapOf x' 860 lowerBitmap = bitmapOfx' - 1 861 higherBitmap = complement (lowerBitmap + bitmapOfx') 862 go _ Nil = (Nil, False, Nil) 863 864{---------------------------------------------------------------------- 865 Min/Max 866----------------------------------------------------------------------} 867 868-- | /O(min(n,W))/. Retrieves the maximal key of the set, and the set 869-- stripped of that element, or 'Nothing' if passed an empty set. 870maxView :: IntSet -> Maybe (Key, IntSet) 871maxView t = 872 case t of Nil -> Nothing 873 Bin p m l r | m < 0 -> case go l of (result, l') -> Just (result, bin p m l' r) 874 _ -> Just (go t) 875 where 876 go (Bin p m l r) = case go r of (result, r') -> (result, bin p m l r') 877 go (Tip kx bm) = case highestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi))) 878 go Nil = error "maxView Nil" 879 880-- | /O(min(n,W))/. Retrieves the minimal key of the set, and the set 881-- stripped of that element, or 'Nothing' if passed an empty set. 882minView :: IntSet -> Maybe (Key, IntSet) 883minView t = 884 case t of Nil -> Nothing 885 Bin p m l r | m < 0 -> case go r of (result, r') -> Just (result, bin p m l r') 886 _ -> Just (go t) 887 where 888 go (Bin p m l r) = case go l of (result, l') -> (result, bin p m l' r) 889 go (Tip kx bm) = case lowestBitSet bm of bi -> (kx + bi, tip kx (bm .&. complement (bitmapOfSuffix bi))) 890 go Nil = error "minView Nil" 891 892-- | /O(min(n,W))/. Delete and find the minimal element. 893-- 894-- > deleteFindMin set = (findMin set, deleteMin set) 895deleteFindMin :: IntSet -> (Key, IntSet) 896deleteFindMin = fromMaybe (error "deleteFindMin: empty set has no minimal element") . minView 897 898-- | /O(min(n,W))/. Delete and find the maximal element. 899-- 900-- > deleteFindMax set = (findMax set, deleteMax set) 901deleteFindMax :: IntSet -> (Key, IntSet) 902deleteFindMax = fromMaybe (error "deleteFindMax: empty set has no maximal element") . maxView 903 904 905-- | /O(min(n,W))/. The minimal element of the set. 906findMin :: IntSet -> Key 907findMin Nil = error "findMin: empty set has no minimal element" 908findMin (Tip kx bm) = kx + lowestBitSet bm 909findMin (Bin _ m l r) 910 | m < 0 = find r 911 | otherwise = find l 912 where find (Tip kx bm) = kx + lowestBitSet bm 913 find (Bin _ _ l' _) = find l' 914 find Nil = error "findMin Nil" 915 916-- | /O(min(n,W))/. The maximal element of a set. 917findMax :: IntSet -> Key 918findMax Nil = error "findMax: empty set has no maximal element" 919findMax (Tip kx bm) = kx + highestBitSet bm 920findMax (Bin _ m l r) 921 | m < 0 = find l 922 | otherwise = find r 923 where find (Tip kx bm) = kx + highestBitSet bm 924 find (Bin _ _ _ r') = find r' 925 find Nil = error "findMax Nil" 926 927 928-- | /O(min(n,W))/. Delete the minimal element. Returns an empty set if the set is empty. 929-- 930-- Note that this is a change of behaviour for consistency with 'Data.Set.Set' – 931-- versions prior to 0.5 threw an error if the 'IntSet' was already empty. 932deleteMin :: IntSet -> IntSet 933deleteMin = maybe Nil snd . minView 934 935-- | /O(min(n,W))/. Delete the maximal element. Returns an empty set if the set is empty. 936-- 937-- Note that this is a change of behaviour for consistency with 'Data.Set.Set' – 938-- versions prior to 0.5 threw an error if the 'IntSet' was already empty. 939deleteMax :: IntSet -> IntSet 940deleteMax = maybe Nil snd . maxView 941 942{---------------------------------------------------------------------- 943 Map 944----------------------------------------------------------------------} 945 946-- | /O(n*min(n,W))/. 947-- @'map' f s@ is the set obtained by applying @f@ to each element of @s@. 948-- 949-- It's worth noting that the size of the result may be smaller if, 950-- for some @(x,y)@, @x \/= y && f x == f y@ 951 952map :: (Key -> Key) -> IntSet -> IntSet 953map f = fromList . List.map f . toList 954 955-- | /O(n)/. The 956-- 957-- @'mapMonotonic' f s == 'map' f s@, but works only when @f@ is strictly increasing. 958-- /The precondition is not checked./ 959-- Semi-formally, we have: 960-- 961-- > and [x < y ==> f x < f y | x <- ls, y <- ls] 962-- > ==> mapMonotonic f s == map f s 963-- > where ls = toList s 964-- 965-- @since 0.6.3.1 966 967-- Note that for now the test is insufficient to support any fancier implementation. 968mapMonotonic :: (Key -> Key) -> IntSet -> IntSet 969mapMonotonic f = fromDistinctAscList . List.map f . toAscList 970 971 972{-------------------------------------------------------------------- 973 Fold 974--------------------------------------------------------------------} 975-- | /O(n)/. Fold the elements in the set using the given right-associative 976-- binary operator. This function is an equivalent of 'foldr' and is present 977-- for compatibility only. 978-- 979-- /Please note that fold will be deprecated in the future and removed./ 980fold :: (Key -> b -> b) -> b -> IntSet -> b 981fold = foldr 982{-# INLINE fold #-} 983 984-- | /O(n)/. Fold the elements in the set using the given right-associative 985-- binary operator, such that @'foldr' f z == 'Prelude.foldr' f z . 'toAscList'@. 986-- 987-- For example, 988-- 989-- > toAscList set = foldr (:) [] set 990foldr :: (Key -> b -> b) -> b -> IntSet -> b 991foldr f z = \t -> -- Use lambda t to be inlinable with two arguments only. 992 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before 993 | otherwise -> go (go z r) l 994 _ -> go z t 995 where 996 go z' Nil = z' 997 go z' (Tip kx bm) = foldrBits kx f z' bm 998 go z' (Bin _ _ l r) = go (go z' r) l 999{-# INLINE foldr #-} 1000 1001-- | /O(n)/. A strict version of 'foldr'. Each application of the operator is 1002-- evaluated before using the result in the next application. This 1003-- function is strict in the starting value. 1004foldr' :: (Key -> b -> b) -> b -> IntSet -> b 1005foldr' f z = \t -> -- Use lambda t to be inlinable with two arguments only. 1006 case t of Bin _ m l r | m < 0 -> go (go z l) r -- put negative numbers before 1007 | otherwise -> go (go z r) l 1008 _ -> go z t 1009 where 1010 go !z' Nil = z' 1011 go z' (Tip kx bm) = foldr'Bits kx f z' bm 1012 go z' (Bin _ _ l r) = go (go z' r) l 1013{-# INLINE foldr' #-} 1014 1015-- | /O(n)/. Fold the elements in the set using the given left-associative 1016-- binary operator, such that @'foldl' f z == 'Prelude.foldl' f z . 'toAscList'@. 1017-- 1018-- For example, 1019-- 1020-- > toDescList set = foldl (flip (:)) [] set 1021foldl :: (a -> Key -> a) -> a -> IntSet -> a 1022foldl f z = \t -> -- Use lambda t to be inlinable with two arguments only. 1023 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before 1024 | otherwise -> go (go z l) r 1025 _ -> go z t 1026 where 1027 go z' Nil = z' 1028 go z' (Tip kx bm) = foldlBits kx f z' bm 1029 go z' (Bin _ _ l r) = go (go z' l) r 1030{-# INLINE foldl #-} 1031 1032-- | /O(n)/. A strict version of 'foldl'. Each application of the operator is 1033-- evaluated before using the result in the next application. This 1034-- function is strict in the starting value. 1035foldl' :: (a -> Key -> a) -> a -> IntSet -> a 1036foldl' f z = \t -> -- Use lambda t to be inlinable with two arguments only. 1037 case t of Bin _ m l r | m < 0 -> go (go z r) l -- put negative numbers before 1038 | otherwise -> go (go z l) r 1039 _ -> go z t 1040 where 1041 go !z' Nil = z' 1042 go z' (Tip kx bm) = foldl'Bits kx f z' bm 1043 go z' (Bin _ _ l r) = go (go z' l) r 1044{-# INLINE foldl' #-} 1045 1046{-------------------------------------------------------------------- 1047 List variations 1048--------------------------------------------------------------------} 1049-- | /O(n)/. An alias of 'toAscList'. The elements of a set in ascending order. 1050-- Subject to list fusion. 1051elems :: IntSet -> [Key] 1052elems 1053 = toAscList 1054 1055{-------------------------------------------------------------------- 1056 Lists 1057--------------------------------------------------------------------} 1058#if __GLASGOW_HASKELL__ >= 708 1059-- | @since 0.5.6.2 1060instance GHC.Exts.IsList IntSet where 1061 type Item IntSet = Key 1062 fromList = fromList 1063 toList = toList 1064#endif 1065 1066-- | /O(n)/. Convert the set to a list of elements. Subject to list fusion. 1067toList :: IntSet -> [Key] 1068toList 1069 = toAscList 1070 1071-- | /O(n)/. Convert the set to an ascending list of elements. Subject to list 1072-- fusion. 1073toAscList :: IntSet -> [Key] 1074toAscList = foldr (:) [] 1075 1076-- | /O(n)/. Convert the set to a descending list of elements. Subject to list 1077-- fusion. 1078toDescList :: IntSet -> [Key] 1079toDescList = foldl (flip (:)) [] 1080 1081-- List fusion for the list generating functions. 1082#if __GLASGOW_HASKELL__ 1083-- The foldrFB and foldlFB are foldr and foldl equivalents, used for list fusion. 1084-- They are important to convert unfused to{Asc,Desc}List back, see mapFB in prelude. 1085foldrFB :: (Key -> b -> b) -> b -> IntSet -> b 1086foldrFB = foldr 1087{-# INLINE[0] foldrFB #-} 1088foldlFB :: (a -> Key -> a) -> a -> IntSet -> a 1089foldlFB = foldl 1090{-# INLINE[0] foldlFB #-} 1091 1092-- Inline elems and toList, so that we need to fuse only toAscList. 1093{-# INLINE elems #-} 1094{-# INLINE toList #-} 1095 1096-- The fusion is enabled up to phase 2 included. If it does not succeed, 1097-- convert in phase 1 the expanded to{Asc,Desc}List calls back to 1098-- to{Asc,Desc}List. In phase 0, we inline fold{lr}FB (which were used in 1099-- a list fusion, otherwise it would go away in phase 1), and let compiler do 1100-- whatever it wants with to{Asc,Desc}List -- it was forbidden to inline it 1101-- before phase 0, otherwise the fusion rules would not fire at all. 1102{-# NOINLINE[0] toAscList #-} 1103{-# NOINLINE[0] toDescList #-} 1104{-# RULES "IntSet.toAscList" [~1] forall s . toAscList s = GHC.Exts.build (\c n -> foldrFB c n s) #-} 1105{-# RULES "IntSet.toAscListBack" [1] foldrFB (:) [] = toAscList #-} 1106{-# RULES "IntSet.toDescList" [~1] forall s . toDescList s = GHC.Exts.build (\c n -> foldlFB (\xs x -> c x xs) n s) #-} 1107{-# RULES "IntSet.toDescListBack" [1] foldlFB (\xs x -> x : xs) [] = toDescList #-} 1108#endif 1109 1110 1111-- | /O(n*min(n,W))/. Create a set from a list of integers. 1112fromList :: [Key] -> IntSet 1113fromList xs 1114 = Foldable.foldl' ins empty xs 1115 where 1116 ins t x = insert x t 1117 1118-- | /O(n)/. Build a set from an ascending list of elements. 1119-- /The precondition (input list is ascending) is not checked./ 1120fromAscList :: [Key] -> IntSet 1121fromAscList = fromMonoList 1122{-# NOINLINE fromAscList #-} 1123 1124-- | /O(n)/. Build a set from an ascending list of distinct elements. 1125-- /The precondition (input list is strictly ascending) is not checked./ 1126fromDistinctAscList :: [Key] -> IntSet 1127fromDistinctAscList = fromAscList 1128{-# INLINE fromDistinctAscList #-} 1129 1130-- | /O(n)/. Build a set from a monotonic list of elements. 1131-- 1132-- The precise conditions under which this function works are subtle: 1133-- For any branch mask, keys with the same prefix w.r.t. the branch 1134-- mask must occur consecutively in the list. 1135fromMonoList :: [Key] -> IntSet 1136fromMonoList [] = Nil 1137fromMonoList (kx : zs1) = addAll' (prefixOf kx) (bitmapOf kx) zs1 1138 where 1139 -- `addAll'` collects all keys with the prefix `px` into a single 1140 -- bitmap, and then proceeds with `addAll`. 1141 addAll' !px !bm [] 1142 = Tip px bm 1143 addAll' !px !bm (ky : zs) 1144 | px == prefixOf ky 1145 = addAll' px (bm .|. bitmapOf ky) zs 1146 -- inlined: | otherwise = addAll px (Tip px bm) (ky : zs) 1147 | py <- prefixOf ky 1148 , m <- branchMask px py 1149 , Inserted ty zs' <- addMany' m py (bitmapOf ky) zs 1150 = addAll px (linkWithMask m py ty {-px-} (Tip px bm)) zs' 1151 1152 -- for `addAll` and `addMany`, px is /a/ prefix inside the tree `tx` 1153 -- `addAll` consumes the rest of the list, adding to the tree `tx` 1154 addAll !_px !tx [] 1155 = tx 1156 addAll !px !tx (ky : zs) 1157 | py <- prefixOf ky 1158 , m <- branchMask px py 1159 , Inserted ty zs' <- addMany' m py (bitmapOf ky) zs 1160 = addAll px (linkWithMask m py ty {-px-} tx) zs' 1161 1162 -- `addMany'` is similar to `addAll'`, but proceeds with `addMany'`. 1163 addMany' !_m !px !bm [] 1164 = Inserted (Tip px bm) [] 1165 addMany' !m !px !bm zs0@(ky : zs) 1166 | px == prefixOf ky 1167 = addMany' m px (bm .|. bitmapOf ky) zs 1168 -- inlined: | otherwise = addMany m px (Tip px bm) (ky : zs) 1169 | mask px m /= mask ky m 1170 = Inserted (Tip (prefixOf px) bm) zs0 1171 | py <- prefixOf ky 1172 , mxy <- branchMask px py 1173 , Inserted ty zs' <- addMany' mxy py (bitmapOf ky) zs 1174 = addMany m px (linkWithMask mxy py ty {-px-} (Tip px bm)) zs' 1175 1176 -- `addAll` adds to `tx` all keys whose prefix w.r.t. `m` agrees with `px`. 1177 addMany !_m !_px tx [] 1178 = Inserted tx [] 1179 addMany !m !px tx zs0@(ky : zs) 1180 | mask px m /= mask ky m 1181 = Inserted tx zs0 1182 | py <- prefixOf ky 1183 , mxy <- branchMask px py 1184 , Inserted ty zs' <- addMany' mxy py (bitmapOf ky) zs 1185 = addMany m px (linkWithMask mxy py ty {-px-} tx) zs' 1186{-# INLINE fromMonoList #-} 1187 1188data Inserted = Inserted !IntSet ![Key] 1189 1190{-------------------------------------------------------------------- 1191 Eq 1192--------------------------------------------------------------------} 1193instance Eq IntSet where 1194 t1 == t2 = equal t1 t2 1195 t1 /= t2 = nequal t1 t2 1196 1197equal :: IntSet -> IntSet -> Bool 1198equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) 1199 = (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2) 1200equal (Tip kx1 bm1) (Tip kx2 bm2) 1201 = kx1 == kx2 && bm1 == bm2 1202equal Nil Nil = True 1203equal _ _ = False 1204 1205nequal :: IntSet -> IntSet -> Bool 1206nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2) 1207 = (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2) 1208nequal (Tip kx1 bm1) (Tip kx2 bm2) 1209 = kx1 /= kx2 || bm1 /= bm2 1210nequal Nil Nil = False 1211nequal _ _ = True 1212 1213{-------------------------------------------------------------------- 1214 Ord 1215--------------------------------------------------------------------} 1216 1217instance Ord IntSet where 1218 compare s1 s2 = compare (toAscList s1) (toAscList s2) 1219 -- tentative implementation. See if more efficient exists. 1220 1221{-------------------------------------------------------------------- 1222 Show 1223--------------------------------------------------------------------} 1224instance Show IntSet where 1225 showsPrec p xs = showParen (p > 10) $ 1226 showString "fromList " . shows (toList xs) 1227 1228{-------------------------------------------------------------------- 1229 Read 1230--------------------------------------------------------------------} 1231instance Read IntSet where 1232#ifdef __GLASGOW_HASKELL__ 1233 readPrec = parens $ prec 10 $ do 1234 Ident "fromList" <- lexP 1235 xs <- readPrec 1236 return (fromList xs) 1237 1238 readListPrec = readListPrecDefault 1239#else 1240 readsPrec p = readParen (p > 10) $ \ r -> do 1241 ("fromList",s) <- lex r 1242 (xs,t) <- reads s 1243 return (fromList xs,t) 1244#endif 1245 1246{-------------------------------------------------------------------- 1247 Typeable 1248--------------------------------------------------------------------} 1249 1250INSTANCE_TYPEABLE0(IntSet) 1251 1252{-------------------------------------------------------------------- 1253 NFData 1254--------------------------------------------------------------------} 1255 1256-- The IntSet constructors consist only of strict fields of Ints and 1257-- IntSets, thus the default NFData instance which evaluates to whnf 1258-- should suffice 1259instance NFData IntSet where rnf x = seq x () 1260 1261{-------------------------------------------------------------------- 1262 Debugging 1263--------------------------------------------------------------------} 1264-- | /O(n)/. Show the tree that implements the set. The tree is shown 1265-- in a compressed, hanging format. 1266showTree :: IntSet -> String 1267showTree s 1268 = showTreeWith True False s 1269 1270 1271{- | /O(n)/. The expression (@'showTreeWith' hang wide map@) shows 1272 the tree that implements the set. If @hang@ is 1273 'True', a /hanging/ tree is shown otherwise a rotated tree is shown. If 1274 @wide@ is 'True', an extra wide version is shown. 1275-} 1276showTreeWith :: Bool -> Bool -> IntSet -> String 1277showTreeWith hang wide t 1278 | hang = (showsTreeHang wide [] t) "" 1279 | otherwise = (showsTree wide [] [] t) "" 1280 1281showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS 1282showsTree wide lbars rbars t 1283 = case t of 1284 Bin p m l r 1285 -> showsTree wide (withBar rbars) (withEmpty rbars) r . 1286 showWide wide rbars . 1287 showsBars lbars . showString (showBin p m) . showString "\n" . 1288 showWide wide lbars . 1289 showsTree wide (withEmpty lbars) (withBar lbars) l 1290 Tip kx bm 1291 -> showsBars lbars . showString " " . shows kx . showString " + " . 1292 showsBitMap bm . showString "\n" 1293 Nil -> showsBars lbars . showString "|\n" 1294 1295showsTreeHang :: Bool -> [String] -> IntSet -> ShowS 1296showsTreeHang wide bars t 1297 = case t of 1298 Bin p m l r 1299 -> showsBars bars . showString (showBin p m) . showString "\n" . 1300 showWide wide bars . 1301 showsTreeHang wide (withBar bars) l . 1302 showWide wide bars . 1303 showsTreeHang wide (withEmpty bars) r 1304 Tip kx bm 1305 -> showsBars bars . showString " " . shows kx . showString " + " . 1306 showsBitMap bm . showString "\n" 1307 Nil -> showsBars bars . showString "|\n" 1308 1309showBin :: Prefix -> Mask -> String 1310showBin _ _ 1311 = "*" -- ++ show (p,m) 1312 1313showWide :: Bool -> [String] -> String -> String 1314showWide wide bars 1315 | wide = showString (concat (reverse bars)) . showString "|\n" 1316 | otherwise = id 1317 1318showsBars :: [String] -> ShowS 1319showsBars [] = id 1320showsBars bars = showString (concat (reverse (tail bars))) . showString node 1321 1322showsBitMap :: Word -> ShowS 1323showsBitMap = showString . showBitMap 1324 1325showBitMap :: Word -> String 1326showBitMap w = show $ foldrBits 0 (:) [] w 1327 1328node :: String 1329node = "+--" 1330 1331withBar, withEmpty :: [String] -> [String] 1332withBar bars = "| ":bars 1333withEmpty bars = " ":bars 1334 1335 1336{-------------------------------------------------------------------- 1337 Helpers 1338--------------------------------------------------------------------} 1339{-------------------------------------------------------------------- 1340 Link 1341--------------------------------------------------------------------} 1342link :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet 1343link p1 t1 p2 t2 = linkWithMask (branchMask p1 p2) p1 t1 {-p2-} t2 1344{-# INLINE link #-} 1345 1346-- `linkWithMask` is useful when the `branchMask` has already been computed 1347linkWithMask :: Mask -> Prefix -> IntSet -> IntSet -> IntSet 1348linkWithMask m p1 t1 {-p2-} t2 1349 | zero p1 m = Bin p m t1 t2 1350 | otherwise = Bin p m t2 t1 1351 where 1352 p = mask p1 m 1353{-# INLINE linkWithMask #-} 1354 1355{-------------------------------------------------------------------- 1356 @bin@ assures that we never have empty trees within a tree. 1357--------------------------------------------------------------------} 1358bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet 1359bin _ _ l Nil = l 1360bin _ _ Nil r = r 1361bin p m l r = Bin p m l r 1362{-# INLINE bin #-} 1363 1364{-------------------------------------------------------------------- 1365 @tip@ assures that we never have empty bitmaps within a tree. 1366--------------------------------------------------------------------} 1367tip :: Prefix -> BitMap -> IntSet 1368tip _ 0 = Nil 1369tip kx bm = Tip kx bm 1370{-# INLINE tip #-} 1371 1372 1373{---------------------------------------------------------------------- 1374 Functions that generate Prefix and BitMap of a Key or a Suffix. 1375----------------------------------------------------------------------} 1376 1377suffixBitMask :: Int 1378#if MIN_VERSION_base(4,7,0) 1379suffixBitMask = finiteBitSize (undefined::Word) - 1 1380#else 1381suffixBitMask = bitSize (undefined::Word) - 1 1382#endif 1383{-# INLINE suffixBitMask #-} 1384 1385prefixBitMask :: Int 1386prefixBitMask = complement suffixBitMask 1387{-# INLINE prefixBitMask #-} 1388 1389prefixOf :: Int -> Prefix 1390prefixOf x = x .&. prefixBitMask 1391{-# INLINE prefixOf #-} 1392 1393suffixOf :: Int -> Int 1394suffixOf x = x .&. suffixBitMask 1395{-# INLINE suffixOf #-} 1396 1397bitmapOfSuffix :: Int -> BitMap 1398bitmapOfSuffix s = 1 `shiftLL` s 1399{-# INLINE bitmapOfSuffix #-} 1400 1401bitmapOf :: Int -> BitMap 1402bitmapOf x = bitmapOfSuffix (suffixOf x) 1403{-# INLINE bitmapOf #-} 1404 1405 1406{-------------------------------------------------------------------- 1407 Endian independent bit twiddling 1408--------------------------------------------------------------------} 1409-- Returns True iff the bits set in i and the Mask m are disjoint. 1410zero :: Int -> Mask -> Bool 1411zero i m 1412 = (natFromInt i) .&. (natFromInt m) == 0 1413{-# INLINE zero #-} 1414 1415nomatch,match :: Int -> Prefix -> Mask -> Bool 1416nomatch i p m 1417 = (mask i m) /= p 1418{-# INLINE nomatch #-} 1419 1420match i p m 1421 = (mask i m) == p 1422{-# INLINE match #-} 1423 1424-- Suppose a is largest such that 2^a divides 2*m. 1425-- Then mask i m is i with the low a bits zeroed out. 1426mask :: Int -> Mask -> Prefix 1427mask i m 1428 = maskW (natFromInt i) (natFromInt m) 1429{-# INLINE mask #-} 1430 1431{-------------------------------------------------------------------- 1432 Big endian operations 1433--------------------------------------------------------------------} 1434maskW :: Nat -> Nat -> Prefix 1435maskW i m 1436 = intFromNat (i .&. (complement (m-1) `xor` m)) 1437{-# INLINE maskW #-} 1438 1439shorter :: Mask -> Mask -> Bool 1440shorter m1 m2 1441 = (natFromInt m1) > (natFromInt m2) 1442{-# INLINE shorter #-} 1443 1444branchMask :: Prefix -> Prefix -> Mask 1445branchMask p1 p2 1446 = intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2)) 1447{-# INLINE branchMask #-} 1448 1449{---------------------------------------------------------------------- 1450 To get best performance, we provide fast implementations of 1451 lowestBitSet, highestBitSet and fold[lr][l]Bits for GHC. 1452 If the intel bsf and bsr instructions ever become GHC primops, 1453 this code should be reimplemented using these. 1454 1455 Performance of this code is crucial for folds, toList, filter, partition. 1456 1457 The signatures of methods in question are placed after this comment. 1458----------------------------------------------------------------------} 1459 1460lowestBitSet :: Nat -> Int 1461highestBitSet :: Nat -> Int 1462foldlBits :: Int -> (a -> Int -> a) -> a -> Nat -> a 1463foldl'Bits :: Int -> (a -> Int -> a) -> a -> Nat -> a 1464foldrBits :: Int -> (Int -> a -> a) -> a -> Nat -> a 1465foldr'Bits :: Int -> (Int -> a -> a) -> a -> Nat -> a 1466 1467{-# INLINE lowestBitSet #-} 1468{-# INLINE highestBitSet #-} 1469{-# INLINE foldlBits #-} 1470{-# INLINE foldl'Bits #-} 1471{-# INLINE foldrBits #-} 1472{-# INLINE foldr'Bits #-} 1473 1474#if defined(__GLASGOW_HASKELL__) && (WORD_SIZE_IN_BITS==32 || WORD_SIZE_IN_BITS==64) 1475indexOfTheOnlyBit :: Nat -> Int 1476{-# INLINE indexOfTheOnlyBit #-} 1477#if MIN_VERSION_base(4,8,0) && (WORD_SIZE_IN_BITS==64) 1478indexOfTheOnlyBit bitmask = countTrailingZeros bitmask 1479 1480lowestBitSet x = countTrailingZeros x 1481 1482highestBitSet x = WORD_SIZE_IN_BITS - 1 - countLeadingZeros x 1483 1484#else 1485{---------------------------------------------------------------------- 1486 For lowestBitSet we use wordsize-dependant implementation based on 1487 multiplication and DeBrujn indeces, which was proposed by Edward Kmett 1488 <http://haskell.org/pipermail/libraries/2011-September/016749.html> 1489 1490 The core of this implementation is fast indexOfTheOnlyBit, 1491 which is given a Nat with exactly one bit set, and returns 1492 its index. 1493 1494 Lot of effort was put in these implementations, please benchmark carefully 1495 before changing this code. 1496----------------------------------------------------------------------} 1497 1498indexOfTheOnlyBit bitmask = 1499 fromIntegral (GHC.Int.I8# (lsbArray `GHC.Exts.indexInt8OffAddr#` unboxInt (intFromNat ((bitmask * magic) `shiftRL` offset)))) 1500 where unboxInt (GHC.Exts.I# i) = i 1501#if WORD_SIZE_IN_BITS==32 1502 magic = 0x077CB531 1503 offset = 27 1504 !lsbArray = "\0\1\28\2\29\14\24\3\30\22\20\15\25\17\4\8\31\27\13\23\21\19\16\7\26\12\18\6\11\5\10\9"# 1505#else 1506 magic = 0x07EDD5E59A4E28C2 1507 offset = 58 1508 !lsbArray = "\63\0\58\1\59\47\53\2\60\39\48\27\54\33\42\3\61\51\37\40\49\18\28\20\55\30\34\11\43\14\22\4\62\57\46\52\38\26\32\41\50\36\17\19\29\10\13\21\56\45\25\31\35\16\9\12\44\24\15\8\23\7\6\5"# 1509#endif 1510-- The lsbArray gets inlined to every call site of indexOfTheOnlyBit. 1511-- That cannot be easily avoided, as GHC forbids top-level Addr# literal. 1512-- One could go around that by supplying getLsbArray :: () -> Addr# marked 1513-- as NOINLINE. But the code size of calling it and processing the result 1514-- is 48B on 32-bit and 56B on 64-bit architectures -- so the 32B and 64B array 1515-- is actually improvement on 32-bit and only a 8B size increase on 64-bit. 1516 1517lowestBitSet x = indexOfTheOnlyBit (lowestBitMask x) 1518 1519highestBitSet x = indexOfTheOnlyBit (highestBitMask x) 1520 1521#endif 1522 1523lowestBitMask :: Nat -> Nat 1524lowestBitMask x = x .&. negate x 1525{-# INLINE lowestBitMask #-} 1526 1527-- Reverse the order of bits in the Nat. 1528revNat :: Nat -> Nat 1529#if WORD_SIZE_IN_BITS==32 1530revNat x1 = case ((x1 `shiftRL` 1) .&. 0x55555555) .|. ((x1 .&. 0x55555555) `shiftLL` 1) of 1531 x2 -> case ((x2 `shiftRL` 2) .&. 0x33333333) .|. ((x2 .&. 0x33333333) `shiftLL` 2) of 1532 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F) `shiftLL` 4) of 1533 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF) .|. ((x4 .&. 0x00FF00FF) `shiftLL` 8) of 1534 x5 -> ( x5 `shiftRL` 16 ) .|. ( x5 `shiftLL` 16); 1535#else 1536revNat x1 = case ((x1 `shiftRL` 1) .&. 0x5555555555555555) .|. ((x1 .&. 0x5555555555555555) `shiftLL` 1) of 1537 x2 -> case ((x2 `shiftRL` 2) .&. 0x3333333333333333) .|. ((x2 .&. 0x3333333333333333) `shiftLL` 2) of 1538 x3 -> case ((x3 `shiftRL` 4) .&. 0x0F0F0F0F0F0F0F0F) .|. ((x3 .&. 0x0F0F0F0F0F0F0F0F) `shiftLL` 4) of 1539 x4 -> case ((x4 `shiftRL` 8) .&. 0x00FF00FF00FF00FF) .|. ((x4 .&. 0x00FF00FF00FF00FF) `shiftLL` 8) of 1540 x5 -> case ((x5 `shiftRL` 16) .&. 0x0000FFFF0000FFFF) .|. ((x5 .&. 0x0000FFFF0000FFFF) `shiftLL` 16) of 1541 x6 -> ( x6 `shiftRL` 32 ) .|. ( x6 `shiftLL` 32); 1542#endif 1543 1544foldlBits prefix f z bitmap = go bitmap z 1545 where go 0 acc = acc 1546 go bm acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi)) 1547 where 1548 !bitmask = lowestBitMask bm 1549 !bi = indexOfTheOnlyBit bitmask 1550 1551foldl'Bits prefix f z bitmap = go bitmap z 1552 where go 0 acc = acc 1553 go bm !acc = go (bm `xor` bitmask) ((f acc) $! (prefix+bi)) 1554 where !bitmask = lowestBitMask bm 1555 !bi = indexOfTheOnlyBit bitmask 1556 1557foldrBits prefix f z bitmap = go (revNat bitmap) z 1558 where go 0 acc = acc 1559 go bm acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc) 1560 where !bitmask = lowestBitMask bm 1561 !bi = indexOfTheOnlyBit bitmask 1562 1563 1564foldr'Bits prefix f z bitmap = go (revNat bitmap) z 1565 where go 0 acc = acc 1566 go bm !acc = go (bm `xor` bitmask) ((f $! (prefix+(WORD_SIZE_IN_BITS-1)-bi)) acc) 1567 where !bitmask = lowestBitMask bm 1568 !bi = indexOfTheOnlyBit bitmask 1569 1570#else 1571{---------------------------------------------------------------------- 1572 In general case we use logarithmic implementation of 1573 lowestBitSet and highestBitSet, which works up to bit sizes of 64. 1574 1575 Folds are linear scans. 1576----------------------------------------------------------------------} 1577 1578lowestBitSet n0 = 1579 let (n1,b1) = if n0 .&. 0xFFFFFFFF /= 0 then (n0,0) else (n0 `shiftRL` 32, 32) 1580 (n2,b2) = if n1 .&. 0xFFFF /= 0 then (n1,b1) else (n1 `shiftRL` 16, 16+b1) 1581 (n3,b3) = if n2 .&. 0xFF /= 0 then (n2,b2) else (n2 `shiftRL` 8, 8+b2) 1582 (n4,b4) = if n3 .&. 0xF /= 0 then (n3,b3) else (n3 `shiftRL` 4, 4+b3) 1583 (n5,b5) = if n4 .&. 0x3 /= 0 then (n4,b4) else (n4 `shiftRL` 2, 2+b4) 1584 b6 = if n5 .&. 0x1 /= 0 then b5 else 1+b5 1585 in b6 1586 1587highestBitSet n0 = 1588 let (n1,b1) = if n0 .&. 0xFFFFFFFF00000000 /= 0 then (n0 `shiftRL` 32, 32) else (n0,0) 1589 (n2,b2) = if n1 .&. 0xFFFF0000 /= 0 then (n1 `shiftRL` 16, 16+b1) else (n1,b1) 1590 (n3,b3) = if n2 .&. 0xFF00 /= 0 then (n2 `shiftRL` 8, 8+b2) else (n2,b2) 1591 (n4,b4) = if n3 .&. 0xF0 /= 0 then (n3 `shiftRL` 4, 4+b3) else (n3,b3) 1592 (n5,b5) = if n4 .&. 0xC /= 0 then (n4 `shiftRL` 2, 2+b4) else (n4,b4) 1593 b6 = if n5 .&. 0x2 /= 0 then 1+b5 else b5 1594 in b6 1595 1596foldlBits prefix f z bm = let lb = lowestBitSet bm 1597 in go (prefix+lb) z (bm `shiftRL` lb) 1598 where go !_ acc 0 = acc 1599 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1) 1600 | otherwise = go (bi + 1) acc (n `shiftRL` 1) 1601 1602foldl'Bits prefix f z bm = let lb = lowestBitSet bm 1603 in go (prefix+lb) z (bm `shiftRL` lb) 1604 where go !_ !acc 0 = acc 1605 go bi acc n | n `testBit` 0 = go (bi + 1) (f acc bi) (n `shiftRL` 1) 1606 | otherwise = go (bi + 1) acc (n `shiftRL` 1) 1607 1608foldrBits prefix f z bm = let lb = lowestBitSet bm 1609 in go (prefix+lb) (bm `shiftRL` lb) 1610 where go !_ 0 = z 1611 go bi n | n `testBit` 0 = f bi (go (bi + 1) (n `shiftRL` 1)) 1612 | otherwise = go (bi + 1) (n `shiftRL` 1) 1613 1614foldr'Bits prefix f z bm = let lb = lowestBitSet bm 1615 in go (prefix+lb) (bm `shiftRL` lb) 1616 where 1617 go !_ 0 = z 1618 go bi n | n `testBit` 0 = f bi $! go (bi + 1) (n `shiftRL` 1) 1619 | otherwise = go (bi + 1) (n `shiftRL` 1) 1620 1621#endif 1622 1623 1624{-------------------------------------------------------------------- 1625 Utilities 1626--------------------------------------------------------------------} 1627 1628-- | /O(1)/. Decompose a set into pieces based on the structure of the underlying 1629-- tree. This function is useful for consuming a set in parallel. 1630-- 1631-- No guarantee is made as to the sizes of the pieces; an internal, but 1632-- deterministic process determines this. However, it is guaranteed that the 1633-- pieces returned will be in ascending order (all elements in the first submap 1634-- less than all elements in the second, and so on). 1635-- 1636-- Examples: 1637-- 1638-- > splitRoot (fromList [1..120]) == [fromList [1..63],fromList [64..120]] 1639-- > splitRoot empty == [] 1640-- 1641-- Note that the current implementation does not return more than two subsets, 1642-- but you should not depend on this behaviour because it can change in the 1643-- future without notice. Also, the current version does not continue 1644-- splitting all the way to individual singleton sets -- it stops at some 1645-- point. 1646splitRoot :: IntSet -> [IntSet] 1647splitRoot Nil = [] 1648-- NOTE: we don't currently split below Tip, but we could. 1649splitRoot x@(Tip _ _) = [x] 1650splitRoot (Bin _ m l r) | m < 0 = [r, l] 1651 | otherwise = [l, r] 1652{-# INLINE splitRoot #-} 1653