1{-# LANGUAGE GADTs #-} 2module CmmSwitch ( 3 SwitchTargets, 4 mkSwitchTargets, 5 switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned, 6 mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough, 7 switchTargetsToList, eqSwitchTargetWith, 8 9 SwitchPlan(..), 10 targetSupportsSwitch, 11 createSwitchPlan, 12 ) where 13 14import GhcPrelude 15 16import Outputable 17import DynFlags 18import Hoopl.Label (Label) 19 20import Data.Maybe 21import Data.List (groupBy) 22import Data.Function (on) 23import qualified Data.Map as M 24 25-- Note [Cmm Switches, the general plan] 26-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27-- 28-- Compiling a high-level switch statement, as it comes out of a STG case 29-- expression, for example, allows for a surprising amount of design decisions. 30-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as 31-- well as from the actual code generation. 32-- 33-- The overall plan is: 34-- * The Stg → Cmm transformation creates a single `SwitchTargets` in 35-- emitSwitch and emitCmmLitSwitch in GHC.StgToCmm/Utils.hs. 36-- At this stage, they are unsuitable for code generation. 37-- * A dedicated Cmm transformation (CmmImplementSwitchPlans) replaces these 38-- switch statements with code that is suitable for code generation, i.e. 39-- a nice balanced tree of decisions with dense jump tables in the leafs. 40-- The actual planning of this tree is performed in pure code in createSwitchPlan 41-- in this module. See Note [createSwitchPlan]. 42-- * The actual code generation will not do any further processing and 43-- implement each CmmSwitch with a jump tables. 44-- 45-- When compiling to LLVM or C, CmmImplementSwitchPlans leaves the switch 46-- statements alone, as we can turn a SwitchTargets value into a nice 47-- switch-statement in LLVM resp. C, and leave the rest to the compiler. 48-- 49-- See Note [CmmSwitch vs. CmmImplementSwitchPlans] why the two module are 50-- separated. 51 52----------------------------------------------------------------------------- 53-- Note [Magic Constants in CmmSwitch] 54-- 55-- There are a lot of heuristics here that depend on magic values where it is 56-- hard to determine the "best" value (for whatever that means). These are the 57-- magic values: 58 59-- | Number of consecutive default values allowed in a jump table. If there are 60-- more of them, the jump tables are split. 61-- 62-- Currently 7, as it costs 7 words of additional code when a jump table is 63-- split (at least on x64, determined experimentally). 64maxJumpTableHole :: Integer 65maxJumpTableHole = 7 66 67-- | Minimum size of a jump table. If the number is smaller, the switch is 68-- implemented using conditionals. 69-- Currently 5, because an if-then-else tree of 4 values is nice and compact. 70minJumpTableSize :: Int 71minJumpTableSize = 5 72 73-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset]. 74minJumpTableOffset :: Integer 75minJumpTableOffset = 2 76 77 78----------------------------------------------------------------------------- 79-- Switch Targets 80 81-- Note [SwitchTargets]: 82-- ~~~~~~~~~~~~~~~~~~~~~ 83-- 84-- The branches of a switch are stored in a SwitchTargets, which consists of an 85-- (optional) default jump target, and a map from values to jump targets. 86-- 87-- If the default jump target is absent, the behaviour of the switch outside the 88-- values of the map is undefined. 89-- 90-- We use an Integer for the keys the map so that it can be used in switches on 91-- unsigned as well as signed integers. 92-- 93-- The map may be empty (we prune out-of-range branches here, so it could be us 94-- emptying it). 95-- 96-- Before code generation, the table needs to be brought into a form where all 97-- entries are non-negative, so that it can be compiled into a jump table. 98-- See switchTargetsToTable. 99 100 101-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch' 102-- value, and knows whether the value is signed, the possible range, an 103-- optional default value and a map from values to jump labels. 104data SwitchTargets = 105 SwitchTargets 106 Bool -- Signed values 107 (Integer, Integer) -- Range 108 (Maybe Label) -- Default value 109 (M.Map Integer Label) -- The branches 110 deriving (Show, Eq) 111 112-- | The smart constructor mkSwitchTargets normalises the map a bit: 113-- * No entries outside the range 114-- * No entries equal to the default 115-- * No default if all elements have explicit values 116mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets 117mkSwitchTargets signed range@(lo,hi) mbdef ids 118 = SwitchTargets signed range mbdef' ids' 119 where 120 ids' = dropDefault $ restrict ids 121 mbdef' | defaultNeeded = mbdef 122 | otherwise = Nothing 123 124 -- Drop entries outside the range, if there is a range 125 restrict = restrictMap (lo,hi) 126 127 -- Drop entries that equal the default, if there is a default 128 dropDefault | Just l <- mbdef = M.filter (/= l) 129 | otherwise = id 130 131 -- Check if the default is still needed 132 defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1 133 134 135-- | Changes all labels mentioned in the SwitchTargets value 136mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets 137mapSwitchTargets f (SwitchTargets signed range mbdef branches) 138 = SwitchTargets signed range (fmap f mbdef) (fmap f branches) 139 140-- | Returns the list of non-default branches of the SwitchTargets value 141switchTargetsCases :: SwitchTargets -> [(Integer, Label)] 142switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches 143 144-- | Return the default label of the SwitchTargets value 145switchTargetsDefault :: SwitchTargets -> Maybe Label 146switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef 147 148-- | Return the range of the SwitchTargets value 149switchTargetsRange :: SwitchTargets -> (Integer, Integer) 150switchTargetsRange (SwitchTargets _ range _ _) = range 151 152-- | Return whether this is used for a signed value 153switchTargetsSigned :: SwitchTargets -> Bool 154switchTargetsSigned (SwitchTargets signed _ _ _) = signed 155 156-- | switchTargetsToTable creates a dense jump table, usable for code generation. 157-- 158-- Also returns an offset to add to the value; the list is 0-based on the 159-- result of that addition. 160-- 161-- The conversion from Integer to Int is a bit of a wart, as the actual 162-- scrutinee might be an unsigned word, but it just works, due to wrap-around 163-- arithmetic (as verified by the CmmSwitchTest test case). 164switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label]) 165switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches) 166 = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ]) 167 where 168 labelFor i = case M.lookup i branches of Just l -> Just l 169 Nothing -> mbdef 170 start | lo >= 0 && lo < minJumpTableOffset = 0 -- See Note [Jump Table Offset] 171 | otherwise = lo 172 173-- Note [Jump Table Offset] 174-- ~~~~~~~~~~~~~~~~~~~~~~~~ 175-- 176-- Usually, the code for a jump table starting at x will first subtract x from 177-- the value, to avoid a large amount of empty entries. But if x is very small, 178-- the extra entries are no worse than the subtraction in terms of code size, and 179-- not having to do the subtraction is quicker. 180-- 181-- I.e. instead of 182-- _u20N: 183-- leaq -1(%r14),%rax 184-- jmp *_n20R(,%rax,8) 185-- _n20R: 186-- .quad _c20p 187-- .quad _c20q 188-- do 189-- _u20N: 190-- jmp *_n20Q(,%r14,8) 191-- 192-- _n20Q: 193-- .quad 0 194-- .quad _c20p 195-- .quad _c20q 196-- .quad _c20r 197 198-- | The list of all labels occuring in the SwitchTargets value. 199switchTargetsToList :: SwitchTargets -> [Label] 200switchTargetsToList (SwitchTargets _ _ mbdef branches) 201 = maybeToList mbdef ++ M.elems branches 202 203-- | Groups cases with equal targets, suitable for pretty-printing to a 204-- c-like switch statement with fall-through semantics. 205switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label) 206switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef) 207 where 208 groups = map (\xs -> (map fst xs, snd (head xs))) $ 209 groupBy ((==) `on` snd) $ 210 M.toList branches 211 212-- | Custom equality helper, needed for "CmmCommonBlockElim" 213eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool 214eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) = 215 signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2) 216 where 217 goMB Nothing Nothing = True 218 goMB (Just l1) (Just l2) = l1 `eq` l2 219 goMB _ _ = False 220 goList [] [] = True 221 goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2 222 goList _ _ = False 223 224----------------------------------------------------------------------------- 225-- Code generation for Switches 226 227 228-- | A SwitchPlan abstractly describes how a Switch statement ought to be 229-- implemented. See Note [createSwitchPlan] 230data SwitchPlan 231 = Unconditionally Label 232 | IfEqual Integer Label SwitchPlan 233 | IfLT Bool Integer SwitchPlan SwitchPlan 234 | JumpTable SwitchTargets 235 deriving Show 236-- 237-- Note [createSwitchPlan] 238-- ~~~~~~~~~~~~~~~~~~~~~~~ 239-- 240-- A SwitchPlan describes how a Switch statement is to be broken down into 241-- smaller pieces suitable for code generation. 242-- 243-- createSwitchPlan creates such a switch plan, in these steps: 244-- 1. It splits the switch statement at segments of non-default values that 245-- are too large. See splitAtHoles and Note [Magic Constants in CmmSwitch] 246-- 2. Too small jump tables should be avoided, so we break up smaller pieces 247-- in breakTooSmall. 248-- 3. We fill in the segments between those pieces with a jump to the default 249-- label (if there is one), returning a SeparatedList in mkFlatSwitchPlan 250-- 4. We find and replace two less-than branches by a single equal-to-test in 251-- findSingleValues 252-- 5. The thus collected pieces are assembled to a balanced binary tree. 253 254{- 255 Note [Two alts + default] 256 ~~~~~~~~~~~~~~~~~~~~~~~~~ 257 258Discussion and a bit more info at #14644 259 260When dealing with a switch of the form: 261switch(e) { 262 case 1: goto l1; 263 case 3000: goto l2; 264 default: goto ldef; 265} 266 267If we treat it as a sparse jump table we would generate: 268 269if (e > 3000) //Check if value is outside of the jump table. 270 goto ldef; 271else { 272 if (e < 3000) { //Compare to upper value 273 if(e != 1) //Compare to remaining value 274 goto ldef; 275 else 276 goto l2; 277 } 278 else 279 goto l1; 280} 281 282Instead we special case this to : 283 284if (e==1) goto l1; 285else if (e==3000) goto l2; 286else goto l3; 287 288This means we have: 289* Less comparisons for: 1,<3000 290* Unchanged for 3000 291* One more for >3000 292 293This improves code in a few ways: 294* One comparison less means smaller code which helps with cache. 295* It exchanges a taken jump for two jumps no taken in the >range case. 296 Jumps not taken are cheaper (See Agner guides) making this about as fast. 297* For all other cases the first range check is removed making it faster. 298 299The end result is that the change is not measurably slower for the case 300>3000 and faster for the other cases. 301 302This makes running this kind of match in an inner loop cheaper by 10-20% 303depending on the data. 304In nofib this improves wheel-sieve1 by 4-9% depending on problem 305size. 306 307We could also add a second conditional jump after the comparison to 308keep the range check like this: 309 cmp 3000, rArgument 310 jg <default> 311 je <branch 2> 312While this is fairly cheap it made no big difference for the >3000 case 313and slowed down all other cases making it not worthwhile. 314-} 315 316 317-- | Does the target support switch out of the box? Then leave this to the 318-- target! 319targetSupportsSwitch :: HscTarget -> Bool 320targetSupportsSwitch HscC = True 321targetSupportsSwitch HscLlvm = True 322targetSupportsSwitch _ = False 323 324-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it 325-- down into smaller pieces suitable for code generation. 326createSwitchPlan :: SwitchTargets -> SwitchPlan 327-- Lets do the common case of a singleton map quicky and efficiently (#10677) 328createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) 329 | [(x, l)] <- M.toList m 330 = IfEqual x l (Unconditionally defLabel) 331-- And another common case, matching "booleans" 332createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m) 333 | [(x1, l1), (_x2,l2)] <- M.toAscList m 334 --Checking If |range| = 2 is enough if we have two unique literals 335 , hi - lo == 1 336 = IfEqual x1 l1 (Unconditionally l2) 337-- See Note [Two alts + default] 338createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m) 339 | [(x1, l1), (x2,l2)] <- M.toAscList m 340 = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel)) 341createSwitchPlan (SwitchTargets signed range mbdef m) = 342 -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $ 343 plan 344 where 345 pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m 346 flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces 347 plan = buildTree signed $ flatPlan 348 349 350--- 351--- Step 1: Splitting at large holes 352--- 353splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a] 354splitAtHoles _ m | M.null m = [] 355splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles 356 where 357 holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m)) 358 nonHoles = reassocTuples lo holes hi 359 360 (lo,_) = M.findMin m 361 (hi,_) = M.findMax m 362 363--- 364--- Step 2: Avoid small jump tables 365--- 366-- We do not want jump tables below a certain size. This breaks them up 367-- (into singleton maps, for now). 368breakTooSmall :: M.Map Integer a -> [M.Map Integer a] 369breakTooSmall m 370 | M.size m > minJumpTableSize = [m] 371 | otherwise = [M.singleton k v | (k,v) <- M.toList m] 372 373--- 374--- Step 3: Fill in the blanks 375--- 376 377-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every 378-- two entries, dividing the range. 379-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if 380-- the expression is < n, and plan2 otherwise. 381 382type FlatSwitchPlan = SeparatedList Integer SwitchPlan 383 384mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan 385 386-- If we have no default (i.e. undefined where there is no entry), we can 387-- branch at the minimum of each map 388mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty 389mkFlatSwitchPlan signed Nothing _ (m:ms) 390 = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ]) 391 392-- If we have a default, we have to interleave segments that jump 393-- to the default between the maps 394mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps) 395 where 396 go (lo,hi) [] 397 | lo > hi = [] 398 | otherwise = [(lo, Unconditionally l)] 399 go (lo,hi) (m:ms) 400 | lo < min 401 = (lo, Unconditionally l) : go (min,hi) (m:ms) 402 | lo == min 403 = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms 404 | otherwise 405 = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min) 406 where 407 min = fst (M.findMin m) 408 max = fst (M.findMax m) 409 410 411mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan 412mkLeafPlan signed mbdef m 413 | [(_,l)] <- M.toList m -- singleton map 414 = Unconditionally l 415 | otherwise 416 = JumpTable $ mkSwitchTargets signed (min,max) mbdef m 417 where 418 min = fst (M.findMin m) 419 max = fst (M.findMax m) 420 421--- 422--- Step 4: Reduce the number of branches using == 423--- 424 425-- A sequence of three unconditional jumps, with the outer two pointing to the 426-- same value and the bounds off by exactly one can be improved 427findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan 428findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs) 429 | l == l3 && i + 1 == i' 430 = findSingleValues (IfEqual i l2 (Unconditionally l), xs) 431findSingleValues (p, (i,p'):xs) 432 = (p,i) `consSL` findSingleValues (p', xs) 433findSingleValues (p, []) 434 = (p, []) 435 436--- 437--- Step 5: Actually build the tree 438--- 439 440-- Build a balanced tree from a separated list 441buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan 442buildTree _ (p,[]) = p 443buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2) 444 where 445 (sl1, m, sl2) = divideSL sl 446 447 448 449-- 450-- Utility data type: Non-empty lists with extra markers in between each 451-- element: 452-- 453 454type SeparatedList b a = (a, [(b,a)]) 455 456consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a 457consSL (a, b) (a', xs) = (a, (b,a'):xs) 458 459divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a) 460divideSL (_,[]) = error "divideSL: Singleton SeparatedList" 461divideSL (p,xs) = ((p, xs1), m, (p', xs2)) 462 where 463 (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs 464 465-- 466-- Other Utilities 467-- 468 469restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b 470restrictMap (lo,hi) m = mid 471 where (_, mid_hi) = M.split (lo-1) m 472 (mid, _) = M.split (hi+1) mid_hi 473 474-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)] 475reassocTuples :: a -> [(a,a)] -> a -> [(a,a)] 476reassocTuples initial [] last 477 = [(initial,last)] 478reassocTuples initial ((a,b):tuples) last 479 = (initial,a) : reassocTuples b tuples last 480 481-- Note [CmmSwitch vs. CmmImplementSwitchPlans] 482-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 483-- I (Joachim) separated the two somewhat closely related modules 484-- 485-- - CmmSwitch, which provides the CmmSwitchTargets type and contains the strategy 486-- for implementing a Cmm switch (createSwitchPlan), and 487-- - CmmImplementSwitchPlans, which contains the actuall Cmm graph modification, 488-- 489-- for these reasons: 490-- 491-- * CmmSwitch is very low in the dependency tree, i.e. does not depend on any 492-- GHC specific modules at all (with the exception of Output and Hoople 493-- (Literal)). CmmImplementSwitchPlans is the Cmm transformation and hence very 494-- high in the dependency tree. 495-- * CmmSwitch provides the CmmSwitchTargets data type, which is abstract, but 496-- used in CmmNodes. 497-- * Because CmmSwitch is low in the dependency tree, the separation allows 498-- for more parallelism when building GHC. 499-- * The interaction between the modules is very explicit and easy to 500-- understand, due to the small and simple interface. 501